% \iffalse meta-comment % %% File: l3fp-basics.dtx % % Copyright (C) 2011-2025 The LaTeX Project % % It may be distributed and/or modified under the conditions of the % LaTeX Project Public License (LPPL), either version 1.3c of this % license or (at your option) any later version. The latest version % of this license is in the file % % https://www.latex-project.org/lppl.txt % % This file is part of the "l3kernel bundle" (The Work in LPPL) % and all files in that bundle must be distributed together. % % ----------------------------------------------------------------------- % % The development version of the bundle can be found at % % https://github.com/latex3/latex3 % % for those people who are interested. % %<*driver> \documentclass[full,kernel]{l3doc} \begin{document} \DocInput{\jobname.dtx} \end{document} % % \fi % % \title{^^A % The \pkg{l3fp-basics} module\\ % Floating point arithmetic^^A % } % \author{^^A % The \LaTeX{} Project\thanks % {^^A % E-mail: % \href{mailto:latex-team@latex-project.org} % {latex-team@latex-project.org}^^A % }^^A % } % \date{Released 2025-01-14} % % \maketitle % % \begin{documentation} % % \end{documentation} % % \begin{implementation} % % \section{\pkg{l3fp-basics} implementation} % % \begin{macrocode} %<*package> % \end{macrocode} % % \begin{macrocode} %<@@=fp> % \end{macrocode} % % The \pkg{l3fp-basics} module implements addition, subtraction, % multiplication, and division of two floating points, and the absolute % value and sign-changing operations on one floating point. % All operations implemented in this module yield the outcome of % rounding the infinitely precise result of the operation to the % nearest floating point. % % Some algorithms used below end up being quite similar to some % described in \enquote{What Every Computer Scientist Should Know About % Floating Point Arithmetic}, by David Goldberg, which can be found at % \texttt{http://cr.yp.to/2005-590/goldberg.pdf}. % % \begin{macro}[EXP] % { % \@@_parse_word_abs:N , % \@@_parse_word_logb:N , % \@@_parse_word_sign:N , % \@@_parse_word_sqrt:N , % } % Unary functions. % \begin{macrocode} \cs_new:Npn \@@_parse_word_abs:N { \@@_parse_unary_function:NNN \@@_set_sign_o:w 0 } \cs_new:Npn \@@_parse_word_logb:N { \@@_parse_unary_function:NNN \@@_logb_o:w ? } \cs_new:Npn \@@_parse_word_sign:N { \@@_parse_unary_function:NNN \@@_sign_o:w ? } \cs_new:Npn \@@_parse_word_sqrt:N { \@@_parse_unary_function:NNN \@@_sqrt_o:w ? } % \end{macrocode} % \end{macro} % % \subsection{Addition and subtraction} % % We define here two functions, \cs{@@_-_o:ww} and \cs{@@_+_o:ww}, which % perform the subtraction and addition of their two floating point % operands, and expand the tokens following the result once. % % A more obscure function, \cs{@@_add_big_i_o:wNww}, is used in % \pkg{l3fp-expo}. % % The logic goes as follows: % \begin{itemize} % \item \cs{@@_-_o:ww} calls \cs{@@_+_o:ww} to do the work, with the % sign of the second operand flipped; % \item \cs{@@_+_o:ww} dispatches depending on the type of floating % point, calling specialized auxiliaries; % \item in all cases except summing two normal floating point numbers, % we return one or the other operands depending on the signs, or % detect an invalid operation in the case of $\infty - \infty$; % \item for normal floating point numbers, compare the signs; % \item to add two floating point numbers of the same sign or of % opposite signs, shift the significand of the smaller one to match the % bigger one, perform the addition or subtraction of significands, % check for a carry, round, and pack using the % \cs[no-index]{@@_basics_pack_\ldots{}} functions. % \end{itemize} % The trickiest part is to round correctly when adding or subtracting % normal floating point numbers. % % \subsubsection{Sign, exponent, and special numbers} % % \begin{macro}[EXP]{\@@_-_o:ww} % The \cs{@@_+_o:ww} auxiliary has a hook: it takes one argument % between the first \cs{s_@@} and \cs{@@_chk:w}, which is applied to % the sign of the second operand. Positioning the hook there means % that \cs{@@_+_o:ww} can still perform the sanity check that it was % followed by \cs{s_@@}. % \begin{macrocode} \cs_new:cpe { @@_-_o:ww } \s_@@ { \exp_not:c { @@_+_o:ww } \exp_not:n { \s_@@ \@@_neg_sign:N } } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_+_o:ww} % This function is either called directly with an empty |#1| to % compute an addition, or it is called by \cs{@@_-_o:ww} with % \cs{@@_neg_sign:N} as |#1| to compute a subtraction, in which case % the second operand's sign should be changed. If the % \meta{types} |#2| and |#4| are the same, dispatch to case |#2| ($0$, % $1$, $2$, or $3$), where we call specialized functions: thanks to % \cs{int_value:w}, those receive the tweaked \meta{sign_2} % (expansion of |#1#5|) as an argument. If the \meta{types} are % distinct, the result is simply the floating point number with the % highest \meta{type}. Since case $3$ (used for two \texttt{nan}) % also picks the first operand, we can also use it when \meta{type_1} % is greater than \meta{type_2}. Also note that we don't need to % worry about \meta{sign_2} in that case since the second operand is % discarded. % \begin{macrocode} \cs_new:cpn { @@_+_o:ww } \s_@@ #1 \@@_chk:w #2 #3 ; \s_@@ \@@_chk:w #4 #5 { \if_case:w \if_meaning:w #2 #4 #2 \else: \if_int_compare:w #2 > #4 \exp_stop_f: 3 \else: 4 \fi: \fi: \exp_stop_f: \exp_after:wN \@@_add_zeros_o:Nww \int_value:w \or: \exp_after:wN \@@_add_normal_o:Nww \int_value:w \or: \exp_after:wN \@@_add_inf_o:Nww \int_value:w \or: \@@_case_return_i_o:ww \else: \exp_after:wN \@@_add_return_ii_o:Nww \int_value:w \fi: #1 #5 \s_@@ \@@_chk:w #2 #3 ; \s_@@ \@@_chk:w #4 #5 } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_add_return_ii_o:Nww} % Ignore the first operand, and return the second, but using the sign % |#1| rather than |#4|. As usual, expand after the floating point. % \begin{macrocode} \cs_new:Npn \@@_add_return_ii_o:Nww #1 #2 ; \s_@@ \@@_chk:w #3 #4 { \@@_exp_after_o:w \s_@@ \@@_chk:w #3 #1 } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_add_zeros_o:Nww} % Adding two zeros yields \cs{c_zero_fp}, except if both zeros were % $-0$. % \begin{macrocode} \cs_new:Npn \@@_add_zeros_o:Nww #1 \s_@@ \@@_chk:w 0 #2 { \if_int_compare:w #2 #1 = 20 \exp_stop_f: \exp_after:wN \@@_add_return_ii_o:Nww \else: \@@_case_return_i_o:ww \fi: #1 \s_@@ \@@_chk:w 0 #2 } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_add_inf_o:Nww} % If both infinities have the same sign, just return that infinity, % otherwise, it is an invalid operation. We find out if that invalid % operation is an addition or a subtraction by testing whether the % tweaked \meta{sign_2} (|#1|) and the \meta{sign_2} (|#4|) are % identical. % \begin{macrocode} \cs_new:Npn \@@_add_inf_o:Nww #1 \s_@@ \@@_chk:w 2 #2 #3; \s_@@ \@@_chk:w 2 #4 { \if_meaning:w #1 #2 \@@_case_return_i_o:ww \else: \@@_case_use:nw { \exp_last_unbraced:Nf \@@_invalid_operation_o:Nww { \token_if_eq_meaning:NNTF #1 #4 + - } } \fi: \s_@@ \@@_chk:w 2 #2 #3; \s_@@ \@@_chk:w 2 #4 } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_add_normal_o:Nww} % \begin{quote} % \cs{@@_add_normal_o:Nww} \meta{sign_2} % \cs{s_@@} \cs{@@_chk:w} |1| \meta{sign_1} % \meta{exp_1} \meta{body_1} |;| % \cs{s_@@} \cs{@@_chk:w} |1| \meta{initial sign_2} % \meta{exp_2} \meta{body_2} |;| % \end{quote} % We now have two normal numbers to add, and we have to check signs % and exponents more carefully before performing the addition. % \begin{macrocode} \cs_new:Npn \@@_add_normal_o:Nww #1 \s_@@ \@@_chk:w 1 #2 { \if_meaning:w #1#2 \exp_after:wN \@@_add_npos_o:NnwNnw \else: \exp_after:wN \@@_sub_npos_o:NnwNnw \fi: #2 } % \end{macrocode} % \end{macro} % % \subsubsection{Absolute addition} % % In this subsection, we perform the addition of two positive normal % numbers. % % \begin{macro}[EXP]{\@@_add_npos_o:NnwNnw} % \begin{quote} % \cs{@@_add_npos_o:NnwNnw} \meta{sign_1} \meta{exp_1} \meta{body_1} % |;| \cs{s_@@} \cs{@@_chk:w} |1| \meta{initial sign_2} \meta{exp_2} % \meta{body_2} |;| % \end{quote} % Since we are doing an addition, the final sign is \meta{sign_1}. % Start an \cs{@@_int_eval:w}, responsible for computing the exponent: % the result, and the \meta{final sign} are then given to % \cs{@@_sanitize:Nw} which checks for overflow. The exponent is % computed as the largest exponent |#2| or |#5|, incremented if there % is a carry. To add the significands, we decimate the smaller number by % the difference between the exponents. This is done by % \cs{@@_add_big_i:wNww} or \cs{@@_add_big_ii:wNww}. We need to bring % the final sign with us in the midst of the calculation to round % properly at the end. % \begin{macrocode} \cs_new:Npn \@@_add_npos_o:NnwNnw #1#2#3 ; \s_@@ \@@_chk:w 1 #4 #5 { \exp_after:wN \@@_sanitize:Nw \exp_after:wN #1 \int_value:w \@@_int_eval:w \if_int_compare:w #2 > #5 \exp_stop_f: #2 \exp_after:wN \@@_add_big_i_o:wNww \int_value:w - \else: #5 \exp_after:wN \@@_add_big_ii_o:wNww \int_value:w \fi: \@@_int_eval:w #5 - #2 ; #1 #3; } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP]{\@@_add_big_i_o:wNww} % \begin{macro}[rEXP]{\@@_add_big_ii_o:wNww} % \begin{quote} % \cs{@@_add_big_i_o:wNww} \meta{shift} |;| \meta{final sign} % \meta{body_1} |;| \meta{body_2} |;| % \end{quote} % Used in \pkg{l3fp-expo}. % Shift the significand of the small number, then add with % \cs{@@_add_significand_o:NnnwnnnnN}. % \begin{macrocode} \cs_new:Npn \@@_add_big_i_o:wNww #1; #2 #3; #4; { \@@_decimate:nNnnnn {#1} \@@_add_significand_o:NnnwnnnnN #4 #3 #2 } \cs_new:Npn \@@_add_big_ii_o:wNww #1; #2 #3; #4; { \@@_decimate:nNnnnn {#1} \@@_add_significand_o:NnnwnnnnN #3 #4 #2 } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[rEXP]{\@@_add_significand_o:NnnwnnnnN} % \begin{macro}[rEXP] % {\@@_add_significand_pack:NNNNNNN, \@@_add_significand_test_o:N} % \begin{quote}\raggedright % \cs{@@_add_significand_o:NnnwnnnnN} % \meta{rounding digit} % \Arg{Y'_1} \Arg{Y'_2} \meta{extra-digits} |;| % \Arg{X_1} \Arg{X_2} \Arg{X_3} \Arg{X_4} % \meta{final sign} % \end{quote} % To round properly, we must know at which digit the rounding % should occur. This requires to know whether the addition % produces an overall carry or not. Thus, we do the computation % now and check for a carry, then go back and do the rounding. % The rounding may cause a carry in very rare cases such as % $0.99\cdots 95 \to 1.00\cdots 0$, but this situation always % give an exact power of $10$, for which it is easy to correct % the result at the end. % \begin{macrocode} \cs_new:Npn \@@_add_significand_o:NnnwnnnnN #1 #2#3 #4; #5#6#7#8 { \exp_after:wN \@@_add_significand_test_o:N \int_value:w \@@_int_eval:w 1#5#6 + #2 \exp_after:wN \@@_add_significand_pack:NNNNNNN \int_value:w \@@_int_eval:w 1#7#8 + #3 ; #1 } \cs_new:Npn \@@_add_significand_pack:NNNNNNN #1 #2#3#4#5#6#7 { \if_meaning:w 2 #1 + 1 \fi: ; #2 #3 #4 #5 #6 #7 ; } \cs_new:Npn \@@_add_significand_test_o:N #1 { \if_meaning:w 2 #1 \exp_after:wN \@@_add_significand_carry_o:wwwNN \else: \exp_after:wN \@@_add_significand_no_carry_o:wwwNN \fi: } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[rEXP]{\@@_add_significand_no_carry_o:wwwNN} % \begin{quote} % \cs{@@_add_significand_no_carry_o:wwwNN} % \meta{8d} |;| \meta{6d} |;| \meta{2d} |;| % \meta{rounding digit} \meta{sign} % \end{quote} % If there's no carry, grab all the digits again and round. The % packing function \cs{@@_basics_pack_high:NNNNNw} takes care of the % case where rounding brings a carry. % \begin{macrocode} \cs_new:Npn \@@_add_significand_no_carry_o:wwwNN #1; #2; #3#4 ; #5#6 { \exp_after:wN \@@_basics_pack_high:NNNNNw \int_value:w \@@_int_eval:w 1 #1 \exp_after:wN \@@_basics_pack_low:NNNNNw \int_value:w \@@_int_eval:w 1 #2 #3#4 + \@@_round:NNN #6 #4 #5 \exp_after:wN ; } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP]{\@@_add_significand_carry_o:wwwNN} % \begin{quote} % \cs{@@_add_significand_carry_o:wwwNN} % \meta{8d} |;| \meta{6d} |;| \meta{2d} |;| % \meta{rounding digit} \meta{sign} % \end{quote} % The case where there is a carry is very similar. Rounding can even % raise the first digit from $1$ to $2$, but we don't care. % \begin{macrocode} \cs_new:Npn \@@_add_significand_carry_o:wwwNN #1; #2; #3#4; #5#6 { + 1 \exp_after:wN \@@_basics_pack_weird_high:NNNNNNNNw \int_value:w \@@_int_eval:w 1 1 #1 \exp_after:wN \@@_basics_pack_weird_low:NNNNw \int_value:w \@@_int_eval:w 1 #2#3 + \exp_after:wN \@@_round:NNN \exp_after:wN #6 \exp_after:wN #3 \int_value:w \@@_round_digit:Nw #4 #5 ; \exp_after:wN ; } % \end{macrocode} % \end{macro} % % \subsubsection{Absolute subtraction} % % \begin{macro}[EXP]{\@@_sub_npos_o:NnwNnw} % \begin{macro}[EXP]{\@@_sub_eq_o:Nnwnw, \@@_sub_npos_ii_o:Nnwnw} % \begin{quote} % \cs{@@_sub_npos_o:NnwNnw} % \meta{sign_1} \meta{exp_1} \meta{body_1} |;| % \cs{s_@@} \cs{@@_chk:w} |1| % \meta{initial sign_2} \meta{exp_2} \meta{body_2} |;| % \end{quote} % Rounding properly in some modes requires to know what the sign of % the result will be. Thus, we start by comparing the exponents and % significands. If the numbers coincide, return zero. If the second % number is larger, swap the numbers and call % \cs{@@_sub_npos_i_o:Nnwnw} with the opposite of \meta{sign_1}. % \begin{macrocode} \cs_new:Npn \@@_sub_npos_o:NnwNnw #1#2#3; \s_@@ \@@_chk:w 1 #4#5#6; { \if_case:w \@@_compare_npos:nwnw {#2} #3; {#5} #6; \exp_stop_f: \exp_after:wN \@@_sub_eq_o:Nnwnw \or: \exp_after:wN \@@_sub_npos_i_o:Nnwnw \else: \exp_after:wN \@@_sub_npos_ii_o:Nnwnw \fi: #1 {#2} #3; {#5} #6; } \cs_new:Npn \@@_sub_eq_o:Nnwnw #1#2; #3; { \exp_after:wN \c_zero_fp } \cs_new:Npn \@@_sub_npos_ii_o:Nnwnw #1 #2; #3; { \exp_after:wN \@@_sub_npos_i_o:Nnwnw \int_value:w \@@_neg_sign:N #1 #3; #2; } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP]{\@@_sub_npos_i_o:Nnwnw} % After the computation is done, \cs{@@_sanitize:Nw} checks for % overflow/underflow. It expects the \meta{final sign} and the % \meta{exponent} (delimited by |;|). Start an integer expression for % the exponent, which starts with the exponent of the largest number, % and may be decreased if the two numbers are very close. If the two % numbers have the same exponent, call the \texttt{near} auxiliary. % Otherwise, decimate $y$, then call the \texttt{far} auxiliary to % evaluate the difference between the two significands. Note that we % decimate by $1$ less than one could expect. % \begin{macrocode} \cs_new:Npn \@@_sub_npos_i_o:Nnwnw #1 #2#3; #4#5; { \exp_after:wN \@@_sanitize:Nw \exp_after:wN #1 \int_value:w \@@_int_eval:w #2 \if_int_compare:w #2 = #4 \exp_stop_f: \exp_after:wN \@@_sub_back_near_o:nnnnnnnnN \else: \exp_after:wN \@@_decimate:nNnnnn \exp_after:wN { \int_value:w \@@_int_eval:w #2 - #4 - 1 \exp_after:wN } \exp_after:wN \@@_sub_back_far_o:NnnwnnnnN \fi: #5 #3 #1 } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP]{\@@_sub_back_near_o:nnnnnnnnN} % \begin{macro}[rEXP] % {\@@_sub_back_near_pack:NNNNNNw, \@@_sub_back_near_after:wNNNNw} % \begin{quote} % \cs{@@_sub_back_near_o:nnnnnnnnN} % \Arg{Y_1} \Arg{Y_2} \Arg{Y_3} \Arg{Y_4} % \Arg{X_1} \Arg{X_2} \Arg{X_3} \Arg{X_4} % \meta{final sign} % \end{quote} % In this case, the subtraction is exact, so we discard the % \meta{final sign} |#9|. The very large shifts of $10^{9}$ and % $1.1\cdot10^{9}$ are unnecessary here, but allow the auxiliaries to % be reused later. Each integer expression produces a $10$ digit % result. If the resulting $16$ digits start with a $0$, then we need % to shift the group, padding with trailing zeros. % \begin{macrocode} \cs_new:Npn \@@_sub_back_near_o:nnnnnnnnN #1#2#3#4 #5#6#7#8 #9 { \exp_after:wN \@@_sub_back_near_after:wNNNNw \int_value:w \@@_int_eval:w 10#5#6 - #1#2 - 11 \exp_after:wN \@@_sub_back_near_pack:NNNNNNw \int_value:w \@@_int_eval:w 11#7#8 - #3#4 \exp_after:wN ; } \cs_new:Npn \@@_sub_back_near_pack:NNNNNNw #1#2#3#4#5#6#7 ; { + #1#2 ; {#3#4#5#6} {#7} ; } \cs_new:Npn \@@_sub_back_near_after:wNNNNw 10 #1#2#3#4 #5 ; { \if_meaning:w 0 #1 \exp_after:wN \@@_sub_back_shift:wnnnn \fi: ; {#1#2#3#4} {#5} } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[rEXP]{\@@_sub_back_shift:wnnnn} % \begin{macro}[rEXP] % { % \@@_sub_back_shift_ii:ww, % \@@_sub_back_shift_iii:NNNNNNNNw, % \@@_sub_back_shift_iv:nnnnw % } % \begin{quote} % \cs{@@_sub_back_shift:wnnnn} |;| % \Arg{Z_1} \Arg{Z_2} \Arg{Z_3} \Arg{Z_4} |;| % \end{quote} % This function is called with $\meta{Z_1}\leq 999$. Act with % \tn{number} to trim leading zeros from \meta{Z_1} \meta{Z_2} (we % don't do all four blocks at once, since non-zero blocks would then % overflow \TeX{}'s integers). If the first two blocks are zero, the % auxiliary receives an empty |#1| and trims |#2#30| from leading % zeros, yielding a total shift between $7$ and~$16$ to the exponent. % Otherwise we get the shift from |#1| alone, yielding a result % between $1$ and~$6$. Once the exponent is taken care of, trim % leading zeros from |#1#2#3| (when |#1| is empty, the space before % |#2#3| is ignored), get four blocks of $4$~digits and finally clean % up. Trailing zeros are added so that digits can be grabbed safely. % \begin{macrocode} \cs_new:Npn \@@_sub_back_shift:wnnnn ; #1#2 { \exp_after:wN \@@_sub_back_shift_ii:ww \int_value:w #1 #2 0 ; } \cs_new:Npn \@@_sub_back_shift_ii:ww #1 0 ; #2#3 ; { \if_meaning:w @ #1 @ - 7 - \exp_after:wN \use_i:nnn \exp_after:wN \@@_sub_back_shift_iii:NNNNNNNNw \int_value:w #2#3 0 ~ 123456789; \else: - \@@_sub_back_shift_iii:NNNNNNNNw #1 123456789; \fi: \exp_after:wN \@@_pack_twice_four:wNNNNNNNN \exp_after:wN \@@_pack_twice_four:wNNNNNNNN \exp_after:wN \@@_sub_back_shift_iv:nnnnw \exp_after:wN ; \int_value:w #1 ~ #2#3 0 ~ 0000 0000 0000 000 ; } \cs_new:Npn \@@_sub_back_shift_iii:NNNNNNNNw #1#2#3#4#5#6#7#8#9; {#8} \cs_new:Npn \@@_sub_back_shift_iv:nnnnw #1 ; #2 ; { ; #1 ; } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[rEXP]{\@@_sub_back_far_o:NnnwnnnnN} % \begin{quote}\raggedright % \cs{@@_sub_back_far_o:NnnwnnnnN} % \meta{rounding} \Arg{Y'_1} \Arg{Y'_2} \meta{extra-digits} |;| % \Arg{X_1} \Arg{X_2} \Arg{X_3} \Arg{X_4} % \meta{final sign} % \end{quote} % If the difference is greater than $10^{\meta{expo_x}}$, call the % \texttt{very_far} auxiliary. If the result is less than % $10^{\meta{expo_x}}$, call the \texttt{not_far} auxiliary. If it is % too close a call to know yet, namely if $1 \meta{Y'_1} \meta{Y'_2} = % \meta{X_1} \meta{X_2} \meta{X_3} \meta{X_4} 0$, then call the % \texttt{quite_far} auxiliary. We use the odd combination of space % and semi-colon delimiters to allow the \texttt{not_far} auxiliary to % grab each piece individually, the \texttt{very_far} auxiliary to use % \cs{@@_pack_eight:wNNNNNNNN}, and the \texttt{quite_far} to ignore % the significands easily (using the |;| delimiter). % \begin{macrocode} \cs_new:Npn \@@_sub_back_far_o:NnnwnnnnN #1 #2#3 #4; #5#6#7#8 { \if_case:w \if_int_compare:w 1 #2 = #5#6 \use_i:nnnn #7 \exp_stop_f: \if_int_compare:w #3 = \use_none:n #7#8 0 \exp_stop_f: 0 \else: \if_int_compare:w #3 > \use_none:n #7#8 0 - \fi: 1 \fi: \else: \if_int_compare:w 1 #2 > #5#6 \use_i:nnnn #7 - \fi: 1 \fi: \exp_stop_f: \exp_after:wN \@@_sub_back_quite_far_o:wwNN \or: \exp_after:wN \@@_sub_back_very_far_o:wwwwNN \else: \exp_after:wN \@@_sub_back_not_far_o:wwwwNN \fi: #2 ~ #3 ; #5 #6 ~ #7 #8 ; #1 } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_sub_back_quite_far_o:wwNN} % \begin{macro}[EXP]{\@@_sub_back_quite_far_ii:NN} % The easiest case is when $x-y$ is extremely close to a power of % $10$, namely the first digit of $x$ is $1$, and all others vanish % when subtracting $y$. Then the \meta{rounding} |#3| and the % \meta{final sign} |#4| control whether we get $1$ or $0.9999 9999 % 9999 9999$. In the usual round-to-nearest mode, we get $1$ % whenever the \meta{rounding} digit is less than or equal to $5$ % (remember that the \meta{rounding} digit is only equal to $5$ if % there was no further non-zero digit). % \begin{macrocode} \cs_new:Npn \@@_sub_back_quite_far_o:wwNN #1; #2; #3#4 { \exp_after:wN \@@_sub_back_quite_far_ii:NN \exp_after:wN #3 \exp_after:wN #4 } \cs_new:Npn \@@_sub_back_quite_far_ii:NN #1#2 { \if_case:w \@@_round_neg:NNN #2 0 #1 \exp_after:wN \use_i:nn \else: \exp_after:wN \use_ii:nn \fi: { ; {1000} {0000} {0000} {0000} ; } { - 1 ; {9999} {9999} {9999} {9999} ; } } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[rEXP]{\@@_sub_back_not_far_o:wwwwNN} % In the present case, $x$ and $y$ have different exponents, but % $y$~is large enough that $x-y$ has a smaller exponent than~$x$. % Decrement the exponent (with |-1|). Then proceed in a way % similar to the \texttt{near} auxiliaries seen earlier, but % multiplying $x$ by~$10$ (|#30| and |#40| below), and with the added % quirk that the \meta{rounding} digit has to be taken into account. % Namely, we may have to decrease the result by one unit if % \cs{@@_round_neg:NNN} returns~$1$. This function expects the % \meta{final sign}~|#6|, the last digit of |1100000000+#40-#2|, and % the \meta{rounding} digit. Instead of redoing the computation for % the second argument, we note that \cs{@@_round_neg:NNN} only cares % about its parity, which is identical to that of the last digit % of~|#2|. % \begin{macrocode} \cs_new:Npn \@@_sub_back_not_far_o:wwwwNN #1 ~ #2; #3 ~ #4; #5#6 { - 1 \exp_after:wN \@@_sub_back_near_after:wNNNNw \int_value:w \@@_int_eval:w 1#30 - #1 - 11 \exp_after:wN \@@_sub_back_near_pack:NNNNNNw \int_value:w \@@_int_eval:w 11 0000 0000 + #40 - #2 - \exp_after:wN \@@_round_neg:NNN \exp_after:wN #6 \use_none:nnnnnnn #2 #5 \exp_after:wN ; } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_sub_back_very_far_o:wwwwNN} % \begin{macro}[EXP]{\@@_sub_back_very_far_ii_o:nnNwwNN} % The case where $x-y$ and $x$ have the same exponent is a bit more % tricky, mostly because it cannot reuse the same auxiliaries. Shift % the $y$~significand by adding a leading~$0$. Then the logic is similar % to the \texttt{not_far} functions above. Rounding is a bit more % complicated: we have two \meta{rounding} digits |#3| and |#6| (from % the decimation, and from the new shift) to take into account, and % getting the parity of the main result requires a computation. The % first \cs{int_value:w} triggers the second one because the number % is unfinished; we can thus not use $0$ in place of $2$ there. % \begin{macrocode} \cs_new:Npn \@@_sub_back_very_far_o:wwwwNN #1#2#3#4#5#6#7 { \@@_pack_eight:wNNNNNNNN \@@_sub_back_very_far_ii_o:nnNwwNN { 0 #1#2#3 #4#5#6#7 } ; } \cs_new:Npn \@@_sub_back_very_far_ii_o:nnNwwNN #1#2 ; #3 ; #4 ~ #5; #6#7 { \exp_after:wN \@@_basics_pack_high:NNNNNw \int_value:w \@@_int_eval:w 1#4 - #1 - 1 \exp_after:wN \@@_basics_pack_low:NNNNNw \int_value:w \@@_int_eval:w 2#5 - #2 - \exp_after:wN \@@_round_neg:NNN \exp_after:wN #7 \int_value:w \if_int_odd:w \@@_int_eval:w #5 - #2 \@@_int_eval_end: 1 \else: 2 \fi: \int_value:w \@@_round_digit:Nw #3 #6 ; \exp_after:wN ; } % \end{macrocode} % \end{macro} % \end{macro} % % \subsection{Multiplication} % % \subsubsection{Signs, and special numbers} % % \begin{macro}[EXP]{\@@_*_o:ww} % We go through an auxiliary, which is common with \cs{@@_/_o:ww}. % The first argument is the operation, used for the invalid operation % exception. The second is inserted in a formula to dispatch cases % slightly differently between multiplication and division. The third % is the operation for normal floating points. The fourth is there % for extra cases needed in \cs{@@_/_o:ww}. % \begin{macrocode} \cs_new:cpn { @@_*_o:ww } { \@@_mul_cases_o:NnNnww * { - 2 + } \@@_mul_npos_o:Nww { } } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_mul_cases_o:nNnnww} % Split into $10$ cases ($12$ for division). % If both numbers are normal, go to case $0$ % (same sign) or case $1$ (opposite signs): in both cases, call % \cs{@@_mul_npos_o:Nww} to do the work. If the first operand is % \texttt{nan}, go to case $2$, in which the second operand is % discarded; if the second operand is \texttt{nan}, go to case $3$, in % which the first operand is discarded (note the weird interaction % with the final test on signs). Then we separate the case where the % first number is normal and the second is zero: this goes to cases % $4$ and $5$ for multiplication, $10$ and $11$ for division. % Otherwise, we do a computation which % dispatches the products $0\times 0 = 0\times 1 = 1\times 0 = 0$ to % case $4$ or $5$ depending on the combined sign, the products % $0\times\infty$ and $\infty\times0$ to case $6$ or $7$ (invalid % operation), and the products $1\times\infty = \infty\times1 = % \infty\times\infty = \infty$ to cases $8$ and $9$. Note that the % code for these two cases (which return $\pm\infty$) is inserted as % argument |#4|, because it differs in the case of divisions. % \begin{macrocode} \cs_new:Npn \@@_mul_cases_o:NnNnww #1#2#3#4 \s_@@ \@@_chk:w #5#6#7; \s_@@ \@@_chk:w #8#9 { \if_case:w \@@_int_eval:w \if_int_compare:w #5 #8 = 11 ~ 1 \else: \if_meaning:w 3 #8 3 \else: \if_meaning:w 3 #5 2 \else: \if_int_compare:w #5 #8 = 10 ~ 9 #2 - 2 \else: (#5 #2 #8) / 2 * 2 + 7 \fi: \fi: \fi: \fi: \if_meaning:w #6 #9 - 1 \fi: \@@_int_eval_end: \@@_case_use:nw { #3 0 } \or: \@@_case_use:nw { #3 2 } \or: \@@_case_return_i_o:ww \or: \@@_case_return_ii_o:ww \or: \@@_case_return_o:Nww \c_zero_fp \or: \@@_case_return_o:Nww \c_minus_zero_fp \or: \@@_case_use:nw { \@@_invalid_operation_o:Nww #1 } \or: \@@_case_use:nw { \@@_invalid_operation_o:Nww #1 } \or: \@@_case_return_o:Nww \c_inf_fp \or: \@@_case_return_o:Nww \c_minus_inf_fp #4 \fi: \s_@@ \@@_chk:w #5 #6 #7; \s_@@ \@@_chk:w #8 #9 } % \end{macrocode} % \end{macro} % % \subsubsection{Absolute multiplication} % % In this subsection, we perform the multiplication % of two positive normal numbers. % % \begin{macro}[EXP]{\@@_mul_npos_o:Nww} % \begin{quote} % \cs{@@_mul_npos_o:Nww} \meta{final sign} % \cs{s_@@} \cs{@@_chk:w} |1| \meta{sign_1} \Arg{exp_1} \meta{body_1} |;| % \cs{s_@@} \cs{@@_chk:w} |1| \meta{sign_2} \Arg{exp_2} \meta{body_2} |;| % \end{quote} % After the computation, \cs{@@_sanitize:Nw} checks for overflow or % underflow. As we did for addition, \cs{@@_int_eval:w} computes the % exponent, catching any shift coming from the computation in the % significand. The \meta{final sign} is needed to do the rounding % properly in the significand computation. We setup the post-expansion % here, triggered by \cs{@@_mul_significand_o:nnnnNnnnn}. % % This is also used in \pkg{l3fp-convert}. % \begin{macrocode} \cs_new:Npn \@@_mul_npos_o:Nww #1 \s_@@ \@@_chk:w #2 #3 #4 #5 ; \s_@@ \@@_chk:w #6 #7 #8 #9 ; { \exp_after:wN \@@_sanitize:Nw \exp_after:wN #1 \int_value:w \@@_int_eval:w #4 + #8 \@@_mul_significand_o:nnnnNnnnn #5 #1 #9 } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP]{\@@_mul_significand_o:nnnnNnnnn} % \begin{macro}[EXP] % {\@@_mul_significand_drop:NNNNNw, \@@_mul_significand_keep:NNNNNw} % \begin{quote} % \cs{@@_mul_significand_o:nnnnNnnnn} % \Arg{X_1} \Arg{X_2} \Arg{X_3} \Arg{X_4} \meta{sign} % \Arg{Y_1} \Arg{Y_2} \Arg{Y_3} \Arg{Y_4} % \end{quote} % Note the three semicolons at the end of the definition. One is for % the last \cs{@@_mul_significand_drop:NNNNNw}; one is for % \cs{@@_round_digit:Nw} later on; and one, preceded by % \cs{exp_after:wN}, which is correctly expanded (within an % \cs{@@_int_eval:w}), is used by \cs{@@_basics_pack_low:NNNNNw}. % % The product of two $16$ digit integers has $31$ or $32$ digits, % but it is impossible to know which one before computing. The place % where we round depends on that number of digits, and may depend % on all digits until the last in some rare cases. The approach is % thus to compute the $5$ first blocks of $4$ digits (the first one % is between $100$ and $9999$ inclusive), and a compact version of % the remaining $3$ blocks. Afterwards, the number of digits is % known, and we can do the rounding within yet another set of % \cs{@@_int_eval:w}. % \begin{macrocode} \cs_new:Npn \@@_mul_significand_o:nnnnNnnnn #1#2#3#4 #5 #6#7#8#9 { \exp_after:wN \@@_mul_significand_test_f:NNN \exp_after:wN #5 \int_value:w \@@_int_eval:w 99990000 + #1*#6 + \exp_after:wN \@@_mul_significand_keep:NNNNNw \int_value:w \@@_int_eval:w 99990000 + #1*#7 + #2*#6 + \exp_after:wN \@@_mul_significand_keep:NNNNNw \int_value:w \@@_int_eval:w 99990000 + #1*#8 + #2*#7 + #3*#6 + \exp_after:wN \@@_mul_significand_drop:NNNNNw \int_value:w \@@_int_eval:w 99990000 + #1*#9 + #2*#8 + #3*#7 + #4*#6 + \exp_after:wN \@@_mul_significand_drop:NNNNNw \int_value:w \@@_int_eval:w 99990000 + #2*#9 + #3*#8 + #4*#7 + \exp_after:wN \@@_mul_significand_drop:NNNNNw \int_value:w \@@_int_eval:w 99990000 + #3*#9 + #4*#8 + \exp_after:wN \@@_mul_significand_drop:NNNNNw \int_value:w \@@_int_eval:w 100000000 + #4*#9 ; ; \exp_after:wN ; } \cs_new:Npn \@@_mul_significand_drop:NNNNNw #1#2#3#4#5 #6; { #1#2#3#4#5 ; + #6 } \cs_new:Npn \@@_mul_significand_keep:NNNNNw #1#2#3#4#5 #6; { #1#2#3#4#5 ; #6 ; } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[rEXP]{\@@_mul_significand_test_f:NNN} % \begin{quote} % \cs{@@_mul_significand_test_f:NNN} \meta{sign} |1| % \meta{digits 1--8} |;| \meta{digits 9--12} |;| \meta{digits 13--16} |;| % |+| \meta{digits 17--20} |+| \meta{digits 21--24} % |+| \meta{digits 25--28} |+| \meta{digits 29--32} |;| % \cs{exp_after:wN} |;| % \end{quote} % If the \meta{digit 1} is non-zero, then for rounding we only care % about the digits $16$ and $17$, and whether further digits are zero % or not (check for exact ties). On the other hand, if \meta{digit 1} % is zero, we care about digits $17$ and $18$, and whether further % digits are zero. % \begin{macrocode} \cs_new:Npn \@@_mul_significand_test_f:NNN #1 #2 #3 { \if_meaning:w 0 #3 \exp_after:wN \@@_mul_significand_small_f:NNwwwN \else: \exp_after:wN \@@_mul_significand_large_f:NwwNNNN \fi: #1 #3 } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_mul_significand_large_f:NwwNNNN} % In this branch, \meta{digit 1} is non-zero. The result is thus % \meta{digits 1--16}, plus some rounding which depends on the digits % $16$, $17$, and whether all subsequent digits are zero or not. % Here, \cs{@@_round_digit:Nw} takes digits $17$ and further (as an % integer expression), and replaces it by a \meta{rounding digit}, % suitable for \cs{@@_round:NNN}. % \begin{macrocode} \cs_new:Npn \@@_mul_significand_large_f:NwwNNNN #1 #2; #3; #4#5#6#7; + { \exp_after:wN \@@_basics_pack_high:NNNNNw \int_value:w \@@_int_eval:w 1#2 \exp_after:wN \@@_basics_pack_low:NNNNNw \int_value:w \@@_int_eval:w 1#3#4#5#6#7 + \exp_after:wN \@@_round:NNN \exp_after:wN #1 \exp_after:wN #7 \int_value:w \@@_round_digit:Nw } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP]{\@@_mul_significand_small_f:NNwwwN} % In this branch, \meta{digit 1} is zero. Our result is thus % \meta{digits 2--17}, plus some rounding which depends on the digits % $17$, $18$, and whether all subsequent digits are zero or not. % The $8$ digits |1#3| are followed, after expansion of the % \texttt{small_pack} auxiliary, by the next digit, to form a $9$ % digit number. % \begin{macrocode} \cs_new:Npn \@@_mul_significand_small_f:NNwwwN #1 #2#3; #4#5; #6; + #7 { - 1 \exp_after:wN \@@_basics_pack_high:NNNNNw \int_value:w \@@_int_eval:w 1#3#4 \exp_after:wN \@@_basics_pack_low:NNNNNw \int_value:w \@@_int_eval:w 1#5#6#7 + \exp_after:wN \@@_round:NNN \exp_after:wN #1 \exp_after:wN #7 \int_value:w \@@_round_digit:Nw } % \end{macrocode} % \end{macro} % % \subsection{Division} % % \subsubsection{Signs, and special numbers} % % Time is now ripe to tackle the hardest of the four elementary % operations: division. % % \begin{macro}[EXP]{\@@_/_o:ww} % Filtering special floating point is very similar to what we did for % multiplications, with a few variations. Invalid operation % exceptions display |/| rather than |*|. In the formula for % dispatch, we replace |- 2 +| by |-|. The case of normal % numbers is treated using \cs{@@_div_npos_o:Nww} rather than % \cs{@@_mul_npos_o:Nww}. There are two additional cases: if the % first operand is normal and the second is a zero, then the division % by zero exception is raised: cases $10$ and $11$ of the % \cs{if_case:w} construction in \cs{@@_mul_cases_o:NnNnww} are % provided as the fourth argument here. % \begin{macrocode} \cs_new:cpn { @@_/_o:ww } { \@@_mul_cases_o:NnNnww / { - } \@@_div_npos_o:Nww { \or: \@@_case_use:nw { \@@_division_by_zero_o:NNww \c_inf_fp / } \or: \@@_case_use:nw { \@@_division_by_zero_o:NNww \c_minus_inf_fp / } } } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_div_npos_o:Nww} % \begin{quote} % \cs{@@_div_npos_o:Nww} \meta{final sign} % \cs{s_@@} \cs{@@_chk:w} |1| \meta{sign_A} \Arg{exp A} % \Arg{A_1} \Arg{A_2} \Arg{A_3} \Arg{A_4} |;| % \cs{s_@@} \cs{@@_chk:w} |1| \meta{sign_Z} \Arg{exp Z} % \Arg{Z_1} \Arg{Z_2} \Arg{Z_3} \Arg{Z_4} |;| % \end{quote} % We want to compute $A/Z$. As for multiplication, % \cs{@@_sanitize:Nw} checks for overflow or underflow; we provide it % with the \meta{final sign}, and an integer expression in which we % compute the exponent. We set up the arguments of % \cs{@@_div_significand_i_o:wnnw}, namely an integer \meta{y} obtained % by adding $1$ to the first $5$ digits of $Z$ (explanation given soon % below), then the four \Arg{A_{i}}, then the four \Arg{Z_{i}}, a % semi-colon, and the \meta{final sign}, used for rounding at the end. % \begin{macrocode} \cs_new:Npn \@@_div_npos_o:Nww #1 \s_@@ \@@_chk:w 1 #2 #3 #4 ; \s_@@ \@@_chk:w 1 #5 #6 #7#8#9; { \exp_after:wN \@@_sanitize:Nw \exp_after:wN #1 \int_value:w \@@_int_eval:w #3 - #6 \exp_after:wN \@@_div_significand_i_o:wnnw \int_value:w \@@_int_eval:w #7 \use_i:nnnn #8 + 1 ; #4 {#7}{#8}#9 ; #1 } % \end{macrocode} % \end{macro} % % \subsubsection{Work plan} % % In this subsection, we explain how to avoid overflowing \TeX{}'s % integers when performing the division of two positive normal numbers. % % We are given two numbers, $A=0.A_{1}A_{2}A_{3}A_{4}$ and % $Z=0.Z_{1}Z_{2}Z_{3}Z_{4}$, in blocks of $4$ digits, and we know that % the first digits of $A_{1}$ and of $Z_{1}$ are non-zero. To compute % $A/Z$, we proceed as follows. % \begin{itemize} % \item Find an integer $Q_{A} \simeq 10^{4} A / Z$. % \item Replace $A$ by $B = 10^{4} A - Q_{A} Z$. % \item Find an integer $Q_{B} \simeq 10^{4} B / Z$. % \item Replace $B$ by $C = 10^{4} B - Q_{B} Z$. % \item Find an integer $Q_{C} \simeq 10^{4} C / Z$. % \item Replace $C$ by $D = 10^{4} C - Q_{C} Z$. % \item Find an integer $Q_{D} \simeq 10^{4} D / Z$. % \item Consider $E = 10^{4} D - Q_{D} Z$, and ensure % correct rounding. % \end{itemize} % The result is then $Q = 10^{-4} Q_{A} + 10^{-8} Q_{B} + 10^{-12} Q_{C} % + 10^{-16} Q_{D} + \text{rounding}$. Since the $Q_{i}$ are integers, % $B$, $C$, $D$, and~$E$ are all exact multiples of $10^{-16}$, in other % words, computing with $16$ digits after the decimal separator yields % exact results. The problem is the risk of overflow: in general $B$, $C$, % $D$, and $E$ may be greater than $1$. % % Unfortunately, things are not as easy as they seem. In particular, we % want all intermediate steps to be positive, since negative results % would require extra calculations at the end. This requires that % $Q_{A} \leq 10^{4} A / Z$ \emph{etc.} A reasonable attempt would be % to define $Q_{A}$ as % \begin{equation*} % \cs{int_eval:n} \left\{ % \frac{ A_{1} A_{2} }{ Z_{1} + 1 } - 1 \right\} % \leq 10^{4} \frac{A}{Z} % \end{equation*} % Subtracting $1$ at the end takes care of the fact that \eTeX{}'s % \cs{@@_int_eval:w} rounds divisions instead of truncating (really, % $1/2$ would be sufficient, but we work with integers). We add $1$ to % $Z_{1}$ because $Z_{1} \leq 10^{4}Z < Z_{1}+1$ and we need $Q_{A}$ to % be an underestimate. However, we are now underestimating $Q_{A}$ too % much: it can be wrong by up to $100$, for instance when $Z = 0.1$ and % $A \simeq 1$. Then $B$ could take values up to $10$ (maybe more), and % a few steps down the line, we would run into arithmetic overflow, % since \TeX{} can only handle integers less than roughly $2\cdot % 10^{9}$. % % A better formula is to take % \begin{equation*} % Q_{A} = \cs{int_eval:n} \left\{ % \frac{ 10 \cdot A_{1} A_{2} } % { \left\lfloor 10^{-3} \cdot Z_{1} Z_{2} \right\rfloor + 1 } % - 1 \right\}. % \end{equation*} % This is always less than $10^{9} A / (10^{5} Z)$, as we wanted. In % words, we take the $5$ first digits of $Z$ into account, and the $8$ % first digits of $A$, using $0$ as a $9$-th digit rather than the true % digit for efficiency reasons. We shall prove that using this formula % to define all the $Q_{i}$ avoids any overflow. For convenience, let % us denote % \begin{equation*} % y = \left\lfloor 10^{-3} \cdot Z_{1} Z_{2} \right\rfloor + 1, % \end{equation*} % so that, taking into account the fact that \eTeX{} rounds ties away % from zero, % \begin{align*} % Q_{A} % &= \left\lfloor \frac{A_{1}A_{2}0}{y} - \frac{1}{2} \right\rfloor % \\ % &>\frac{A_{1}A_{2}0}{y} - \frac{3}{2}. % \end{align*} % Note that $10^{4} 0 + 1 \fi: \else: \if_meaning:w - #1 - \else: + \fi: 1 \fi: ; } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_div_significand_pack:NNN} % At this stage, we are in the following situation: \TeX{} is in the % process of expanding several integer expressions, thus functions at % the bottom expand before those above. % \begin{quote} % \cs{@@_div_significand_test_o:w} $10^{6} + Q_{A}$ % \cs{@@_div_significand_pack:NNN} $10^{6} + Q_{B}$ % \cs{@@_div_significand_pack:NNN} $10^{6} + Q_{C}$ % \cs{@@_div_significand_pack:NNN} % $10^{7} + 10\cdot Q_{D} + 5 \cdot P + \varepsilon$ |;| \meta{sign} % \end{quote} % Here, $\varepsilon = \operatorname{sign}(T)$ is $0$ in case $2E=PZ$, % $1$ in case $2E>PZ$, which means that $P$ was the correct value, but % not with an exact quotient, and $-1$ if $2E 3/2$, this last expression is % $\leq\delta/2+3/4<\delta$, hence $\delta$~decreases at each step: % since all~$x$ are integers, $\delta$~must reach a value % $-1/4<\delta\leq 3/2$. In this range of values, we get $\delta' % \leq \frac{3}{4} \frac{3}{2\sqrt{10^{8} a_1}} + \frac{3}{4} \leq % 0.75 + 1.125 \cdot 10^{-7}$. We deduce that the difference $\delta % = x - \sqrt{10^{8} a_1}$ eventually reaches a value in the interval % $[-0.25 + 0.25\cdot 10^{-8}, 0.75 + 11.25 \cdot 10^{-8}]$, whose % width is $1 + 11 \cdot 10^{-8}$. The corresponding interval for~$x$ % may contain two integers, hence $x$~might oscillate between those % two values. % % However, the fact that $x\mapsto x-1$ and $x-1 \mapsto x$ puts % stronger constraints, which are not compatible: the first implies % \[ % x + [10^{8} a_1 / x] \leq 2x - 2 % \] % hence $10^{8} a_1 / x \leq x - 3/2$, while the second implies % \[ % x - 1 + [10^{8} a_1 / (x - 1)] \geq 2x - 1 % \] % hence $10^{8} a_1 / (x - 1) \geq x - 1/2$. Combining the two % inequalities yields $x^2 - 3x/2 \geq 10^{8} a_1 \geq x - 3x/2 + % 1/2$, which cannot hold. Therefore, the iteration always converges % to a single integer~$x$. To stop the iteration when two consecutive % results are equal, the function \cs{@@_sqrt_Newton_o:wwn} receives % the newly computed result as~|#1|, the previous result as~|#2|, and % $a_1$ as~|#3|. Note that \eTeX{} combines the computation of a % multiplication and a following division, thus avoiding overflow in % |#3 * 100000000 / #1|. In any case, the result is within $[10^{7}, % 10^{8}]$. % \begin{macrocode} \cs_new:Npn \@@_sqrt_Newton_o:wwn #1; #2; #3 { \if_int_compare:w #1 = #2 \exp_stop_f: \exp_after:wN \@@_sqrt_auxi_o:NNNNwnnN \int_value:w \@@_int_eval:w 9999 9999 + \exp_after:wN \@@_use_none_until_s:w \fi: \exp_after:wN \@@_sqrt_Newton_o:wwn \int_value:w \@@_int_eval:w (#1 + #3 * 1 0000 0000 / #1) / 2 ; #1; {#3} } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP]{\@@_sqrt_auxi_o:NNNNwnnN} % This function is followed by $10^{8}+x-1$, which has~$9$ digits % starting with~$1$, then |;| \Arg{a_1} \Arg{a_2} \meta{a'}. Here, $x % \simeq \sqrt{10^{8} a_1}$ and we want to estimate the square root of % $a = 10^{-8} a_1 + 10^{-16} a_2 + 10^{-17} a'$. We set up an % initial underestimate % \[ % y = (x - 1) 10^{-8} + 0.2499998875 \cdot 10^{-8} \lesssim \sqrt{a}\,. % \] % From the inequalities shown earlier, we know that $y \leq % \sqrt{10^{-8} a_1} \leq \sqrt{a}$ and that $\sqrt{10^{-8} a_1} \leq % y + 10^{-8} + 11\cdot 10^{-16}$ hence (using $0.1\leq y\leq % \sqrt{a}\leq 1$) % \[ % a - y^2 \leq 10^{-8} a_1 + 10^{-8} - y^2 % \leq (y + 10^{-8} + 11\cdot 10^{-16})^2 - y^2 + 10^{-8} % < 3.2 \cdot 10^{-8} \,, % \] % and $\sqrt{a} - y = (a - y^2)/(\sqrt{a} + y) \leq 16 \cdot 10^{-8}$. % Next, \cs{@@_sqrt_auxii_o:NnnnnnnnN} is called several times to % get closer and closer underestimates of~$\sqrt{a}$. By % construction, the underestimates~$y$ are always increasing, $a - y^2 % < 3.2 \cdot 10^{-8}$ for all. Also, $y<1$. % \begin{macrocode} \cs_new:Npn \@@_sqrt_auxi_o:NNNNwnnN 1 #1#2#3#4#5; { \@@_sqrt_auxii_o:NnnnnnnnN \@@_sqrt_auxiii_o:wnnnnnnnn {#1#2#3#4} {#5} {2499} {9988} {7500} } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP]{\@@_sqrt_auxii_o:NnnnnnnnN} % This receives a continuation function~|#1|, then five blocks of~$4$ % digits for~$y$, then two $8$-digit blocks and a single digit % for~$a$. A common estimate of $\sqrt{a} - y = (a - y^2) / (\sqrt{a} % + y)$ is $(a - y^2)/(2y)$, which leads to alternating overestimates % and underestimates. We tweak this, to only work with underestimates % (no need then to worry about signs in the computation). Each step % finds the largest integer $j\leq 6$ such that $10^{4j}(a-y^2) < % 2\cdot 10^{8}$, then computes the integer (with \eTeX{}'s rounding % division) % \[ % 10^{4j} z = % \Bigl[\bigl(\lfloor 10^{4j}(a-y^2)\rfloor - 257\bigr) % \cdot (0.5\cdot 10^{8}) % \Bigm/ \lfloor 10^{8} y + 1\rfloor\Bigr] \,. % \] % The choice of~$j$ ensures that $10^{4j} z < 2\cdot 10^{8} \cdot % 0.5\cdot 10^{8} / 10^{7} = 10^{9}$, thus $10^{9} + 10^{4j} z$ has % exactly $10$~digits, does not overflow \TeX{}'s integer range, and % starts with~$1$. Incidentally, since all $a - y^2 \leq 3.2\cdot % 10^{-8}$, we know that $j\geq 3$. % % Let us show that $z$ is an underestimate of $\sqrt{a} - y$. On the % one hand, $\sqrt{a} - y \leq 16\cdot 10^{-8}$ because this holds for % the initial~$y$ and values of~$y$ can only increase. On the other % hand, the choice of~$j$ implies that $\sqrt{a} - y \leq % 5(\sqrt{a}+y)(\sqrt{a}-y) = 5(a - y^2) < 10^{9-4j}$. For $j=3$, the % first bound is better, while for larger~$j$, the second bound is % better. For all $j\in[3,6]$, we find $\sqrt{a}-y < 16\cdot % 10^{-2j}$. From this, we deduce that % \[ % 10^{4j} (\sqrt{a}-y) % = \frac{10^{4j}\bigl(a-y^2-(\sqrt{a}-y)^2\bigr)}{2y} % \geq \frac{\bigl\lfloor 10^{4j}(a-y^2)\bigr\rfloor-257} % {2\cdot 10^{-8} \lfloor 10^{8}y+1\rfloor} % + \frac{1}{2} % \] % where we have replaced the bound $10^{4j}(16\cdot 10^{-2j}) = 256$ % by~$257$ and extracted the corresponding term $1/\bigl(2\cdot % 10^{-8} \lfloor 10^{8}y+1\rfloor\bigr) \geq 1/2$. Given that % \eTeX{}'s integer division obeys $[b/c] \leq b/c + 1/2$, we deduce % that $10^{4j} z \leq 10^{4j} (\sqrt{a}-y)$, hence $y+z\leq\sqrt{a}$ % is an underestimate of~$\sqrt{a}$, as claimed. One implementation % detail: because the computation involves |-#4*#4| |-| |2*#3*#5| |-| % |2*#2*#6| which may be as low as $-5\cdot 10^{8}$, we need to use % the \texttt{pack_big} functions, and the \texttt{big} shifts. % \begin{macrocode} \cs_new:Npn \@@_sqrt_auxii_o:NnnnnnnnN #1 #2#3#4#5#6 #7#8#9 { \exp_after:wN #1 \int_value:w \@@_int_eval:w \c_@@_big_leading_shift_int + #7 - #2 * #2 \exp_after:wN \@@_pack_big:NNNNNNw \int_value:w \@@_int_eval:w \c_@@_big_middle_shift_int - 2 * #2 * #3 \exp_after:wN \@@_pack_big:NNNNNNw \int_value:w \@@_int_eval:w \c_@@_big_middle_shift_int + #8 - #3 * #3 - 2 * #2 * #4 \exp_after:wN \@@_pack_big:NNNNNNw \int_value:w \@@_int_eval:w \c_@@_big_middle_shift_int - 2 * #3 * #4 - 2 * #2 * #5 \exp_after:wN \@@_pack_big:NNNNNNw \int_value:w \@@_int_eval:w \c_@@_big_middle_shift_int + #9 000 0000 - #4 * #4 - 2 * #3 * #5 - 2 * #2 * #6 \exp_after:wN \@@_pack_big:NNNNNNw \int_value:w \@@_int_eval:w \c_@@_big_middle_shift_int - 2 * #4 * #5 - 2 * #3 * #6 \exp_after:wN \@@_pack_big:NNNNNNw \int_value:w \@@_int_eval:w \c_@@_big_middle_shift_int - #5 * #5 - 2 * #4 * #6 \exp_after:wN \@@_pack_big:NNNNNNw \int_value:w \@@_int_eval:w \c_@@_big_middle_shift_int - 2 * #5 * #6 \exp_after:wN \@@_pack_big:NNNNNNw \int_value:w \@@_int_eval:w \c_@@_big_trailing_shift_int - #6 * #6 ; % ( - 257 ) * 5000 0000 / (#2#3 + 1) + 10 0000 0000 ; {#2}{#3}{#4}{#5}{#6} {#7}{#8}#9 } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP] % { % \@@_sqrt_auxiii_o:wnnnnnnnn, % \@@_sqrt_auxiv_o:NNNNNw, % \@@_sqrt_auxv_o:NNNNNw, % \@@_sqrt_auxvi_o:NNNNNw, % \@@_sqrt_auxvii_o:NNNNNw % } % We receive here the difference $a-y^2=d=\sum_i d_i \cdot 10^{-4i}$, % as \meta{d_2} |;| \Arg{d_3} \ldots{} \Arg{d_{10}}, where each block % has~$4$ digits, except \meta{d_2}. This function finds the largest % $j\leq 6$ such that $10^{4j}(a-y^2) < 2\cdot 10^{8}$, then leaves an % open parenthesis and the integer % $\bigl\lfloor 10^{4j}(a-y^2)\bigr\rfloor$ in an integer % expression. The closing parenthesis is provided by the caller % \cs{@@_sqrt_auxii_o:NnnnnnnnN}, which completes the expression % \[ % 10^{4j} z = % \Bigl[\bigl(\lfloor 10^{4j}(a-y^2)\rfloor - 257\bigr) % \cdot (0.5\cdot 10^{8}) % \Bigm/ \lfloor 10^{8} y + 1\rfloor\Bigr] % \] % for an estimate of $10^{4j} (\sqrt{a} - y)$. If $d_2\geq 2$, $j=3$ % and the \texttt{auxiv} auxiliary receives $10^{12} z$. If $d_2\leq % 1$ but $10^{4} d_2 + d_3 \geq 2$, $j=4$ and the \texttt{auxv} % auxiliary is called, and receives $10^{16} z$, and so on. In all % those cases, the \texttt{auxviii} auxiliary is set up to add~$z$ % to~$y$, then go back to the \texttt{auxii} step with continuation % \texttt{auxiii} (the function we are currently describing). The % maximum value of $j$ is~$6$, regardless of whether $10^{12} d_2 + % 10^{8} d_3 + 10^{4} d_4 + d_5 \geq 1$. In this last case, we detect % when $10^{24} z < 10^{7}$, which essentially means $\sqrt{a} - y % \lesssim 10^{-17}$: once this threshold is reached, there is enough % information to find the correctly rounded~$\sqrt{a}$ with only one % more call to \cs{@@_sqrt_auxii_o:NnnnnnnnN}. Note that the % iteration cannot be stuck before reaching $j=6$, because for $j<6$, % one has $2\cdot 10^{8}\leq 10^{4(j+1)}(a-y^2)$, hence % \[ % 10^{4j} z % \geq \frac{(20000-257)(0.5\cdot 10^{8})}{\lfloor 10^{8} y + 1\rfloor} % \geq (20000-257)\cdot 0.5 > 0 \,. % \] % \begin{macrocode} \cs_new:Npn \@@_sqrt_auxiii_o:wnnnnnnnn #1; #2#3#4#5#6#7#8#9 { \if_int_compare:w #1 > \c_one_int \exp_after:wN \@@_sqrt_auxiv_o:NNNNNw \int_value:w \@@_int_eval:w (#1#2 %) \else: \if_int_compare:w #1#2 > \c_one_int \exp_after:wN \@@_sqrt_auxv_o:NNNNNw \int_value:w \@@_int_eval:w (#1#2#3 %) \else: \if_int_compare:w #1#2#3 > \c_one_int \exp_after:wN \@@_sqrt_auxvi_o:NNNNNw \int_value:w \@@_int_eval:w (#1#2#3#4 %) \else: \exp_after:wN \@@_sqrt_auxvii_o:NNNNNw \int_value:w \@@_int_eval:w (#1#2#3#4#5 %) \fi: \fi: \fi: } \cs_new:Npn \@@_sqrt_auxiv_o:NNNNNw 1#1#2#3#4#5#6; { \@@_sqrt_auxviii_o:nnnnnnn {#1#2#3#4#5#6} {00000000} } \cs_new:Npn \@@_sqrt_auxv_o:NNNNNw 1#1#2#3#4#5#6; { \@@_sqrt_auxviii_o:nnnnnnn {000#1#2#3#4#5} {#60000} } \cs_new:Npn \@@_sqrt_auxvi_o:NNNNNw 1#1#2#3#4#5#6; { \@@_sqrt_auxviii_o:nnnnnnn {0000000#1} {#2#3#4#5#6} } \cs_new:Npn \@@_sqrt_auxvii_o:NNNNNw 1#1#2#3#4#5#6; { \if_int_compare:w #1#2 = \c_zero_int \exp_after:wN \@@_sqrt_auxx_o:Nnnnnnnn \fi: \@@_sqrt_auxviii_o:nnnnnnn {00000000} {000#1#2#3#4#5} } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP] % {\@@_sqrt_auxviii_o:nnnnnnn, \@@_sqrt_auxix_o:wnwnw} % Simply add the two $8$-digit blocks of~$z$, aligned to the last four % of the five $4$-digit blocks of~$y$, then call the \texttt{auxii} % auxiliary to evaluate $y'^{2} = (y+z)^{2}$. % \begin{macrocode} \cs_new:Npn \@@_sqrt_auxviii_o:nnnnnnn #1#2 #3#4#5#6#7 { \exp_after:wN \@@_sqrt_auxix_o:wnwnw \int_value:w \@@_int_eval:w #3 \exp_after:wN \@@_basics_pack_low:NNNNNw \int_value:w \@@_int_eval:w #1 + 1#4#5 \exp_after:wN \@@_basics_pack_low:NNNNNw \int_value:w \@@_int_eval:w #2 + 1#6#7 ; } \cs_new:Npn \@@_sqrt_auxix_o:wnwnw #1; #2#3; #4#5; { \@@_sqrt_auxii_o:NnnnnnnnN \@@_sqrt_auxiii_o:wnnnnnnnn {#1}{#2}{#3}{#4}{#5} } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP] % {\@@_sqrt_auxx_o:Nnnnnnnn, \@@_sqrt_auxxi_o:wwnnN} % At this stage, $j=6$ and $10^{24} z < 10^{7}$, hence % \[ % 10^{7} + 1/2 > 10^{24} z + 1/2 \geq % \bigl(10^{24}(a-y^2) - 258\bigr) \cdot (0.5\cdot 10^{8}) % \Bigm/ (10^{8} y + 1) \,, % \] % then $10^{24}(a-y^2) - 258 < 2 (10^{7} + 1/2) (y + 10^{-8})$, and % \[ % 10^{24}(a-y^2) % < (10^{7} + 1290.5) (1 + 10^{-8}/y) (2y) % < (10^{7} + 1290.5) (1 + 10^{-7}) (y + \sqrt{a}) \,, % \] % which finally implies $0\leq\sqrt{a}-y < 0.2\cdot 10^{-16}$. In % particular, $y$~is an underestimate of~$\sqrt{a}$ and $y+0.5\cdot % 10^{-16}$ is a (strict) overestimate. There is at exactly one % multiple $m$~of $0.5\cdot 10^{-16}$ in the interval $[y, y+0.5\cdot % 10^{-16})$. If $m^2>a$, then the square root is inexact and is % obtained by rounding $m-\epsilon$ to a multiple of $10^{-16}$ (the % precise shift $0<\epsilon<0.5\cdot 10^{-16}$ is irrelevant for % rounding). If $m^2=a$ then the square root is exactly~$m$, and % there is no rounding. If $m^2 \c_zero_int \if_int_compare:w #1#2 = \c_one_int \if_int_compare:w #3#4 = \c_zero_int \if_int_compare:w #5#6 = \c_zero_int \if_int_compare:w #7#8 = \c_zero_int \@@_sqrt_auxxiii_o:w \fi: \fi: \fi: \fi: \exp_after:wN \@@_sqrt_auxxiv_o:wnnnnnnnN \int_value:w 9998 \else: \exp_after:wN \@@_sqrt_auxxiv_o:wnnnnnnnN \int_value:w 10000 \fi: ; } \cs_new:Npn \@@_sqrt_auxxiii_o:w \fi: \fi: \fi: \fi: #1 \fi: ; { \fi: \fi: \fi: \fi: \fi: \@@_sqrt_auxxiv_o:wnnnnnnnN 9999 ; } % \end{macrocode} % \end{macro} % % \begin{macro}[rEXP]{\@@_sqrt_auxxiv_o:wnnnnnnnN} % This receives $9998$, $9999$ or $10000$ as~|#1| when $m$~is an % underestimate, exact, or an overestimate, respectively. Then % comes~$m$ as five blocks of~$4$ digits, but where the last % block~|#6| may be $0$, $5000$, or~$10000$. In the latter case, we % need to add a carry, unless $m$~is an overestimate (|#1|~is then % $10000$). Then comes~$a$ as three arguments. Rounding is done by % \cs{@@_round:NNN}, whose first argument is the final sign~$0$ % (square roots are positive). We fake its second argument. It % should be the last digit kept, but this is only used when ties are % \enquote{rounded to even}, and only when the result is exactly % half-way between two representable numbers rational square roots of % numbers with $16$~significant digits have: this situation never % arises for the square root, as any exact square root of a $16$~digit % number has at most $8$~significant digits. Finally, the last % argument is the next digit, possibly shifted by~$1$ when there are % further nonzero digits. This is achieved by \cs{@@_round_digit:Nw}, % which receives (after removal of the $10000$'s digit) one of $0000$, % $0001$, $4999$, $5000$, $5001$, or~$9999$, which it converts to $0$, % $1$, $4$, $5$, $6$, and~$9$, respectively. % \begin{macrocode} \cs_new:Npn \@@_sqrt_auxxiv_o:wnnnnnnnN #1; #2#3#4#5#6 #7#8#9 { \exp_after:wN \@@_basics_pack_high:NNNNNw \int_value:w \@@_int_eval:w 1 0000 0000 + #2#3 \exp_after:wN \@@_basics_pack_low:NNNNNw \int_value:w \@@_int_eval:w 1 0000 0000 + #4#5 \if_int_compare:w #6 > #1 \exp_stop_f: + 1 \fi: + \exp_after:wN \@@_round:NNN \exp_after:wN 0 \exp_after:wN 0 \int_value:w \exp_after:wN \use_i:nn \exp_after:wN \@@_round_digit:Nw \int_value:w \@@_int_eval:w #6 + 19999 - #1 ; \exp_after:wN ; } % \end{macrocode} % \end{macro} % % \subsection{About the sign and exponent} % % \begin{macro}[EXP]{\@@_logb_o:w, \@@_logb_aux_o:w} % The exponent of a normal number is its \meta{exponent} minus one. % \begin{macrocode} \cs_new:Npn \@@_logb_o:w ? \s_@@ \@@_chk:w #1#2; @ { \if_case:w #1 \exp_stop_f: \@@_case_use:nw { \@@_division_by_zero_o:Nnw \c_minus_inf_fp { logb } } \or: \exp_after:wN \@@_logb_aux_o:w \or: \@@_case_return_o:Nw \c_inf_fp \else: \@@_case_return_same_o:w \fi: \s_@@ \@@_chk:w #1 #2; } \cs_new:Npn \@@_logb_aux_o:w \s_@@ \@@_chk:w #1 #2 #3 #4 ; { \exp_after:wN \@@_parse:n \exp_after:wN { \int_value:w \int_eval:w #3 - 1 \exp_after:wN } } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_sign_o:w} % \begin{macro}[EXP]{\@@_sign_aux_o:w} % Find the sign of the floating point: \texttt{nan}, |+0|, |-0|, |+1| or |-1|. % \begin{macrocode} \cs_new:Npn \@@_sign_o:w ? \s_@@ \@@_chk:w #1#2; @ { \if_case:w #1 \exp_stop_f: \@@_case_return_same_o:w \or: \exp_after:wN \@@_sign_aux_o:w \or: \exp_after:wN \@@_sign_aux_o:w \else: \@@_case_return_same_o:w \fi: \s_@@ \@@_chk:w #1 #2; } \cs_new:Npn \@@_sign_aux_o:w \s_@@ \@@_chk:w #1 #2 #3 ; { \exp_after:wN \@@_set_sign_o:w \exp_after:wN #2 \c_one_fp @ } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP]{\@@_set_sign_o:w} % This function is used for the unary minus and for \texttt{abs}. It % leaves the sign of \texttt{nan} invariant, turns negative numbers % (sign~$2$) to positive numbers (sign~$0$) and positive numbers % (sign~$0$) to positive or negative numbers depending on~|#1|. It % also expands after itself in the input stream, just like % \cs{@@_+_o:ww}. % \begin{macrocode} \cs_new:Npn \@@_set_sign_o:w #1 \s_@@ \@@_chk:w #2#3#4; @ { \exp_after:wN \@@_exp_after_o:w \exp_after:wN \s_@@ \exp_after:wN \@@_chk:w \exp_after:wN #2 \int_value:w \if_case:w #3 \exp_stop_f: #1 \or: 1 \or: 0 \fi: \exp_stop_f: #4; } % \end{macrocode} % \end{macro} % % \subsection{Operations on tuples} % % \begin{macro}[EXP]{\@@_tuple_set_sign_o:w} % \begin{macro}[EXP]{\@@_tuple_set_sign_aux_o:Nnw, \@@_tuple_set_sign_aux_o:w} % Two cases: |abs(|\meta{tuple}|)| for which |#1| is $0$ (invalid for % tuples) and |-|\meta{tuple} for which |#1| is $2$. In that case, % map over all items in the tuple an auxiliary that dispatches to the % type-appropriate sign-flipping function. % \begin{macrocode} \cs_new:Npn \@@_tuple_set_sign_o:w #1#2 @ { \if_meaning:w 2 #1 \exp_after:wN \@@_tuple_set_sign_aux_o:Nnw \fi: \@@_invalid_operation_o:nw { abs } #2 } \cs_new:Npn \@@_tuple_set_sign_aux_o:Nnw #1#2 { \@@_tuple_map_o:nw \@@_tuple_set_sign_aux_o:w } \cs_new:Npn \@@_tuple_set_sign_aux_o:w #1#2 ; { \@@_change_func_type:NNN #1 \@@_set_sign_o:w \@@_parse_apply_unary_error:NNw 2 #1 #2 ; @ } % \end{macrocode} % \end{macro} % \end{macro} % % \begin{macro}[EXP]{\@@_*_tuple_o:ww, \@@_tuple_*_o:ww, \@@_tuple_/_o:ww} % For \meta{number}|*|\meta{tuple} and \meta{tuple}|*|\meta{number} % and \meta{tuple}|/|\meta{number}, loop through the \meta{tuple} some % code that multiplies or divides by the appropriate \meta{number}. % Importantly we need to dispatch according to the type, and we make % sure to apply the operator in the correct order. % \begin{macrocode} \cs_new:cpn { @@_*_tuple_o:ww } #1 ; { \@@_tuple_map_o:nw { \@@_binary_type_o:Nww * #1 ; } } \cs_new:cpn { @@_tuple_*_o:ww } #1 ; #2 ; { \@@_tuple_map_o:nw { \@@_binary_rev_type_o:Nww * #2 ; } #1 ; } \cs_new:cpn { @@_tuple_/_o:ww } #1 ; #2 ; { \@@_tuple_map_o:nw { \@@_binary_rev_type_o:Nww / #2 ; } #1 ; } % \end{macrocode} % \end{macro} % % \begin{macro}[EXP]{\@@_tuple_+_tuple_o:ww, \@@_tuple_-_tuple_o:ww} % Check the two tuples have the same number of items and map through % these a helper that dispatches appropriately depending on the types. % This means |(1,2)+((1,1),2)| gives |(nan,4)|. % \begin{macrocode} \cs_set_protected:Npn \@@_tmp:w #1 { \cs_new:cpn { @@_tuple_#1_tuple_o:ww } \s_@@_tuple \@@_tuple_chk:w ##1 ; \s_@@_tuple \@@_tuple_chk:w ##2 ; { \int_compare:nNnTF { \@@_array_count:n {##1} } = { \@@_array_count:n {##2} } { \@@_tuple_mapthread_o:nww { \@@_binary_type_o:Nww #1 } } { \@@_invalid_operation_o:nww #1 } \s_@@_tuple \@@_tuple_chk:w {##1} ; \s_@@_tuple \@@_tuple_chk:w {##2} ; } } \@@_tmp:w + \@@_tmp:w - % \end{macrocode} % \end{macro} % % \begin{macrocode} % % \end{macrocode} % % \end{implementation} % % \PrintChanges % % \PrintIndex