From 6fe4b549f5bd08461f5062bcd4572b254f407884 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 12 Dec 2025 18:51:13 +1100 Subject: [PATCH] FPU: Improve accuracy in multiply-add almost-cancellation cases There are two paths for multiply-add instructions; one where the product is larger or nearly the same as the addend, which does the addition/subtraction in the multiplier with 128-bit accuracy; the other is used when the addend is clearly larger, which shifts the product right before doing the addition/subtraction in 64-bit arithmetic. The threshold for the second path is that B_exp has to be greater than A_exp + C_exp + 1, the +1 being because the product mantissa can be greater than 2. This increases the +1 to +2 to make sure that the 128-bit path is used when there is any chance of cancellation of the high-order bits of the sum. With the +1 threshold we could still get close to cancellation when the mantissas of A and C were nearly 2 and the mantissa of B was 1. This improves accuracy and avoids the need to do a 120-bit subtraction in the second path. Signed-off-by: Paul Mackerras --- fpu.vhdl | 14 +++++++++----- tests/fpu/fpu.c | 2 ++ tests/test_fpu.bin | Bin 34432 -> 34496 bytes 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fpu.vhdl b/fpu.vhdl index 3bc7b3e..272e475 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -1786,14 +1786,16 @@ begin re_set_result <= '1'; -- put b.exp into shift rs_sel1 <= RSH1_B; - if (r.a.exponent + r.c.exponent + 1) < r.b.exponent then - -- addend is bigger, do multiply first + if (r.a.exponent + r.c.exponent + 2) < r.b.exponent then + -- addend is definitely bigger, do multiply first -- if subtracting, sign is opposite to initial estimate f_to_multiply.valid <= '1'; v.first := '1'; v.state := FMADD_0; else - -- product is bigger, shift B first + -- product may be bigger, or the answer might be + -- close to 0; shift B first so the multiplier does + -- the add/subtract operation. v.state := FMADD_1; end if; @@ -1961,8 +1963,8 @@ begin end if; when FMADD_1 => - -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp - -- product is bigger here + -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp (>= -2) + -- product may bigger here -- shift B right and use it as the addend to the multiplier -- for subtract, multiplier does B - A * C re_sel2 <= REXP2_B; @@ -3342,6 +3344,8 @@ begin ci := '0'; case opsel_c is when CIN_SUBEXT => + -- Used with opsel_b = BIN_ADDSUBR, which will invert it if + -- r.subtract = 1, hence we use r.x here, rather than not r.x. ci := r.is_subtract and r.x; when CIN_ABSEXT => ci := r.r(63) and (s_nz or r.x); diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index b72e069..535d77a 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1622,6 +1622,8 @@ struct fmavals { 0x014fd79870000001, 0x014fd79870000000, 0x814fd79870000001, 0x814fd79870000000 }, { 0x00000000ffffffff, 0x1fc771af627f62ab, 0x8000000000000000, FPS_RN_ZERO, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 }, + { 0x41efffffffe00000, 0xc1efffffffe00000, 0x43f0000000000000, FPS_RN_CEIL, + 0x41fffffffff00000, 0xc3ffffffffe00000, 0xc1fffffffff00000, 0x43ffffffffe00000 }, }; int test23(long arg) diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index ed714b776907036c03b56b2d80d67a521a382909..3d6dcce8e98d9d097e26d3664fb1a4b82f2d3d1a 100755 GIT binary patch delta 698 zcmZvYPiPZS5XNU-B2A*0U<6BqSPO|wEJYKoi8f)~fPW&ygNLFAF@;_X3Ld0ZqFH;f z3f1ZZ$w9E^AQF9=*OOF5(F(R+REUCtimPJNf*^{n^OAs3Tv%r2`+Ym}vBnsi8e;(f zEdh|uK4?o1QyqlU=`6qj(sHEbNXrcx7Xnc7LtX*nWukt#iPWmJAPFH7JSh>lrWhM#$rRvT9urC>d*x73F4{NI*vP=HN(}Okf z)AJmi4#_AEo#U^T^)rX$z22~&Zkvqq(R~wkg!3=05$)KIw*^~W<0rjA-;IiHQtUdu z+{E?TT1yMxdm~P}95B>a7OR)#G*)qh8VsOx7J3L!Cbc;aa^pAY(X3qhoVD>hIqGOG2(=n-)KYp#=rK? z;6r|Y(+rDUeo*&q^U!~|YKMGjw{}AH>~4D44xxCf zW&BKOX(0VvmXdj>*|9c)p{w#nxN^cXj6{wr{uk+WkP6VN$8>n7h7 z3TNurHrY&AOAcuFOD3TEfcC#+@X)Hc^S{ZV;`ft!X0X`gI$<@bBA^t|*U+0$zj!GJ59CCHR=Z#86!5^YL+uG&e*(F+Zd$arGYymW58xZBWp%RkIlKp<&2CD zo1dC+GcqP@{%Fd~$hc$jSF`hs9g~lmpJ)0paq={aQ%pblCMQ_}sou?cR@WFgRT%7a zUi@ovm@)aIrSxPCn;k%{4{i7vnHsij{$%UL#MHEHvYmq(*NkmSogfEK*fu%Op$dt+ z`IAEwBNI^A&q<1D#PEu|YfZ?!)g`sAZze6pE&2-4Cl7&Ij5?PFy!NHOTO-ulV kKlzlC9LU61PDXqX>p68GPS~vFd_iimRj}x0|F}X80DtP}OaK4?