From f631dcd7001c9950d8ec680d09b470007020549d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 12 Dec 2025 12:44:13 +1100 Subject: [PATCH] FPU: Set FPRF correctly on multiply result that underflows rcls_op being set to RCLS_TZERO was not detecting a zero result after rounding for a multiply result that underflows, because S still had low bits of the product. To fix this, remove the 's_nz = 0' from the RCLS_TZERO test. We can't then use this test in the FMADD_6 state, but we really shouldn't be testing for zero there, before rounding, so remove that. Also simplify FMADD_6 state by not setting rs_norm and going always to FINISH state rather than going to NORMALIZE state. Add a test for this case (actually a fmadd with B=0). While here, remove a pointless assignment to f_to_multiply.valid in MULT_1 state, since r.first is never set here. Signed-off-by: Paul Mackerras --- fpu.vhdl | 16 ++++------------ tests/fpu/fpu.c | 2 ++ tests/test_fpu.bin | Bin 33624 -> 33688 bytes 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/fpu.vhdl b/fpu.vhdl index 7d8060a..48c021d 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -1931,7 +1931,6 @@ begin v.instr_done := '1'; when MULT_1 => - f_to_multiply.valid <= r.first; opsel_r <= RES_MULT; set_r := '1'; opsel_s <= S_MULT; @@ -2022,25 +2021,18 @@ begin v.state := FMADD_6; when FMADD_6 => - -- r.shift = UNIT_BIT (or 0, but only if r is now nonzero) + -- r.shift = UNIT_BIT set_r := '0'; opsel_r <= RES_SHIFT; re_sel2 <= REXP2_NE; - rs_norm <= '1'; - rcls_op <= RCLS_TZERO; if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then - -- S = 0 case is handled by RCLS_TZERO logic, otherwise... - -- R is all zeroes but there are non-zero bits in S + -- R is all zeroes but there may be non-zero bits in S -- so shift them into R and set S to 0 set_r := '1'; re_set_result <= '1'; set_s := '1'; - v.state := FINISH; - elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then - v.state := FINISH; - else - v.state := NORMALIZE; end if; + v.state := FINISH; when DIV_2 => -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y @@ -3197,7 +3189,7 @@ begin when others => end case; when RCLS_TZERO => - if or (r.r(UNIT_BIT + 2 downto 0)) = '0' and s_nz = '0' then + if or (r.r(UNIT_BIT + 2 downto 0)) = '0' then v.result_class := ZERO; arith_done := '1'; end if; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 89fb44f..ccf07f8 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1618,6 +1618,8 @@ struct fmavals { /* from random exec tests */ { 0x43eff79000000000, 0x00000000000000ff, 0x0000000000000081, FPS_RN_CEIL, 0x014fd79870000001, 0x014fd79870000000, 0x814fd79870000001, 0x814fd79870000000 }, + { 0x00000000ffffffff, 0x1fc771af627f62ab, 0x8000000000000000, FPS_RN_ZERO, + 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 }, }; int test23(long arg) diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index e6a21b8d72d9e5f5afd9e0892d5f88ed437c4233..2a7845bd7939f18fa5a2444a80996f887ced14ab 100755 GIT binary patch delta 660 zcmcc7#x$dyNtJ=Y!Ipu+EAbe+S0#{6WoYqAWMD`Die&@EvVmgRsSOiVjTsv@M!x5- zpRifUl7YeVB?H6D(?Dew3JeW?3_u3Q7LfSM+i(8=_XhDn7^IeCixW`%Bv9?C+ZT}J zI-qjv{1((RFs!=01&iD}zXqUO*XY2e}lk0@lq`Edcy#zWNEIkJ* z%>t3$CoIR+vDv8+WGv9YXTnCDEL)NqK-`YaiXy>Wj3*|S%Dravn4Bm-pYg|LMup$3 zAm^xtGcrXipKPb8wz)?A03&0;W?Rj2CdLh$w`v=M6udNWXJpLSY-nW7$e6G>*SMXL z@xkP;CclB4kEYCwj5{WOH9ODPG5M(ZdBz`;r&^o_l8Kf;a{ z3i~-pF-dHhoaH3tCIAeAH7pD@tNa~mL2RZ&UX?5inwH37%nS~eJZNG9DE!H%oa8_z zzH&0+gILe0198G;E$2Qd$^SqAJs!v*9cw8RD27(60$rl;rChG?)Y(5{`sR{rQ Cu;lmv delta 634 zcmZutO=uHA6n<|KsR?3&5tImPtRyzEYE86uwF%1#x~>p~9*Q2s2)!5-FJfDwS!1!Q z2dNA~4uZW2MFe>YIf)1gwqW(75IqPYu8Pt^PeJ3Gq+k>eJ3HUcyziT-PKvizMI6A$ z0@#J;UG^CNN1Q8m0pKKaMdpgk6-VG^JcIDy*pE>8D0S-qhdcmpj%@=maPq(#x5az) zZ_Vamh1I9LE}86q!QR>Ws}9Xt*6!!0%K$U=hyT&O=N;Cr*Qi4;uu`RW&5AMF_*nL}IC&|Oh}tZ1?UsCdS@yT( z#}O@Pk$G8-<$<3f>A(b?aul>^)7qgRy7IjC6<5l_#yeOej5SuMI`Lr?>+i~3GC`#+ zPZ0Yko4g?K96j2aQ3kK?AYu>gOz%UC(V5I7;r1Jv=ig;-ynbD_W%F@