From 59992eab907f9431a99ab4de987abd722e9d3098 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 11 Dec 2025 13:15:00 +1100 Subject: [PATCH] FPU: Avoid doing overflow processing twice in OE=1 case Split the ROUND_OFLOW state into two, one which handles the OE=0 case (disabled overflow exception) and one which handles the OE=1 case (enabled overflow exception). This avoids a loop in the state diagram and prevents us from adding the exponent bias twice. Also correct a bug in ROUNDING_3 state where for single-precision operations which yield a result which is denormal in double-precision format, r.shift was set wrongly. Signed-off-by: Paul Mackerras --- fpu.vhdl | 109 +++++++++++++++++++++------------------------ tests/fpu/fpu.c | 3 ++ tests/test_fpu.bin | Bin 33464 -> 33560 bytes 3 files changed, 55 insertions(+), 57 deletions(-) diff --git a/fpu.vhdl b/fpu.vhdl index f49f02d..07617af 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -72,7 +72,7 @@ architecture behaviour of fpu is INT_SHIFT, INT_ROUND, INT_ISHIFT, INT_FINAL, INT_CHECK, INT_OFLOW, FINISH, NORMALIZE, - ROUND_UFLOW, NORM_UFLOW, ROUND_OFLOW, + ROUND_UFLOW, NORM_UFLOW, ROUND_OFLOW_DIS, ROUND_OFLOW_EN, ROUNDING, ROUND_INC, ROUNDING_2, ROUNDING_3, DENORM, RENORM_A, RENORM_B, RENORM_C, @@ -315,6 +315,7 @@ architecture behaviour of fpu is constant RSCON2_63 : std_ulogic_vector(3 downto 0) := "0111"; constant RSCON2_64 : std_ulogic_vector(3 downto 0) := "1000"; constant RSCON2_MINEXP : std_ulogic_vector(3 downto 0) := "1001"; + constant RSCON2_DPMINX : std_ulogic_vector(3 downto 0) := "1010"; signal rs_sel1 : std_ulogic_vector(1 downto 0); signal rs_sel2 : std_ulogic; @@ -1633,10 +1634,10 @@ begin rs_con2 <= RSCON2_MINEXP; rs_neg2 <= '1'; set_x := '1'; -- uses r.r and r.shift - if r.result_exp < to_signed(-126, EXP_BITS) then + if exp_tiny = '1' then v.state := ROUND_UFLOW; - elsif r.result_exp > to_signed(127, EXP_BITS) then - v.state := ROUND_OFLOW; + elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then + v.state := ROUND_OFLOW_DIS; else v.state := ROUNDING; end if; @@ -2406,6 +2407,7 @@ begin v.state := ROUNDING; when FINISH => + -- r.shift = 0 if r.is_multiply = '1' and px_nz = '1' then v.x := '1'; end if; @@ -2420,8 +2422,8 @@ begin set_x := '1'; if exp_tiny = '1' then v.state := ROUND_UFLOW; - elsif exp_huge = '1' then - v.state := ROUND_OFLOW; + elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then + v.state := ROUND_OFLOW_DIS; else v.state := ROUNDING; end if; @@ -2441,8 +2443,8 @@ begin set_x := '1'; if exp_tiny = '1' then v.state := ROUND_UFLOW; - elsif exp_huge = '1' then - v.state := ROUND_OFLOW; + elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then + v.state := ROUND_OFLOW_DIS; else v.state := ROUNDING; end if; @@ -2485,30 +2487,20 @@ begin set_x := '1'; v.state := ROUNDING; - when ROUND_OFLOW => + when ROUND_OFLOW_DIS => + -- disabled overflow exception + -- result depends on rounding mode rcls_op <= RCLS_TINF; v.fpscr(FPSCR_OX) := '1'; opsel_r <= RES_MISC; misc_sel <= "010"; - set_r := '0'; - if r.fpscr(FPSCR_OE) = '0' then - -- disabled overflow exception - -- result depends on rounding mode - set_r := '1'; - v.fpscr(FPSCR_XX) := '1'; - v.fpscr(FPSCR_FI) := '1'; - -- construct largest representable number - re_con2 <= RECON2_MAX; - re_set_result <= '1'; - arith_done := '1'; - else - -- enabled overflow exception - re_sel1 <= REXP1_R; - re_con2 <= RECON2_BIAS; - re_neg2 <= '1'; - re_set_result <= '1'; - v.state := ROUNDING; - end if; + set_r := '1'; + v.fpscr(FPSCR_XX) := '1'; + v.fpscr(FPSCR_FI) := '1'; + -- construct largest representable number + re_con2 <= RECON2_MAX; + re_set_result <= '1'; + arith_done := '1'; when ROUNDING => opsel_mask <= '1'; @@ -2527,6 +2519,8 @@ begin -- denormalized result that needs to be renormalized rs_norm <= '1'; v.state := ROUNDING_3; + elsif r.result_exp > max_exp then + v.state := ROUND_OFLOW_EN; else arith_done := '1'; end if; @@ -2540,49 +2534,40 @@ begin when ROUND_INC => set_r := '1'; opsel_a <= AIN_RND; - -- set shift to -1 - rs_con2 <= RSCON2_1; - rs_neg2 <= '1'; v.state := ROUNDING_2; when ROUNDING_2 => -- Check for overflow during rounding - -- r.shift = -1 - v.x := '0'; - re_sel2 <= REXP2_NE; - opsel_r <= RES_SHIFT; - set_r := '0'; - if r.r(UNIT_BIT + 1) = '1' then - set_r := '1'; - re_set_result <= '1'; - if exp_huge = '1' then - v.state := ROUND_OFLOW; - else - arith_done := '1'; - end if; - elsif r.r(UNIT_BIT) = '0' then + -- r.shift = 0 + if r.r(UNIT_BIT + 1) = '1' or r.r(UNIT_BIT) = '0' then -- Do CLZ so we can renormalize the result rs_norm <= '1'; v.state := ROUNDING_3; + elsif exp_huge = '1' then + v.state := ROUND_OFLOW_EN; else arith_done := '1'; end if; when ROUNDING_3 => - -- r.shift = clz(r.r) - 9 + -- r.shift = clz(r.r) - 7 opsel_r <= RES_SHIFT; set_r := '1'; re_sel2 <= REXP2_NE; - -- set shift to new_exp - min_exp (== -1022) + -- set shift to new_exp - DP min_exp (== -1022) rs_sel1 <= RSH1_NE; - rs_con2 <= RSCON2_MINEXP; + rs_con2 <= RSCON2_DPMINX; rs_neg2 <= '1'; rcls_op <= RCLS_TZERO; -- If the result is zero, that's handled below. -- Renormalize result after rounding v.denorm := exp_tiny; re_set_result <= '1'; - if new_exp < to_signed(-1022, EXP_BITS) then + if exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then + v.state := ROUND_OFLOW_DIS; + elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '1' then + v.state := ROUND_OFLOW_EN; + elsif new_exp < to_signed(-1022, EXP_BITS) then v.state := DENORM; else arith_done := '1'; @@ -2596,6 +2581,16 @@ begin re_set_result <= '1'; arith_done := '1'; + when ROUND_OFLOW_EN => + -- enabled overflow exception + -- rounding and normalization has been done + v.fpscr(FPSCR_OX) := '1'; + re_sel1 <= REXP1_R; + re_con2 <= RECON2_BIAS; + re_neg2 <= '1'; + re_set_result <= '1'; + arith_done := '1'; + when DO_IDIVMOD => opsel_a <= AIN_B; opsel_aabs <= '1'; @@ -3201,14 +3196,12 @@ begin arith_done := '1'; end if; when RCLS_TINF => - if r.fpscr(FPSCR_OE) = '0' then - if r.round_mode(1 downto 0) = "00" or - (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then - v.result_class := INFINITY; - v.fpscr(FPSCR_FR) := '1'; - else - v.fpscr(FPSCR_FR) := '0'; - end if; + if r.round_mode(1 downto 0) = "00" or + (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then + v.result_class := INFINITY; + v.fpscr(FPSCR_FR) := '1'; + else + v.fpscr(FPSCR_FR) := '0'; end if; when others => end case; @@ -3593,6 +3586,8 @@ begin rsh_in2 := to_signed(64, EXP_BITS); when RSCON2_MINEXP => rsh_in2 := min_exp; + when RSCON2_DPMINX => + rsh_in2 := to_signed(-1022, EXP_BITS); when others => rsh_in2 := to_signed(0, EXP_BITS); end case; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 5c46b6f..5f0131c 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -682,6 +682,9 @@ struct roundvals { { FPS_RN_NEAR, 0x37c12345b0000000, 0x37c1234400000000, FPS_FI }, { FPS_RN_NEAR, 0x0000008800000088, 0, FPS_FI }, { FPS_RN_NEAR, 0xc2000000c2000000, 0xc2000000c0000000, FPS_FI }, + { FPS_RN_NEAR|FPS_OE, 0xefffffffffffffff, 0xe400000000000000, FPS_FR|FPS_FI }, + { FPS_RN_NEAR|FPS_OE, 0xff0000ff43434343, 0xf30000ff40000000, FPS_FI }, + { FPS_RN_NEAR|FPS_OE, 0xfc00fc0139fffcff, 0xf000fc0140000000, FPS_FR|FPS_FI }, }; int test8(long arg) diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index f68ea11be39a32ba6dab54ab64e3109f8f4b185a..229e70f68afa50d2af321912e16afa1f774477ee 100755 GIT binary patch delta 548 zcmZ9GO(;ZB6vxlKGlO}N$5WanA59@&kx6-K>h^2~8D(L?24!JkD>4`wdUiA2%`7Y| zC8?P!Y-A&a6dN0|V(~0|WNGr8`)2ZW>fU?)=XdV;pTkv}TBBtEECkRuep=W!hwo%8 zS*B5lC+j;XoTNvdOMR*c=m5PIfOP>d)>R9!2w-CJaa9XRY6=58&XAtpUfFiPr1BKy z*=}TW31Itn??2;dwbEl^SDas}Z8V$_pXv=!OhM5N##F)Y%ZG`{JGJI2G_ok#qnQQa zrs$o$!e&1nmRz#N7qkYSf&_;-bR$pQ4n5LpyhXxTmP1Ycs0Ed{n26<86oHB%MOx2P zuN68g)a`p{dq(UxcX()cS3I>xDDewDc%YD&cnkGYvcf~G!J86+bvYx!r`i0E5^rpj zkOUtv{3OMv41Y-im)dbdbbF93BzV%$>SV;zPC5-5A>_J#ldy+N`dyaFn>&Tm0I1H-D@ zThQeuZxj$`<=Cub$ujw@fIicUjhp`oTw~}A~FtF?#s4NRucCWBJ=ZZ~EjUYog zCO;K6=3?2L)BthfW+jmzE|Aq_a&7EjC#Y^?WxB9&^EPcGR;CwgH@`CQV`7xpY-zlm zi;-dTM_X?uMyAQO4(gl|o0U32W(Z8qb*M)0Hh*-8W@HqY?CT`WC^0$HNt#yx=%_U; z3^l9#9cl$8PjnJi1&J^-I9T#9Xj;N$fMNp3{K+Ss