From 09965b91024faceab13446ab6801fdb2614da71e Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Fri, 27 May 2022 22:23:50 +1000
Subject: [PATCH] FPU: Set sign of 0 result of subtraction in pack_dp

When a floating-point subtraction results in a zero result, the sign
of the result is required to be positive in all rounding modes except
the round to minus infinity mode, when it is negative.  Consolidate
the logic for doing this in one place, in the pack_dp function,
instead of having it at each place where a zero result is generated.

Since fnmadd[s] and fnmsub[s] negate the result after this rule has
been applied, we use the r.negate signal to indicate a negation which
is now done in pack_dp.  Thus the EXC_RESULT state no longer uses
r.negate, and in fact doesn't set v.result_sign at all; that is now
done in the states that lead into EXC_RESULT.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 fpu.vhdl | 73 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 36 insertions(+), 37 deletions(-)

diff --git a/fpu.vhdl b/fpu.vhdl
index 3ea51b9..417a318 100644
--- a/fpu.vhdl
+++ b/fpu.vhdl
@@ -169,6 +169,9 @@ architecture behaviour of fpu is
         oe           : std_ulogic;
         xerc         : xer_common_t;
         xerc_result  : xer_common_t;
+        res_negate   : std_ulogic;
+        res_subtract : std_ulogic;
+        res_rmode    : std_ulogic_vector(2 downto 0);
     end record;
 
     type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
@@ -605,15 +608,21 @@ architecture behaviour of fpu is
     end;
 
     -- Construct a DP floating-point result from components
-    function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
-                     mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
+    function pack_dp(negative: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
+                     mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic;
+                     negate: std_ulogic; is_subtract: std_ulogic; round_mode: std_ulogic_vector)
         return std_ulogic_vector is
         variable dp_result : std_ulogic_vector(63 downto 0);
+        variable sign : std_ulogic;
     begin
         dp_result := (others => '0');
-        dp_result(63) := sign;
+        sign := negative;
         case class is
             when ZERO =>
+                if is_subtract = '1' then
+                    -- set result sign depending on rounding mode
+                    sign := round_mode(0) and round_mode(1);
+                end if;
             when FINITE =>
                 if mantissa(UNIT_BIT) = '1' then
                     -- normalized number
@@ -633,6 +642,7 @@ architecture behaviour of fpu is
                     dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
                 end if;
         end case;
+        dp_result(63) := sign xor negate;
         return dp_result;
     end;
 
@@ -1468,8 +1478,8 @@ begin
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 is_add := r.a.negative xor r.b.negative xor r.insn(1);
+                v.is_subtract := not is_add;
                 if r.a.class = FINITE and r.b.class = FINITE then
-                    v.is_subtract := not is_add;
                     v.add_bsmall := r.exp_cmp;
                     v.opsel_a := AIN_B;
                     if r.exp_cmp = '0' then
@@ -1491,18 +1501,13 @@ begin
                         v.fpscr(FPSCR_VXISI) := '1';
                         qnan_result := '1';
                         arith_done := '1';
-                    elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
-                        -- return -0 for rounding to -infinity
-                        v.result_sign := r.round_mode(1) and r.round_mode(0);
-                        arith_done := '1';
                     elsif r.a.class = INFINITY or r.b.class = ZERO then
-                        -- result is A
-                        v.opsel_a := AIN_A;
-                        v.state := EXC_RESULT;
+                        -- result is A; we're already set up to put A into R
+                        arith_done := '1';
                     else
                         -- result is +/- B
                         v.opsel_a := AIN_B;
-                        v.negate := not r.insn(1);
+                        v.result_sign := r.b.negative xnor r.insn(1);
                         v.state := EXC_RESULT;
                     end if;
                 end if;
@@ -1541,7 +1546,6 @@ begin
                     else
                         -- r.c.class is ZERO or INFINITY
                         v.opsel_a := AIN_C;
-                        v.negate := r.a.negative;
                         v.state := EXC_RESULT;
                     end if;
                 end if;
@@ -1599,8 +1603,10 @@ begin
                 v.fpscr(FPSCR_FI) := '0';
                 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
                     v.opsel_a := AIN_C;
+                    v.result_sign := r.c.negative;
                 else
                     v.opsel_a := AIN_B;
+                    v.result_sign := r.b.negative;
                 end if;
                 v.quieten_nan := '0';
                 v.state := EXC_RESULT;
@@ -1720,9 +1726,10 @@ begin
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
+                v.negate := r.insn(2);
+                v.is_subtract := not is_add;
                 if r.a.class = FINITE and r.c.class = FINITE and
                     (r.b.class = FINITE or r.b.class = ZERO) then
-                    v.is_subtract := not is_add;
                     -- Make sure A and C are normalized
                     if r.a.mantissa(UNIT_BIT) = '0' then
                         v.state := RENORM_A;
@@ -1730,13 +1737,13 @@ begin
                         v.state := RENORM_C;
                     elsif r.b.class = ZERO then
                         -- no addend, degenerates to multiply
-                        v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
+                        v.result_sign := r.a.negative xor r.c.negative;
                         f_to_multiply.valid <= '1';
                         v.is_multiply := '1';
                         v.state := MULT_1;
                     elsif r.madd_cmp = '0' then
                         -- addend is bigger, do multiply first
-                        v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
+                        v.result_sign := r.b.negative xnor r.insn(1);
                         f_to_multiply.valid <= '1';
                         v.first := '1';
                         v.state := FMADD_0;
@@ -1760,19 +1767,14 @@ begin
                         else
                             -- result is infinity
                             v.result_class := INFINITY;
-                            v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
+                            v.result_sign := r.a.negative xor r.c.negative;
                             arith_done := '1';
                         end if;
                     else
                         -- Here A is zero, C is zero, or B is infinity
                         -- Result is +/-B in all of those cases
                         v.opsel_a := AIN_B;
-                        if r.b.class /= ZERO or is_add = '1' then
-                            v.negate := not (r.insn(1) xor r.insn(2));
-                        else
-                            -- have to be careful about rule for 0 - 0 result sign
-                            v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
-                        end if;
+                        v.result_sign := r.b.negative xnor r.insn(1);
                         v.state := EXC_RESULT;
                     end if;
                 end if;
@@ -1910,10 +1912,6 @@ begin
                 elsif (r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
                     -- r.x must be zero at this point
                     v.result_class := ZERO;
-                    if r.is_subtract = '1' then
-                        -- set result sign depending on rounding mode
-                        v.result_sign := r.round_mode(1) and r.round_mode(0);
-                    end if;
                     arith_done := '1';
                 else
                     rs_norm <= '1';
@@ -1970,7 +1968,7 @@ begin
                 -- product is bigger here
                 -- shift B right and use it as the addend to the multiplier
                 -- for subtract, multiplier does B - A * C
-                v.result_sign := r.a.negative xor r.c.negative xor r.insn(2) xor r.is_subtract;
+                v.result_sign := r.a.negative xor r.c.negative xor r.is_subtract;
                 re_sel2 <= REXP2_B;
                 re_set_result <= '1';
                 -- set shift to b.exp - result_exp + 64
@@ -2030,7 +2028,6 @@ begin
                     if s_nz = '0' then
                         -- must be a subtraction, and r.x must be zero
                         v.result_class := ZERO;
-                        v.result_sign := r.round_mode(1) and r.round_mode(0);
                         arith_done := '1';
                     else
                         -- R is all zeroes but there are non-zero bits in S
@@ -2572,10 +2569,6 @@ begin
                 rs_neg2 <= '1';
                 if mant_nz = '0' then
                     v.result_class := ZERO;
-                    if r.is_subtract = '1' then
-                        -- set result sign depending on rounding mode
-                        v.result_sign := r.round_mode(1) and r.round_mode(0);
-                    end if;
                     arith_done := '1';
                 else
                     -- Renormalize result after rounding
@@ -2597,6 +2590,7 @@ begin
                 arith_done := '1';
 
             when NAN_RESULT =>
+                v.negate := '0';
                 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
                     (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or
                     (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then
@@ -2606,10 +2600,13 @@ begin
                 end if;
                 if r.use_a = '1' and r.a.class = NAN then
                     v.opsel_a := AIN_A;
+                    v.result_sign := r.a.negative;
                 elsif r.use_b = '1' and r.b.class = NAN then
                     v.opsel_a := AIN_B;
+                    v.result_sign := r.b.negative;
                 elsif r.use_c = '1' and r.c.class = NAN then
                     v.opsel_a := AIN_C;
+                    v.result_sign := r.c.negative;
                 end if;
                 v.state := EXC_RESULT;
 
@@ -2617,15 +2614,12 @@ begin
                 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
                 case r.opsel_a is
                     when AIN_B =>
-                        v.result_sign := r.b.negative xor r.negate;
                         re_sel2 <= REXP2_B;
                         v.result_class := r.b.class;
                     when AIN_C =>
-                        v.result_sign := r.c.negative xor r.negate;
                         re_sel2 <= REXP2_C;
                         v.result_class := r.c.class;
                     when others =>
-                        v.result_sign := r.a.negative xor r.negate;
                         re_sel1 <= REXP1_A;
                         v.result_class := r.a.class;
                 end case;
@@ -3171,6 +3165,7 @@ begin
             invalid := '1';
             v.result_class := NAN;
             v.result_sign := '0';
+            v.negate := '0';
             misc_sel <= "0001";
             opsel_r <= RES_MISC;
             arith_done := '1';
@@ -3550,6 +3545,9 @@ begin
                 v.int_result := int_result;
                 v.illegal := illegal;
                 v.nsnan_result := v.quieten_nan;
+                v.res_negate := v.negate;
+                v.res_subtract := v.is_subtract;
+                v.res_rmode := r.round_mode;
                 if r.integer_op = '1' then
                     v.cr_mask := num_to_fxm(0);
                 elsif r.is_cmp = '0' then
@@ -3581,7 +3579,8 @@ begin
             fp_result <= r.r;
         else
             fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
-                                 r.sp_result, r.nsnan_result);
+                                 r.sp_result, r.nsnan_result,
+                                 r.res_negate, r.res_subtract, r.res_rmode);
         end if;
 
         rin <= v;