From 5535257c71d5231ffdce98153abf1a27558052fe Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 21 Sep 2020 11:37:10 +1000
Subject: [PATCH] FPU: Don't use mask generator for rounding

Instead of using the mask generator in the rounding process, this uses
simpler logic to add in a 1 at the appropriate position (bit 2 or bit
31, depending on precision) and mask off the low-order bits.  Since
there are only two positions at which the masking and incrementing
need to be done, we don't need the full generality of the mask
generator.  This reduces the amount of logic and improves timing.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 fpu.vhdl | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/fpu.vhdl b/fpu.vhdl
index 38495a9..2e8096a 100644
--- a/fpu.vhdl
+++ b/fpu.vhdl
@@ -157,7 +157,7 @@ architecture behaviour of fpu is
 
     constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
     constant BIN_R    : std_ulogic_vector(1 downto 0) := "01";
-    constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
+    constant BIN_RND  : std_ulogic_vector(1 downto 0) := "10";
     constant BIN_PS6  : std_ulogic_vector(1 downto 0) := "11";
 
     constant RES_SUM   : std_ulogic_vector(1 downto 0) := "00";
@@ -632,6 +632,7 @@ begin
         variable mulexp      : signed(EXP_BITS-1 downto 0);
         variable maddend     : std_ulogic_vector(127 downto 0);
         variable sum         : std_ulogic_vector(63 downto 0);
+        variable round_inc   : std_ulogic_vector(63 downto 0);
     begin
         v := r;
         illegal := '0';
@@ -1117,7 +1118,6 @@ begin
                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
                         v.state := ROUND_OFLOW;
                     else
-                        v.shift := to_signed(-2, EXP_BITS);
                         v.state := ROUNDING;
                     end if;
                 else
@@ -1619,7 +1619,6 @@ begin
                     -- sum overflowed, shift right
                     opsel_r <= RES_SHIFT;
                     set_x := '1';
-                    v.shift := to_signed(-2, EXP_BITS);
                     if exp_huge = '1' then
                         v.state := ROUND_OFLOW;
                     else
@@ -1627,7 +1626,6 @@ begin
                     end if;
                 elsif r.r(54) = '1' then
                     set_x := '1';
-                    v.shift := to_signed(-2, EXP_BITS);
                     v.state := ROUNDING;
                 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
                     -- r.x must be zero at this point
@@ -2085,7 +2083,6 @@ begin
                 -- r.shift = b.exponent - 52
                 opsel_r <= RES_SHIFT;
                 set_x := '1';
-                v.shift := to_signed(-2, EXP_BITS);
                 v.state := ROUNDING;
 
             when FINISH =>
@@ -2103,7 +2100,6 @@ begin
                     elsif exp_huge = '1' then
                         v.state := ROUND_OFLOW;
                     else
-                        v.shift := to_signed(-2, EXP_BITS);
                         v.state := ROUNDING;
                     end if;
                 end if;
@@ -2119,7 +2115,6 @@ begin
                 elsif exp_huge = '1' then
                     v.state := ROUND_OFLOW;
                 else
-                    v.shift := to_signed(-2, EXP_BITS);
                     v.state := ROUNDING;
                 end if;
 
@@ -2131,7 +2126,6 @@ begin
                     -- have to denormalize before rounding
                     opsel_r <= RES_SHIFT;
                     set_x := '1';
-                    v.shift := to_signed(-2, EXP_BITS);
                     v.state := ROUNDING;
                 else
                     -- enabled underflow exception case
@@ -2142,7 +2136,6 @@ begin
                         renormalize := '1';
                         v.state := NORMALIZE;
                     else
-                        v.shift := to_signed(-2, EXP_BITS);
                         v.state := ROUNDING;
                     end if;
                 end if;
@@ -2169,7 +2162,6 @@ begin
                 else
                     -- enabled overflow exception
                     v.result_exp := r.result_exp - bias_exp;
-                    v.shift := to_signed(-2, EXP_BITS);
                     v.state := ROUNDING;
                 end if;
 
@@ -2178,9 +2170,8 @@ begin
                 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
                 if round(1) = '1' then
-                    -- set mask to increment the LSB for the precision
-                    opsel_b <= BIN_MASK;
-                    carry_in <= '1';
+                    -- increment the LSB for the precision
+                    opsel_b <= BIN_RND;
                     v.shift := to_signed(-1, EXP_BITS);
                     v.state := ROUNDING_2;
                 else
@@ -2402,8 +2393,9 @@ begin
                 in_b0 := (others => '0');
             when BIN_R =>
                 in_b0 := r.r;
-            when BIN_MASK =>
-                in_b0 := mask;
+            when BIN_RND =>
+                round_inc := (31 => r.single_prec, 2 => not r.single_prec, others => '0');
+                in_b0 := round_inc;
             when others =>
                 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
                 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
@@ -2420,7 +2412,10 @@ begin
         end if;
         sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
         if opsel_mask = '1' then
-            sum := sum and not mask;
+            sum(1 downto 0) := "00";
+            if r.single_prec = '1' then
+                sum(30 downto 2) := (others => '0');
+            end if;
         end if;
         case opsel_r is
             when RES_SUM =>