From 8da05e5331914674bd3e66a5728d435228d3ebf7 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Sat, 14 May 2022 21:46:40 +1000
Subject: [PATCH] FPU: Make an explicit exponent data path

With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift.  The plan is for the case
statement to turn into a microcode ROM eventually.

The value of v.result_exp is the sum of two values, either of which
can be negated (but not both).  The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0.  The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant.  The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.

For v.shift we also compute the sum of two values, either of which
can be negated (but not both).  The first value can be chosen from
new_exp, B exponent, r.shift, or 0.  The second value can be chosen
from the A exponent or a constant.  The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp).  These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2.  After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift.  The
count-leading-zeroes result does not go through the adder for timing
reasons.

In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 fpu.vhdl | 490 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 395 insertions(+), 95 deletions(-)

diff --git a/fpu.vhdl b/fpu.vhdl
index d838872..44ab9aa 100644
--- a/fpu.vhdl
+++ b/fpu.vhdl
@@ -232,6 +232,55 @@ architecture behaviour of fpu is
     constant MULADD_A     : std_ulogic_vector(1 downto 0) := "10";
     constant MULADD_RS    : std_ulogic_vector(1 downto 0) := "11";
 
+    -- control signals and values for exponent data path
+    constant REXP1_ZERO  : std_ulogic_vector(1 downto 0) := "00";
+    constant REXP1_R     : std_ulogic_vector(1 downto 0) := "01";
+    constant REXP1_A     : std_ulogic_vector(1 downto 0) := "10";
+    constant REXP1_BHALF : std_ulogic_vector(1 downto 0) := "11";
+
+    constant REXP2_CON   : std_ulogic_vector(1 downto 0) := "00";
+    constant REXP2_NE    : std_ulogic_vector(1 downto 0) := "01";
+    constant REXP2_C     : std_ulogic_vector(1 downto 0) := "10";
+    constant REXP2_B     : std_ulogic_vector(1 downto 0) := "11";
+
+    constant RECON2_ZERO : std_ulogic_vector(1 downto 0) := "00";
+    constant RECON2_UNIT : std_ulogic_vector(1 downto 0) := "01";
+    constant RECON2_BIAS : std_ulogic_vector(1 downto 0) := "10";
+    constant RECON2_MAX  : std_ulogic_vector(1 downto 0) := "11";
+
+    signal re_sel1       : std_ulogic_vector(1 downto 0);
+    signal re_sel2       : std_ulogic_vector(1 downto 0);
+    signal re_con2       : std_ulogic_vector(1 downto 0);
+    signal re_neg1       : std_ulogic;
+    signal re_neg2       : std_ulogic;
+    signal re_set_result : std_ulogic;
+
+    constant RSH1_ZERO   : std_ulogic_vector(1 downto 0) := "00";
+    constant RSH1_B      : std_ulogic_vector(1 downto 0) := "01";
+    constant RSH1_NE     : std_ulogic_vector(1 downto 0) := "10";
+    constant RSH1_S      : std_ulogic_vector(1 downto 0) := "11";
+
+    constant RSH2_CON    : std_ulogic := '0';
+    constant RSH2_A      : std_ulogic := '1';
+
+    constant RSCON2_ZERO    : std_ulogic_vector(3 downto 0) := "0000";
+    constant RSCON2_1       : std_ulogic_vector(3 downto 0) := "0001";
+    constant RSCON2_UNIT_52 : std_ulogic_vector(3 downto 0) := "0010";
+    constant RSCON2_64_UNIT : std_ulogic_vector(3 downto 0) := "0011";
+    constant RSCON2_32      : std_ulogic_vector(3 downto 0) := "0100";
+    constant RSCON2_52      : std_ulogic_vector(3 downto 0) := "0101";
+    constant RSCON2_UNIT    : std_ulogic_vector(3 downto 0) := "0110";
+    constant RSCON2_63      : std_ulogic_vector(3 downto 0) := "0111";
+    constant RSCON2_64      : std_ulogic_vector(3 downto 0) := "1000";
+    constant RSCON2_MINEXP  : std_ulogic_vector(3 downto 0) := "1001";
+
+    signal rs_sel1       : std_ulogic_vector(1 downto 0);
+    signal rs_sel2       : std_ulogic;
+    signal rs_con2       : std_ulogic_vector(3 downto 0);
+    signal rs_neg1       : std_ulogic;
+    signal rs_neg2       : std_ulogic;
+    signal rs_norm       : std_ulogic;
+
     -- Inverse lookup table, indexed by the top 8 fraction bits
     -- The first 256 entries are the reciprocal (1/x) lookup table,
     -- and the remaining 768 entries are the reciprocal square root table.
@@ -705,7 +754,6 @@ begin
         variable new_exp     : signed(EXP_BITS-1 downto 0);
         variable exp_tiny    : std_ulogic;
         variable exp_huge    : std_ulogic;
-        variable renormalize : std_ulogic;
         variable clz         : std_ulogic_vector(5 downto 0);
         variable set_x       : std_ulogic;
         variable mshift      : signed(EXP_BITS-1 downto 0);
@@ -741,6 +789,12 @@ begin
         variable mult_mask   : std_ulogic;
         variable sign_bit    : std_ulogic;
         variable rnd_b32     : std_ulogic;
+        variable rexp_in1    : signed(EXP_BITS-1 downto 0);
+        variable rexp_in2    : signed(EXP_BITS-1 downto 0);
+        variable rexp_cin    : std_ulogic;
+        variable rexp_sum    : signed(EXP_BITS-1 downto 0);
+        variable rsh_in1     : signed(EXP_BITS-1 downto 0);
+        variable rsh_in2     : signed(EXP_BITS-1 downto 0);
         variable int_result  : std_ulogic;
         variable illegal     : std_ulogic;
     begin
@@ -884,7 +938,6 @@ begin
         end if;
 
         v.update_fprf := '0';
-        v.shift := to_signed(0, EXP_BITS);
         v.first := '0';
         v.opsel_a := AIN_R;
         opsel_ainv <= '0';
@@ -900,7 +953,6 @@ begin
         arith_done := '0';
         invalid := '0';
         zero_divide := '0';
-        renormalize := '0';
         set_x := '0';
         qnan_result := '0';
         set_a := '0';
@@ -928,6 +980,20 @@ begin
         rnd_b32 := '0';
         int_result := '0';
         illegal := '0';
+
+        re_sel1 <= REXP1_ZERO;
+        re_sel2 <= REXP2_CON;
+        re_con2 <= RECON2_ZERO;
+        re_neg1 <= '0';
+        re_neg2 <= '0';
+        re_set_result <= '0';
+        rs_sel1 <= RSH1_ZERO;
+        rs_sel2 <= RSH2_CON;
+        rs_con2 <= RSCON2_ZERO;
+        rs_neg1 <= '0';
+        rs_neg2 <= '0';
+        rs_norm <= '0';
+
         case r.state is
             when IDLE =>
                 v.use_a := '0';
@@ -1090,7 +1156,8 @@ begin
                 -- r.opsel_a = AIN_B
                 v.instr_done := '1';
                 update_fx := '1';
-                v.result_exp := r.b.exponent;
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
                 if (r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
                     (r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') then
                     -- Signalling NAN
@@ -1221,7 +1288,8 @@ begin
             when DO_FMR =>
                 -- r.opsel_a = AIN_B
                 v.result_class := r.b.class;
-                v.result_exp := r.b.exponent;
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
                 v.quieten_nan := '0';
                 if r.insn(9) = '1' then
                     v.result_sign := '0';              -- fabs
@@ -1241,7 +1309,12 @@ begin
                 -- r.opsel_a = AIN_B
                 v.result_class := r.b.class;
                 v.result_sign := r.b.negative;
-                v.result_exp := r.b.exponent;
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
+                -- set shift to exponent - 52
+                rs_sel1 <= RSH1_B;
+                rs_con2 <= RSCON2_52;
+                rs_neg2 <= '1';
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 if r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0' then
@@ -1254,7 +1327,6 @@ begin
                         -- integer already, no rounding required
                         arith_done := '1';
                     else
-                        v.shift := r.b.exponent - to_signed(52, EXP_BITS);
                         v.state := FRI_1;
                         v.round_mode := '1' & r.insn(7 downto 6);
                     end if;
@@ -1266,7 +1338,12 @@ begin
                 -- r.opsel_a = AIN_B, r.shift = 0
                 v.result_class := r.b.class;
                 v.result_sign := r.b.negative;
-                v.result_exp := r.b.exponent;
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
+                -- set shift to exponent - -126
+                rs_sel1 <= RSH1_B;
+                rs_con2 <= RSCON2_MINEXP;
+                rs_neg2 <= '1';
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
@@ -1277,7 +1354,6 @@ begin
                 set_x := '1';
                 if r.b.class = FINITE then
                     if r.b.exponent < to_signed(-126, EXP_BITS) then
-                        v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
                         v.state := ROUND_UFLOW;
                     elsif r.b.exponent > to_signed(127, EXP_BITS) then
                         v.state := ROUND_OFLOW;
@@ -1295,7 +1371,10 @@ begin
                 -- r.opsel_a = AIN_B
                 v.result_class := r.b.class;
                 v.result_sign := r.b.negative;
-                v.result_exp := r.b.exponent;
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
+                rs_sel1 <= RSH1_B;
+                rs_neg2 <= '1';
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 if r.b.class = NAN and r.b.mantissa(53) = '0' then
@@ -1305,6 +1384,7 @@ begin
                 end if;
 
                 int_result := '1';
+
                 case r.b.class is
                     when ZERO =>
                         arith_done := '1';
@@ -1315,14 +1395,16 @@ begin
                         elsif r.b.exponent >= to_signed(52, EXP_BITS) then
                             -- integer already, no rounding required,
                             -- shift into final position
-                            v.shift := r.b.exponent - to_signed(UNIT_BIT, EXP_BITS);
+                            -- set shift to exponent - 56
+                            rs_con2 <= RSCON2_UNIT;
                             if r.insn(8) = '1' and r.b.negative = '1' then
                                 v.state := INT_OFLOW;
                             else
                                 v.state := INT_ISHIFT;
                             end if;
                         else
-                            v.shift := r.b.exponent - to_signed(52, EXP_BITS);
+                            -- set shift to exponent - 52
+                            rs_con2 <= RSCON2_52;
                             v.state := INT_SHIFT;
                         end if;
                     when INFINITY | NAN =>
@@ -1339,7 +1421,8 @@ begin
                     v.result_sign := '1';
                 end if;
                 v.result_class := r.b.class;
-                v.result_exp := to_signed(UNIT_BIT, EXP_BITS);
+                re_con2 <= RECON2_UNIT;
+                re_set_result <= '1';
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 if r.b.class = ZERO then
@@ -1353,7 +1436,12 @@ begin
                 -- r.opsel_a = AIN_A
                 v.result_sign := r.a.negative;
                 v.result_class := r.a.class;
-                v.result_exp := r.a.exponent;
+                re_sel1 <= REXP1_A;
+                re_set_result <= '1';
+                -- set shift to a.exp - b.exp
+                rs_sel1 <= RSH1_B;
+                rs_neg1 <= '1';
+                rs_sel2 <= RSH2_A;
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 v.use_a := '1';
@@ -1364,7 +1452,6 @@ begin
                     v.add_bsmall := r.exp_cmp;
                     v.opsel_a := AIN_B;
                     if r.exp_cmp = '0' then
-                        v.shift := r.a.exponent - r.b.exponent;
                         v.result_sign := r.b.negative xnor r.insn(1);
                         if r.a.exponent = r.b.exponent then
                             v.state := ADD_2;
@@ -1408,8 +1495,10 @@ begin
                 v.fpscr(FPSCR_FI) := '0';
                 v.use_a := '1';
                 v.use_c := '1';
+                re_sel1 <= REXP1_A;
+                re_sel2 <= REXP2_C;
+                re_set_result <= '1';
                 if r.a.class = FINITE and r.c.class = FINITE then
-                    v.result_exp := r.a.exponent + r.c.exponent;
                     -- Renormalize denorm operands
                     if r.a.mantissa(UNIT_BIT) = '0' then
                         v.state := RENORM_A;
@@ -1446,7 +1535,10 @@ begin
                 v.use_a := '1';
                 v.use_b := '1';
                 v.result_sign := r.a.negative xor r.b.negative;
-                v.result_exp := r.a.exponent - r.b.exponent;
+                re_sel1 <= REXP1_A;
+                re_sel2 <= REXP2_B;
+                re_neg2 <= '1';
+                re_set_result <= '1';
                 v.count := "00";
                 if r.a.class = FINITE and r.b.class = FINITE then
                     -- Renormalize denorm operands
@@ -1503,9 +1595,10 @@ begin
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 v.use_b := '1';
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
                 case r.b.class is
                     when FINITE =>
-                        v.result_exp := r.b.exponent;
                         if r.b.negative = '1' then
                             v.fpscr(FPSCR_VXSQRT) := '1';
                             qnan_result := '1';
@@ -1514,7 +1607,8 @@ begin
                         elsif r.b.exponent(0) = '0' then
                             v.state := SQRT_1;
                         else
-                            v.shift := to_signed(1, EXP_BITS);
+                            -- set shift to 1
+                            rs_con2 <= RSCON2_1;
                             v.state := RENORM_B2;
                         end if;
                     when NAN =>
@@ -1538,7 +1632,8 @@ begin
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 v.use_b := '1';
-                v.result_exp := r.b.exponent;
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
                 case r.b.class is
                     when FINITE =>
                         if r.b.mantissa(UNIT_BIT) = '0' then
@@ -1564,10 +1659,12 @@ begin
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 v.use_b := '1';
-                v.shift := to_signed(1, EXP_BITS);
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
+                -- set shift to 1
+                rs_con2 <= RSCON2_1;
                 case r.b.class is
                     when FINITE =>
-                        v.result_exp := r.b.exponent;
                         if r.b.negative = '1' then
                             v.fpscr(FPSCR_VXSQRT) := '1';
                             qnan_result := '1';
@@ -1600,7 +1697,12 @@ begin
                 -- else AIN_B
                 v.result_sign := r.a.negative;
                 v.result_class := r.a.class;
-                v.result_exp := r.a.exponent + r.c.exponent;
+                -- put a.exp + c.exp into result_exp
+                re_sel1 <= REXP1_A;
+                re_sel2 <= REXP2_C;
+                re_set_result <= '1';
+                -- put b.exp into shift
+                rs_sel1 <= RSH1_B;
                 v.fpscr(FPSCR_FR) := '0';
                 v.fpscr(FPSCR_FI) := '0';
                 v.use_a := '1';
@@ -1625,6 +1727,7 @@ begin
                         -- addend is bigger, do multiply first
                         v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
                         f_to_multiply.valid <= '1';
+                        v.first := '1';
                         v.state := FMADD_0;
                     else
                         -- product is bigger, shift B first
@@ -1664,7 +1767,7 @@ begin
                 end if;
 
             when RENORM_A =>
-                renormalize := '1';
+                rs_norm <= '1';
                 v.state := RENORM_A2;
                 if r.insn(4) = '1' then
                     v.opsel_a := AIN_C;
@@ -1675,7 +1778,8 @@ begin
             when RENORM_A2 =>
                 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
                 set_a := '1';
-                v.result_exp := new_exp;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 if r.insn(4) = '1' then
                     if r.c.mantissa(UNIT_BIT) = '1' then
                         if r.insn(3) = '0' or r.b.class = ZERO then
@@ -1702,23 +1806,25 @@ begin
                 end if;
 
             when RENORM_B =>
-                renormalize := '1';
+                rs_norm <= '1';
                 renorm_sqrt := r.is_sqrt;
                 v.state := RENORM_B2;
 
             when RENORM_B2 =>
                 set_b := '1';
-                v.result_exp := new_exp;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 v.opsel_a := AIN_B;
                 v.state := LOOKUP;
 
             when RENORM_C =>
-                renormalize := '1';
+                rs_norm <= '1';
                 v.state := RENORM_C2;
 
             when RENORM_C2 =>
                 set_c := '1';
-                v.result_exp := new_exp;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 if r.insn(3) = '0' or r.b.class = ZERO then
                     v.first := '1';
                     v.state := MULT_1;
@@ -1733,14 +1839,20 @@ begin
 
             when ADD_1 =>
                 -- transferring B to R
-                v.shift := r.b.exponent - r.a.exponent;
-                v.result_exp := r.b.exponent;
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
+                -- set shift to b.exp - a.exp
+                rs_sel1 <= RSH1_B;
+                rs_sel2 <= RSH2_A;
+                rs_neg2 <= '1';
                 v.longmask := '0';
                 v.state := ADD_SHIFT;
 
             when ADD_SHIFT =>
                 -- r.shift = - exponent difference, r.longmask = 0
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 v.x := s_nz;
                 set_x := '1';
                 v.longmask := r.single_prec;
@@ -1756,12 +1868,15 @@ begin
                 opsel_b <= BIN_R;
                 opsel_binv <= r.is_subtract;
                 carry_in <= r.is_subtract and not r.x;
-                v.shift := to_signed(-1, EXP_BITS);
+                -- set shift to -1
+                rs_con2 <= RSCON2_1;
+                rs_neg2 <= '1';
                 v.state := ADD_3;
 
             when ADD_3 =>
                 -- check for overflow or negative result (can't get both)
                 -- r.shift = -1
+                re_sel2 <= REXP2_NE;
                 if r.r(63) = '1' then
                     -- result is opposite sign to expected
                     v.result_sign := not r.result_sign;
@@ -1771,6 +1886,7 @@ begin
                 elsif r.r(UNIT_BIT + 1) = '1' then
                     -- sum overflowed, shift right
                     opsel_r <= RES_SHIFT;
+                    re_set_result <= '1';
                     set_x := '1';
                     if exp_huge = '1' then
                         v.state := ROUND_OFLOW;
@@ -1789,7 +1905,7 @@ begin
                     end if;
                     arith_done := '1';
                 else
-                    renormalize := '1';
+                    rs_norm <= '1';
                     v.state := NORMALIZE;
                 end if;
 
@@ -1820,26 +1936,36 @@ begin
                 end if;
 
             when FMADD_0 =>
+                -- r.shift is b.exp, so new_exp is a.exp + c.exp - b.exp
+                -- (first time through; subsequent times we preserve v.shift)
                 -- Addend is bigger here
-                v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
+                -- set shift to a.exp + c.exp - b.exp
                 -- note v.shift is at most -2 here
-                v.shift := r.result_exp - r.b.exponent;
+                if r.first = '1' then
+                    rs_sel1 <= RSH1_NE;
+                else
+                    rs_sel1 <= RSH1_S;
+                end if;
                 opsel_r <= RES_MULT;
                 opsel_s <= S_MULT;
                 set_s := '1';
-                f_to_multiply.valid <= r.first;
                 if multiply_to_f.valid = '1' then
                     v.longmask := '0';
                     v.state := ADD_SHIFT;
                 end if;
 
             when FMADD_1 =>
+                -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp
                 -- product is bigger here
                 -- shift B right and use it as the addend to the multiplier
-                v.shift := r.b.exponent - r.result_exp + to_signed(64, EXP_BITS);
                 -- for subtract, multiplier does B - A * C
                 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2) xor r.is_subtract;
-                v.result_exp := r.b.exponent;
+                re_sel2 <= REXP2_B;
+                re_set_result <= '1';
+                -- set shift to b.exp - result_exp + 64
+                rs_sel1 <= RSH1_NE;
+                rs_neg1 <= '1';
+                rs_con2 <= RSCON2_64;
                 v.state := FMADD_2;
 
             when FMADD_2 =>
@@ -1847,12 +1973,17 @@ begin
                 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
                 set_s := '1';
                 opsel_s <= S_SHIFT;
-                v.shift := r.shift - to_signed(64, EXP_BITS);
+                -- set shift to r.shift - 64
+                rs_sel1 <= RSH1_S;
+                rs_con2 <= RSCON2_64;
+                rs_neg2 <= '1';
                 v.state := FMADD_3;
 
             when FMADD_3 =>
                 -- r.shift = addend exp - product exp
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 v.first := '1';
                 v.state := FMADD_4;
 
@@ -1876,11 +2007,14 @@ begin
                     opsel_s <= S_NEG;
                     set_s := '1';
                 end if;
-                v.shift := to_signed(UNIT_BIT, EXP_BITS);
+                -- set shift to UNIT_BIT
+                rs_con2 <= RSCON2_UNIT;
                 v.state := FMADD_6;
 
             when FMADD_6 =>
                 -- r.shift = UNIT_BIT (or 0, but only if r is now nonzero)
+                re_sel2 <= REXP2_NE;
+                rs_norm <= '1';
                 if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
                     if s_nz = '0' then
                         -- must be a subtraction, and r.x must be zero
@@ -1891,13 +2025,13 @@ begin
                         -- R is all zeroes but there are non-zero bits in S
                         -- so shift them into R and set S to 0
                         opsel_r <= RES_SHIFT;
+                        re_set_result <= '1';
                         set_s := '1';
-                        -- stay in state FMADD_6
+                        v.state := FINISH;
                     end if;
                 elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then
                     v.state := FINISH;
                 else
-                    renormalize := '1';
                     v.state := NORMALIZE;
                 end if;
 
@@ -1991,27 +2125,33 @@ begin
                 v.state := FINISH;
 
             when FRE_1 =>
-                v.result_exp := - r.result_exp;
+                re_sel1 <= REXP1_R;
+                re_neg1 <= '1';
+                re_set_result <= '1';
                 opsel_r <= RES_MISC;
                 misc_sel <= "0111";
-                v.shift := to_signed(1, EXP_BITS);
+                -- set shift to 1
+                rs_con2 <= RSCON2_1;
                 v.state := NORMALIZE;
 
             when FTDIV_1 =>
                 v.cr_result(1) := exp_tiny or exp_huge;
+                -- set shift to a.exp
+                rs_sel2 <= RSH2_A;
                 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
                     v.instr_done := '1';
                 else
-                    v.shift := r.a.exponent;
                     v.doing_ftdiv := "10";
                 end if;
 
             when RSQRT_1 =>
                 opsel_r <= RES_MISC;
                 misc_sel <= "0111";
-                sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
-                v.result_exp := - sqrt_exp;
-                v.shift := to_signed(1, EXP_BITS);
+                re_sel1 <= REXP1_BHALF;
+                re_neg1 <= '1';
+                re_set_result <= '1';
+                -- set shift to 1
+                rs_con2 <= RSCON2_1;
                 v.state := NORMALIZE;
 
             when SQRT_1 =>
@@ -2023,7 +2163,9 @@ begin
                 msel_1 <= MUL1_B;
                 msel_2 <= MUL2_LUT;
                 f_to_multiply.valid <= '1';
-                v.shift := to_signed(-1, EXP_BITS);
+                -- set shift to -1
+                rs_con2 <= RSCON2_1;
+                rs_neg2 <= '1';
                 v.count := "00";
                 v.state := SQRT_2;
 
@@ -2032,6 +2174,8 @@ begin
                 -- not expecting multiplier result yet
                 -- r.shift = -1
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 v.first := '1';
                 v.state := SQRT_3;
 
@@ -2132,9 +2276,10 @@ begin
             when SQRT_10 =>
                 -- Add the bottom 8 bits of P, sign-extended, onto R.
                 opsel_b <= BIN_PS8;
-                sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
-                v.result_exp := sqrt_exp;
-                v.shift := to_signed(1, EXP_BITS);
+                re_sel1 <= REXP1_BHALF;
+                re_set_result <= '1';
+                -- set shift to 1
+                rs_con2 <= RSCON2_1;
                 v.first := '1';
                 v.state := SQRT_11;
 
@@ -2167,13 +2312,19 @@ begin
             when INT_SHIFT =>
                 -- r.shift = b.exponent - 52
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 set_x := '1';
                 v.state := INT_ROUND;
-                v.shift := to_signed(52 - UNIT_BIT, EXP_BITS);
+                -- set shift to -4 (== 52 - UNIT_BIT)
+                rs_con2 <= RSCON2_UNIT_52;
+                rs_neg2 <= '1';
 
             when INT_ROUND =>
                 -- r.shift = -4 (== 52 - UNIT_BIT)
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
                 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
                 -- Check for negative values that don't round to 0 for fcti*u*
@@ -2187,6 +2338,8 @@ begin
             when INT_ISHIFT =>
                 -- r.shift = b.exponent - UNIT_BIT;
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 v.state := INT_FINAL;
 
             when INT_FINAL =>
@@ -2248,6 +2401,8 @@ begin
             when FRI_1 =>
                 -- r.shift = b.exponent - 52
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 set_x := '1';
                 v.state := ROUNDING;
 
@@ -2255,13 +2410,16 @@ begin
                 if r.is_multiply = '1' and px_nz = '1' then
                     v.x := '1';
                 end if;
+                -- set shift to new_exp - min_exp (N.B. rs_norm overrides this)
+                rs_sel1 <= RSH1_NE;
+                rs_con2 <= RSCON2_MINEXP;
+                rs_neg2 <= '1';
                 if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then
-                    renormalize := '1';
+                    rs_norm <= '1';
                     v.state := NORMALIZE;
                 else
                     set_x := '1';
                     if exp_tiny = '1' then
-                        v.shift := new_exp - min_exp;
                         v.state := ROUND_UFLOW;
                     elsif exp_huge = '1' then
                         v.state := ROUND_OFLOW;
@@ -2272,11 +2430,16 @@ begin
 
             when NORMALIZE =>
                 -- Shift so we have 9 leading zeroes (we know R is non-zero)
-                -- r.shift = clz(r.r) - 9
+                -- r.shift = clz(r.r) - 7
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
+                -- set shift to new_exp - min_exp
+                rs_sel1 <= RSH1_NE;
+                rs_con2 <= RSCON2_MINEXP;
+                rs_neg2 <= '1';
                 set_x := '1';
                 if exp_tiny = '1' then
-                    v.shift := new_exp - min_exp;
                     v.state := ROUND_UFLOW;
                 elsif exp_huge = '1' then
                     v.state := ROUND_OFLOW;
@@ -2291,15 +2454,19 @@ begin
                     -- disabled underflow exception case
                     -- have to denormalize before rounding
                     opsel_r <= RES_SHIFT;
+                    re_sel2 <= REXP2_NE;
+                    re_set_result <= '1';
                     set_x := '1';
                     v.state := ROUNDING;
                 else
                     -- enabled underflow exception case
                     -- if denormalized, have to normalize before rounding
                     v.fpscr(FPSCR_UX) := '1';
-                    v.result_exp := r.result_exp + bias_exp;
+                    re_sel1 <= REXP1_R;
+                    re_con2 <= RECON2_BIAS;
+                    re_set_result <= '1';
                     if r.r(UNIT_BIT) = '0' then
-                        renormalize := '1';
+                        rs_norm <= '1';
                         v.state := NORMALIZE;
                     else
                         v.state := ROUNDING;
@@ -2321,13 +2488,17 @@ begin
                         v.fpscr(FPSCR_FR) := '0';
                     end if;
                     -- construct largest representable number
-                    v.result_exp := max_exp;
+                    re_con2 <= RECON2_MAX;
+                    re_set_result <= '1';
                     opsel_r <= RES_MISC;
                     misc_sel <= "001" & r.single_prec;
                     arith_done := '1';
                 else
                     -- enabled overflow exception
-                    v.result_exp := r.result_exp - bias_exp;
+                    re_sel1 <= REXP1_R;
+                    re_con2 <= RECON2_BIAS;
+                    re_neg2 <= '1';
+                    re_set_result <= '1';
                     v.state := ROUNDING;
                 end if;
 
@@ -2338,13 +2509,15 @@ begin
                 if round(1) = '1' then
                     -- increment the LSB for the precision
                     opsel_b <= BIN_RND;
-                    v.shift := to_signed(-1, EXP_BITS);
+                    -- set shift to -1
+                    rs_con2 <= RSCON2_1;
+                    rs_neg2 <= '1';
                     v.state := ROUNDING_2;
                 else
                     if r.r(UNIT_BIT) = '0' then
                         -- result after masking could be zero, or could be a
                         -- denormalized result that needs to be renormalized
-                        renormalize := '1';
+                        rs_norm <= '1';
                         v.state := ROUNDING_3;
                     else
                         arith_done := '1';
@@ -2361,8 +2534,10 @@ begin
                 -- Check for overflow during rounding
                 -- r.shift = -1
                 v.x := '0';
+                re_sel2 <= REXP2_NE;
                 if r.r(UNIT_BIT + 1) = '1' then
                     opsel_r <= RES_SHIFT;
+                    re_set_result <= '1';
                     if exp_huge = '1' then
                         v.state := ROUND_OFLOW;
                     else
@@ -2370,7 +2545,7 @@ begin
                     end if;
                 elsif r.r(UNIT_BIT) = '0' then
                     -- Do CLZ so we can renormalize the result
-                    renormalize := '1';
+                    rs_norm <= '1';
                     v.state := ROUNDING_3;
                 else
                     arith_done := '1';
@@ -2379,6 +2554,11 @@ begin
             when ROUNDING_3 =>
                 -- r.shift = clz(r.r) - 9
                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
+                re_sel2 <= REXP2_NE;
+                -- set shift to new_exp - min_exp (== -1022)
+                rs_sel1 <= RSH1_NE;
+                rs_con2 <= RSCON2_MINEXP;
+                rs_neg2 <= '1';
                 if mant_nz = '0' then
                     v.result_class := ZERO;
                     if r.is_subtract = '1' then
@@ -2389,8 +2569,8 @@ begin
                 else
                     -- Renormalize result after rounding
                     opsel_r <= RES_SHIFT;
+                    re_set_result <= '1';
                     v.denorm := exp_tiny;
-                    v.shift := new_exp - to_signed(-1022, EXP_BITS);
                     if new_exp < to_signed(-1022, EXP_BITS) then
                         v.state := DENORM;
                     else
@@ -2401,6 +2581,8 @@ begin
             when DENORM =>
                 -- r.shift = result_exp - -1022
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 arith_done := '1';
 
             when NAN_RESULT =>
@@ -2425,17 +2607,18 @@ begin
                 case r.opsel_a is
                     when AIN_B =>
                         v.result_sign := r.b.negative xor r.negate;
-                        v.result_exp := r.b.exponent;
+                        re_sel2 <= REXP2_B;
                         v.result_class := r.b.class;
                     when AIN_C =>
                         v.result_sign := r.c.negative xor r.negate;
-                        v.result_exp := r.c.exponent;
+                        re_sel2 <= REXP2_C;
                         v.result_class := r.c.class;
                     when others =>
                         v.result_sign := r.a.negative xor r.negate;
-                        v.result_exp := r.a.exponent;
+                        re_sel1 <= REXP1_A;
                         v.result_class := r.a.class;
                 end case;
+                re_set_result <= '1';
                 arith_done := '1';
 
             when DO_IDIVMOD =>
@@ -2456,12 +2639,13 @@ begin
                         carry_in <= '1';
                     end if;
                     v.result_class := FINITE;
-                    v.result_exp := to_signed(UNIT_BIT, EXP_BITS);
+                    re_con2 <= RECON2_UNIT;
+                    re_set_result <= '1';
                     v.state := IDIV_NORMB;
                 end if;
             when IDIV_NORMB =>
                 -- do count-leading-zeroes on B (now in R)
-                renormalize := '1';
+                rs_norm <= '1';
                 -- save the original value of B or |B| in C
                 set_c := '1';
                 v.state := IDIV_NORMB2;
@@ -2469,6 +2653,8 @@ begin
                 -- get B into the range [1, 2) in 8.56 format
                 set_x := '1';           -- record if any 1 bits shifted out
                 opsel_r <= RES_SHIFT;
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 v.state := IDIV_NORMB3;
             when IDIV_NORMB3 =>
                 -- add the X bit onto R to round up B
@@ -2483,12 +2669,13 @@ begin
                     opsel_ainv <= '1';
                     carry_in <= '1';
                 end if;
-                v.result_exp := to_signed(UNIT_BIT, EXP_BITS);
+                re_con2 <= RECON2_UNIT;
+                re_set_result <= '1';
                 v.opsel_a := AIN_C;
                 v.state := IDIV_CLZA2;
             when IDIV_CLZA2 =>
                 -- r.opsel_a = AIN_C
-                renormalize := '1';
+                rs_norm <= '1';
                 -- write the dividend back into A in case we negated it
                 set_a_mant := '1';
                 -- while doing the count-leading-zeroes on A,
@@ -2500,7 +2687,9 @@ begin
                 v.state := IDIV_CLZA3;
             when IDIV_CLZA3 =>
                 -- save the exponent of A (but don't overwrite the mantissa)
-                v.a.exponent := new_exp;
+                set_a_exp := '1';
+                re_sel2 <= REXP2_NE;
+                re_set_result <= '1';
                 v.div_close := '0';
                 if new_exp = r.b.exponent then
                     v.div_close := '1';
@@ -2521,9 +2710,9 @@ begin
                 end if;
             when IDIV_NR0 =>
                 -- reduce number of Newton-Raphson iterations for small A
-                if r.divext = '1' or new_exp >= to_signed(32, EXP_BITS) then
+                if r.divext = '1' or r.result_exp >= to_signed(32, EXP_BITS) then
                     v.count := "00";
-                elsif new_exp >= to_signed(16, EXP_BITS) then
+                elsif r.result_exp >= to_signed(16, EXP_BITS) then
                     v.count := "01";
                 else
                     v.count := "10";
@@ -2567,7 +2756,8 @@ begin
                 f_to_multiply.valid <= r.first;
                 pshift := '1';
                 v.opsel_a := AIN_A;
-                v.shift := to_signed(64, EXP_BITS);
+                -- set shift to 64
+                rs_con2 <= RSCON2_64;
                 -- Get 0.5 into R in case the inverse estimate turns out to be
                 -- less than 0.5, in which case we want to use 0.5, to avoid
                 -- infinite loops in some cases.
@@ -2587,7 +2777,8 @@ begin
                 opsel_r <= RES_MISC;
                 misc_sel <= "0001";
                 v.opsel_a := AIN_A;
-                v.shift := to_signed(64, EXP_BITS);
+                -- set shift to 64
+                rs_con2 <= RSCON2_64;
                 v.state := IDIV_DODIV;
             when IDIV_DODIV =>
                 -- r.opsel_a = AIN_A
@@ -2604,15 +2795,19 @@ begin
                 -- put that into B, which now holds the quotient
                 set_b_mant := '1';
                 if r.divext = '0' then
-                    v.shift := to_signed(-UNIT_BIT, EXP_BITS);
+                    -- set shift to -56
+                    rs_con2 <= RSCON2_UNIT;
+                    rs_neg2 <= '1';
                     v.first := '1';
                     v.state := IDIV_DIV;
                 elsif r.single_prec = '1' then
                     -- divwe[u][o], shift A left 32 bits
-                    v.shift := to_signed(32, EXP_BITS);
+                    -- set shift to 32
+                    rs_con2 <= RSCON2_32;
                     v.state := IDIV_SH32;
                 elsif r.div_close = '0' then
-                    v.shift := to_signed(64 - UNIT_BIT, EXP_BITS);
+                    -- set shift to 64 - UNIT_BIT (== 8)
+                    rs_con2 <= RSCON2_64_UNIT;
                     v.state := IDIV_EXTDIV;
                 else
                     -- handle top bit of quotient specially
@@ -2623,7 +2818,9 @@ begin
             when IDIV_SH32 =>
                 -- r.shift = 32, R contains the dividend
                 opsel_r <= RES_SHIFT;
-                v.shift := to_signed(-UNIT_BIT, EXP_BITS);
+                -- set shift to -UNIT_BIT (== -56)
+                rs_con2 <= RSCON2_UNIT;
+                rs_neg2 <= '1';
                 v.first := '1';
                 v.state := IDIV_DIV;
             when IDIV_DIV =>
@@ -2637,7 +2834,9 @@ begin
                 f_to_multiply.valid <= r.first;
                 pshift := '1';
                 opsel_r <= RES_MULT;
-                v.shift := - r.b.exponent;
+                -- set shift to - b.exp
+                rs_sel1 <= RSH1_B;
+                rs_neg1 <= '1';
                 if multiply_to_f.valid = '1' then
                     v.state := IDIV_DIV2;
                 end if;
@@ -2670,7 +2869,8 @@ begin
                 if r.divmod = '0' then
                     v.opsel_a := AIN_B;
                 end if;
-                v.shift := to_signed(UNIT_BIT, EXP_BITS);
+                -- set shift to UNIT_BIT (== 56)
+                rs_con2 <= RSCON2_UNIT;
                 if pcmpc_lt = '1' or pcmpc_eq = '1' then
                     if r.divmod = '0' then
                         v.state := IDIV_DIVADJ;
@@ -2687,7 +2887,9 @@ begin
             when IDIV_DIV5 =>
                 pshift := '1';
                 opsel_r <= RES_MULT;
-                v.shift := - r.b.exponent;
+                -- set shift to - b.exp
+                rs_sel1 <= RSH1_B;
+                rs_neg1 <= '1';
                 if multiply_to_f.valid = '1' then
                     v.state := IDIV_DIV6;
                 end if;
@@ -2727,7 +2929,8 @@ begin
                 if r.divmod = '0' then
                     v.opsel_a := AIN_B;
                 end if;
-                v.shift := to_signed(UNIT_BIT, EXP_BITS);
+                -- set shift to UNIT_BIT (== 56)
+                rs_con2 <= RSCON2_UNIT;
                 if r.divmod = '0' then
                     v.state := IDIV_DIVADJ;
                 elsif pcmpc_eq = '1' then
@@ -2737,14 +2940,18 @@ begin
                 end if;
             when IDIV_EXT_TBH =>
                 -- r.opsel_a = AIN_C; get divisor into R and prepare to shift left
-                v.shift := to_signed(63, EXP_BITS) - r.b.exponent;
+                -- set shift to 63 - b.exp
+                rs_sel1 <= RSH1_B;
+                rs_neg1 <= '1';
+                rs_con2 <= RSCON2_63;
                 v.opsel_a := AIN_A;
                 v.state := IDIV_EXT_TBH2;
             when IDIV_EXT_TBH2 =>
                 -- r.opsel_a = AIN_A; divisor is in R
                 -- r.shift = 63 - b.exponent; shift and put into B
                 set_b_mant := '1';
-                v.shift := to_signed(64 - UNIT_BIT, EXP_BITS);
+                -- set shift to 64 - UNIT_BIT (== 8)
+                rs_con2 <= RSCON2_64_UNIT;
                 v.state := IDIV_EXT_TBH3;
             when IDIV_EXT_TBH3 =>
                 -- Dividing (A << 64) by C
@@ -2752,7 +2959,10 @@ begin
                 -- Put A in the top 64 bits of Ahi/A/Alo
                 set_a_hi := '1';
                 set_a_mant := '1';
-                v.shift := to_signed(64, EXP_BITS) - r.b.exponent;
+                -- set shift to 64 - b.exp
+                rs_sel1 <= RSH1_B;
+                rs_neg1 <= '1';
+                rs_con2 <= RSCON2_64;
                 v.state := IDIV_EXT_TBH4;
             when IDIV_EXT_TBH4 =>
                 -- dividend (A) is in R
@@ -2760,7 +2970,8 @@ begin
                 opsel_r <= RES_SHIFT;
                 -- top bit of A gets lost in the shift, so handle it specially
                 v.opsel_a := AIN_B;
-                v.shift := to_signed(63, EXP_BITS);
+                -- set shift to 63
+                rs_con2 <= RSCON2_63;
                 v.state := IDIV_EXT_TBH5;
             when IDIV_EXT_TBH5 =>
                 -- r.opsel_a = AIN_B, r.shift = 63
@@ -2779,7 +2990,10 @@ begin
                 -- Put A in the top 64 bits of Ahi/A/Alo
                 set_a_hi := '1';
                 set_a_mant := '1';
-                v.shift := to_signed(64, EXP_BITS) - r.b.exponent;
+                -- set shift to 64 - b.exp
+                rs_sel1 <= RSH1_B;
+                rs_neg1 <= '1';
+                rs_con2 <= RSCON2_64;
                 v.state := IDIV_EXTDIV1;
             when IDIV_EXTDIV1 =>
                 -- dividend is in R
@@ -2816,7 +3030,10 @@ begin
                 opsel_r <= RES_MULT;
                 opsel_s <= S_MULT;
                 set_s := '1';
-                v.shift := to_signed(UNIT_BIT, EXP_BITS) - r.b.exponent;
+                -- set shift to UNIT_BIT - b.exp
+                rs_sel1 <= RSH1_B;
+                rs_neg1 <= '1';
+                rs_con2 <= RSCON2_UNIT;
                 if multiply_to_f.valid = '1' then
                     v.state := IDIV_EXTDIV5;
                 end if;
@@ -3200,17 +3417,100 @@ begin
             v.c.mantissa := shift_res;
         end if;
 
-        if opsel_r = RES_SHIFT then
-            v.result_exp := new_exp;
+        -- exponent data path
+        case re_sel1 is
+            when REXP1_R =>
+                rexp_in1 := r.result_exp;
+            when REXP1_A =>
+                rexp_in1 := r.a.exponent;
+            when REXP1_BHALF =>
+                rexp_in1 := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
+            when others =>
+                rexp_in1 := to_signed(0, EXP_BITS);
+        end case;
+        if re_neg1 = '1' then
+            rexp_in1 := not rexp_in1;
         end if;
-
-        if renormalize = '1' then
+        case re_sel2 is
+            when REXP2_NE =>
+                rexp_in2 := new_exp;
+            when REXP2_C =>
+                rexp_in2 := r.c.exponent;
+            when REXP2_B =>
+                rexp_in2 := r.b.exponent;
+            when others =>
+                case re_con2 is
+                    when RECON2_UNIT =>
+                        rexp_in2 := to_signed(UNIT_BIT, EXP_BITS);
+                    when RECON2_MAX =>
+                        rexp_in2 := max_exp;
+                    when RECON2_BIAS =>
+                        rexp_in2 := bias_exp;
+                    when others =>
+                        rexp_in2 := to_signed(0, EXP_BITS);
+                end case;
+        end case;
+        if re_neg2 = '1' then
+            rexp_in2 := not rexp_in2;
+        end if;
+        rexp_cin := re_neg1 or re_neg2;
+        rexp_sum := rexp_in1 + rexp_in2 + rexp_cin;
+        if re_set_result = '1' then
+            v.result_exp := rexp_sum;
+        end if;
+        case rs_sel1 is
+            when RSH1_B =>
+                rsh_in1 := r.b.exponent;
+            when RSH1_NE =>
+                rsh_in1 := new_exp;
+            when RSH1_S =>
+                rsh_in1 := r.shift;
+            when others =>
+                rsh_in1 := to_signed(0, EXP_BITS);
+        end case;
+        if rs_neg1 = '1' then
+            rsh_in1 := not rsh_in1;
+        end if;
+        case rs_sel2 is
+            when RSH2_A =>
+                rsh_in2 := r.a.exponent;
+            when others =>
+                case rs_con2 is
+                    when RSCON2_1 =>
+                        rsh_in2 := to_signed(1, EXP_BITS);
+                    when RSCON2_UNIT_52 =>
+                        rsh_in2 := to_signed(UNIT_BIT - 52, EXP_BITS);
+                    when RSCON2_64_UNIT =>
+                        rsh_in2 := to_signed(64 - UNIT_BIT, EXP_BITS);
+                    when RSCON2_32 =>
+                        rsh_in2 := to_signed(32, EXP_BITS);
+                    when RSCON2_52 =>
+                        rsh_in2 := to_signed(52, EXP_BITS);
+                    when RSCON2_UNIT =>
+                        rsh_in2 := to_signed(UNIT_BIT, EXP_BITS);
+                    when RSCON2_63 =>
+                        rsh_in2 := to_signed(63, EXP_BITS);
+                    when RSCON2_64 =>
+                        rsh_in2 := to_signed(64, EXP_BITS);
+                    when RSCON2_MINEXP =>
+                        rsh_in2 := min_exp;
+                    when others =>
+                        rsh_in2 := to_signed(0, EXP_BITS);
+                end case;
+        end case;
+        if rs_neg2 = '1' then
+            rsh_in2 := not rsh_in2;
+        end if;
+        if rs_norm = '1' then
             clz := count_left_zeroes(r.r);
             if renorm_sqrt = '1' then
                 -- make denormalized value end up with even exponent
                 clz(0) := '1';
             end if;
+            -- do this as a separate dedicated 7-bit adder for timing reasons
             v.shift := resize(signed('0' & clz) - (63 - UNIT_BIT), EXP_BITS);
+        else
+            v.shift := rsh_in1 + rsh_in2 + (rs_neg1 or rs_neg2);
         end if;
 
         if r.update_fprf = '1' then