From 86b826cd7e4cc8ffde6a324a90d4481cbc910ebd Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 23 Jul 2020 17:56:15 +1000
Subject: [PATCH] FPU: Implement fadd[s] and fsub[s] and add tests for them

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 decode1.vhdl               |  19 ++++-
 fpu.vhdl                   | 150 +++++++++++++++++++++++++++++++++++-
 tests/fpu/fpu.c            | 154 +++++++++++++++++++++++++++++++++++++
 tests/test_fpu.bin         | Bin 21208 -> 24024 bytes
 tests/test_fpu.console_out |   2 +
 5 files changed, 322 insertions(+), 3 deletions(-)

diff --git a/decode1.vhdl b/decode1.vhdl
index 34170dd..737d83c 100644
--- a/decode1.vhdl
+++ b/decode1.vhdl
@@ -58,6 +58,7 @@ architecture behaviour of decode1 is
     type op_59_subop_array_t is array(0 to 31) of decode_rom_t;
     type minor_rom_array_2_t is array(0 to 3) of decode_rom_t;
     type op_63_subop_array_0_t is array(0 to 511) of decode_rom_t;
+    type op_63_subop_array_1_t is array(0 to 16) of decode_rom_t;
 
     constant major_decode_rom_array : major_rom_array_t := (
         --          unit     internal      in1         in2          in3   out   CR   CR   inv  inv  cry   cry  ldst  BR   sgn  upd  rsrv 32b  sgn  rc    lk   sgl
@@ -415,6 +416,8 @@ architecture behaviour of decode1 is
         --             unit   internal       in1   in2   in3   out   CR   CR   inv  inv  cry   cry  ldst  BR   sgn  upd  rsrv 32b  sgn  rc    lk   sgl
         --                          op                               in   out   A   out  in    out  len        ext                                pipe
         2#01110#  =>  (FPU,   OP_FPOP_I,     NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC,   '0', '0'), -- fcfid[u]s
+        2#10100#  =>  (FPU,   OP_FPOP,       FRA,  FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC,   '0', '0'), -- fsubs
+        2#10101#  =>  (FPU,   OP_FPOP,       FRA,  FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC,   '0', '0'), -- fadds
         others => illegal_inst
         );
 
@@ -461,6 +464,15 @@ architecture behaviour of decode1 is
         others => illegal_inst
         );
 
+    -- indexed by bits 4..1 of instruction word
+    constant decode_op_63h_array : op_63_subop_array_1_t := (
+        --            unit   internal       in1   in2   in3   out   CR   CR   inv  inv  cry   cry  ldst  BR   sgn  upd  rsrv 32b  sgn  rc    lk   sgl
+        --                         op                               in   out   A   out  in    out  len        ext                                pipe
+        2#0100#  =>  (FPU,   OP_FPOP,       FRA,  FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- fsub
+        2#0101#  =>  (FPU,   OP_FPOP,       FRA,  FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- fadd
+        others => illegal_inst
+        );
+
     --                                        unit   internal         in1         in2          in3   out   CR   CR   inv  inv  cry   cry  ldst  BR   sgn  upd  rsrv 32b  sgn  rc    lk   sgl
     --                                                     op                                              in   out   A   out  in    out  len        ext                                 pipe
     constant nop_instr      : decode_rom_t := (ALU,  OP_NOP,          NONE,       NONE,        NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
@@ -626,8 +638,11 @@ begin
         when 63 =>
             if HAS_FPU then
                 -- floating point operations, general and double-precision
-                v.decode := decode_op_63l_array(to_integer(unsigned(f_in.insn(4 downto 1) & f_in.insn(10 downto 6))));
-                vi.override := f_in.insn(5);
+                if f_in.insn(5) = '0' then
+                    v.decode := decode_op_63l_array(to_integer(unsigned(f_in.insn(4 downto 1) & f_in.insn(10 downto 6))));
+                else
+                    v.decode := decode_op_63h_array(to_integer(unsigned(f_in.insn(4 downto 1))));
+                end if;
             end if;
 
         when others =>
diff --git a/fpu.vhdl b/fpu.vhdl
index e97461c..e9edfb4 100644
--- a/fpu.vhdl
+++ b/fpu.vhdl
@@ -40,7 +40,9 @@ architecture behaviour of fpu is
                      DO_FMR, DO_FMRG,
                      DO_FCFID, DO_FCTI,
                      DO_FRSP, DO_FRI,
+                     DO_FADD,
                      FRI_1,
+                     ADD_SHIFT, ADD_2, ADD_3,
                      INT_SHIFT, INT_ROUND, INT_ISHIFT,
                      INT_FINAL, INT_CHECK, INT_OFLOW,
                      FINISH, NORMALIZE,
@@ -79,6 +81,9 @@ architecture behaviour of fpu is
         tiny         : std_ulogic;
         denorm       : std_ulogic;
         round_mode   : std_ulogic_vector(2 downto 0);
+        is_subtract  : std_ulogic;
+        exp_cmp      : std_ulogic;
+        add_bsmall   : std_ulogic;
     end record;
 
     signal r, rin : reg_type;
@@ -89,6 +94,7 @@ architecture behaviour of fpu is
     signal opsel_r       : std_ulogic_vector(1 downto 0);
     signal opsel_ainv    : std_ulogic;
     signal opsel_amask   : std_ulogic;
+    signal opsel_binv    : std_ulogic;
     signal in_a          : std_ulogic_vector(63 downto 0);
     signal in_b          : std_ulogic_vector(63 downto 0);
     signal result        : std_ulogic_vector(63 downto 0);
@@ -368,6 +374,9 @@ begin
         variable mshift      : signed(EXP_BITS-1 downto 0);
         variable need_check  : std_ulogic;
         variable msb         : std_ulogic;
+        variable is_add      : std_ulogic;
+        variable qnan_result : std_ulogic;
+        variable longmask    : std_ulogic;
     begin
         v := r;
         illegal := '0';
@@ -397,10 +406,16 @@ begin
             v.tiny := '0';
             v.denorm := '0';
             v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
+            v.is_subtract := '0';
+            v.add_bsmall := '0';
             adec := decode_dp(e_in.fra, int_input);
             bdec := decode_dp(e_in.frb, int_input);
             v.a := adec;
             v.b := bdec;
+            v.exp_cmp := '0';
+            if adec.exponent > bdec.exponent then
+                v.exp_cmp := '1';
+            end if;
         end if;
 
         r_hi_nz <= or (r.r(55 downto 31));
@@ -433,6 +448,7 @@ begin
         opsel_ainv <= '0';
         opsel_amask <= '0';
         opsel_b <= BIN_ZERO;
+        opsel_binv <= '0';
         opsel_r <= RES_SUM;
         carry_in <= '0';
         misc_sel <= "0000";
@@ -442,6 +458,8 @@ begin
         invalid := '0';
         renormalize := '0';
         set_x := '0';
+        qnan_result := '0';
+        longmask := r.single_prec;
 
         case r.state is
             when IDLE =>
@@ -483,6 +501,8 @@ begin
                         when "01111" =>
                             v.round_mode := "001";
                             v.state := DO_FCTI;
+                        when "10100" | "10101" =>
+                            v.state := DO_FADD;
                         when others =>
                             illegal := '1';
                     end case;
@@ -717,6 +737,117 @@ begin
                     v.state := FINISH;
                 end if;
 
+            when DO_FADD =>
+                -- fadd[s] and fsub[s]
+                opsel_a <= AIN_A;
+                v.result_sign := r.a.negative;
+                v.result_class := r.a.class;
+                v.result_exp := r.a.exponent;
+                v.fpscr(FPSCR_FR) := '0';
+                v.fpscr(FPSCR_FI) := '0';
+                is_add := r.a.negative xor r.b.negative xor r.insn(1);
+                if r.a.class = FINITE and r.b.class = FINITE then
+                    v.is_subtract := not is_add;
+                    v.add_bsmall := r.exp_cmp;
+                    if r.exp_cmp = '0' then
+                        v.shift := r.a.exponent - r.b.exponent;
+                        v.result_sign := r.b.negative xnor r.insn(1);
+                        if r.a.exponent = r.b.exponent then
+                            v.state := ADD_2;
+                        else
+                            v.state := ADD_SHIFT;
+                        end if;
+                    else
+                        opsel_a <= AIN_B;
+                        v.shift := r.b.exponent - r.a.exponent;
+                        v.result_exp := r.b.exponent;
+                        v.state := ADD_SHIFT;
+                    end if;
+                else
+                    if (r.a.class = NAN and r.a.mantissa(53) = '0') or
+                        (r.b.class = NAN and r.b.mantissa(53) = '0') then
+                        -- Signalling NAN
+                        v.fpscr(FPSCR_VXSNAN) := '1';
+                        invalid := '1';
+                    end if;
+                    if r.a.class = NAN then
+                        -- nothing to do, result is A
+                    elsif r.b.class = NAN then
+                        v.result_class := NAN;
+                        v.result_sign := r.b.negative;
+                        opsel_a <= AIN_B;
+                    elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
+                        -- invalid operation, construct QNaN
+                        v.fpscr(FPSCR_VXISI) := '1';
+                        qnan_result := '1';
+                    elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
+                        -- return -0 for rounding to -infinity
+                        v.result_sign := r.round_mode(1) and r.round_mode(0);
+                    elsif r.a.class = INFINITY or r.b.class = ZERO then
+                        -- nothing to do, result is A
+                    else
+                        -- result is +/- B
+                        v.result_sign := r.b.negative xnor r.insn(1);
+                        v.result_class := r.b.class;
+                        v.result_exp := r.b.exponent;
+                        opsel_a <= AIN_B;
+                    end if;
+                    arith_done := '1';
+                end if;
+
+            when ADD_SHIFT =>
+                opsel_r <= RES_SHIFT;
+                set_x := '1';
+                longmask := '0';
+                v.state := ADD_2;
+
+            when ADD_2 =>
+                if r.add_bsmall = '1' then
+                    opsel_a <= AIN_A;
+                else
+                    opsel_a <= AIN_B;
+                end if;
+                opsel_b <= BIN_R;
+                opsel_binv <= r.is_subtract;
+                carry_in <= r.is_subtract and not r.x;
+                v.shift := to_signed(-1, EXP_BITS);
+                v.state := ADD_3;
+
+            when ADD_3 =>
+                -- check for overflow or negative result (can't get both)
+                if r.r(63) = '1' then
+                    -- result is opposite sign to expected
+                    v.result_sign := not r.result_sign;
+                    opsel_ainv <= '1';
+                    carry_in <= '1';
+                    v.state := FINISH;
+                elsif r.r(55) = '1' then
+                    -- sum overflowed, shift right
+                    opsel_r <= RES_SHIFT;
+                    set_x := '1';
+                    v.shift := to_signed(-2, EXP_BITS);
+                    if exp_huge = '1' then
+                        v.state := ROUND_OFLOW;
+                    else
+                        v.state := ROUNDING;
+                    end if;
+                elsif r.r(54) = '1' then
+                    set_x := '1';
+                    v.shift := to_signed(-2, EXP_BITS);
+                    v.state := ROUNDING;
+                elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
+                    -- r.x must be zero at this point
+                    v.result_class := ZERO;
+                    if r.is_subtract = '1' then
+                        -- set result sign depending on rounding mode
+                        v.result_sign := r.round_mode(1) and r.round_mode(0);
+                    end if;
+                    arith_done := '1';
+                else
+                    renormalize := '1';
+                    v.state := NORMALIZE;
+                end if;
+
             when INT_SHIFT =>
                 opsel_r <= RES_SHIFT;
                 set_x := '1';
@@ -927,6 +1058,10 @@ begin
                 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
                 if mant_nz = '0' then
                     v.result_class := ZERO;
+                    if r.is_subtract = '1' then
+                        -- set result sign depending on rounding mode
+                        v.result_sign := r.round_mode(1) and r.round_mode(0);
+                    end if;
                     arith_done := '1';
                 else
                     -- Renormalize result after rounding
@@ -946,6 +1081,13 @@ begin
 
         end case;
 
+        if qnan_result = '1' then
+            invalid := '1';
+            v.result_class := NAN;
+            v.result_sign := '0';
+            misc_sel <= "0001";
+            opsel_r <= RES_MISC;
+        end if;
         if arith_done = '1' then
             -- Enabled invalid exception doesn't write result or FPRF
             if (invalid and r.fpscr(FPSCR_VE)) = '0' then
@@ -960,7 +1102,7 @@ begin
         -- Data path.
         -- This has A and B input multiplexers, an adder, a shifter,
         -- count-leading-zeroes logic, and a result mux.
-        if r.single_prec = '1' then
+        if longmask = '1' then
             mshift := r.shift + to_signed(-29, EXP_BITS);
         else
             mshift := r.shift;
@@ -1000,6 +1142,9 @@ begin
             when others =>
                 in_b0 := (others => '0');
         end case;
+        if opsel_binv = '1' then
+            in_b0 := not in_b0;
+        end if;
         in_b <= in_b0;
         if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
             shift_res := shifter_64(r.r & x"00000000000000",
@@ -1016,6 +1161,9 @@ begin
                 case misc_sel is
                     when "0000" =>
                         misc := x"00000000" & (r.fpscr and fpscr_mask);
+                    when "0001" =>
+                        -- generated QNaN mantissa
+                        misc := x"0020000000000000";
                     when "0010" =>
                         -- mantissa of max representable DP number
                         misc := x"007ffffffffffffc";
diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c
index e7a1334..8f7407a 100644
--- a/tests/fpu/fpu.c
+++ b/tests/fpu/fpu.c
@@ -843,6 +843,158 @@ int fpu_test_12(void)
 	return trapit(0, test12);
 }
 
+struct addvals {
+	unsigned long val_a;
+	unsigned long val_b;
+	unsigned long sum;
+	unsigned long diff;
+} addvals[] = {
+	{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+	{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+	{ 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff },
+	{ 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000 },
+	{ 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000 },
+	{ 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680 },
+	{ 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800 },
+	{ 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000 },
+	{ 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000 },
+	{ 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000 },
+	{ 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000 },
+	{ 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000 },
+	{ 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000 },
+	{ 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000 },
+	{ 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000 },
+	{ 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2 },
+	{ 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa },
+	{ 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe },
+	{ 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000 },
+	{ 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888 },
+	{ 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888 },
+	{ 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 },
+	{ 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888 },
+	{ 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888 },
+	{ 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 },
+	{ 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 },
+	{ 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+	{ 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000 },
+	{ 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
+	{ 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
+	{ 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
+	{ 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
+	{ 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 },
+	{ 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
+	{ 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
+	{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+	{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+	{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+	{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+	{ 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333 },
+	{ 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef },
+	{ 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6 },
+	{ 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004 },
+	{ 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000 },
+};
+
+int test13(long arg)
+{
+	long i;
+	unsigned long results[2];
+	struct addvals *vp = addvals;
+
+	set_fpscr(FPS_RN_NEAR);
+	for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) {
+		asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)"
+		    : : "b" (&vp->val_a), "b" (results) : "memory");
+		if (results[0] != vp->sum || results[1] != vp->diff) {
+			print_hex(i, 2, " ");
+			print_hex(results[0], 16, " ");
+			print_hex(results[1], 16, "\r\n");
+			return i + 1;
+		}
+	}
+	return 0;
+}
+
+int fpu_test_13(void)
+{
+	enable_fp();
+	return trapit(0, test13);
+}
+
+struct addvals sp_addvals[] = {
+	{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+	{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+	{ 0x3fdfffffffffffff, 0x0000000000000000, 0x3fe0000000000000, 0x3fe0000000000000 },
+	{ 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000 },
+	{ 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000 },
+	{ 0x402123456789abcd, 0x4021000000000000, 0x403111a2c0000000, 0x3fb1a2b000000000 },
+	{ 0x4061200000000000, 0x406123456789abcd, 0x407121a2c0000000, 0xbfba2b0000000000 },
+	{ 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000 },
+	{ 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000 },
+	{ 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000 },
+	{ 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000 },
+	{ 0x3fa1230000000000, 0x4064560000000000, 0x4064571240000000, 0xc06454edc0000000 },
+	{ 0xbfa1230000000000, 0x4064560000000000, 0x406454edc0000000, 0xc064571240000000 },
+	{ 0x3fa1230000000000, 0xc064560000000000, 0xc06454edc0000000, 0x4064571240000000 },
+	{ 0xbfa1230000000000, 0xc064560000000000, 0xc064571240000000, 0x406454edc0000000 },
+	{ 0x6780000000000001, 0x6470000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+	{ 0x6780000000000001, 0x6460000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+	{ 0x6780000000000001, 0x6450000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+	{ 0x6780000000000001, 0x6440000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+	{ 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888880000000, 0x7ff8888880000000 },
+	{ 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888880000000, 0xfff8888880000000 },
+	{ 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888880000000, 0x7ff8888880000000 },
+	{ 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888880000000, 0x7ff8888880000000 },
+	{ 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888880000000, 0x7ff8888880000000 },
+	{ 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888880000000, 0x7ff8888880000000 },
+	{ 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999980000000, 0x7ff9999980000000 },
+	{ 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000 },
+	{ 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000 },
+	{ 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
+	{ 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
+	{ 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
+	{ 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 },
+	{ 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999980000000, 0x7ff9999980000000 },
+	{ 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
+	{ 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 },
+	{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+	{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 },
+	{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+	{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 },
+	{ 0x8002222222222222, 0x0001111111111111, 0x0000000000000000, 0x8000000000000000 },
+	{ 0x0000022222222222, 0x0000111111111111, 0x0000000000000000, 0x0000000000000000 },
+	{ 0x47dc000020000000, 0x47ec03ffe0000000, 0x7ff0000000000000, 0xc7dc07ffa0000000 },
+	{ 0x47dbffffe0000000, 0x47eff7ffe0000000, 0x7ff0000000000000, 0xc7e1f80000000000 },
+	{ 0x47efffffc0000000, 0xc7efffffc0000000, 0x0000000000000000, 0x7ff0000000000000 },
+};
+
+int test14(long arg)
+{
+	long i;
+	unsigned long results[2];
+	struct addvals *vp = sp_addvals;
+
+	set_fpscr(FPS_RN_NEAR);
+	for (i = 0; i < sizeof(sp_addvals) / sizeof(sp_addvals[0]); ++i, ++vp) {
+		asm("lfd 5,0(%0); frsp 5,5; lfd 6,8(%0); frsp 6,6; "
+		    "fadds 7,5,6; fsubs 8,5,6; stfd 7,0(%1); stfd 8,8(%1)"
+		    : : "b" (&vp->val_a), "b" (results) : "memory");
+		if (results[0] != vp->sum || results[1] != vp->diff) {
+			print_hex(i, 2, " ");
+			print_hex(results[0], 16, " ");
+			print_hex(results[1], 16, "\r\n");
+			return i + 1;
+		}
+	}
+	return 0;
+}
+
+int fpu_test_14(void)
+{
+	enable_fp();
+	return trapit(0, test14);
+}
+
 int fail = 0;
 
 void do_test(int num, int (*test)(void))
@@ -880,6 +1032,8 @@ int main(void)
 	do_test(10, fpu_test_10);
 	do_test(11, fpu_test_11);
 	do_test(12, fpu_test_12);
+	do_test(13, fpu_test_13);
+	do_test(14, fpu_test_14);
 
 	return fail;
 }
diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin
index 668ff65367cbf02294b638e77fd93f9173db9532..623db3f690b0aad69472a9a8740964239799983c 100755
GIT binary patch
delta 4711
zcmd5;eNa@_6+iE>;9`{Q^0|BnyNCpkfdy={3d(~hA|!|=wMlB|q9`^x5ofHU4PoCf
zB!j6qn3J@ci5q0piPkz=Y{%M8T{BKKw&`f6A7uPOqX-Eo=@?xFxjpy3_mD4R@<(TS
zX6~MQf9H44J?DO$eeHXs_CH9;0LpBDlFC0%DA`1M1yqz&0=NlVP1tI}R<BSiwk79C
z-fb^u%L8eLw;p@av#}|0CP4RJi0QI}S_agycm;O8hp|#V*092v8ut_8o;iPZluac+
z2g_PK0Q=7$8OL_BOeNdf=eO%NZ4C%;+q`(jGF{q<cP!6KJKIXEWwN|I4&M)ZD%e{{
zp@Dm26P^jnh`bmJ#fO>0&zLW_dr*ta;>D2g?}wZv6F`2q&R!f|I8kkC%{5U@^-bsv
zUoLr3j>wMO83V;9gp1aE#_IC{e7WIEu_R)ayd?(Tig-1sW@Urx*I(fNs0>Vsv`J@i
zcH~0oBsN7Zl=EK3OOa)=DH=1PHUu7B+Tgl}UqwBQ9Z{2{cI=IsALw4z;QEQcYHX8a
zR~6RTiUQxQXmEXo*61AUh%mhqJ(nx%o_nm60?6aLe*4D_t_23c`pPg%ySu?9k>zPA
z0#8Iw!`Gr0%RBv85}h=`T?@g>-P5o)y3GU;P>da6HnhZ>&=(Uo`D8d0&$jX{0rxe$
z7#1rRgkw-_h5UO9u8d6!{cRW&@1kuG_T;*amYvudn<2lr7XKbwB(Dj>n7A}~`!+0!
zTO8?LyWBN}cH0h>U5vHnSwM9W@nBq$d>|C>#Jwh~@8QZRC83-c_<*O?nrG5TA5Te>
zk67{Mlx(@N2@~VD#<<t+ac%7X+_PX{jK?$aMRKzRLlY`^9&}giGc6$b`H9=XanBA{
zPH&wJgXy4%OR+a#!5g6Rd)kp}2Z{8)(`NYObei+}K#DW)0YF!~8O)zqpu5Ws1>N9E
zt1XB60v?_$!qXyg!Y;p~pOum(US${0&)Uz5N>hd<%)@?gpOs4BaUGekoE$k^Tb_hs
z8#zzWoqxpY6$uyxlR9%fgF=3^?R@jUbvn(z9GQ+SrDcGpWOMokD6Jk!tM|L4^*xGM
zPpeGiHI0DLX<0|5)e`dPwAPGBt0p1*xm9%N{FKhoDZNBD_h0>k9w%+mL|_SgqxMbI
zZee+!r`8F}#pb=IW3y<3HtT&SZRh}=OUMmfFd2#~^}ETRr}`$w+gm*~bOMv0Ug5{L
zk^20YUh(rHkekjip4NDrnZ%hxHh?_A8_hA3p9tY+wy?XPQXPjA>iX&~KmIzAK?_AY
z7Z*7&blnppqix<y(I#VDV(qvQzj^2J5myrDaS_iJejXEXG?9i!yarRJ-j!0(k(49N
zz#k;dplbVe(uM$!svTmNpI!NKZ-BYBy^l|{i)Y5zvb;vJgEw*83Bvha2hyH3>ks*B
z!uHBPtK~R9lkJ3G5!mZPSPNmF5;m=imHw2rQK9YQeEzCv|E>H64K2Q#i0F*6OZ)I}
z$|*XN`n<_>QD(f<;3}Xp#K$2tuBCC7C`%3(FJrQIS=T#}_7L8W6MtXeHhb|msh;4W
zQQNGv=cUQst1hsYRNA3r6A|8@o;%N**nPkRwfh&s8)oXuADcIs=n)Uc0;uXt8p0I8
zvy2#HO!~qIu~bqzK<Z|I&6zwHv$w4-y+ZQrzRcJ)0=QVH5ee3JFjme15{@a<4iVNz
znhVEXptj`-V;^yVgk!6y*{?EYC(VUpwbarHOX2_t*Ui-O2)h7)Kgw{(eOIwABSRke
z91oIOe+@5X#LFLC!$DFl*D*dbUOs&t=aA~VjuoVAomfXo=|nH7Rh{@JQq7%sDKlO=
ziWf6tgI<)W0C&>V*g)!;Z{T2NOwe}fx^G~-GiJ_F>K?tpSR9Xu#7^z6g!u@o;lSMB
z0{BpRCPc9o)Rl)o;pLzL9(o^fCUxNkXJU{ofHLbsuQM&En7WN!_%~;A(0b}ZH{K`0
z2h{a<V`5ff&`IiU`2tI_;uqP;-b2HaU+Nn_Kct-2_)uCgF$4-|nJ2VpEGB@49>#iz
z4+f|DA%{mZgE-IPx~!=&OR3k=`*7>$lC_4g1HE`Kt1##&b@OgfcTN^R6bPhj`yOV`
zv2eovK^ULO<ymSegqd(fc4^Q?>bY;@@3ND_cT+d~Yg$So#=H<a1^+tsWhbS<9mX2@
zR1)q{TqVL91VY|OE7Y#t!8ti;skXa}afl12tJKm7OXq$`IQA~Jxoz8WwgzLSV#fSp
zivyUD%k+_?i7_@bs>P8L#X`c4hl(RT;M2+Q>900d&ztH{w6R0!{O8HGR%@1`iq2up
ztG!auc$J#nu}*<i70=~OKXzQqn_8*OU$UF?-Sd*fxaF8b$EQ~M1W&WK9P>B?z=@jS
zp#$#Fm}ocxc<6xJjd`4>&)>xt23S(1(62M}heDt2eWA)nW17E<&ok_2J!DX5$caAE
zoM1TMV`HM>Xt)DzH|BAkq8*P~O*lzSRj|-%^?$?xc&bO*-Bom5JVa5e?~h9;uL{r|
z432A8s$|_R6!~cs;`i(ja?fLoM``xZE+~Gc6AaCxBI$dS1KmzfTZn%Q&niTEq+R!)
z-<UDVfo>Sez=%}W=qXKz_78=pvzey-WR1cli)^^H)J$KDsFvEyi9+NN10Pu;V*y4v
zA6gV}(V{81;Ta3QRce(Oo|EPqj|`tuEc~%~z;JA&s%tezNPw!X3!vcu0uBSXb(16n
z)WJd4&sc)uZC@R|qJFjVi}q5|Ym~k1KPUaFkLnVD?4<7}#w4&FH>vD+zvNU*Dkvs7
zQ8O@Ry!MWK&Zjfeb@Lx7?98@I<+c?&r0}8m7b;N`4Q@yNcdH2G-p4BfU*}Nk*oq+R
zydrS<KdlIgUJ-ncs0cozB5<NlWHPcM7!wUgqbq_jkMqV<1mTa~m8Sik(G`JkZhI7=
zyy}}BC}Zr#12wKX%~u;6#S0Qzw+~eWzCJfFoj9f<Ji>vVkqB?R1w6(x`R4g_doxBw
zC|uHx49{T4j(dDTmGu9nBIwSIMLgf4B4m!K2wLPrzoRMwJ<o_TJHUnc#OPoFJzSHB
vvu|M&E}WtPeOjK^?00Vsx6|j-6>j%2CUCA#@gO7c-_m4y?2|Ba@xK28+c^CS

delta 2035
zcmai#e@s(X6vxkdl-jaVU-?lAlv)rwK&+)8J_NBKu&yYexMXumip*qATv(<Kwn~}l
z6pS;%Sr%h9ha?(D*p#8k;*8N{Zqe-zml%V=Ii~1ZbOsj$oXXv~h2V4~p5*qN`#Im9
zd+xpOwY}d7y@vz~fMYR$tNO<ot_Je!q0&_i;3l?_*hXR-*EdzRS~3M}U2PXXOX=Qo
za;A4|XHpJ8;Ac|GW@st{3M34&zB`PS^7#ZS%$d-hC+&s4i_hesIVtDsvUV>(XW#K@
za(By`D7UL`-(+rL4G3`?5HCn`g*F_KDuuQlm#$0{4RN?X@<lZkQfuHI+ldz<t@^dG
z;Ou6JV~m~24UA88CLwC0vb7bI0$385RIiLG41?y5Tm|h(Y{mmoH3Fg-ZPzClz<D-U
zqGJhT+m`^G$@RUCuINRg#(+Db-%?6%_~fq!u`|Ys$$Ep}z{UD<Aqzj&my2Dm;Wd4k
z=+mP$W@~858lU_C@5L;{{uqtm#J^%*3I(@M-VkJU294NTjg5wqP(_tbzJj{gg>A7#
zp}*4Nhww`5d%`^WoHN7i0d<Z01a6D#QAm;C#QsPFwwn}K5pN8?6amiVGx^TIJ%GPO
z#)~T=@YDDz@w^sC;?p9|>A-oA3I_9*#(Jp@?MAEETZ`+AC89@%oyIiLup2KMSL*v~
zYvcqvG&5BD8SBWCph+fSenN?OO2QopZ;P-OM-p5STp0S8m2#0{LbTG9CjKxJ8%=gm
z)r{Ym_Qdtq9+K-H-tfNs=zkSg&ngkywb(JM3NI!`E%#G734}4e@zjT`^zy@TuMNC%
z7_eE#XViFF!Y1}tG352wpho6w%OqPw*SBFZwn3tN6*H1@BT~b`Sv|RHet*z}jY%Ee
zA!<nj+a0{`F^c&H=l#Q<lH7F8QfTATY1E|2ryM|DQ1`4xMRjgI;B^)U^D<O7O{0mB
z&yq-h#^4UxsoP}io{bSJUU<4&$w%E<(39+$w&PdEp5Ji>X`a_{c(BfMIu49J-SIMP
znKLS=@k9EFh{6+=EP5RWEn7ppuqk+n{xSB)?ZY7|&ym0R7tO(!ld*MqD#`;UwYC!H
zXB|psCT*H{M9vXAEWYpIoZpFocz2L}FvZNoJ}36kAS>NMO*!cJxSl^|bpAU21Whfz
znS^MIF$*oIoBNZ-Oj`Z|h`7$*rZ;POYz+Q>fm?-WPxq>)W<4j<D}`{pE`!-sZ3b5Z
z3F54|#h4W6Qh=wk9JZ+_7Qa_DC?GZGzbphCIhrZv2<oJ#=D6Ql9vxgNIUOLi1)ya<
zZ$^bZ<JKy{n|z(I0Rr448YF^YVig>aaps_5yTRBHd2XDoqM;dJ><9;BoV`h-otT+C
zH_kjXx`-unK*s%E8mEX|1;9RAqF8qm$8A>8a|^8*R&n?iR%Mt(<Db|_&g{nn<eYvy
zO|H(5*U0Vj<2bo9el*!lqTi4C<fH&rk&^>>+8(ETRit$Sc+DP{Ur(Vwz?hM*PiCgE
zo7fPsIu44|1@MKiRO`?cG*)P#@ONbaZ}S*w@&++GGfCM?;hI6L%1l$LLIA!U#66i7
zWfq0D+jx=$YbZ>)gV!naQF!VOhGm)5yh;~Mhp{1RcHAY3oOhobPZ4GP#8%zK@vM9`
zpV|}v1Nb;=>AZT1Hs5>FG(XGT#5}~flG`#G?Zmop(}Ggv5Jjr{czS`wXbhzm?lT4g
zDT=jb+Sj{UC{h;C6dg8Wbn^&f%{1Z0EAv}_gc;e9V(loFWTy%3_)d0Y?D0{?(s>Os
kF8hSWFZ54W-*Yf~hq~ujc2r2u(c)np9xc}3{Iy^H3;nCTwg3PC

diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out
index d926abc..440cd77 100644
--- a/tests/test_fpu.console_out
+++ b/tests/test_fpu.console_out
@@ -10,3 +10,5 @@ test 09:PASS
 test 10:PASS
 test 11:PASS
 test 12:PASS
+test 13:PASS
+test 14:PASS