FPU: Implement floating convert to integer instructions

This implements fctiw, fctiwz, fctiwu, fctiwuz, fctid, fctidz, fctidu and fctiduz, and adds tests for them. There are some subtleties around the setting of the inexact (XX) and invalid conversion (VXCVI) flags in the FPSCR. If the rounded value ends up being out of range, we need to set VXCVI and not XX. For a conversion to unsigned word or doubleword of a negative value that rounds to zero, we need to set XX and not VXCVI. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago · 03d1aa968a
parent 36130f1db3
commit 03d1aa968a
4 changed files with 321 additions and 3 deletions
--- a/decode1.vhdl
+++ b/decode1.vhdl
@ -442,8 +442,16 @@ architecture behaviour of decode1 is
        2#100000100#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  4/8=fnabs
        2#100001000#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  8/8=fabs
        2#110000000#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC,   '0', '0'), --  0/12=frsp
+        2#111000000#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  0/14=fctiw
+        2#111000100#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  4/14=fctiwu
+        2#111011001#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- 25/14=fctid
        2#111011010#  => (FPU,   OP_FPOP_I,     NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- 26/14=fcfid
+        2#111011101#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- 29/14=fctidu
        2#111011110#  => (FPU,   OP_FPOP_I,     NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- 30/14=fcfidu
+        2#111100000#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  0/15=fctiwz
+        2#111100100#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), --  4/15=fctiwuz
+        2#111111001#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- 25/15=fctidz
+        2#111111101#  => (FPU,   OP_FPOP,       NONE, FRB,  NONE, FRT,  '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- 29/15=fctiduz
        others => illegal_inst
        );

--- a/fpu.vhdl
+++ b/fpu.vhdl
@ -38,8 +38,10 @@ architecture behaviour of fpu is
    type state_t is (IDLE,
                     DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
                     DO_FMR,
-                     DO_FCFID,
+                     DO_FCFID, DO_FCTI,
                     DO_FRSP,
+                     INT_SHIFT, INT_ROUND, INT_ISHIFT,
+                     INT_FINAL, INT_CHECK, INT_OFLOW,
                     FINISH, NORMALIZE,
                     ROUND_UFLOW, ROUND_OFLOW,
                     ROUNDING, ROUNDING_2, ROUNDING_3,
@ -363,6 +365,8 @@ begin
        variable clz         : std_ulogic_vector(5 downto 0);
        variable set_x       : std_ulogic;
        variable mshift      : signed(EXP_BITS-1 downto 0);
+        variable need_check  : std_ulogic;
+        variable msb         : std_ulogic;
    begin
        v := r;
        illegal := '0';
@ -461,8 +465,15 @@ begin
                        when "01100" =>
                            v.state := DO_FRSP;
                        when "01110" =>
+                            if int_input = '1' then
                                -- fcfid[u][s]
                                v.state := DO_FCFID;
+                            else
+                                v.state := DO_FCTI;
+                            end if;
+                        when "01111" =>
+                            v.round_mode := "001";
+                            v.state := DO_FCTI;
                        when others =>
                            illegal := '1';
                    end case;
@ -603,6 +614,47 @@ begin
                    arith_done := '1';
                end if;

+            when DO_FCTI =>
+                -- instr bit 9: 1=dword 0=word
+                -- instr bit 8: 1=unsigned 0=signed
+                -- instr bit 1: 1=round to zero 0=use fpscr[RN]
+                opsel_a <= AIN_B;
+                v.result_class := r.b.class;
+                v.result_sign := r.b.negative;
+                v.result_exp := r.b.exponent;
+                v.fpscr(FPSCR_FR) := '0';
+                v.fpscr(FPSCR_FI) := '0';
+                if r.b.class = NAN and r.b.mantissa(53) = '0' then
+                    -- Signalling NAN
+                    v.fpscr(FPSCR_VXSNAN) := '1';
+                    invalid := '1';
+                end if;
+
+                v.int_result := '1';
+                case r.b.class is
+                    when ZERO =>
+                        arith_done := '1';
+                    when FINITE =>
+                        if r.b.exponent >= to_signed(64, EXP_BITS) or
+                            (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
+                            v.state := INT_OFLOW;
+                        elsif r.b.exponent >= to_signed(52, EXP_BITS) then
+                            -- integer already, no rounding required,
+                            -- shift into final position
+                            v.shift := r.b.exponent - to_signed(54, EXP_BITS);
+                            if r.insn(8) = '1' and r.b.negative = '1' then
+                                v.state := INT_OFLOW;
+                            else
+                                v.state := INT_ISHIFT;
+                            end if;
+                        else
+                            v.shift := r.b.exponent - to_signed(52, EXP_BITS);
+                            v.state := INT_SHIFT;
+                        end if;
+                    when INFINITY | NAN =>
+                        v.state := INT_OFLOW;
+                end case;
+
            when DO_FCFID =>
                v.result_sign := '0';
                opsel_a <= AIN_B;
@ -622,6 +674,81 @@ begin
                    v.state := FINISH;
                end if;

+            when INT_SHIFT =>
+                opsel_r <= RES_SHIFT;
+                set_x := '1';
+                v.state := INT_ROUND;
+                v.shift := to_signed(-2, EXP_BITS);
+
+            when INT_ROUND =>
+                opsel_r <= RES_SHIFT;
+                round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
+                v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
+                -- Check for negative values that don't round to 0 for fcti*u*
+                if r.insn(8) = '1' and r.result_sign = '1' and
+                    (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
+                    v.state := INT_OFLOW;
+                else
+                    v.state := INT_FINAL;
+                end if;
+
+            when INT_ISHIFT =>
+                opsel_r <= RES_SHIFT;
+                v.state := INT_FINAL;
+
+            when INT_FINAL =>
+                -- Negate if necessary, and increment for rounding if needed
+                opsel_ainv <= r.result_sign;
+                carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
+                -- Check for possible overflows
+                case r.insn(9 downto 8) is
+                    when "00" =>        -- fctiw[z]
+                        need_check := r.r(31) or (r.r(30) and not r.result_sign);
+                    when "01" =>        -- fctiwu[z]
+                        need_check := r.r(31);
+                    when "10" =>        -- fctid[z]
+                        need_check := r.r(63) or (r.r(62) and not r.result_sign);
+                    when others =>      -- fctidu[z]
+                        need_check := r.r(63);
+                end case;
+                if need_check = '1' then
+                    v.state := INT_CHECK;
+                else
+                    if r.fpscr(FPSCR_FI) = '1' then
+                        v.fpscr(FPSCR_XX) := '1';
+                    end if;
+                    arith_done := '1';
+                end if;
+
+            when INT_CHECK =>
+                if r.insn(9) = '0' then
+                    msb := r.r(31);
+                else
+                    msb := r.r(63);
+                end if;
+                misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
+                if (r.insn(8) = '0' and msb /= r.result_sign) or
+                    (r.insn(8) = '1' and msb /= '1') then
+                    opsel_r <= RES_MISC;
+                    v.fpscr(FPSCR_VXCVI) := '1';
+                    invalid := '1';
+                else
+                    if r.fpscr(FPSCR_FI) = '1' then
+                        v.fpscr(FPSCR_XX) := '1';
+                    end if;
+                end if;
+                arith_done := '1';
+
+            when INT_OFLOW =>
+                opsel_r <= RES_MISC;
+                misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
+                if r.b.class = NAN then
+                    misc_sel(0) <= '1';
+                end if;
+                v.fpscr(FPSCR_VXCVI) := '1';
+                invalid := '1';
+                arith_done := '1';
+
            when FINISH =>
                if r.r(63 downto 54) /= "0000000001" then
                    renormalize := '1';
@ -846,6 +973,30 @@ begin
                    when "0011" =>
                        -- mantissa of max representable SP number
                        misc := x"007fffff80000000";
+                    when "1000" =>
+                        -- max positive result for fctiw[z]
+                        misc := x"000000007fffffff";
+                    when "1001" =>
+                        -- max negative result for fctiw[z]
+                        misc := x"ffffffff80000000";
+                    when "1010" =>
+                        -- max positive result for fctiwu[z]
+                        misc := x"00000000ffffffff";
+                    when "1011" =>
+                        -- max negative result for fctiwu[z]
+                        misc := x"0000000000000000";
+                    when "1100" =>
+                        -- max positive result for fctid[z]
+                        misc := x"7fffffffffffffff";
+                    when "1101" =>
+                        -- max negative result for fctid[z]
+                        misc := x"8000000000000000";
+                    when "1110" =>
+                        -- max positive result for fctidu[z]
+                        misc := x"ffffffffffffffff";
+                    when "1111" =>
+                        -- max negative result for fctidu[z]
+                        misc := x"0000000000000000";
                    when others =>
                        misc := x"0000000000000000";
                end case;
--- a/tests/fpu/fpu.c
+++ b/tests/fpu/fpu.c
@ -19,6 +19,7 @@
 #define FPS_UE		0x20
 #define FPS_OE		0x40
 #define FPS_VE		0x80
+#define FPS_VXCVI	0x100
 #define FPS_VXSOFT	0x400

 extern int trapit(long arg, int (*func)(long));
@ -598,6 +599,160 @@ int fpu_test_8(void)
 	return trapit(0, test8);
 }

+struct cvtivals {
+	unsigned long dval;
+	long lval;
+	unsigned long ulval;
+	int ival;
+	unsigned int uival;
+	unsigned char invalids[4];
+} cvtivals[] = {
+	{ 0x0000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} },
+	{ 0x8000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} },
+	{ 0x3fdfffffffffffff, 0, 0, 0, 0, {0, 0, 0, 0} },
+	{ 0x3ff0000000000000, 1, 1, 1, 1, {0, 0, 0, 0} },
+	{ 0xbff0000000000000, -1, 0, -1, 0, {0, 1, 0, 1} },
+	{ 0x402123456789abcd, 9, 9, 9, 9, {0, 0, 0, 0} },
+	{ 0x406123456789abcd, 137, 137, 137, 137, {0, 0, 0, 0} },
+	{ 0x409123456789abcd, 1097, 1097, 1097, 1097, {0, 0, 0, 0} },
+	{ 0x41c123456789abcd, 0x22468acf, 0x22468acf, 0x22468acf, 0x22468acf, {0, 0, 0, 0} },
+	{ 0x41d123456789abcd, 0x448d159e, 0x448d159e, 0x448d159e, 0x448d159e, {0, 0, 0, 0} },
+	{ 0x41e123456789abcd, 0x891a2b3c, 0x891a2b3c, 0x7fffffff, 0x891a2b3c, {0, 0, 1, 0} },
+	{ 0x41f123456789abcd, 0x112345679, 0x112345679, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0xc1f123456789abcd, -0x112345679, 0, 0x80000000, 0, {0, 1, 1, 1} },
+	{ 0x432123456789abcd, 0x891a2b3c4d5e6, 0x891a2b3c4d5e6, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x433123456789abcd, 0x1123456789abcd, 0x1123456789abcd, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x434123456789abcd, 0x22468acf13579a, 0x22468acf13579a, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x43c123456789abcd, 0x22468acf13579a00, 0x22468acf13579a00, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x43d123456789abcd, 0x448d159e26af3400, 0x448d159e26af3400, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x43e123456789abcd, 0x7fffffffffffffff, 0x891a2b3c4d5e6800, 0x7fffffff, 0xffffffff, {1, 0, 1, 1} },
+	{ 0x43f123456789abcd, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} },
+	{ 0xc3f123456789abcd, 0x8000000000000000, 0, 0x80000000, 0, {1, 1, 1, 1} },
+	{ 0x7ff0000000000000, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} },
+	{ 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
+	{ 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
+	{ 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
+	{ 0xbfd123456789abcd, 0, 0, 0, 0, {0, 0, 0, 0} },
+};
+
+#define GET_VXCVI()	((get_fpscr() >> 8) & 1)
+
+int test9(long arg)
+{
+	long i;
+	int ires;
+	unsigned int ures;
+	long lres;
+	unsigned long ulres;
+	unsigned char inv[4];
+	struct cvtivals *vp = cvtivals;
+
+	for (i = 0; i < sizeof(cvtivals) / sizeof(cvtivals[0]); ++i, ++vp) {
+		set_fpscr(FPS_RN_NEAR);
+		asm("lfd 3,0(%0); fctid 4,3; stfd 4,0(%1)"
+		    : : "b" (&vp->dval), "b" (&lres) : "memory");
+		inv[0] = GET_VXCVI();
+		set_fpscr(FPS_RN_NEAR);
+		asm("fctidu 5,3; stfd 5,0(%0)" : : "b" (&ulres) : "memory");
+		inv[1] = GET_VXCVI();
+		set_fpscr(FPS_RN_NEAR);
+		asm("fctiw 6,3; stfiwx 6,0,%0" : : "b" (&ires) : "memory");
+		inv[2] = GET_VXCVI();
+		set_fpscr(FPS_RN_NEAR);
+		asm("fctiwu 7,3; stfiwx 7,0,%0" : : "b" (&ures) : "memory");
+		inv[3] = GET_VXCVI();
+
+		if (lres != vp->lval || ulres != vp->ulval || ires != vp->ival || ures != vp->uival ||
+		    inv[0] != vp->invalids[0] || inv[1] != vp->invalids[1] ||
+		    inv[2] != vp->invalids[2] || inv[3] != vp->invalids[3]) {
+			print_hex(lres, 16, inv[0]? "V ": "  ");
+			print_hex(ulres, 16, inv[1]? "V ": "  ");
+			print_hex(ires, 8, inv[2]? "V ": "  ");
+			print_hex(ures, 8, inv[3]? "V ": "  ");
+			return i + 1;
+		}
+	}
+	return 0;
+}
+
+int fpu_test_9(void)
+{
+	enable_fp();
+	return trapit(0, test9);
+}
+
+struct cvtivals cvtizvals[] = {
+	{ 0x0000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} },
+	{ 0x8000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} },
+	{ 0x3fdfffffffffffff, 0, 0, 0, 0, {0, 0, 0, 0} },
+	{ 0x3ff0000000000000, 1, 1, 1, 1, {0, 0, 0, 0} },
+	{ 0xbff0000000000000, -1, 0, -1, 0, {0, 1, 0, 1} },
+	{ 0x402123456789abcd, 8, 8, 8, 8, {0, 0, 0, 0} },
+	{ 0x406123456789abcd, 137, 137, 137, 137, {0, 0, 0, 0} },
+	{ 0x409123456789abcd, 1096, 1096, 1096, 1096, {0, 0, 0, 0} },
+	{ 0x41c123456789abcd, 0x22468acf, 0x22468acf, 0x22468acf, 0x22468acf, {0, 0, 0, 0} },
+	{ 0x41d123456789abcd, 0x448d159e, 0x448d159e, 0x448d159e, 0x448d159e, {0, 0, 0, 0} },
+	{ 0x41e123456789abcd, 0x891a2b3c, 0x891a2b3c, 0x7fffffff, 0x891a2b3c, {0, 0, 1, 0} },
+	{ 0x41f123456789abcd, 0x112345678, 0x112345678, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0xc1f123456789abcd, -0x112345678, 0, 0x80000000, 0, {0, 1, 1, 1} },
+	{ 0x432123456789abcd, 0x891a2b3c4d5e6, 0x891a2b3c4d5e6, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x433123456789abcd, 0x1123456789abcd, 0x1123456789abcd, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x434123456789abcd, 0x22468acf13579a, 0x22468acf13579a, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x43c123456789abcd, 0x22468acf13579a00, 0x22468acf13579a00, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x43d123456789abcd, 0x448d159e26af3400, 0x448d159e26af3400, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
+	{ 0x43e123456789abcd, 0x7fffffffffffffff, 0x891a2b3c4d5e6800, 0x7fffffff, 0xffffffff, {1, 0, 1, 1} },
+	{ 0x43f123456789abcd, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} },
+	{ 0xc3f123456789abcd, 0x8000000000000000, 0, 0x80000000, 0, {1, 1, 1, 1} },
+	{ 0x7ff0000000000000, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} },
+	{ 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
+	{ 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
+	{ 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
+};
+
+int test10(long arg)
+{
+	long i;
+	int ires;
+	unsigned int ures;
+	long lres;
+	unsigned long ulres;
+	unsigned char inv[4];
+	struct cvtivals *vp = cvtizvals;
+
+	for (i = 0; i < sizeof(cvtizvals) / sizeof(cvtizvals[0]); ++i, ++vp) {
+		set_fpscr(FPS_RN_NEAR);
+		asm("lfd 3,0(%0); fctidz 4,3; stfd 4,0(%1)"
+		    : : "b" (&vp->dval), "b" (&lres) : "memory");
+		inv[0] = GET_VXCVI();
+		set_fpscr(FPS_RN_NEAR);
+		asm("fctiduz 5,3; stfd 5,0(%0)" : : "b" (&ulres) : "memory");
+		inv[1] = GET_VXCVI();
+		set_fpscr(FPS_RN_NEAR);
+		asm("fctiwz 6,3; stfiwx 6,0,%0" : : "b" (&ires) : "memory");
+		inv[2] = GET_VXCVI();
+		set_fpscr(FPS_RN_NEAR);
+		asm("fctiwuz 7,3; stfiwx 7,0,%0" : : "b" (&ures) : "memory");
+		inv[3] = GET_VXCVI();
+
+		if (lres != vp->lval || ulres != vp->ulval || ires != vp->ival || ures != vp->uival ||
+		    inv[0] != vp->invalids[0] || inv[1] != vp->invalids[1] ||
+		    inv[2] != vp->invalids[2] || inv[3] != vp->invalids[3]) {
+			print_hex(lres, 16, inv[0]? "V ": "  ");
+			print_hex(ulres, 16, inv[1]? "V ": "  ");
+			print_hex(ires, 8, inv[2]? "V ": "  ");
+			print_hex(ures, 8, inv[3]? "V ": "  ");
+			return i + 1;
+		}
+	}
+	return 0;
+}
+
+int fpu_test_10(void)
+{
+	enable_fp();
+	return trapit(0, test10);
+}
+
 int fail = 0;

 void do_test(int num, int (*test)(void))
@ -631,6 +786,8 @@ int main(void)
 	do_test(6, fpu_test_6);
 	do_test(7, fpu_test_7);
 	do_test(8, fpu_test_8);
+	do_test(9, fpu_test_9);
+	do_test(10, fpu_test_10);

 	return fail;
 }
--- a/tests/test_fpu.console_out
+++ b/tests/test_fpu.console_out
@ -6,3 +6,5 @@ test 05:PASS
 test 06:PASS
 test 07:PASS
 test 08:PASS
+test 09:PASS
+test 10:PASS