From 03d1aa968a76f338c4caf9c742e9e59d8a8d13e0 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 22 Jul 2020 12:19:12 +1000 Subject: [PATCH] FPU: Implement floating convert to integer instructions This implements fctiw, fctiwz, fctiwu, fctiwuz, fctid, fctidz, fctidu and fctiduz, and adds tests for them. There are some subtleties around the setting of the inexact (XX) and invalid conversion (VXCVI) flags in the FPSCR. If the rounded value ends up being out of range, we need to set VXCVI and not XX. For a conversion to unsigned word or doubleword of a negative value that rounds to zero, we need to set XX and not VXCVI. Signed-off-by: Paul Mackerras --- decode1.vhdl | 8 ++ fpu.vhdl | 157 ++++++++++++++++++++++++++++++++++++- tests/fpu/fpu.c | 157 +++++++++++++++++++++++++++++++++++++ tests/test_fpu.console_out | 2 + 4 files changed, 321 insertions(+), 3 deletions(-) diff --git a/decode1.vhdl b/decode1.vhdl index 284fb08..c659e3e 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -442,8 +442,16 @@ architecture behaviour of decode1 is 2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs 2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs 2#110000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- 0/12=frsp + 2#111000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/14=fctiw + 2#111000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/14=fctiwu + 2#111011001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/14=fctid 2#111011010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 26/14=fcfid + 2#111011101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/14=fctidu 2#111011110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 30/14=fcfidu + 2#111100000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/15=fctiwz + 2#111100100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/15=fctiwuz + 2#111111001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/15=fctidz + 2#111111101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/15=fctiduz others => illegal_inst ); diff --git a/fpu.vhdl b/fpu.vhdl index 7576562..6301fa7 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -38,8 +38,10 @@ architecture behaviour of fpu is type state_t is (IDLE, DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF, DO_FMR, - DO_FCFID, + DO_FCFID, DO_FCTI, DO_FRSP, + INT_SHIFT, INT_ROUND, INT_ISHIFT, + INT_FINAL, INT_CHECK, INT_OFLOW, FINISH, NORMALIZE, ROUND_UFLOW, ROUND_OFLOW, ROUNDING, ROUNDING_2, ROUNDING_3, @@ -363,6 +365,8 @@ begin variable clz : std_ulogic_vector(5 downto 0); variable set_x : std_ulogic; variable mshift : signed(EXP_BITS-1 downto 0); + variable need_check : std_ulogic; + variable msb : std_ulogic; begin v := r; illegal := '0'; @@ -461,8 +465,15 @@ begin when "01100" => v.state := DO_FRSP; when "01110" => - -- fcfid[u][s] - v.state := DO_FCFID; + if int_input = '1' then + -- fcfid[u][s] + v.state := DO_FCFID; + else + v.state := DO_FCTI; + end if; + when "01111" => + v.round_mode := "001"; + v.state := DO_FCTI; when others => illegal := '1'; end case; @@ -603,6 +614,47 @@ begin arith_done := '1'; end if; + when DO_FCTI => + -- instr bit 9: 1=dword 0=word + -- instr bit 8: 1=unsigned 0=signed + -- instr bit 1: 1=round to zero 0=use fpscr[RN] + opsel_a <= AIN_B; + v.result_class := r.b.class; + v.result_sign := r.b.negative; + v.result_exp := r.b.exponent; + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + if r.b.class = NAN and r.b.mantissa(53) = '0' then + -- Signalling NAN + v.fpscr(FPSCR_VXSNAN) := '1'; + invalid := '1'; + end if; + + v.int_result := '1'; + case r.b.class is + when ZERO => + arith_done := '1'; + when FINITE => + if r.b.exponent >= to_signed(64, EXP_BITS) or + (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then + v.state := INT_OFLOW; + elsif r.b.exponent >= to_signed(52, EXP_BITS) then + -- integer already, no rounding required, + -- shift into final position + v.shift := r.b.exponent - to_signed(54, EXP_BITS); + if r.insn(8) = '1' and r.b.negative = '1' then + v.state := INT_OFLOW; + else + v.state := INT_ISHIFT; + end if; + else + v.shift := r.b.exponent - to_signed(52, EXP_BITS); + v.state := INT_SHIFT; + end if; + when INFINITY | NAN => + v.state := INT_OFLOW; + end case; + when DO_FCFID => v.result_sign := '0'; opsel_a <= AIN_B; @@ -622,6 +674,81 @@ begin v.state := FINISH; end if; + when INT_SHIFT => + opsel_r <= RES_SHIFT; + set_x := '1'; + v.state := INT_ROUND; + v.shift := to_signed(-2, EXP_BITS); + + when INT_ROUND => + opsel_r <= RES_SHIFT; + round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign); + v.fpscr(FPSCR_FR downto FPSCR_FI) := round; + -- Check for negative values that don't round to 0 for fcti*u* + if r.insn(8) = '1' and r.result_sign = '1' and + (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then + v.state := INT_OFLOW; + else + v.state := INT_FINAL; + end if; + + when INT_ISHIFT => + opsel_r <= RES_SHIFT; + v.state := INT_FINAL; + + when INT_FINAL => + -- Negate if necessary, and increment for rounding if needed + opsel_ainv <= r.result_sign; + carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign; + -- Check for possible overflows + case r.insn(9 downto 8) is + when "00" => -- fctiw[z] + need_check := r.r(31) or (r.r(30) and not r.result_sign); + when "01" => -- fctiwu[z] + need_check := r.r(31); + when "10" => -- fctid[z] + need_check := r.r(63) or (r.r(62) and not r.result_sign); + when others => -- fctidu[z] + need_check := r.r(63); + end case; + if need_check = '1' then + v.state := INT_CHECK; + else + if r.fpscr(FPSCR_FI) = '1' then + v.fpscr(FPSCR_XX) := '1'; + end if; + arith_done := '1'; + end if; + + when INT_CHECK => + if r.insn(9) = '0' then + msb := r.r(31); + else + msb := r.r(63); + end if; + misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign; + if (r.insn(8) = '0' and msb /= r.result_sign) or + (r.insn(8) = '1' and msb /= '1') then + opsel_r <= RES_MISC; + v.fpscr(FPSCR_VXCVI) := '1'; + invalid := '1'; + else + if r.fpscr(FPSCR_FI) = '1' then + v.fpscr(FPSCR_XX) := '1'; + end if; + end if; + arith_done := '1'; + + when INT_OFLOW => + opsel_r <= RES_MISC; + misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign; + if r.b.class = NAN then + misc_sel(0) <= '1'; + end if; + v.fpscr(FPSCR_VXCVI) := '1'; + invalid := '1'; + arith_done := '1'; + when FINISH => if r.r(63 downto 54) /= "0000000001" then renormalize := '1'; @@ -846,6 +973,30 @@ begin when "0011" => -- mantissa of max representable SP number misc := x"007fffff80000000"; + when "1000" => + -- max positive result for fctiw[z] + misc := x"000000007fffffff"; + when "1001" => + -- max negative result for fctiw[z] + misc := x"ffffffff80000000"; + when "1010" => + -- max positive result for fctiwu[z] + misc := x"00000000ffffffff"; + when "1011" => + -- max negative result for fctiwu[z] + misc := x"0000000000000000"; + when "1100" => + -- max positive result for fctid[z] + misc := x"7fffffffffffffff"; + when "1101" => + -- max negative result for fctid[z] + misc := x"8000000000000000"; + when "1110" => + -- max positive result for fctidu[z] + misc := x"ffffffffffffffff"; + when "1111" => + -- max negative result for fctidu[z] + misc := x"0000000000000000"; when others => misc := x"0000000000000000"; end case; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index aff6d6c..3c6a9bd 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -19,6 +19,7 @@ #define FPS_UE 0x20 #define FPS_OE 0x40 #define FPS_VE 0x80 +#define FPS_VXCVI 0x100 #define FPS_VXSOFT 0x400 extern int trapit(long arg, int (*func)(long)); @@ -598,6 +599,160 @@ int fpu_test_8(void) return trapit(0, test8); } +struct cvtivals { + unsigned long dval; + long lval; + unsigned long ulval; + int ival; + unsigned int uival; + unsigned char invalids[4]; +} cvtivals[] = { + { 0x0000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x8000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x3fdfffffffffffff, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x3ff0000000000000, 1, 1, 1, 1, {0, 0, 0, 0} }, + { 0xbff0000000000000, -1, 0, -1, 0, {0, 1, 0, 1} }, + { 0x402123456789abcd, 9, 9, 9, 9, {0, 0, 0, 0} }, + { 0x406123456789abcd, 137, 137, 137, 137, {0, 0, 0, 0} }, + { 0x409123456789abcd, 1097, 1097, 1097, 1097, {0, 0, 0, 0} }, + { 0x41c123456789abcd, 0x22468acf, 0x22468acf, 0x22468acf, 0x22468acf, {0, 0, 0, 0} }, + { 0x41d123456789abcd, 0x448d159e, 0x448d159e, 0x448d159e, 0x448d159e, {0, 0, 0, 0} }, + { 0x41e123456789abcd, 0x891a2b3c, 0x891a2b3c, 0x7fffffff, 0x891a2b3c, {0, 0, 1, 0} }, + { 0x41f123456789abcd, 0x112345679, 0x112345679, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0xc1f123456789abcd, -0x112345679, 0, 0x80000000, 0, {0, 1, 1, 1} }, + { 0x432123456789abcd, 0x891a2b3c4d5e6, 0x891a2b3c4d5e6, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x433123456789abcd, 0x1123456789abcd, 0x1123456789abcd, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x434123456789abcd, 0x22468acf13579a, 0x22468acf13579a, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43c123456789abcd, 0x22468acf13579a00, 0x22468acf13579a00, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43d123456789abcd, 0x448d159e26af3400, 0x448d159e26af3400, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43e123456789abcd, 0x7fffffffffffffff, 0x891a2b3c4d5e6800, 0x7fffffff, 0xffffffff, {1, 0, 1, 1} }, + { 0x43f123456789abcd, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} }, + { 0xc3f123456789abcd, 0x8000000000000000, 0, 0x80000000, 0, {1, 1, 1, 1} }, + { 0x7ff0000000000000, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} }, + { 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0xbfd123456789abcd, 0, 0, 0, 0, {0, 0, 0, 0} }, +}; + +#define GET_VXCVI() ((get_fpscr() >> 8) & 1) + +int test9(long arg) +{ + long i; + int ires; + unsigned int ures; + long lres; + unsigned long ulres; + unsigned char inv[4]; + struct cvtivals *vp = cvtivals; + + for (i = 0; i < sizeof(cvtivals) / sizeof(cvtivals[0]); ++i, ++vp) { + set_fpscr(FPS_RN_NEAR); + asm("lfd 3,0(%0); fctid 4,3; stfd 4,0(%1)" + : : "b" (&vp->dval), "b" (&lres) : "memory"); + inv[0] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctidu 5,3; stfd 5,0(%0)" : : "b" (&ulres) : "memory"); + inv[1] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiw 6,3; stfiwx 6,0,%0" : : "b" (&ires) : "memory"); + inv[2] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiwu 7,3; stfiwx 7,0,%0" : : "b" (&ures) : "memory"); + inv[3] = GET_VXCVI(); + + if (lres != vp->lval || ulres != vp->ulval || ires != vp->ival || ures != vp->uival || + inv[0] != vp->invalids[0] || inv[1] != vp->invalids[1] || + inv[2] != vp->invalids[2] || inv[3] != vp->invalids[3]) { + print_hex(lres, 16, inv[0]? "V ": " "); + print_hex(ulres, 16, inv[1]? "V ": " "); + print_hex(ires, 8, inv[2]? "V ": " "); + print_hex(ures, 8, inv[3]? "V ": " "); + return i + 1; + } + } + return 0; +} + +int fpu_test_9(void) +{ + enable_fp(); + return trapit(0, test9); +} + +struct cvtivals cvtizvals[] = { + { 0x0000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x8000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x3fdfffffffffffff, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x3ff0000000000000, 1, 1, 1, 1, {0, 0, 0, 0} }, + { 0xbff0000000000000, -1, 0, -1, 0, {0, 1, 0, 1} }, + { 0x402123456789abcd, 8, 8, 8, 8, {0, 0, 0, 0} }, + { 0x406123456789abcd, 137, 137, 137, 137, {0, 0, 0, 0} }, + { 0x409123456789abcd, 1096, 1096, 1096, 1096, {0, 0, 0, 0} }, + { 0x41c123456789abcd, 0x22468acf, 0x22468acf, 0x22468acf, 0x22468acf, {0, 0, 0, 0} }, + { 0x41d123456789abcd, 0x448d159e, 0x448d159e, 0x448d159e, 0x448d159e, {0, 0, 0, 0} }, + { 0x41e123456789abcd, 0x891a2b3c, 0x891a2b3c, 0x7fffffff, 0x891a2b3c, {0, 0, 1, 0} }, + { 0x41f123456789abcd, 0x112345678, 0x112345678, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0xc1f123456789abcd, -0x112345678, 0, 0x80000000, 0, {0, 1, 1, 1} }, + { 0x432123456789abcd, 0x891a2b3c4d5e6, 0x891a2b3c4d5e6, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x433123456789abcd, 0x1123456789abcd, 0x1123456789abcd, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x434123456789abcd, 0x22468acf13579a, 0x22468acf13579a, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43c123456789abcd, 0x22468acf13579a00, 0x22468acf13579a00, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43d123456789abcd, 0x448d159e26af3400, 0x448d159e26af3400, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43e123456789abcd, 0x7fffffffffffffff, 0x891a2b3c4d5e6800, 0x7fffffff, 0xffffffff, {1, 0, 1, 1} }, + { 0x43f123456789abcd, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} }, + { 0xc3f123456789abcd, 0x8000000000000000, 0, 0x80000000, 0, {1, 1, 1, 1} }, + { 0x7ff0000000000000, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} }, + { 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, +}; + +int test10(long arg) +{ + long i; + int ires; + unsigned int ures; + long lres; + unsigned long ulres; + unsigned char inv[4]; + struct cvtivals *vp = cvtizvals; + + for (i = 0; i < sizeof(cvtizvals) / sizeof(cvtizvals[0]); ++i, ++vp) { + set_fpscr(FPS_RN_NEAR); + asm("lfd 3,0(%0); fctidz 4,3; stfd 4,0(%1)" + : : "b" (&vp->dval), "b" (&lres) : "memory"); + inv[0] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiduz 5,3; stfd 5,0(%0)" : : "b" (&ulres) : "memory"); + inv[1] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiwz 6,3; stfiwx 6,0,%0" : : "b" (&ires) : "memory"); + inv[2] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiwuz 7,3; stfiwx 7,0,%0" : : "b" (&ures) : "memory"); + inv[3] = GET_VXCVI(); + + if (lres != vp->lval || ulres != vp->ulval || ires != vp->ival || ures != vp->uival || + inv[0] != vp->invalids[0] || inv[1] != vp->invalids[1] || + inv[2] != vp->invalids[2] || inv[3] != vp->invalids[3]) { + print_hex(lres, 16, inv[0]? "V ": " "); + print_hex(ulres, 16, inv[1]? "V ": " "); + print_hex(ires, 8, inv[2]? "V ": " "); + print_hex(ures, 8, inv[3]? "V ": " "); + return i + 1; + } + } + return 0; +} + +int fpu_test_10(void) +{ + enable_fp(); + return trapit(0, test10); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -631,6 +786,8 @@ int main(void) do_test(6, fpu_test_6); do_test(7, fpu_test_7); do_test(8, fpu_test_8); + do_test(9, fpu_test_9); + do_test(10, fpu_test_10); return fail; } diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out index 25e791c..3e84260 100644 --- a/tests/test_fpu.console_out +++ b/tests/test_fpu.console_out @@ -6,3 +6,5 @@ test 05:PASS test 06:PASS test 07:PASS test 08:PASS +test 09:PASS +test 10:PASS