diff --git a/decode1.vhdl b/decode1.vhdl index 284fb08..c659e3e 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -442,8 +442,16 @@ architecture behaviour of decode1 is 2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs 2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs 2#110000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- 0/12=frsp + 2#111000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/14=fctiw + 2#111000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/14=fctiwu + 2#111011001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/14=fctid 2#111011010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 26/14=fcfid + 2#111011101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/14=fctidu 2#111011110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 30/14=fcfidu + 2#111100000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/15=fctiwz + 2#111100100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/15=fctiwuz + 2#111111001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/15=fctidz + 2#111111101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/15=fctiduz others => illegal_inst ); diff --git a/fpu.vhdl b/fpu.vhdl index 7576562..6301fa7 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -38,8 +38,10 @@ architecture behaviour of fpu is type state_t is (IDLE, DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF, DO_FMR, - DO_FCFID, + DO_FCFID, DO_FCTI, DO_FRSP, + INT_SHIFT, INT_ROUND, INT_ISHIFT, + INT_FINAL, INT_CHECK, INT_OFLOW, FINISH, NORMALIZE, ROUND_UFLOW, ROUND_OFLOW, ROUNDING, ROUNDING_2, ROUNDING_3, @@ -363,6 +365,8 @@ begin variable clz : std_ulogic_vector(5 downto 0); variable set_x : std_ulogic; variable mshift : signed(EXP_BITS-1 downto 0); + variable need_check : std_ulogic; + variable msb : std_ulogic; begin v := r; illegal := '0'; @@ -461,8 +465,15 @@ begin when "01100" => v.state := DO_FRSP; when "01110" => - -- fcfid[u][s] - v.state := DO_FCFID; + if int_input = '1' then + -- fcfid[u][s] + v.state := DO_FCFID; + else + v.state := DO_FCTI; + end if; + when "01111" => + v.round_mode := "001"; + v.state := DO_FCTI; when others => illegal := '1'; end case; @@ -603,6 +614,47 @@ begin arith_done := '1'; end if; + when DO_FCTI => + -- instr bit 9: 1=dword 0=word + -- instr bit 8: 1=unsigned 0=signed + -- instr bit 1: 1=round to zero 0=use fpscr[RN] + opsel_a <= AIN_B; + v.result_class := r.b.class; + v.result_sign := r.b.negative; + v.result_exp := r.b.exponent; + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + if r.b.class = NAN and r.b.mantissa(53) = '0' then + -- Signalling NAN + v.fpscr(FPSCR_VXSNAN) := '1'; + invalid := '1'; + end if; + + v.int_result := '1'; + case r.b.class is + when ZERO => + arith_done := '1'; + when FINITE => + if r.b.exponent >= to_signed(64, EXP_BITS) or + (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then + v.state := INT_OFLOW; + elsif r.b.exponent >= to_signed(52, EXP_BITS) then + -- integer already, no rounding required, + -- shift into final position + v.shift := r.b.exponent - to_signed(54, EXP_BITS); + if r.insn(8) = '1' and r.b.negative = '1' then + v.state := INT_OFLOW; + else + v.state := INT_ISHIFT; + end if; + else + v.shift := r.b.exponent - to_signed(52, EXP_BITS); + v.state := INT_SHIFT; + end if; + when INFINITY | NAN => + v.state := INT_OFLOW; + end case; + when DO_FCFID => v.result_sign := '0'; opsel_a <= AIN_B; @@ -622,6 +674,81 @@ begin v.state := FINISH; end if; + when INT_SHIFT => + opsel_r <= RES_SHIFT; + set_x := '1'; + v.state := INT_ROUND; + v.shift := to_signed(-2, EXP_BITS); + + when INT_ROUND => + opsel_r <= RES_SHIFT; + round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign); + v.fpscr(FPSCR_FR downto FPSCR_FI) := round; + -- Check for negative values that don't round to 0 for fcti*u* + if r.insn(8) = '1' and r.result_sign = '1' and + (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then + v.state := INT_OFLOW; + else + v.state := INT_FINAL; + end if; + + when INT_ISHIFT => + opsel_r <= RES_SHIFT; + v.state := INT_FINAL; + + when INT_FINAL => + -- Negate if necessary, and increment for rounding if needed + opsel_ainv <= r.result_sign; + carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign; + -- Check for possible overflows + case r.insn(9 downto 8) is + when "00" => -- fctiw[z] + need_check := r.r(31) or (r.r(30) and not r.result_sign); + when "01" => -- fctiwu[z] + need_check := r.r(31); + when "10" => -- fctid[z] + need_check := r.r(63) or (r.r(62) and not r.result_sign); + when others => -- fctidu[z] + need_check := r.r(63); + end case; + if need_check = '1' then + v.state := INT_CHECK; + else + if r.fpscr(FPSCR_FI) = '1' then + v.fpscr(FPSCR_XX) := '1'; + end if; + arith_done := '1'; + end if; + + when INT_CHECK => + if r.insn(9) = '0' then + msb := r.r(31); + else + msb := r.r(63); + end if; + misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign; + if (r.insn(8) = '0' and msb /= r.result_sign) or + (r.insn(8) = '1' and msb /= '1') then + opsel_r <= RES_MISC; + v.fpscr(FPSCR_VXCVI) := '1'; + invalid := '1'; + else + if r.fpscr(FPSCR_FI) = '1' then + v.fpscr(FPSCR_XX) := '1'; + end if; + end if; + arith_done := '1'; + + when INT_OFLOW => + opsel_r <= RES_MISC; + misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign; + if r.b.class = NAN then + misc_sel(0) <= '1'; + end if; + v.fpscr(FPSCR_VXCVI) := '1'; + invalid := '1'; + arith_done := '1'; + when FINISH => if r.r(63 downto 54) /= "0000000001" then renormalize := '1'; @@ -846,6 +973,30 @@ begin when "0011" => -- mantissa of max representable SP number misc := x"007fffff80000000"; + when "1000" => + -- max positive result for fctiw[z] + misc := x"000000007fffffff"; + when "1001" => + -- max negative result for fctiw[z] + misc := x"ffffffff80000000"; + when "1010" => + -- max positive result for fctiwu[z] + misc := x"00000000ffffffff"; + when "1011" => + -- max negative result for fctiwu[z] + misc := x"0000000000000000"; + when "1100" => + -- max positive result for fctid[z] + misc := x"7fffffffffffffff"; + when "1101" => + -- max negative result for fctid[z] + misc := x"8000000000000000"; + when "1110" => + -- max positive result for fctidu[z] + misc := x"ffffffffffffffff"; + when "1111" => + -- max negative result for fctidu[z] + misc := x"0000000000000000"; when others => misc := x"0000000000000000"; end case; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index aff6d6c..3c6a9bd 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -19,6 +19,7 @@ #define FPS_UE 0x20 #define FPS_OE 0x40 #define FPS_VE 0x80 +#define FPS_VXCVI 0x100 #define FPS_VXSOFT 0x400 extern int trapit(long arg, int (*func)(long)); @@ -598,6 +599,160 @@ int fpu_test_8(void) return trapit(0, test8); } +struct cvtivals { + unsigned long dval; + long lval; + unsigned long ulval; + int ival; + unsigned int uival; + unsigned char invalids[4]; +} cvtivals[] = { + { 0x0000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x8000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x3fdfffffffffffff, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x3ff0000000000000, 1, 1, 1, 1, {0, 0, 0, 0} }, + { 0xbff0000000000000, -1, 0, -1, 0, {0, 1, 0, 1} }, + { 0x402123456789abcd, 9, 9, 9, 9, {0, 0, 0, 0} }, + { 0x406123456789abcd, 137, 137, 137, 137, {0, 0, 0, 0} }, + { 0x409123456789abcd, 1097, 1097, 1097, 1097, {0, 0, 0, 0} }, + { 0x41c123456789abcd, 0x22468acf, 0x22468acf, 0x22468acf, 0x22468acf, {0, 0, 0, 0} }, + { 0x41d123456789abcd, 0x448d159e, 0x448d159e, 0x448d159e, 0x448d159e, {0, 0, 0, 0} }, + { 0x41e123456789abcd, 0x891a2b3c, 0x891a2b3c, 0x7fffffff, 0x891a2b3c, {0, 0, 1, 0} }, + { 0x41f123456789abcd, 0x112345679, 0x112345679, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0xc1f123456789abcd, -0x112345679, 0, 0x80000000, 0, {0, 1, 1, 1} }, + { 0x432123456789abcd, 0x891a2b3c4d5e6, 0x891a2b3c4d5e6, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x433123456789abcd, 0x1123456789abcd, 0x1123456789abcd, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x434123456789abcd, 0x22468acf13579a, 0x22468acf13579a, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43c123456789abcd, 0x22468acf13579a00, 0x22468acf13579a00, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43d123456789abcd, 0x448d159e26af3400, 0x448d159e26af3400, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43e123456789abcd, 0x7fffffffffffffff, 0x891a2b3c4d5e6800, 0x7fffffff, 0xffffffff, {1, 0, 1, 1} }, + { 0x43f123456789abcd, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} }, + { 0xc3f123456789abcd, 0x8000000000000000, 0, 0x80000000, 0, {1, 1, 1, 1} }, + { 0x7ff0000000000000, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} }, + { 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0xbfd123456789abcd, 0, 0, 0, 0, {0, 0, 0, 0} }, +}; + +#define GET_VXCVI() ((get_fpscr() >> 8) & 1) + +int test9(long arg) +{ + long i; + int ires; + unsigned int ures; + long lres; + unsigned long ulres; + unsigned char inv[4]; + struct cvtivals *vp = cvtivals; + + for (i = 0; i < sizeof(cvtivals) / sizeof(cvtivals[0]); ++i, ++vp) { + set_fpscr(FPS_RN_NEAR); + asm("lfd 3,0(%0); fctid 4,3; stfd 4,0(%1)" + : : "b" (&vp->dval), "b" (&lres) : "memory"); + inv[0] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctidu 5,3; stfd 5,0(%0)" : : "b" (&ulres) : "memory"); + inv[1] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiw 6,3; stfiwx 6,0,%0" : : "b" (&ires) : "memory"); + inv[2] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiwu 7,3; stfiwx 7,0,%0" : : "b" (&ures) : "memory"); + inv[3] = GET_VXCVI(); + + if (lres != vp->lval || ulres != vp->ulval || ires != vp->ival || ures != vp->uival || + inv[0] != vp->invalids[0] || inv[1] != vp->invalids[1] || + inv[2] != vp->invalids[2] || inv[3] != vp->invalids[3]) { + print_hex(lres, 16, inv[0]? "V ": " "); + print_hex(ulres, 16, inv[1]? "V ": " "); + print_hex(ires, 8, inv[2]? "V ": " "); + print_hex(ures, 8, inv[3]? "V ": " "); + return i + 1; + } + } + return 0; +} + +int fpu_test_9(void) +{ + enable_fp(); + return trapit(0, test9); +} + +struct cvtivals cvtizvals[] = { + { 0x0000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x8000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x3fdfffffffffffff, 0, 0, 0, 0, {0, 0, 0, 0} }, + { 0x3ff0000000000000, 1, 1, 1, 1, {0, 0, 0, 0} }, + { 0xbff0000000000000, -1, 0, -1, 0, {0, 1, 0, 1} }, + { 0x402123456789abcd, 8, 8, 8, 8, {0, 0, 0, 0} }, + { 0x406123456789abcd, 137, 137, 137, 137, {0, 0, 0, 0} }, + { 0x409123456789abcd, 1096, 1096, 1096, 1096, {0, 0, 0, 0} }, + { 0x41c123456789abcd, 0x22468acf, 0x22468acf, 0x22468acf, 0x22468acf, {0, 0, 0, 0} }, + { 0x41d123456789abcd, 0x448d159e, 0x448d159e, 0x448d159e, 0x448d159e, {0, 0, 0, 0} }, + { 0x41e123456789abcd, 0x891a2b3c, 0x891a2b3c, 0x7fffffff, 0x891a2b3c, {0, 0, 1, 0} }, + { 0x41f123456789abcd, 0x112345678, 0x112345678, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0xc1f123456789abcd, -0x112345678, 0, 0x80000000, 0, {0, 1, 1, 1} }, + { 0x432123456789abcd, 0x891a2b3c4d5e6, 0x891a2b3c4d5e6, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x433123456789abcd, 0x1123456789abcd, 0x1123456789abcd, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x434123456789abcd, 0x22468acf13579a, 0x22468acf13579a, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43c123456789abcd, 0x22468acf13579a00, 0x22468acf13579a00, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43d123456789abcd, 0x448d159e26af3400, 0x448d159e26af3400, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} }, + { 0x43e123456789abcd, 0x7fffffffffffffff, 0x891a2b3c4d5e6800, 0x7fffffff, 0xffffffff, {1, 0, 1, 1} }, + { 0x43f123456789abcd, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} }, + { 0xc3f123456789abcd, 0x8000000000000000, 0, 0x80000000, 0, {1, 1, 1, 1} }, + { 0x7ff0000000000000, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} }, + { 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, + { 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, +}; + +int test10(long arg) +{ + long i; + int ires; + unsigned int ures; + long lres; + unsigned long ulres; + unsigned char inv[4]; + struct cvtivals *vp = cvtizvals; + + for (i = 0; i < sizeof(cvtizvals) / sizeof(cvtizvals[0]); ++i, ++vp) { + set_fpscr(FPS_RN_NEAR); + asm("lfd 3,0(%0); fctidz 4,3; stfd 4,0(%1)" + : : "b" (&vp->dval), "b" (&lres) : "memory"); + inv[0] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiduz 5,3; stfd 5,0(%0)" : : "b" (&ulres) : "memory"); + inv[1] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiwz 6,3; stfiwx 6,0,%0" : : "b" (&ires) : "memory"); + inv[2] = GET_VXCVI(); + set_fpscr(FPS_RN_NEAR); + asm("fctiwuz 7,3; stfiwx 7,0,%0" : : "b" (&ures) : "memory"); + inv[3] = GET_VXCVI(); + + if (lres != vp->lval || ulres != vp->ulval || ires != vp->ival || ures != vp->uival || + inv[0] != vp->invalids[0] || inv[1] != vp->invalids[1] || + inv[2] != vp->invalids[2] || inv[3] != vp->invalids[3]) { + print_hex(lres, 16, inv[0]? "V ": " "); + print_hex(ulres, 16, inv[1]? "V ": " "); + print_hex(ires, 8, inv[2]? "V ": " "); + print_hex(ures, 8, inv[3]? "V ": " "); + return i + 1; + } + } + return 0; +} + +int fpu_test_10(void) +{ + enable_fp(); + return trapit(0, test10); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -631,6 +786,8 @@ int main(void) do_test(6, fpu_test_6); do_test(7, fpu_test_7); do_test(8, fpu_test_8); + do_test(9, fpu_test_9); + do_test(10, fpu_test_10); return fail; } diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out index 25e791c..3e84260 100644 --- a/tests/test_fpu.console_out +++ b/tests/test_fpu.console_out @@ -6,3 +6,5 @@ test 05:PASS test 06:PASS test 07:PASS test 08:PASS +test 09:PASS +test 10:PASS