FPU: Implement floating convert to integer instructions

This implements fctiw, fctiwz, fctiwu, fctiwuz, fctid, fctidz, fctidu
and fctiduz, and adds tests for them.

There are some subtleties around the setting of the inexact (XX) and
invalid conversion (VXCVI) flags in the FPSCR.  If the rounded value
ends up being out of range, we need to set VXCVI and not XX.  For a
conversion to unsigned word or doubleword of a negative value that
rounds to zero, we need to set XX and not VXCVI.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/245/head
Paul Mackerras 4 years ago
parent 36130f1db3
commit 03d1aa968a

@ -442,8 +442,16 @@ architecture behaviour of decode1 is
2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs
2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs
2#110000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- 0/12=frsp
2#111000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/14=fctiw
2#111000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/14=fctiwu
2#111011001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/14=fctid
2#111011010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 26/14=fcfid
2#111011101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/14=fctidu
2#111011110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 30/14=fcfidu
2#111100000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/15=fctiwz
2#111100100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/15=fctiwuz
2#111111001# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 25/15=fctidz
2#111111101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 29/15=fctiduz
others => illegal_inst
);


@ -38,8 +38,10 @@ architecture behaviour of fpu is
type state_t is (IDLE,
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
DO_FMR,
DO_FCFID,
DO_FCFID, DO_FCTI,
DO_FRSP,
INT_SHIFT, INT_ROUND, INT_ISHIFT,
INT_FINAL, INT_CHECK, INT_OFLOW,
FINISH, NORMALIZE,
ROUND_UFLOW, ROUND_OFLOW,
ROUNDING, ROUNDING_2, ROUNDING_3,
@ -363,6 +365,8 @@ begin
variable clz : std_ulogic_vector(5 downto 0);
variable set_x : std_ulogic;
variable mshift : signed(EXP_BITS-1 downto 0);
variable need_check : std_ulogic;
variable msb : std_ulogic;
begin
v := r;
illegal := '0';
@ -461,8 +465,15 @@ begin
when "01100" =>
v.state := DO_FRSP;
when "01110" =>
-- fcfid[u][s]
v.state := DO_FCFID;
if int_input = '1' then
-- fcfid[u][s]
v.state := DO_FCFID;
else
v.state := DO_FCTI;
end if;
when "01111" =>
v.round_mode := "001";
v.state := DO_FCTI;
when others =>
illegal := '1';
end case;
@ -603,6 +614,47 @@ begin
arith_done := '1';
end if;

when DO_FCTI =>
-- instr bit 9: 1=dword 0=word
-- instr bit 8: 1=unsigned 0=signed
-- instr bit 1: 1=round to zero 0=use fpscr[RN]
opsel_a <= AIN_B;
v.result_class := r.b.class;
v.result_sign := r.b.negative;
v.result_exp := r.b.exponent;
v.fpscr(FPSCR_FR) := '0';
v.fpscr(FPSCR_FI) := '0';
if r.b.class = NAN and r.b.mantissa(53) = '0' then
-- Signalling NAN
v.fpscr(FPSCR_VXSNAN) := '1';
invalid := '1';
end if;

v.int_result := '1';
case r.b.class is
when ZERO =>
arith_done := '1';
when FINITE =>
if r.b.exponent >= to_signed(64, EXP_BITS) or
(r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
v.state := INT_OFLOW;
elsif r.b.exponent >= to_signed(52, EXP_BITS) then
-- integer already, no rounding required,
-- shift into final position
v.shift := r.b.exponent - to_signed(54, EXP_BITS);
if r.insn(8) = '1' and r.b.negative = '1' then
v.state := INT_OFLOW;
else
v.state := INT_ISHIFT;
end if;
else
v.shift := r.b.exponent - to_signed(52, EXP_BITS);
v.state := INT_SHIFT;
end if;
when INFINITY | NAN =>
v.state := INT_OFLOW;
end case;

when DO_FCFID =>
v.result_sign := '0';
opsel_a <= AIN_B;
@ -622,6 +674,81 @@ begin
v.state := FINISH;
end if;

when INT_SHIFT =>
opsel_r <= RES_SHIFT;
set_x := '1';
v.state := INT_ROUND;
v.shift := to_signed(-2, EXP_BITS);

when INT_ROUND =>
opsel_r <= RES_SHIFT;
round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
-- Check for negative values that don't round to 0 for fcti*u*
if r.insn(8) = '1' and r.result_sign = '1' and
(r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
v.state := INT_OFLOW;
else
v.state := INT_FINAL;
end if;

when INT_ISHIFT =>
opsel_r <= RES_SHIFT;
v.state := INT_FINAL;

when INT_FINAL =>
-- Negate if necessary, and increment for rounding if needed
opsel_ainv <= r.result_sign;
carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
-- Check for possible overflows
case r.insn(9 downto 8) is
when "00" => -- fctiw[z]
need_check := r.r(31) or (r.r(30) and not r.result_sign);
when "01" => -- fctiwu[z]
need_check := r.r(31);
when "10" => -- fctid[z]
need_check := r.r(63) or (r.r(62) and not r.result_sign);
when others => -- fctidu[z]
need_check := r.r(63);
end case;
if need_check = '1' then
v.state := INT_CHECK;
else
if r.fpscr(FPSCR_FI) = '1' then
v.fpscr(FPSCR_XX) := '1';
end if;
arith_done := '1';
end if;

when INT_CHECK =>
if r.insn(9) = '0' then
msb := r.r(31);
else
msb := r.r(63);
end if;
misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
if (r.insn(8) = '0' and msb /= r.result_sign) or
(r.insn(8) = '1' and msb /= '1') then
opsel_r <= RES_MISC;
v.fpscr(FPSCR_VXCVI) := '1';
invalid := '1';
else
if r.fpscr(FPSCR_FI) = '1' then
v.fpscr(FPSCR_XX) := '1';
end if;
end if;
arith_done := '1';

when INT_OFLOW =>
opsel_r <= RES_MISC;
misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
if r.b.class = NAN then
misc_sel(0) <= '1';
end if;
v.fpscr(FPSCR_VXCVI) := '1';
invalid := '1';
arith_done := '1';

when FINISH =>
if r.r(63 downto 54) /= "0000000001" then
renormalize := '1';
@ -846,6 +973,30 @@ begin
when "0011" =>
-- mantissa of max representable SP number
misc := x"007fffff80000000";
when "1000" =>
-- max positive result for fctiw[z]
misc := x"000000007fffffff";
when "1001" =>
-- max negative result for fctiw[z]
misc := x"ffffffff80000000";
when "1010" =>
-- max positive result for fctiwu[z]
misc := x"00000000ffffffff";
when "1011" =>
-- max negative result for fctiwu[z]
misc := x"0000000000000000";
when "1100" =>
-- max positive result for fctid[z]
misc := x"7fffffffffffffff";
when "1101" =>
-- max negative result for fctid[z]
misc := x"8000000000000000";
when "1110" =>
-- max positive result for fctidu[z]
misc := x"ffffffffffffffff";
when "1111" =>
-- max negative result for fctidu[z]
misc := x"0000000000000000";
when others =>
misc := x"0000000000000000";
end case;

@ -19,6 +19,7 @@
#define FPS_UE 0x20
#define FPS_OE 0x40
#define FPS_VE 0x80
#define FPS_VXCVI 0x100
#define FPS_VXSOFT 0x400

extern int trapit(long arg, int (*func)(long));
@ -598,6 +599,160 @@ int fpu_test_8(void)
return trapit(0, test8);
}

struct cvtivals {
unsigned long dval;
long lval;
unsigned long ulval;
int ival;
unsigned int uival;
unsigned char invalids[4];
} cvtivals[] = {
{ 0x0000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} },
{ 0x8000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} },
{ 0x3fdfffffffffffff, 0, 0, 0, 0, {0, 0, 0, 0} },
{ 0x3ff0000000000000, 1, 1, 1, 1, {0, 0, 0, 0} },
{ 0xbff0000000000000, -1, 0, -1, 0, {0, 1, 0, 1} },
{ 0x402123456789abcd, 9, 9, 9, 9, {0, 0, 0, 0} },
{ 0x406123456789abcd, 137, 137, 137, 137, {0, 0, 0, 0} },
{ 0x409123456789abcd, 1097, 1097, 1097, 1097, {0, 0, 0, 0} },
{ 0x41c123456789abcd, 0x22468acf, 0x22468acf, 0x22468acf, 0x22468acf, {0, 0, 0, 0} },
{ 0x41d123456789abcd, 0x448d159e, 0x448d159e, 0x448d159e, 0x448d159e, {0, 0, 0, 0} },
{ 0x41e123456789abcd, 0x891a2b3c, 0x891a2b3c, 0x7fffffff, 0x891a2b3c, {0, 0, 1, 0} },
{ 0x41f123456789abcd, 0x112345679, 0x112345679, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0xc1f123456789abcd, -0x112345679, 0, 0x80000000, 0, {0, 1, 1, 1} },
{ 0x432123456789abcd, 0x891a2b3c4d5e6, 0x891a2b3c4d5e6, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x433123456789abcd, 0x1123456789abcd, 0x1123456789abcd, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x434123456789abcd, 0x22468acf13579a, 0x22468acf13579a, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x43c123456789abcd, 0x22468acf13579a00, 0x22468acf13579a00, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x43d123456789abcd, 0x448d159e26af3400, 0x448d159e26af3400, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x43e123456789abcd, 0x7fffffffffffffff, 0x891a2b3c4d5e6800, 0x7fffffff, 0xffffffff, {1, 0, 1, 1} },
{ 0x43f123456789abcd, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} },
{ 0xc3f123456789abcd, 0x8000000000000000, 0, 0x80000000, 0, {1, 1, 1, 1} },
{ 0x7ff0000000000000, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} },
{ 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0xbfd123456789abcd, 0, 0, 0, 0, {0, 0, 0, 0} },
};

#define GET_VXCVI() ((get_fpscr() >> 8) & 1)

int test9(long arg)
{
long i;
int ires;
unsigned int ures;
long lres;
unsigned long ulres;
unsigned char inv[4];
struct cvtivals *vp = cvtivals;

for (i = 0; i < sizeof(cvtivals) / sizeof(cvtivals[0]); ++i, ++vp) {
set_fpscr(FPS_RN_NEAR);
asm("lfd 3,0(%0); fctid 4,3; stfd 4,0(%1)"
: : "b" (&vp->dval), "b" (&lres) : "memory");
inv[0] = GET_VXCVI();
set_fpscr(FPS_RN_NEAR);
asm("fctidu 5,3; stfd 5,0(%0)" : : "b" (&ulres) : "memory");
inv[1] = GET_VXCVI();
set_fpscr(FPS_RN_NEAR);
asm("fctiw 6,3; stfiwx 6,0,%0" : : "b" (&ires) : "memory");
inv[2] = GET_VXCVI();
set_fpscr(FPS_RN_NEAR);
asm("fctiwu 7,3; stfiwx 7,0,%0" : : "b" (&ures) : "memory");
inv[3] = GET_VXCVI();

if (lres != vp->lval || ulres != vp->ulval || ires != vp->ival || ures != vp->uival ||
inv[0] != vp->invalids[0] || inv[1] != vp->invalids[1] ||
inv[2] != vp->invalids[2] || inv[3] != vp->invalids[3]) {
print_hex(lres, 16, inv[0]? "V ": " ");
print_hex(ulres, 16, inv[1]? "V ": " ");
print_hex(ires, 8, inv[2]? "V ": " ");
print_hex(ures, 8, inv[3]? "V ": " ");
return i + 1;
}
}
return 0;
}

int fpu_test_9(void)
{
enable_fp();
return trapit(0, test9);
}

struct cvtivals cvtizvals[] = {
{ 0x0000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} },
{ 0x8000000000000000, 0, 0, 0, 0, {0, 0, 0, 0} },
{ 0x3fdfffffffffffff, 0, 0, 0, 0, {0, 0, 0, 0} },
{ 0x3ff0000000000000, 1, 1, 1, 1, {0, 0, 0, 0} },
{ 0xbff0000000000000, -1, 0, -1, 0, {0, 1, 0, 1} },
{ 0x402123456789abcd, 8, 8, 8, 8, {0, 0, 0, 0} },
{ 0x406123456789abcd, 137, 137, 137, 137, {0, 0, 0, 0} },
{ 0x409123456789abcd, 1096, 1096, 1096, 1096, {0, 0, 0, 0} },
{ 0x41c123456789abcd, 0x22468acf, 0x22468acf, 0x22468acf, 0x22468acf, {0, 0, 0, 0} },
{ 0x41d123456789abcd, 0x448d159e, 0x448d159e, 0x448d159e, 0x448d159e, {0, 0, 0, 0} },
{ 0x41e123456789abcd, 0x891a2b3c, 0x891a2b3c, 0x7fffffff, 0x891a2b3c, {0, 0, 1, 0} },
{ 0x41f123456789abcd, 0x112345678, 0x112345678, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0xc1f123456789abcd, -0x112345678, 0, 0x80000000, 0, {0, 1, 1, 1} },
{ 0x432123456789abcd, 0x891a2b3c4d5e6, 0x891a2b3c4d5e6, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x433123456789abcd, 0x1123456789abcd, 0x1123456789abcd, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x434123456789abcd, 0x22468acf13579a, 0x22468acf13579a, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x43c123456789abcd, 0x22468acf13579a00, 0x22468acf13579a00, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x43d123456789abcd, 0x448d159e26af3400, 0x448d159e26af3400, 0x7fffffff, 0xffffffff, {0, 0, 1, 1} },
{ 0x43e123456789abcd, 0x7fffffffffffffff, 0x891a2b3c4d5e6800, 0x7fffffff, 0xffffffff, {1, 0, 1, 1} },
{ 0x43f123456789abcd, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} },
{ 0xc3f123456789abcd, 0x8000000000000000, 0, 0x80000000, 0, {1, 1, 1, 1} },
{ 0x7ff0000000000000, 0x7fffffffffffffff, 0xffffffffffffffff, 0x7fffffff, 0xffffffff, {1, 1, 1, 1} },
{ 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
};

int test10(long arg)
{
long i;
int ires;
unsigned int ures;
long lres;
unsigned long ulres;
unsigned char inv[4];
struct cvtivals *vp = cvtizvals;

for (i = 0; i < sizeof(cvtizvals) / sizeof(cvtizvals[0]); ++i, ++vp) {
set_fpscr(FPS_RN_NEAR);
asm("lfd 3,0(%0); fctidz 4,3; stfd 4,0(%1)"
: : "b" (&vp->dval), "b" (&lres) : "memory");
inv[0] = GET_VXCVI();
set_fpscr(FPS_RN_NEAR);
asm("fctiduz 5,3; stfd 5,0(%0)" : : "b" (&ulres) : "memory");
inv[1] = GET_VXCVI();
set_fpscr(FPS_RN_NEAR);
asm("fctiwz 6,3; stfiwx 6,0,%0" : : "b" (&ires) : "memory");
inv[2] = GET_VXCVI();
set_fpscr(FPS_RN_NEAR);
asm("fctiwuz 7,3; stfiwx 7,0,%0" : : "b" (&ures) : "memory");
inv[3] = GET_VXCVI();

if (lres != vp->lval || ulres != vp->ulval || ires != vp->ival || ures != vp->uival ||
inv[0] != vp->invalids[0] || inv[1] != vp->invalids[1] ||
inv[2] != vp->invalids[2] || inv[3] != vp->invalids[3]) {
print_hex(lres, 16, inv[0]? "V ": " ");
print_hex(ulres, 16, inv[1]? "V ": " ");
print_hex(ires, 8, inv[2]? "V ": " ");
print_hex(ures, 8, inv[3]? "V ": " ");
return i + 1;
}
}
return 0;
}

int fpu_test_10(void)
{
enable_fp();
return trapit(0, test10);
}

int fail = 0;

void do_test(int num, int (*test)(void))
@ -631,6 +786,8 @@ int main(void)
do_test(6, fpu_test_6);
do_test(7, fpu_test_7);
do_test(8, fpu_test_8);
do_test(9, fpu_test_9);
do_test(10, fpu_test_10);

return fail;
}

@ -6,3 +6,5 @@ test 05:PASS
test 06:PASS
test 07:PASS
test 08:PASS
test 09:PASS
test 10:PASS

Loading…
Cancel
Save