Merge pull request #457 from paulusmack/fixes

FPU fixes, mostly for bugs found by comparing results from random instruction
sequences (generated by simple_random) with POWER9.
master
Paul Mackerras 3 weeks ago committed by GitHub
commit a1624a50da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -51,7 +51,7 @@ architecture behaviour of fpu is
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF, DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT, DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
DO_FCFID, DO_FCTI, DO_FCFID, DO_FCTI,
DO_FRSP, DO_FRSP_2, DO_FRI, DO_FRSP, DO_FRI,
DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD, DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
DO_FRE, DO_FRE,
DO_FSEL, DO_FSEL,
@ -72,9 +72,9 @@ architecture behaviour of fpu is
INT_SHIFT, INT_ROUND, INT_ISHIFT, INT_SHIFT, INT_ROUND, INT_ISHIFT,
INT_FINAL, INT_CHECK, INT_OFLOW, INT_FINAL, INT_CHECK, INT_OFLOW,
FINISH, NORMALIZE, FINISH, NORMALIZE,
ROUND_UFLOW, ROUND_OFLOW, ROUND_UFLOW_DIS, ROUND_UFLOW_EN,
ROUND_OFLOW_DIS, ROUND_OFLOW_EN,
ROUNDING, ROUND_INC, ROUNDING_2, ROUNDING_3, ROUNDING, ROUND_INC, ROUNDING_2, ROUNDING_3,
DENORM,
RENORM_A, RENORM_B, RENORM_C, RENORM_A, RENORM_B, RENORM_C,
RENORM_1, RENORM_2, RENORM_1, RENORM_2,
IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3, IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3,
@ -98,6 +98,7 @@ architecture behaviour of fpu is
zero_divide : std_ulogic; zero_divide : std_ulogic;
new_fpscr : std_ulogic_vector(31 downto 0); new_fpscr : std_ulogic_vector(31 downto 0);
immed_result : std_ulogic; -- result is an input, zero, infinity or NaN immed_result : std_ulogic; -- result is an input, zero, infinity or NaN
need_finish : std_ulogic; -- result needs further processing
qnan_result : std_ulogic; qnan_result : std_ulogic;
result_sel : std_ulogic_vector(2 downto 0); result_sel : std_ulogic_vector(2 downto 0);
result_class : fp_number_class; result_class : fp_number_class;
@ -144,7 +145,7 @@ architecture behaviour of fpu is
int_result : std_ulogic; int_result : std_ulogic;
cr_result : std_ulogic_vector(3 downto 0); cr_result : std_ulogic_vector(3 downto 0);
cr_mask : std_ulogic_vector(7 downto 0); cr_mask : std_ulogic_vector(7 downto 0);
old_exc : std_ulogic_vector(4 downto 0); old_exc : std_ulogic_vector(12 downto 0);
update_fprf : std_ulogic; update_fprf : std_ulogic;
quieten_nan : std_ulogic; quieten_nan : std_ulogic;
nsnan_result : std_ulogic; nsnan_result : std_ulogic;
@ -158,6 +159,7 @@ architecture behaviour of fpu is
is_multiply : std_ulogic; is_multiply : std_ulogic;
is_inverse : std_ulogic; is_inverse : std_ulogic;
is_sqrt : std_ulogic; is_sqrt : std_ulogic;
do_renorm_b : std_ulogic;
first : std_ulogic; first : std_ulogic;
count : unsigned(1 downto 0); count : unsigned(1 downto 0);
doing_ftdiv : std_ulogic_vector(1 downto 0); doing_ftdiv : std_ulogic_vector(1 downto 0);
@ -187,6 +189,7 @@ architecture behaviour of fpu is
cycle_1_ar : std_ulogic; cycle_1_ar : std_ulogic;
regsel : std_ulogic_vector(2 downto 0); regsel : std_ulogic_vector(2 downto 0);
is_nan_inf : std_ulogic; is_nan_inf : std_ulogic;
zero_fri : std_ulogic;
end record; end record;


type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0); type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
@ -312,6 +315,7 @@ architecture behaviour of fpu is
constant RSCON2_63 : std_ulogic_vector(3 downto 0) := "0111"; constant RSCON2_63 : std_ulogic_vector(3 downto 0) := "0111";
constant RSCON2_64 : std_ulogic_vector(3 downto 0) := "1000"; constant RSCON2_64 : std_ulogic_vector(3 downto 0) := "1000";
constant RSCON2_MINEXP : std_ulogic_vector(3 downto 0) := "1001"; constant RSCON2_MINEXP : std_ulogic_vector(3 downto 0) := "1001";
constant RSCON2_DPMINX : std_ulogic_vector(3 downto 0) := "1010";


signal rs_sel1 : std_ulogic_vector(1 downto 0); signal rs_sel1 : std_ulogic_vector(1 downto 0);
signal rs_sel2 : std_ulogic; signal rs_sel2 : std_ulogic;
@ -713,9 +717,13 @@ architecture behaviour of fpu is
end; end;


-- Determine result flags to write into the FPSCR -- Determine result flags to write into the FPSCR
function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic) function result_flags(sign: std_ulogic; class: fp_number_class; int_result: std_ulogic;
unitbit: std_ulogic)
return std_ulogic_vector is return std_ulogic_vector is
begin begin
if int_result = '1' then
return "00000";
else
case class is case class is
when ZERO => when ZERO =>
return sign & "0010"; return sign & "0010";
@ -726,6 +734,7 @@ architecture behaviour of fpu is
when NAN => when NAN =>
return "10001"; return "10001";
end case; end case;
end if;
end; end;


begin begin
@ -767,6 +776,9 @@ begin
end if; end if;
else else
assert not (r.state /= IDLE and e_in.valid = '1') severity failure; assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
assert not (rin.state = FINISH and rin.r = 64x"0" and rin.x = '1');
assert not (rin.state = ROUNDING and rin.r(UNIT_BIT) = '0' and
not (rin.tiny = '1' or rin.zero_fri = '1'));
r <= rin; r <= rin;
end if; end if;
end if; end if;
@ -827,6 +839,7 @@ begin
e.zero_divide := '0'; e.zero_divide := '0';
e.new_fpscr := (others => '0'); e.new_fpscr := (others => '0');
e.immed_result := '0'; e.immed_result := '0';
e.need_finish := '0';
e.qnan_result := '0'; e.qnan_result := '0';
e.result_sel := AIN_ZERO; e.result_sel := AIN_ZERO;
e.result_class := FINITE; e.result_class := FINITE;
@ -905,6 +918,11 @@ begin
-- result is +/- B -- result is +/- B
e.result_sel := AIN_B; e.result_sel := AIN_B;
e.result_class := r.b.class; e.result_class := r.b.class;
-- r.result_sign is already correct
if r.b.class = FINITE and r.int_result = '0' and
(r.single_prec = '1' or (r.fpscr(FPSCR_UE) = '1' and r.b.denorm = '1')) then
e.need_finish := '1';
end if;
else else
e.result_class := ZERO; e.result_class := ZERO;
end if; end if;
@ -919,6 +937,10 @@ begin
e.immed_result := '1'; e.immed_result := '1';
e.result_sel := AIN_B; e.result_sel := AIN_B;
e.result_class := r.b.class; e.result_class := r.b.class;
if r.b.class = FINITE and r.int_result = '0' and
(r.single_prec = '1' or (r.fpscr(FPSCR_UE) = '1' and r.b.denorm = '1')) then
e.need_finish := '1';
end if;


elsif r.use_b = '1' and r.b.class = ZERO and r.is_multiply = '0' then elsif r.use_b = '1' and r.b.class = ZERO and r.is_multiply = '0' then
-- B is zero, other operands are finite -- B is zero, other operands are finite
@ -931,6 +953,11 @@ begin
elsif r.is_addition = '1' then elsif r.is_addition = '1' then
-- fadd, result is A -- fadd, result is A
e.result_sel := AIN_A; e.result_sel := AIN_A;
e.rsgn_op := RSGN_SEL;
if r.a.class = FINITE and r.int_result = '0' and
(r.single_prec = '1' or (r.fpscr(FPSCR_UE) = '1' and r.a.denorm = '1')) then
e.need_finish := '1';
end if;
else else
-- other things, result is zero -- other things, result is zero
e.result_class := ZERO; e.result_class := ZERO;
@ -979,6 +1006,7 @@ begin
variable exp_huge : std_ulogic; variable exp_huge : std_ulogic;
variable clz : std_ulogic_vector(5 downto 0); variable clz : std_ulogic_vector(5 downto 0);
variable set_x : std_ulogic; variable set_x : std_ulogic;
variable set_xs : std_ulogic;
variable mshift : signed(EXP_BITS-1 downto 0); variable mshift : signed(EXP_BITS-1 downto 0);
variable need_check : std_ulogic; variable need_check : std_ulogic;
variable msb : std_ulogic; variable msb : std_ulogic;
@ -1032,6 +1060,7 @@ begin
variable bneg : std_ulogic; variable bneg : std_ulogic;
variable ci : std_ulogic; variable ci : std_ulogic;
variable rormr : std_ulogic_vector(63 downto 0); variable rormr : std_ulogic_vector(63 downto 0);
variable sorms : std_ulogic_vector(55 downto 0);
begin begin
v := r; v := r;
v.complete := '0'; v.complete := '0';
@ -1048,7 +1077,6 @@ begin
v.writing_fpr := '0'; v.writing_fpr := '0';
v.writing_cr := '0'; v.writing_cr := '0';
v.writing_xer := '0'; v.writing_xer := '0';
v.comm_fpscr := r.fpscr;
v.illegal := '0'; v.illegal := '0';
end if; end if;


@ -1076,6 +1104,8 @@ begin
v.is_addition := '0'; v.is_addition := '0';
v.is_subtract := '0'; v.is_subtract := '0';
v.is_inverse := '0'; v.is_inverse := '0';
v.add_bsmall := '0';
v.do_renorm_b := '0';
fpin_a := '0'; fpin_a := '0';
fpin_b := '0'; fpin_b := '0';
fpin_c := '0'; fpin_c := '0';
@ -1088,6 +1118,7 @@ begin
v.quieten_nan := '1'; v.quieten_nan := '1';
v.int_result := '0'; v.int_result := '0';
v.is_arith := '0'; v.is_arith := '0';
v.zero_fri := '0';
case e_in.op is case e_in.op is
when OP_FP_ARITH => when OP_FP_ARITH =>
fpin_a := e_in.valid_a; fpin_a := e_in.valid_a;
@ -1112,6 +1143,8 @@ begin
v.result_sign := e_in.fra(63); v.result_sign := e_in.fra(63);
if unsigned(e_in.fra(62 downto 52)) <= unsigned(e_in.frb(62 downto 52)) then if unsigned(e_in.fra(62 downto 52)) <= unsigned(e_in.frb(62 downto 52)) then
v.result_sign := e_in.frb(63) xnor e_in.insn(1); v.result_sign := e_in.frb(63) xnor e_in.insn(1);
else
v.add_bsmall := '1';
end if; end if;
v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor e_in.insn(1)); v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor e_in.insn(1));
when "11001" => -- fmul when "11001" => -- fmul
@ -1124,17 +1157,24 @@ begin
v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor v.is_subtract := not (e_in.fra(63) xor e_in.frb(63) xor
e_in.frc(63) xor e_in.insn(1)); e_in.frc(63) xor e_in.insn(1));
v.negate := e_in.insn(2); v.negate := e_in.insn(2);
v.do_renorm_b := '1';
when "10010" => -- fdiv when "10010" => -- fdiv
v.is_inverse := '1'; v.is_inverse := '1';
v.result_sign := e_in.fra(63) xor e_in.frb(63); v.result_sign := e_in.fra(63) xor e_in.frb(63);
v.do_renorm_b := '1';
when "11000" | "11010" => -- fre and frsqrte when "11000" | "11010" => -- fre and frsqrte
v.is_inverse := '1'; v.is_inverse := '1';
v.result_sign := e_in.frb(63); v.result_sign := e_in.frb(63);
v.do_renorm_b := '1';
when "01110" | "01111" => -- fcti* when "01110" | "01111" => -- fcti*
v.int_result := '1'; v.int_result := '1';
v.result_sign := e_in.frb(63); v.result_sign := e_in.frb(63);
when others => -- fri* and frsp when "01000" => -- fri*
v.zero_fri := '1';
v.result_sign := e_in.frb(63); v.result_sign := e_in.frb(63);
when others => -- frsp and fsqrt
v.result_sign := e_in.frb(63);
v.do_renorm_b := '1';
end case; end case;
when OP_FP_CMP => when OP_FP_CMP =>
fpin_a := e_in.valid_a; fpin_a := e_in.valid_a;
@ -1145,12 +1185,21 @@ begin
opcbits := e_in.insn(10) & e_in.insn(8) & e_in.insn(4) & e_in.insn(2) & e_in.insn(1); opcbits := e_in.insn(10) & e_in.insn(8) & e_in.insn(4) & e_in.insn(2) & e_in.insn(1);
exec_state := misc_decode(to_integer(unsigned(opcbits))); exec_state := misc_decode(to_integer(unsigned(opcbits)));
case opcbits is case opcbits is
when "10010" | "11010" | "10011" => when "10010" | "11010" =>
-- fmrg*, mffs -- fmrg*
v.int_result := '1';
v.result_sign := '0';
when "10011" =>
-- mffs*
v.int_result := '1'; v.int_result := '1';
v.result_sign := '0'; v.result_sign := '0';
if e_in.insn(20 downto 16) /= "00000" then
-- mffs* variants other than mffs have bit 0 reserved
v.rc := '0';
end if;
when "10110" => -- fcfid when "10110" => -- fcfid
v.result_sign := e_in.frb(63); v.result_sign := e_in.frb(63);
v.longmask := e_in.single;
when others => when others =>
v.result_sign := '0'; v.result_sign := '0';
end case; end case;
@ -1211,7 +1260,6 @@ begin
end case; end case;
v.tiny := '0'; v.tiny := '0';
v.denorm := '0'; v.denorm := '0';
v.add_bsmall := '0';
v.int_ovf := '0'; v.int_ovf := '0';
v.div_close := '0'; v.div_close := '0';


@ -1268,6 +1316,9 @@ begin
end if; end if;


-- Compare P with zero and with B -- Compare P with zero and with B
-- This has a 2-bit shift in it (p(59..4) compared to b(57..2))
-- because it's used in the FP division code to determine whether
-- to increment the quotient at bit 2 (DP_RBIT).
px_nz := or (r.p(UNIT_BIT + 1 downto 4)); px_nz := or (r.p(UNIT_BIT + 1 downto 4));
pcmpb_eq := '0'; pcmpb_eq := '0';
if r.p(59 downto 4) = r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT) then if r.p(59 downto 4) = r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT) then
@ -1279,6 +1330,9 @@ begin
elsif unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT)) then elsif unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT)) then
pcmpb_lt := '1'; pcmpb_lt := '1';
end if; end if;
-- Compare P with zero and with C
-- This is used in the square root and integer division code
-- to decide whether to increment the result by 1
pcmpc_eq := '0'; pcmpc_eq := '0';
if r.p = r.c.mantissa then if r.p = r.c.mantissa then
pcmpc_eq := '1'; pcmpc_eq := '1';
@ -1303,13 +1357,14 @@ begin
opsel_s <= S_ZERO; opsel_s <= S_ZERO;
misc_sel <= "000"; misc_sel <= "000";
opsel_sel <= AIN_ZERO; opsel_sel <= AIN_ZERO;
fpscr_mask := (others => '1'); fpscr_mask := x"FFFFFFFF";
cr_op := CROP_NONE; cr_op := CROP_NONE;
update_fx := '0'; update_fx := '0';
arith_done := '0'; arith_done := '0';
invalid := '0'; invalid := '0';
zero_divide := '0'; zero_divide := '0';
set_x := '0'; set_x := '0';
set_xs := '0';
qnan_result := '0'; qnan_result := '0';
set_a := '0'; set_a := '0';
set_a_exp := '0'; set_a_exp := '0';
@ -1354,12 +1409,6 @@ begin
rsgn_op := RSGN_NOP; rsgn_op := RSGN_NOP;
rcls_op <= RCLS_NOP; rcls_op <= RCLS_NOP;


if r.cycle_1_ar = '1' then
v.fpscr(FPSCR_FR) := '0';
v.fpscr(FPSCR_FI) := '0';
v.result_class := FINITE;
end if;

case r.state is case r.state is
when IDLE => when IDLE =>
v.invalid := '0'; v.invalid := '0';
@ -1374,7 +1423,7 @@ begin
end if; end if;
end if; end if;
v.x := '0'; v.x := '0';
v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX); v.old_exc := r.fpscr(FPSCR_OX downto FPSCR_VXVC) & r.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI);
set_s := '1'; set_s := '1';
v.regsel := AIN_ZERO; v.regsel := AIN_ZERO;


@ -1391,7 +1440,7 @@ begin
v.state := RENORM_A; v.state := RENORM_A;
elsif r.c.denorm = '1' then elsif r.c.denorm = '1' then
v.state := RENORM_C; v.state := RENORM_C;
elsif r.b.denorm = '1' and (r.is_inverse = '1' or r.is_sqrt = '1') then elsif r.b.denorm = '1' and r.do_renorm_b = '1' then
v.state := RENORM_B; v.state := RENORM_B;
elsif r.is_multiply = '1' and r.b.class = ZERO then elsif r.is_multiply = '1' and r.b.class = ZERO then
v.state := DO_FMUL; v.state := DO_FMUL;
@ -1410,11 +1459,10 @@ begin
for i in 0 to 7 loop for i in 0 to 7 loop
if i = j then if i = j then
k := (7 - i) * 4; k := (7 - i) * 4;
v.cr_result := r.fpscr(k + 3 downto k);
fpscr_mask(k + 3 downto k) := "0000"; fpscr_mask(k + 3 downto k) := "0000";
end if; end if;
end loop; end loop;
v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF"); v.fpscr := r.fpscr and (fpscr_mask or x"6007F0FF");
v.instr_done := '1'; v.instr_done := '1';


when DO_FTDIV => when DO_FTDIV =>
@ -1477,6 +1525,7 @@ begin
v.fpscr(31 - i) := r.insn(6); v.fpscr(31 - i) := r.insn(6);
end if; end if;
end loop; end loop;
update_fx := '1';
v.instr_done := '1'; v.instr_done := '1';


when DO_MTFSFI => when DO_MTFSFI =>
@ -1583,22 +1632,7 @@ begin
set_r := '1'; set_r := '1';
re_sel2 <= REXP2_B; re_sel2 <= REXP2_B;
re_set_result <= '1'; re_set_result <= '1';
v.state := DO_FRSP_2; v.state := FINISH;

when DO_FRSP_2 =>
-- r.shift = 0
-- set shift to exponent - -126 (for ROUND_UFLOW state)
rs_sel1 <= RSH1_B;
rs_con2 <= RSCON2_MINEXP;
rs_neg2 <= '1';
set_x := '1'; -- uses r.r and r.shift
if r.b.exponent < to_signed(-126, EXP_BITS) then
v.state := ROUND_UFLOW;
elsif r.b.exponent > to_signed(127, EXP_BITS) then
v.state := ROUND_OFLOW;
else
v.state := ROUNDING;
end if;


when DO_FCTI => when DO_FCTI =>
-- instr bit 9: 1=dword 0=word -- instr bit 9: 1=dword 0=word
@ -1611,6 +1645,7 @@ begin
re_set_result <= '1'; re_set_result <= '1';
rs_sel1 <= RSH1_B; rs_sel1 <= RSH1_B;
rs_neg2 <= '1'; rs_neg2 <= '1';
v.single_prec := not r.insn(9);


if r.b.exponent >= to_signed(64, EXP_BITS) or if r.b.exponent >= to_signed(64, EXP_BITS) or
(r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
@ -1640,6 +1675,8 @@ begin
rcls_op <= RCLS_SEL; rcls_op <= RCLS_SEL;
re_con2 <= RECON2_UNIT; re_con2 <= RECON2_UNIT;
re_set_result <= '1'; re_set_result <= '1';
v.fpscr(FPSCR_FR) := '0';
v.fpscr(FPSCR_FI) := '0';
if r.b.class = ZERO then if r.b.class = ZERO then
arith_done := '1'; arith_done := '1';
else else
@ -1657,15 +1694,13 @@ begin
rs_sel1 <= RSH1_B; rs_sel1 <= RSH1_B;
rs_neg1 <= '1'; rs_neg1 <= '1';
rs_sel2 <= RSH2_A; rs_sel2 <= RSH2_A;
v.add_bsmall := '0'; if r.add_bsmall = '1' then
if r.a.exponent = r.b.exponent then v.state := ADD_1;
elsif r.a.exponent = r.b.exponent then
v.state := ADD_2B; v.state := ADD_2B;
elsif r.a.exponent < r.b.exponent then elsif v.add_bsmall = '0' then
v.longmask := '0'; v.longmask := '0';
v.state := ADD_SHIFT; v.state := ADD_SHIFT;
else
v.add_bsmall := '1';
v.state := ADD_1;
end if; end if;


when DO_FMUL => when DO_FMUL =>
@ -1705,7 +1740,8 @@ begin
misc_sel <= "111"; misc_sel <= "111";
set_r := '1'; set_r := '1';
re_set_result <= '1'; re_set_result <= '1';
arith_done := '1'; v.writing_fpr := '1';
v.instr_done := '1';


when DO_FSQRT => when DO_FSQRT =>
opsel_a <= AIN_B; opsel_a <= AIN_B;
@ -1737,14 +1773,16 @@ begin
re_set_result <= '1'; re_set_result <= '1';
-- put b.exp into shift -- put b.exp into shift
rs_sel1 <= RSH1_B; rs_sel1 <= RSH1_B;
if (r.a.exponent + r.c.exponent + 1) < r.b.exponent then if (r.a.exponent + r.c.exponent + 2) < r.b.exponent then
-- addend is bigger, do multiply first -- addend is definitely bigger, do multiply first
-- if subtracting, sign is opposite to initial estimate -- if subtracting, sign is opposite to initial estimate
f_to_multiply.valid <= '1'; f_to_multiply.valid <= '1';
v.first := '1'; v.first := '1';
v.state := FMADD_0; v.state := FMADD_0;
else else
-- product is bigger, shift B first -- product may be bigger, or the answer might be
-- close to 0; shift B first so the multiplier does
-- the add/subtract operation.
v.state := FMADD_1; v.state := FMADD_1;
end if; end if;


@ -1791,7 +1829,7 @@ begin
if r.c.denorm = '1' then if r.c.denorm = '1' then
-- must be either fmul or fmadd/sub -- must be either fmul or fmadd/sub
v.state := RENORM_C; v.state := RENORM_C;
elsif r.b.denorm = '1' and r.is_addition = '0' then elsif r.b.denorm = '1' and r.do_renorm_b = '1' then
v.state := RENORM_B; v.state := RENORM_B;
elsif r.is_multiply = '1' and r.b.class = ZERO then elsif r.is_multiply = '1' and r.b.class = ZERO then
v.state := DO_FMUL; v.state := DO_FMUL;
@ -1807,6 +1845,7 @@ begin
re_sel2 <= REXP2_B; re_sel2 <= REXP2_B;
re_set_result <= '1'; re_set_result <= '1';
-- set shift to b.exp - a.exp -- set shift to b.exp - a.exp
-- (N.B., shift can be 0 if B is denorm and A's exp is -1022)
rs_sel1 <= RSH1_B; rs_sel1 <= RSH1_B;
rs_sel2 <= RSH2_A; rs_sel2 <= RSH2_A;
rs_neg2 <= '1'; rs_neg2 <= '1';
@ -1821,6 +1860,7 @@ begin
re_set_result <= '1'; re_set_result <= '1';
v.x := s_nz; v.x := s_nz;
set_x := '1'; set_x := '1';
set_s := '1';
v.longmask := r.single_prec; v.longmask := r.single_prec;
if r.add_bsmall = '1' then if r.add_bsmall = '1' then
v.state := ADD_2; v.state := ADD_2;
@ -1859,25 +1899,14 @@ begin
-- result is opposite sign to expected -- result is opposite sign to expected
rsgn_op := RSGN_INV; rsgn_op := RSGN_INV;
set_r := '1'; set_r := '1';
v.state := FINISH;
elsif r.r(UNIT_BIT + 1) = '1' then elsif r.r(UNIT_BIT + 1) = '1' then
-- sum overflowed, shift right -- sum overflowed, shift right
opsel_r <= RES_SHIFT; opsel_r <= RES_SHIFT;
set_r := '1'; set_r := '1';
re_set_result <= '1'; re_set_result <= '1';
set_x := '1'; set_x := '1';
if exp_huge = '1' then
v.state := ROUND_OFLOW;
else
v.state := ROUNDING;
end if;
elsif r.r(UNIT_BIT) = '1' then
set_x := '1';
v.state := ROUNDING;
else
rs_norm <= '1';
v.state := NORMALIZE;
end if; end if;
v.state := FINISH;


when CMP_1 => when CMP_1 =>
opsel_a <= AIN_A; opsel_a <= AIN_A;
@ -1892,9 +1921,10 @@ begin
v.instr_done := '1'; v.instr_done := '1';


when MULT_1 => when MULT_1 =>
f_to_multiply.valid <= r.first;
opsel_r <= RES_MULT; opsel_r <= RES_MULT;
set_r := '1'; set_r := '1';
opsel_s <= S_MULT;
set_s := '1';
if multiply_to_f.valid = '1' then if multiply_to_f.valid = '1' then
v.state := FINISH; v.state := FINISH;
end if; end if;
@ -1920,8 +1950,8 @@ begin
end if; end if;


when FMADD_1 => when FMADD_1 =>
-- shift is b.exp, so new_exp is a.exp + c.exp - b.exp -- shift is b.exp, so new_exp is a.exp + c.exp - b.exp (>= -2)
-- product is bigger here -- product may bigger here
-- shift B right and use it as the addend to the multiplier -- shift B right and use it as the addend to the multiplier
-- for subtract, multiplier does B - A * C -- for subtract, multiplier does B - A * C
re_sel2 <= REXP2_B; re_sel2 <= REXP2_B;
@ -1935,8 +1965,10 @@ begin
when FMADD_2 => when FMADD_2 =>
-- Product is potentially bigger here -- Product is potentially bigger here
-- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
-- R contains B, S contains 0
set_s := '1'; set_s := '1';
opsel_s <= S_SHIFT; opsel_s <= S_SHIFT;
set_x := '1';
-- set shift to r.shift - 64 -- set shift to r.shift - 64
rs_sel1 <= RSH1_S; rs_sel1 <= RSH1_S;
rs_con2 <= RSCON2_64; rs_con2 <= RSCON2_64;
@ -1979,25 +2011,18 @@ begin
v.state := FMADD_6; v.state := FMADD_6;


when FMADD_6 => when FMADD_6 =>
-- r.shift = UNIT_BIT (or 0, but only if r is now nonzero) -- r.shift = UNIT_BIT
set_r := '0'; set_r := '0';
opsel_r <= RES_SHIFT; opsel_r <= RES_SHIFT;
re_sel2 <= REXP2_NE; re_sel2 <= REXP2_NE;
rs_norm <= '1';
rcls_op <= RCLS_TZERO;
if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
-- S = 0 case is handled by RCLS_TZERO logic, otherwise... -- R is all zeroes but there may be non-zero bits in S
-- R is all zeroes but there are non-zero bits in S
-- so shift them into R and set S to 0 -- so shift them into R and set S to 0
set_r := '1'; set_r := '1';
re_set_result <= '1'; re_set_result <= '1';
set_s := '1'; set_s := '1';
v.state := FINISH;
elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then
v.state := FINISH;
else
v.state := NORMALIZE;
end if; end if;
v.state := FINISH;


when DIV_2 => when DIV_2 =>
-- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
@ -2248,29 +2273,29 @@ begin


when SQRT_11 => when SQRT_11 =>
-- compute P = A - R * R (remainder) -- compute P = A - R * R (remainder)
-- also put 2 * R + 1 into B for comparison with P -- also put 2 * R + 1 into C for comparison with P
msel_1 <= MUL1_R; msel_1 <= MUL1_R;
msel_2 <= MUL2_R; msel_2 <= MUL2_R;
msel_add <= MULADD_A; msel_add <= MULADD_A;
msel_inv <= '1'; msel_inv <= '1';
f_to_multiply.valid <= r.first; f_to_multiply.valid <= r.first;
shiftin := '1'; shiftin := '1';
set_b := r.first; set_c := r.first;
if multiply_to_f.valid = '1' then if multiply_to_f.valid = '1' then
v.state := SQRT_12; v.state := SQRT_12;
end if; end if;


when SQRT_12 => when SQRT_12 =>
-- test if remainder is 0 or >= B = 2*R + 1 -- test if remainder is 0 or >= C = 2*R + 1
set_r := '0'; set_r := '0';
opsel_c <= CIN_INC; opsel_c <= CIN_INC;
if pcmpb_lt = '1' then if pcmpc_lt = '1' then
-- square root is correct, set X if remainder non-zero -- square root is correct, set X if remainder non-zero
v.x := r.p(UNIT_BIT + 2) or px_nz; v.x := r.p(UNIT_BIT + 2) or px_nz;
else else
-- square root needs to be incremented by 1 -- square root needs to be incremented by 1
set_r := '1'; set_r := '1';
v.x := not pcmpb_eq; v.x := not pcmpc_eq;
end if; end if;
v.state := FINISH; v.state := FINISH;


@ -2318,10 +2343,13 @@ begin
-- Check for possible overflows -- Check for possible overflows
case r.insn(9 downto 8) is case r.insn(9 downto 8) is
when "00" => -- fctiw[z] when "00" => -- fctiw[z]
-- check bit 32 in case of rounding overflow
need_check := r.r(31) or (r.r(30) and not r.result_sign); need_check := r.r(31) or (r.r(30) and not r.result_sign);
when "01" => -- fctiwu[z] when "01" => -- fctiwu[z]
need_check := r.r(31); -- check bit 32 in case of rounding overflow
need_check := r.r(31) or r.r(31);
when "10" => -- fctid[z] when "10" => -- fctid[z]
-- can't get rounding overflow for 64-bit conversion
need_check := r.r(63) or (r.r(62) and not r.result_sign); need_check := r.r(63) or (r.r(62) and not r.result_sign);
when others => -- fctidu[z] when others => -- fctidu[z]
need_check := r.r(63); need_check := r.r(63);
@ -2341,26 +2369,23 @@ begin
else else
msb := r.r(63); msb := r.r(63);
end if; end if;
opsel_r <= RES_MISC;
misc_sel <= "110";
if (r.insn(8) = '0' and msb /= r.result_sign) or if (r.insn(8) = '0' and msb /= r.result_sign) or
(r.insn(8) = '1' and msb /= '1') then (r.insn(8) = '1' and msb /= '1') or
set_r := '1'; (r.insn(9) = '0' and r.r(32) /= r.result_sign) then
v.fpscr(FPSCR_VXCVI) := '1'; v.state := INT_OFLOW;
invalid := '1';
else else
set_r := '0';
if r.fpscr(FPSCR_FI) = '1' then if r.fpscr(FPSCR_FI) = '1' then
v.fpscr(FPSCR_XX) := '1'; v.fpscr(FPSCR_XX) := '1';
end if; end if;
end if;
arith_done := '1'; arith_done := '1';
end if;


when INT_OFLOW => when INT_OFLOW =>
opsel_r <= RES_MISC; opsel_r <= RES_MISC;
misc_sel <= "110"; misc_sel <= "110";
set_r := '1'; set_r := '1';
v.fpscr(FPSCR_VXCVI) := '1'; v.fpscr(FPSCR_VXCVI) := '1';
v.fpscr(FPSCR_FR downto FPSCR_FI) := "00";
invalid := '1'; invalid := '1';
arith_done := '1'; arith_done := '1';


@ -2374,22 +2399,24 @@ begin
v.state := ROUNDING; v.state := ROUNDING;


when FINISH => when FINISH =>
if r.is_multiply = '1' and px_nz = '1' then -- r.shift = 0
v.x := '1';
end if;
-- set shift to new_exp - min_exp (N.B. rs_norm overrides this) -- set shift to new_exp - min_exp (N.B. rs_norm overrides this)
-- assert that if r.r = 0 then r.x = 0 also
rs_sel1 <= RSH1_NE; rs_sel1 <= RSH1_NE;
rs_con2 <= RSCON2_MINEXP; rs_con2 <= RSCON2_MINEXP;
rs_neg2 <= '1'; rs_neg2 <= '1';
rcls_op <= RCLS_TZERO;
if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then
rs_norm <= '1'; rs_norm <= '1';
v.state := NORMALIZE; v.state := NORMALIZE;
else else
set_x := '1'; set_x := '1';
if exp_tiny = '1' then set_xs := r.is_multiply;
v.state := ROUND_UFLOW; v.tiny := exp_tiny;
elsif exp_huge = '1' then if exp_tiny = '1' and r.fpscr(FPSCR_UE) = '0' then
v.state := ROUND_OFLOW; v.state := ROUND_UFLOW_DIS;
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then
v.state := ROUND_OFLOW_DIS;
else else
v.state := ROUNDING; v.state := ROUNDING;
end if; end if;
@ -2407,51 +2434,35 @@ begin
rs_con2 <= RSCON2_MINEXP; rs_con2 <= RSCON2_MINEXP;
rs_neg2 <= '1'; rs_neg2 <= '1';
set_x := '1'; set_x := '1';
if exp_tiny = '1' then set_xs := r.is_multiply;
v.state := ROUND_UFLOW; v.tiny := exp_tiny;
elsif exp_huge = '1' then if exp_tiny = '1' and r.fpscr(FPSCR_UE) = '0' then
v.state := ROUND_OFLOW; v.state := ROUND_UFLOW_DIS;
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then
v.state := ROUND_OFLOW_DIS;
else else
v.state := ROUNDING; v.state := ROUNDING;
end if; end if;


when ROUND_UFLOW => when ROUND_UFLOW_DIS =>
-- r.shift = - amount by which exponent underflows -- r.shift = - amount by which exponent underflows
v.tiny := '1';
opsel_r <= RES_SHIFT;
set_r := '0';
if r.fpscr(FPSCR_UE) = '0' then
-- disabled underflow exception case -- disabled underflow exception case
-- have to denormalize before rounding -- have to denormalize before rounding
opsel_r <= RES_SHIFT;
set_r := '0';
set_r := '1'; set_r := '1';
re_sel2 <= REXP2_NE; re_sel2 <= REXP2_NE;
re_set_result <= '1'; re_set_result <= '1';
set_x := '1'; set_x := '1';
v.state := ROUNDING; v.state := ROUNDING;
else
-- enabled underflow exception case
-- if denormalized, have to normalize before rounding
v.fpscr(FPSCR_UX) := '1';
re_sel1 <= REXP1_R;
re_con2 <= RECON2_BIAS;
re_set_result <= '1';
if r.r(UNIT_BIT) = '0' then
rs_norm <= '1';
v.state := NORMALIZE;
else
v.state := ROUNDING;
end if;
end if;


when ROUND_OFLOW => when ROUND_OFLOW_DIS =>
-- disabled overflow exception
-- result depends on rounding mode
rcls_op <= RCLS_TINF; rcls_op <= RCLS_TINF;
v.fpscr(FPSCR_OX) := '1'; v.fpscr(FPSCR_OX) := '1';
opsel_r <= RES_MISC; opsel_r <= RES_MISC;
misc_sel <= "010"; misc_sel <= "010";
set_r := '0';
if r.fpscr(FPSCR_OE) = '0' then
-- disabled overflow exception
-- result depends on rounding mode
set_r := '1'; set_r := '1';
v.fpscr(FPSCR_XX) := '1'; v.fpscr(FPSCR_XX) := '1';
v.fpscr(FPSCR_FI) := '1'; v.fpscr(FPSCR_FI) := '1';
@ -2459,94 +2470,120 @@ begin
re_con2 <= RECON2_MAX; re_con2 <= RECON2_MAX;
re_set_result <= '1'; re_set_result <= '1';
arith_done := '1'; arith_done := '1';
else
-- enabled overflow exception
re_sel1 <= REXP1_R;
re_con2 <= RECON2_BIAS;
re_neg2 <= '1';
re_set_result <= '1';
v.state := ROUNDING;
end if;


when ROUNDING => when ROUNDING =>
-- r.r can be zero or denorm here for fri* instructions,
-- and for disabled underflow exception cases.
opsel_mask <= '1'; opsel_mask <= '1';
set_r := '1'; set_r := '1';
round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign); round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
if r.zero_fri = '0' then
v.fpscr(FPSCR_FR downto FPSCR_FI) := round; v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
else
v.fpscr(FPSCR_FR downto FPSCR_FI) := "00"; -- for fri* instructions
end if;
if round(1) = '1' then if round(1) = '1' then
-- increment the LSB for the precision -- increment the LSB for the precision
v.state := ROUND_INC; v.state := ROUND_INC;
elsif r.r(UNIT_BIT) = '0' then elsif r.r(UNIT_BIT) = '0' then
-- result after masking could be zero, or could be a -- Result after masking could be zero, or could be a
-- denormalized result that needs to be renormalized -- denormalized result that needs to be renormalized,
-- but only for fri* instructions and for disabled
-- underflow exception cases.
-- For fri* instructions, result_exp is 52.
-- For disabled underflow exception cases for DP operations,
-- result_exp is -1022 and there is no point renormalizing
-- since it will just get denormalized again, but we do need
-- to check for a zero result in a subsequent cycle
-- after R is masked.
if r.result_exp > to_signed(-1022, EXP_BITS) then
rs_norm <= '1'; rs_norm <= '1';
end if;
v.state := ROUNDING_3; v.state := ROUNDING_3;
elsif r.tiny = '1' and r.fpscr(FPSCR_UE) = '1' then
v.state := ROUND_UFLOW_EN;
elsif r.result_exp > max_exp then
v.state := ROUND_OFLOW_EN;
else else
arith_done := '1'; arith_done := '1';
end if; end if;
if round(0) = '1' then if round(0) = '1' and r.zero_fri = '0' then
v.fpscr(FPSCR_XX) := '1'; v.fpscr(FPSCR_XX) := '1';
if r.tiny = '1' then
v.fpscr(FPSCR_UX) := '1';
end if; end if;
if round(0) = '1' and r.tiny = '1' then
v.fpscr(FPSCR_UX) := '1';
end if; end if;


when ROUND_INC => when ROUND_INC =>
set_r := '1'; set_r := '1';
opsel_a <= AIN_RND; opsel_a <= AIN_RND;
-- set shift to -1
rs_con2 <= RSCON2_1;
rs_neg2 <= '1';
v.state := ROUNDING_2; v.state := ROUNDING_2;


when ROUNDING_2 => when ROUNDING_2 =>
-- Check for overflow during rounding -- Check for overflow during rounding
-- r.shift = -1 -- r.shift = 0
v.x := '0';
re_sel2 <= REXP2_NE;
opsel_r <= RES_SHIFT;
set_r := '0';
if r.r(UNIT_BIT + 1) = '1' then if r.r(UNIT_BIT + 1) = '1' then
set_r := '1';
re_set_result <= '1';
if exp_huge = '1' then
v.state := ROUND_OFLOW;
else
arith_done := '1';
end if;
elsif r.r(UNIT_BIT) = '0' then
-- Do CLZ so we can renormalize the result -- Do CLZ so we can renormalize the result
rs_norm <= '1'; rs_norm <= '1';
v.state := ROUNDING_3; v.state := ROUNDING_3;
elsif r.r(UNIT_BIT) = '0' then
-- R is non-zero (we just incremented it)
-- If result_exp is -1022 here, don't normalize since
-- we would then need to denormalize again.
if r.result_exp > to_signed(-1022, EXP_BITS) then
rs_norm <= '1';
end if;
v.state := ROUNDING_3;
elsif exp_huge = '1' then
v.state := ROUND_OFLOW_EN;
elsif r.tiny = '1' and r.fpscr(FPSCR_UE) = '1' then
v.state := ROUND_UFLOW_EN;
else else
arith_done := '1'; arith_done := '1';
end if; end if;


when ROUNDING_3 => when ROUNDING_3 =>
-- r.shift = clz(r.r) - 9 -- r.shift = clz(r.r) - 7 (or 0, or -7, if r.r is 0)
-- Note clz may be done on the value before being masked
-- to the result precision.
opsel_r <= RES_SHIFT; opsel_r <= RES_SHIFT;
set_r := '1'; set_r := '1';
re_sel2 <= REXP2_NE; re_sel2 <= REXP2_NE;
-- set shift to new_exp - min_exp (== -1022) -- set shift to new_exp - DP min_exp (== -1022)
rs_sel1 <= RSH1_NE; rs_sel1 <= RSH1_NE;
rs_con2 <= RSCON2_MINEXP; rs_con2 <= RSCON2_DPMINX;
rs_neg2 <= '1'; rs_neg2 <= '1';
rcls_op <= RCLS_TZERO; rcls_op <= RCLS_TZERO;
-- If the result is zero, that's handled below. -- If the result is zero, that's handled below.
-- Renormalize result after rounding -- Renormalize result after rounding
re_set_result <= '1';
v.denorm := exp_tiny; v.denorm := exp_tiny;
if new_exp < to_signed(-1022, EXP_BITS) then re_set_result <= '1';
v.state := DENORM; if exp_huge = '1' and r.fpscr(FPSCR_OE) = '0' then
v.state := ROUND_OFLOW_DIS;
elsif exp_huge = '1' and r.fpscr(FPSCR_OE) = '1' then
v.state := ROUND_OFLOW_EN;
elsif r.tiny = '1' and r.fpscr(FPSCR_UE) = '1' then
v.state := ROUND_UFLOW_EN;
else else
arith_done := '1'; arith_done := '1';
end if; end if;


when DENORM => when ROUND_OFLOW_EN =>
-- r.shift = result_exp - -1022 -- enabled overflow exception
opsel_r <= RES_SHIFT; -- rounding and normalization has been done
set_r := '1'; v.fpscr(FPSCR_OX) := '1';
re_sel2 <= REXP2_NE; re_sel1 <= REXP1_R;
re_con2 <= RECON2_BIAS;
re_neg2 <= '1';
re_set_result <= '1';
arith_done := '1';

when ROUND_UFLOW_EN =>
-- enabled underflow exception
-- rounding and normalization has been done
v.fpscr(FPSCR_UX) := '1';
re_sel1 <= REXP1_R;
re_con2 <= RECON2_BIAS;
re_set_result <= '1'; re_set_result <= '1';
arith_done := '1'; arith_done := '1';


@ -3077,13 +3114,16 @@ begin
-- Handle exceptions and special cases for arithmetic operations -- Handle exceptions and special cases for arithmetic operations
if r.cycle_1_ar = '1' then if r.cycle_1_ar = '1' then
v.fpscr := r.fpscr or scinfo.new_fpscr; v.fpscr := r.fpscr or scinfo.new_fpscr;
v.fpscr(FPSCR_FR) := '0';
v.fpscr(FPSCR_FI) := '0';
v.result_class := FINITE;
invalid := scinfo.invalid; invalid := scinfo.invalid;
zero_divide := scinfo.zero_divide; zero_divide := scinfo.zero_divide;
qnan_result := scinfo.qnan_result; qnan_result := scinfo.qnan_result;
if scinfo.immed_result = '1' then if scinfo.immed_result = '1' then
-- state machine is in the DO_SPECIAL or DO_FSQRT state here -- state machine is in the DO_SPECIAL or DO_FSQRT state here
arith_done := '1';
set_r := '1'; set_r := '1';
v.is_multiply := '0'; -- P is not valid
opsel_r <= RES_MISC; opsel_r <= RES_MISC;
opsel_sel <= scinfo.result_sel; opsel_sel <= scinfo.result_sel;
if scinfo.qnan_result = '1' then if scinfo.qnan_result = '1' then
@ -3092,8 +3132,15 @@ begin
else else
misc_sel <= "110"; misc_sel <= "110";
end if; end if;
arith_done := '1';
else else
misc_sel <= "111"; misc_sel <= "111";
if scinfo.need_finish = '1' then
-- we have to do rounding or underflow exception processing on the result
v.state := FINISH;
else
arith_done := '1';
end if;
end if; end if;
rsgn_op := scinfo.rsgn_op; rsgn_op := scinfo.rsgn_op;
v.result_class := scinfo.result_class; v.result_class := scinfo.result_class;
@ -3140,12 +3187,11 @@ begin
when others => when others =>
end case; end case;
when RCLS_TZERO => when RCLS_TZERO =>
if or (r.r(UNIT_BIT + 2 downto 0)) = '0' and s_nz = '0' then if or (r.r) = '0' then
v.result_class := ZERO; v.result_class := ZERO;
arith_done := '1'; arith_done := '1';
end if; end if;
when RCLS_TINF => when RCLS_TINF =>
if r.fpscr(FPSCR_OE) = '0' then
if r.round_mode(1 downto 0) = "00" or if r.round_mode(1 downto 0) = "00" or
(r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
v.result_class := INFINITY; v.result_class := INFINITY;
@ -3153,7 +3199,6 @@ begin
else else
v.fpscr(FPSCR_FR) := '0'; v.fpscr(FPSCR_FR) := '0';
end if; end if;
end if;
when others => when others =>
end case; end case;


@ -3171,7 +3216,7 @@ begin
v.writing_fpr := '1'; v.writing_fpr := '1';
v.update_fprf := '1'; v.update_fprf := '1';
end if; end if;
if r.is_subtract = '1' and v.result_class = ZERO then if r.is_subtract = '1' and v.result_class = ZERO and v.fpscr(FPSCR_FI) = '0' then
rsign := r.round_mode(0) and r.round_mode(1); rsign := r.round_mode(0) and r.round_mode(1);
end if; end if;
if r.negate = '1' and v.result_class /= NAN then if r.negate = '1' and v.result_class /= NAN then
@ -3242,7 +3287,7 @@ begin


-- If shifting right, test if bits of R will be shifted out of significance -- If shifting right, test if bits of R will be shifted out of significance
if r.longmask = '1' then if r.longmask = '1' then
mshift := to_signed(28, EXP_BITS); mshift := to_signed(SP_RBIT - 1, EXP_BITS);
else else
mshift := to_signed(-1, EXP_BITS); mshift := to_signed(-1, EXP_BITS);
end if; end if;
@ -3258,7 +3303,17 @@ begin
if mshift >= to_signed(64, EXP_BITS) then if mshift >= to_signed(64, EXP_BITS) then
mshift := to_signed(63, EXP_BITS); mshift := to_signed(63, EXP_BITS);
end if; end if;
v.x := v.x or r.r(to_integer(unsigned(mshift(5 downto 0)))); v.x := v.x or rormr(to_integer(unsigned(mshift(5 downto 0))));
end if;
-- Test if there are non-zero bits in S which won't get shifted into R
if set_xs = '1' and not is_X(r.shift) and r.shift < to_signed(56, EXP_BITS) then
if r.shift > to_signed(0, EXP_BITS) then
mshift := to_signed(55, EXP_BITS) - r.shift;
else
mshift := to_signed(55, EXP_BITS);
end if;
sorms := r.s or std_ulogic_vector(- signed(r.s));
v.x := v.x or sorms(to_integer(unsigned(mshift(5 downto 0))));
end if; end if;
asign := '0'; asign := '0';
case opsel_a is case opsel_a is
@ -3284,6 +3339,8 @@ begin
ci := '0'; ci := '0';
case opsel_c is case opsel_c is
when CIN_SUBEXT => when CIN_SUBEXT =>
-- Used with opsel_b = BIN_ADDSUBR, which will invert it if
-- r.subtract = 1, hence we use r.x here, rather than not r.x.
ci := r.is_subtract and r.x; ci := r.is_subtract and r.x;
when CIN_ABSEXT => when CIN_ABSEXT =>
ci := r.r(63) and (s_nz or r.x); ci := r.r(63) and (s_nz or r.x);
@ -3537,6 +3594,8 @@ begin
rsh_in2 := to_signed(64, EXP_BITS); rsh_in2 := to_signed(64, EXP_BITS);
when RSCON2_MINEXP => when RSCON2_MINEXP =>
rsh_in2 := min_exp; rsh_in2 := min_exp;
when RSCON2_DPMINX =>
rsh_in2 := to_signed(-1022, EXP_BITS);
when others => when others =>
rsh_in2 := to_signed(0, EXP_BITS); rsh_in2 := to_signed(0, EXP_BITS);
end case; end case;
@ -3654,7 +3713,7 @@ begin
end if; end if;


if r.update_fprf = '1' then if r.update_fprf = '1' then
v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.res_sign, r.result_class, v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.res_sign, r.result_class, r.int_result,
r.r(UNIT_BIT) and not r.denorm); r.r(UNIT_BIT) and not r.denorm);
end if; end if;


@ -3663,10 +3722,15 @@ begin
v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
v.fpscr(FPSCR_VE downto FPSCR_XE)); v.fpscr(FPSCR_VE downto FPSCR_XE));
if update_fx = '1' and if update_fx = '1' and
(v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then ((v.fpscr(FPSCR_OX downto FPSCR_VXVC) & v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)) and
not r.old_exc) /= 13x"0" then
v.fpscr(FPSCR_FX) := '1'; v.fpscr(FPSCR_FX) := '1';
end if; end if;


if r.complete = '1' or r.do_intr = '1' then
v.comm_fpscr := v.fpscr;
end if;

if v.instr_done = '1' then if v.instr_done = '1' then
if r.state /= IDLE then if r.state /= IDLE then
v.state := IDLE; v.state := IDLE;
@ -3675,7 +3739,8 @@ begin
if r.fp_rc = '1' then if r.fp_rc = '1' then
v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX); v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
end if; end if;
v.sp_result := r.single_prec; -- set sp_result for fctiw*
v.sp_result := r.single_prec and not r.integer_op;
v.res_int := r.int_result or r.integer_op; v.res_int := r.int_result or r.integer_op;
v.illegal := illegal; v.illegal := illegal;
v.nsnan_result := r.quieten_nan; v.nsnan_result := r.quieten_nan;
@ -3709,11 +3774,17 @@ begin
-- This mustn't depend on any fields of r that are modified in IDLE state. -- This mustn't depend on any fields of r that are modified in IDLE state.
if r.res_int = '1' then if r.res_int = '1' then
fp_result <= r.r; fp_result <= r.r;
if r.sp_result = '1' then
fp_result(63 downto 32) <= r.r(31 downto 0);
end if;
else else
fp_result <= pack_dp(r.res_sign, r.result_class, r.result_exp, r.r, fp_result <= pack_dp(r.res_sign, r.result_class, r.result_exp, r.r,
r.sp_result, r.nsnan_result); r.sp_result, r.nsnan_result);
end if; end if;


-- Make sure the reserved bit 11 (52) of FPSCR can never be set
v.fpscr(11) := '0';

rin <= v; rin <= v;
end process; end process;



@ -21,6 +21,8 @@
#define FPS_VE 0x80 #define FPS_VE 0x80
#define FPS_VXCVI 0x100 #define FPS_VXCVI 0x100
#define FPS_VXSOFT 0x400 #define FPS_VXSOFT 0x400
#define FPS_FI 0x20000
#define FPS_FR 0x40000


extern int trapit(long arg, int (*func)(long)); extern int trapit(long arg, int (*func)(long));
extern void do_rfid(unsigned long msr); extern void do_rfid(unsigned long msr);
@ -272,6 +274,7 @@ void set_fpscr(unsigned long fpscr)
unsigned long fpscr_eval(unsigned long val) unsigned long fpscr_eval(unsigned long val)
{ {
val &= ~0x60000000; /* clear FEX and VX */ val &= ~0x60000000; /* clear FEX and VX */
val &= ~0x00000800; /* clear reserved bit 52 (BE) */
if (val & 0x1f80700) /* test all VX* bits */ if (val & 0x1f80700) /* test all VX* bits */
val |= 0x20000000; val |= 0x20000000;
if ((val >> 25) & (val >> 3) & 0x1f) if ((val >> 25) & (val >> 3) & 0x1f)
@ -348,15 +351,15 @@ int test4(long arg)
fpscr = fpscr_eval((fpscr & 0x0fffffff) | 0x70000000); fpscr = fpscr_eval((fpscr & 0x0fffffff) | 0x70000000);
if (get_fpscr() != fpscr) if (get_fpscr() != fpscr)
return 16 * i + 27; return 16 * i + 27;
asm("mtfsb0 21"); asm("mtfsb0 21"); /* VXSOFT */
fpscr = fpscr_eval(fpscr & ~(1 << (31-21))); fpscr = fpscr_eval(fpscr & ~(1 << (31-21)));
if (get_fpscr() != fpscr) if (get_fpscr() != fpscr)
return 16 * i + 28; return 16 * i + 28;
asm("mtfsb1 21"); asm("mtfsb1 21");
fpscr = fpscr_eval(fpscr | (1 << (31-21))); fpscr = fpscr_eval(fpscr | (1 << (31-21)) | (1ul << 31));
if (get_fpscr() != fpscr) if (get_fpscr() != fpscr)
return 16 * i + 29; return 16 * i + 29;
asm("mtfsb0 24"); asm("mtfsb0 24"); /* OE */
fpscr = fpscr_eval(fpscr & ~(1 << (31-24))); fpscr = fpscr_eval(fpscr & ~(1 << (31-24)));
if (get_fpscr() != fpscr) if (get_fpscr() != fpscr)
return 16 * i + 30; return 16 * i + 30;
@ -653,29 +656,35 @@ struct roundvals {
unsigned long fpscr; unsigned long fpscr;
unsigned long dpval; unsigned long dpval;
unsigned long spval; unsigned long spval;
unsigned long fpscr_fir;
} roundvals[] = { } roundvals[] = {
{ FPS_RN_NEAR, 0, 0 }, { FPS_RN_NEAR|FPS_FI|FPS_FR, 0, 0, 0 },
{ FPS_RN_CEIL, 0x8000000000000000, 0x8000000000000000 }, { FPS_RN_CEIL|FPS_FI|FPS_FR, 0x8000000000000000, 0x8000000000000000, 0 },
{ FPS_RN_NEAR, 0x402123456789abcd, 0x4021234560000000 }, { FPS_RN_NEAR|FPS_FR, 0x402123456789abcd, 0x4021234560000000, FPS_FI },
{ FPS_RN_ZERO, 0x402123456789abcd, 0x4021234560000000 }, { FPS_RN_ZERO|FPS_FR, 0x402123456789abcd, 0x4021234560000000, FPS_FI },
{ FPS_RN_CEIL, 0x402123456789abcd, 0x4021234580000000 }, { FPS_RN_CEIL, 0x402123456789abcd, 0x4021234580000000, FPS_FR|FPS_FI },
{ FPS_RN_FLOOR, 0x402123456789abcd, 0x4021234560000000 }, { FPS_RN_FLOOR, 0x402123456789abcd, 0x4021234560000000, FPS_FI },
{ FPS_RN_NEAR, 0x402123457689abcd, 0x4021234580000000 }, { FPS_RN_NEAR, 0x402123457689abcd, 0x4021234580000000, FPS_FR|FPS_FI },
{ FPS_RN_ZERO, 0x402123457689abcd, 0x4021234560000000 }, { FPS_RN_ZERO|FPS_FR|FPS_FI, 0x402123457689abcd, 0x4021234560000000, FPS_FI },
{ FPS_RN_CEIL, 0x402123457689abcd, 0x4021234580000000 }, { FPS_RN_CEIL|FPS_FR, 0x402123457689abcd, 0x4021234580000000, FPS_FR|FPS_FI },
{ FPS_RN_FLOOR, 0x402123457689abcd, 0x4021234560000000 }, { FPS_RN_FLOOR, 0x402123457689abcd, 0x4021234560000000, FPS_FI },
{ FPS_RN_NEAR, 0x4021234570000000, 0x4021234580000000 }, { FPS_RN_NEAR, 0x4021234570000000, 0x4021234580000000, FPS_FR|FPS_FI },
{ FPS_RN_NEAR, 0x4021234550000000, 0x4021234540000000 }, { FPS_RN_NEAR, 0x4021234550000000, 0x4021234540000000, FPS_FI },
{ FPS_RN_NEAR, 0x7ff123456789abcd, 0x7ff9234560000000 }, { FPS_RN_NEAR|FPS_FR|FPS_FI, 0x7ff123456789abcd, 0x7ff9234560000000, 0 },
{ FPS_RN_ZERO, 0x7ffa3456789abcde, 0x7ffa345660000000 }, { FPS_RN_ZERO|FPS_FR, 0x7ffa3456789abcde, 0x7ffa345660000000, 0 },
{ FPS_RN_FLOOR, 0x7ff0000000000000, 0x7ff0000000000000 }, { FPS_RN_FLOOR|FPS_FR|FPS_FI, 0x7ff0000000000000, 0x7ff0000000000000, 0 },
{ FPS_RN_NEAR, 0x47e1234550000000, 0x47e1234540000000 }, { FPS_RN_NEAR, 0x47e1234550000000, 0x47e1234540000000, FPS_FI },
{ FPS_RN_NEAR, 0x47f1234550000000, 0x7ff0000000000000 }, { FPS_RN_NEAR, 0x47f1234550000000, 0x7ff0000000000000, FPS_FR|FPS_FI },
{ FPS_RN_ZERO, 0x47f1234550000000, 0x47efffffe0000000 }, { FPS_RN_ZERO, 0x47f1234550000000, 0x47efffffe0000000, FPS_FI },
{ FPS_RN_CEIL, 0x47f1234550000000, 0x7ff0000000000000 }, { FPS_RN_CEIL, 0x47f1234550000000, 0x7ff0000000000000, FPS_FR|FPS_FI },
{ FPS_RN_FLOOR, 0x47f1234550000000, 0x47efffffe0000000 }, { FPS_RN_FLOOR, 0x47f1234550000000, 0x47efffffe0000000, FPS_FI },
{ FPS_RN_NEAR, 0x38012345b0000000, 0x38012345c0000000 }, { FPS_RN_NEAR, 0x38012345b0000000, 0x38012345c0000000, FPS_FR|FPS_FI },
{ FPS_RN_NEAR, 0x37c12345b0000000, 0x37c1234400000000 }, { FPS_RN_NEAR, 0x37c12345b0000000, 0x37c1234400000000, FPS_FI },
{ FPS_RN_NEAR, 0x0000008800000088, 0, FPS_FI },
{ FPS_RN_NEAR, 0xc2000000c2000000, 0xc2000000c0000000, FPS_FI },
{ FPS_RN_NEAR|FPS_OE, 0xefffffffffffffff, 0xe400000000000000, FPS_FR|FPS_FI },
{ FPS_RN_NEAR|FPS_OE, 0xff0000ff43434343, 0xf30000ff40000000, FPS_FI },
{ FPS_RN_NEAR|FPS_OE, 0xfc00fc0139fffcff, 0xf000fc0140000000, FPS_FR|FPS_FI },
}; };


int test8(long arg) int test8(long arg)
@ -696,6 +705,13 @@ int test8(long arg)
} }
if (check_fprf(result, true, fpscr)) if (check_fprf(result, true, fpscr))
return i + 0x101; return i + 0x101;
if ((fpscr & (FPS_FR|FPS_FI)) != roundvals[i].fpscr_fir) {
print_string("\r\n");
print_hex(i, 4, " ");
print_hex(fpscr, 8, " ");
print_hex(roundvals[i].fpscr_fir, 8, " ");
return i + 0x201;
}
} }
return 0; return 0;
} }
@ -740,6 +756,8 @@ struct cvtivals {
{ 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, { 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, { 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0xbfd123456789abcd, 0, 0, 0, 0, {0, 0, 0, 0} }, { 0xbfd123456789abcd, 0, 0, 0, 0, {0, 0, 0, 0} },
{ 0x41effffffff00081, 0x100000000, 0x100000000, 0x7fffffff, 0xffffffff, { 0, 0, 1, 1 } },
{ 0xc1e0000000000000, 0xffffffff80000000, 0x0000000000000000, 0x80000000, 0x00000000, { 0, 1, 0, 1 } },
}; };


#define GET_VXCVI() ((get_fpscr() >> 8) & 1) #define GET_VXCVI() ((get_fpscr() >> 8) & 1)
@ -814,6 +832,7 @@ struct cvtivals cvtizvals[] = {
{ 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, { 0xfff0000000000000, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, { 0x7ff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } }, { 0xfff923456789abcd, 0x8000000000000000, 0, 0x80000000, 0, { 1, 1, 1, 1 } },
{ 0xc1e0000000000000, 0xffffffff80000000, 0x0000000000000000, 0x80000000, 0x00000000, { 0, 1, 0, 1 } },
}; };


int test10(long arg) int test10(long arg)
@ -959,51 +978,53 @@ struct addvals {
unsigned long val_b; unsigned long val_b;
unsigned long sum; unsigned long sum;
unsigned long diff; unsigned long diff;
unsigned long fpscr;
} addvals[] = { } addvals[] = {
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 }, { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff }, { 0x3fdfffffffffffff, 0x0000000000000000, 0x3fdfffffffffffff, 0x3fdfffffffffffff, FPS_RN_NEAR },
{ 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000 }, { 0x3ff0000000000000, 0x3ff0000000000000, 0x4000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000 }, { 0xbff0000000000000, 0xbff0000000000000, 0xc000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680 }, { 0x402123456789abcd, 0x4021000000000000, 0x403111a2b3c4d5e6, 0x3fb1a2b3c4d5e680, FPS_RN_NEAR },
{ 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800 }, { 0x4061200000000000, 0x406123456789abcd, 0x407121a2b3c4d5e6, 0xbfba2b3c4d5e6800, FPS_RN_NEAR },
{ 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000 }, { 0x4061230000000000, 0x3fa4560000000000, 0x4061244560000000, 0x406121baa0000000, FPS_RN_NEAR },
{ 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000 }, { 0xc061230000000000, 0x3fa4560000000000, 0xc06121baa0000000, 0xc061244560000000, FPS_RN_NEAR },
{ 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000 }, { 0x4061230000000000, 0xbfa4560000000000, 0x406121baa0000000, 0x4061244560000000, FPS_RN_NEAR },
{ 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000 }, { 0xc061230000000000, 0xbfa4560000000000, 0xc061244560000000, 0xc06121baa0000000, FPS_RN_NEAR },
{ 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000 }, { 0x3fa1230000000000, 0x4064560000000000, 0x4064571230000000, 0xc06454edd0000000, FPS_RN_NEAR },
{ 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000 }, { 0xbfa1230000000000, 0x4064560000000000, 0x406454edd0000000, 0xc064571230000000, FPS_RN_NEAR },
{ 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000 }, { 0x3fa1230000000000, 0xc064560000000000, 0xc06454edd0000000, 0x4064571230000000, FPS_RN_NEAR },
{ 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000 }, { 0xbfa1230000000000, 0xc064560000000000, 0xc064571230000000, 0x406454edd0000000, FPS_RN_NEAR },
{ 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2 }, { 0x6780000000000001, 0x6470000000000000, 0x6780000000000009, 0x677ffffffffffff2, FPS_RN_NEAR },
{ 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa }, { 0x6780000000000001, 0x6460000000000000, 0x6780000000000005, 0x677ffffffffffffa, FPS_RN_NEAR },
{ 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe }, { 0x6780000000000001, 0x6450000000000000, 0x6780000000000003, 0x677ffffffffffffe, FPS_RN_NEAR },
{ 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000 }, { 0x6780000000000001, 0x6440000000000000, 0x6780000000000002, 0x6780000000000000, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888 }, { 0x7ff8888888888888, 0x7ff9999999999999, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888 }, { 0xfff8888888888888, 0x7ff9999999999999, 0xfff8888888888888, 0xfff8888888888888, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 }, { 0x7ff8888888888888, 0x7ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888 }, { 0x7ff8888888888888, 0x0000000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888 }, { 0x7ff8888888888888, 0x0001111111111111, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888 }, { 0x7ff8888888888888, 0x3ff0000000000000, 0x7ff8888888888888, 0x7ff8888888888888, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 }, { 0x7ff0000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000 }, { 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff8000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000 }, { 0x7ff0000000000000, 0xfff0000000000000, 0x7ff8000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, { 0x7ff0000000000000, 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, { 0x7ff0000000000000, 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 }, { 0x7ff0000000000000, 0x8002222222222222, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000 }, { 0x7ff0000000000000, 0xc002222222222222, 0x7ff0000000000000, 0x7ff0000000000000, FPS_RN_NEAR },
{ 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999 }, { 0x0000000000000000, 0x7ff9999999999999, 0x7ff9999999999999, 0x7ff9999999999999, FPS_RN_NEAR },
{ 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 }, { 0x0000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000, FPS_RN_NEAR },
{ 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000 }, { 0x8000000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0xfff0000000000000, FPS_RN_NEAR },
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 }, { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 }, { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000 }, { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, FPS_RN_NEAR },
{ 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333 }, { 0x8002222222222222, 0x0001111111111111, 0x8001111111111111, 0x8003333333333333, FPS_RN_NEAR },
{ 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef }, { 0x0000022222222222, 0x0000111111111111, 0x0000133333333333, 0x80000eeeeeeeeeef, FPS_RN_NEAR },
{ 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6 }, { 0x401ffffffbfffefe, 0x406b8265196bd89e, 0x406c8265194bd896, 0xc06a8265198bd8a6, FPS_RN_NEAR },
{ 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004 }, { 0x4030020000000004, 0xbf110001ffffffff, 0x403001fbbfff8004, 0x4030020440008004, FPS_RN_NEAR },
{ 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000 }, { 0x3fdfffffffffffff, 0x3fe0000000000000, 0x3ff0000000000000, 0xbc90000000000000, FPS_RN_NEAR },
{ 0x001000100010000f, 0x00000000000000ff, 0x001000100010010e, 0x00100010000fff10, FPS_RN_CEIL },
}; };


int test13(long arg) int test13(long arg)
@ -1013,8 +1034,8 @@ int test13(long arg)
struct addvals *vp = addvals; struct addvals *vp = addvals;
unsigned long fpscr; unsigned long fpscr;


set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) { for (i = 0; i < sizeof(addvals) / sizeof(addvals[0]); ++i, ++vp) {
set_fpscr(vp->fpscr);
asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)" asm("lfd 5,0(%0); lfd 6,8(%0); fadd 7,5,6; fsub 8,5,6; stfd 7,0(%1); stfd 8,8(%1)"
: : "b" (&vp->val_a), "b" (results) : "memory"); : : "b" (&vp->val_a), "b" (results) : "memory");
fpscr = get_fpscr(); fpscr = get_fpscr();
@ -1491,110 +1512,123 @@ struct fmavals {
unsigned long ra; unsigned long ra;
unsigned long rc; unsigned long rc;
unsigned long rb; unsigned long rb;
unsigned long fpscr;
unsigned long fma; unsigned long fma;
unsigned long fms; unsigned long fms;
unsigned long nfma; unsigned long nfma;
unsigned long nfms; unsigned long nfms;
} fmavals[] = { } fmavals[] = {
/* +0 * +0 +- +0 -> +0, +0, -0, -0 */ /* +0 * +0 +- +0 -> +0, +0, -0, -0 */
{ 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR,
0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 }, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 },
/* +0 * NaNC +- +0 -> NaNC, NaNC, NaNC, NaNC */ /* +0 * NaNC +- +0 -> NaNC, NaNC, NaNC, NaNC */
{ 0x0000000000000000, 0x7ffc000000000000, 0x0000000000000000, { 0x0000000000000000, 0x7ffc000000000000, 0x0000000000000000, FPS_RN_NEAR,
0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000 }, 0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000 },
/* +0 * NaNC +- NaNB -> NaNB, NaNB, NaNB, NaNB */ /* +0 * NaNC +- NaNB -> NaNB, NaNB, NaNB, NaNB */
{ 0x0000000000000000, 0x7ffc000000000000, 0x7ffb000000000000, { 0x0000000000000000, 0x7ffc000000000000, 0x7ffb000000000000, FPS_RN_NEAR,
0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000 }, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000 },
/* NaNA * NaNC +- NaNB -> NaNA, NaNA, NaNA, NaNA */ /* NaNA * NaNC +- NaNB -> NaNA, NaNA, NaNA, NaNA */
{ 0x7ffa000000000000, 0x7ffc000000000000, 0x7ffb000000000000, { 0x7ffa000000000000, 0x7ffc000000000000, 0x7ffb000000000000, FPS_RN_NEAR,
0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000 }, 0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000 },
/* +1.0 * -0 +- +finite B -> +B, -B, -B, +B */ /* +1.0 * -0 +- +finite B -> +B, -B, -B, +B */
{ 0x3ff0000000000000, 0x8000000000000000, 0x678123456789abcd, { 0x3ff0000000000000, 0x8000000000000000, 0x678123456789abcd, FPS_RN_NEAR,
0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd }, 0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd },
/* +1.0 * -1.0 +- (B = +3.818e+190) -> +B, -B, -B, +B */ /* +1.0 * -1.0 +- (B = +3.818e+190) -> +B, -B, -B, +B */
{ 0x3ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, { 0x3ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, FPS_RN_NEAR,
0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd }, 0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd },
/* +inf * -1.0 +- +finite B -> -inf, -inf, +inf, +inf */ /* +inf * -1.0 +- +finite B -> -inf, -inf, +inf, +inf */
{ 0x7ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, { 0x7ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, FPS_RN_NEAR,
0xfff0000000000000, 0xfff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, 0xfff0000000000000, 0xfff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 },
/* +inf * +0 +- +finite B -> NaNQ, NaNQ, NaNQ, NaNQ */ /* +inf * +0 +- +finite B -> NaNQ, NaNQ, NaNQ, NaNQ */
{ 0x7ff0000000000000, 0x0000000000000000, 0x678123456789abcd, { 0x7ff0000000000000, 0x0000000000000000, 0x678123456789abcd, FPS_RN_NEAR,
0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000 }, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000 },
/* +1.0 * +1.0 +- 1.00000012 -> +2.00000012, +1.2e-7, -2.00000012, -1.2e-7 */ /* +1.0 * +1.0 +- 1.00000012 -> +2.00000012, +1.2e-7, -2.00000012, -1.2e-7 */
{ 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000020000000, { 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000020000000, FPS_RN_NEAR,
0x4000000010000000, 0xbe80000000000000, 0xc000000010000000, 0x3e80000000000000 }, 0x4000000010000000, 0xbe80000000000000, 0xc000000010000000, 0x3e80000000000000 },
/* +(1 + 2^-52) * +(1 + 2^-52) +- +1.0 -> +(2 + 2^-51), +2^-51, -(2 + 2^-51), -2^-51 */ /* +(1 + 2^-52) * +(1 + 2^-52) +- +1.0 -> +(2 + 2^-51), +2^-51, -(2 + 2^-51), -2^-51 */
{ 0x3ff0000000000001, 0x3ff0000000000001, 0x3ff0000000000000, { 0x3ff0000000000001, 0x3ff0000000000001, 0x3ff0000000000000, FPS_RN_NEAR,
0x4000000000000001, 0x3cc0000000000000, 0xc000000000000001, 0xbcc0000000000000 }, 0x4000000000000001, 0x3cc0000000000000, 0xc000000000000001, 0xbcc0000000000000 },
/* +(1 + 3*2^-52) * +(1 + 2^-51) +- +1.0 -> +(2 + 2^-50), +5 * 2^-52 + 2^-101, -, - */ /* +(1 + 3*2^-52) * +(1 + 2^-51) +- +1.0 -> +(2 + 3*2^-51), +5 * 2^-52 + 2^-101, -, - */
{ 0x3ff0000000000003, 0x3ff0000000000002, 0x3ff0000000000000, { 0x3ff0000000000003, 0x3ff0000000000002, 0x3ff0000000000000, FPS_RN_NEAR,
0x4000000000000002, 0x3cd4000000000002, 0xc000000000000002, 0xbcd4000000000002 }, 0x4000000000000003, 0x3cd4000000000002, 0xc000000000000003, 0xbcd4000000000002 },
/* +2.443e-77 * 2.828 +- 6.909e-77 -> -1.402e-93, +1.382e-76, +1.402e-93, -1.382e-76 */ /* +2.443e-77 * 2.828 +- 6.909e-77 -> -1.402e-93, +1.382e-76, +1.402e-93, -1.382e-76 */
{ 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb020000000000000, { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb020000000000000, FPS_RN_NEAR,
0xaca765753908cd20, 0x3030000000000000, 0x2ca765753908cd20, 0xb030000000000000 }, 0xaca765753908cd20, 0x3030000000000000, 0x2ca765753908cd20, 0xb030000000000000 },
/* +2.443e-77 * 2.828 +- 6.909e-77 -> +9.446e-93, +1.382e-76, -9.446e-93, -1.382e-76 */ /* +2.443e-77 * 2.828 +- 6.909e-77 -> +9.446e-93, +1.382e-76, -9.446e-93, -1.382e-76 */
{ 0x3006a09e667f3bcd, 0x4006a09e667f3bcd, 0xb020000000000000, { 0x3006a09e667f3bcd, 0x4006a09e667f3bcd, 0xb020000000000000, FPS_RN_NEAR,
0x2cd3b3efbf5e2229, 0x3030000000000000, 0xacd3b3efbf5e2229, 0xb030000000000000 }, 0x2cd3b3efbf5e2229, 0x3030000000000000, 0xacd3b3efbf5e2229, 0xb030000000000000 },
/* +2.443e-77 * 2.828 +- -1.1055e-75 -> -1.0364e-75, +1.1746e-75, +1.0364e-75, -1.1746e-75 */ /* +2.443e-77 * 2.828 +- -1.1055e-75 -> -1.0364e-75, +1.1746e-75, +1.0364e-75, -1.1746e-75 */
{ 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb060003450000000, { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb060003450000000, FPS_RN_NEAR,
0xb05e0068a0000000, 0x3061003450000000, 0x305e0068a0000000, 0xb061003450000000 }, 0xb05e0068a0000000, 0x3061003450000000, 0x305e0068a0000000, 0xb061003450000000 },
/* +2 * +3 +- 3 -> +9, +3, -9, -3 */ /* +2 * +3 +- 3 -> +9, +3, -9, -3 */
{ 0x4000000000000000, 0x4008000000000000, 0x4008000000000000, { 0x4000000000000000, 0x4008000000000000, 0x4008000000000000, FPS_RN_NEAR,
0x4022000000000000, 0x4008000000000000, 0xc022000000000000, 0xc008000000000000 }, 0x4022000000000000, 0x4008000000000000, 0xc022000000000000, 0xc008000000000000 },
/* +2 * +3 +- 5 -> +11, +1, -11, -1 */ /* +2 * +3 +- 5 -> +11, +1, -11, -1 */
{ 0x4000000000000000, 0x4008000000000000, 0x4014000000000000, { 0x4000000000000000, 0x4008000000000000, 0x4014000000000000, FPS_RN_NEAR,
0x4026000000000000, 0x3ff0000000000000, 0xc026000000000000, 0xbff0000000000000 }, 0x4026000000000000, 0x3ff0000000000000, 0xc026000000000000, 0xbff0000000000000 },
/* +2 * +3 +- 7 -> +13, -1, -13, +1 */ /* +2 * +3 +- 7 -> +13, -1, -13, +1 */
{ 0x4000000000000000, 0x4008000000000000, 0x401c000000000000, { 0x4000000000000000, 0x4008000000000000, 0x401c000000000000, FPS_RN_NEAR,
0x402a000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x3ff0000000000000 }, 0x402a000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x3ff0000000000000 },
/* +2 * +3 +- 9 -> +15, -3, -15, +3 */ /* +2 * +3 +- 9 -> +15, -3, -15, +3 */
{ 0x4000000000000000, 0x4008000000000000, 0x4022000000000000, { 0x4000000000000000, 0x4008000000000000, 0x4022000000000000, FPS_RN_NEAR,
0x402e000000000000, 0xc008000000000000, 0xc02e000000000000, 0x4008000000000000 }, 0x402e000000000000, 0xc008000000000000, 0xc02e000000000000, 0x4008000000000000 },
/* +2 * +3 +- -3 -> +3, +9, -3, -9 */ /* +2 * +3 +- -3 -> +3, +9, -3, -9 */
{ 0x4000000000000000, 0x4008000000000000, 0xc008000000000000, { 0x4000000000000000, 0x4008000000000000, 0xc008000000000000, FPS_RN_NEAR,
0x4008000000000000, 0x4022000000000000, 0xc008000000000000, 0xc022000000000000 }, 0x4008000000000000, 0x4022000000000000, 0xc008000000000000, 0xc022000000000000 },
/* +2 * +3 +- -5 -> +1, +11, -1, -11 */ /* +2 * +3 +- -5 -> +1, +11, -1, -11 */
{ 0x4000000000000000, 0x4008000000000000, 0xc014000000000000, { 0x4000000000000000, 0x4008000000000000, 0xc014000000000000, FPS_RN_NEAR,
0x3ff0000000000000, 0x4026000000000000, 0xbff0000000000000, 0xc026000000000000 }, 0x3ff0000000000000, 0x4026000000000000, 0xbff0000000000000, 0xc026000000000000 },
/* +2 * +3 +- -7 -> -1, +13, +1, -13 */ /* +2 * +3 +- -7 -> -1, +13, +1, -13 */
{ 0x4000000000000000, 0x4008000000000000, 0xc01c000000000000, { 0x4000000000000000, 0x4008000000000000, 0xc01c000000000000, FPS_RN_NEAR,
0xbff0000000000000, 0x402a000000000000, 0x3ff0000000000000, 0xc02a000000000000 }, 0xbff0000000000000, 0x402a000000000000, 0x3ff0000000000000, 0xc02a000000000000 },
/* +2 * +3 +- -9 -> -3, +15, +3, -15 */ /* +2 * +3 +- -9 -> -3, +15, +3, -15 */
{ 0x4000000000000000, 0x4008000000000000, 0xc022000000000000, { 0x4000000000000000, 0x4008000000000000, 0xc022000000000000, FPS_RN_NEAR,
0xc008000000000000, 0x402e000000000000, 0x4008000000000000, 0xc02e000000000000 }, 0xc008000000000000, 0x402e000000000000, 0x4008000000000000, 0xc02e000000000000 },
/* +2 * -3 +- 3 -> -3, -9, +3, +9 */ /* +2 * -3 +- 3 -> -3, -9, +3, +9 */
{ 0x4000000000000000, 0xc008000000000000, 0x4008000000000000, { 0x4000000000000000, 0xc008000000000000, 0x4008000000000000, FPS_RN_NEAR,
0xc008000000000000, 0xc022000000000000, 0x4008000000000000, 0x4022000000000000 }, 0xc008000000000000, 0xc022000000000000, 0x4008000000000000, 0x4022000000000000 },
/* +2 * -3 +- 5 -> -1, -11, +1, +11 */ /* +2 * -3 +- 5 -> -1, -11, +1, +11 */
{ 0x4000000000000000, 0xc008000000000000, 0x4014000000000000, { 0x4000000000000000, 0xc008000000000000, 0x4014000000000000, FPS_RN_NEAR,
0xbff0000000000000, 0xc026000000000000, 0x3ff0000000000000, 0x4026000000000000 }, 0xbff0000000000000, 0xc026000000000000, 0x3ff0000000000000, 0x4026000000000000 },
/* +2 * -3 +- 7 -> +1, -13, -1, +13 */ /* +2 * -3 +- 7 -> +1, -13, -1, +13 */
{ 0x4000000000000000, 0xc008000000000000, 0x401c000000000000, { 0x4000000000000000, 0xc008000000000000, 0x401c000000000000, FPS_RN_NEAR,
0x3ff0000000000000, 0xc02a000000000000, 0xbff0000000000000, 0x402a000000000000 }, 0x3ff0000000000000, 0xc02a000000000000, 0xbff0000000000000, 0x402a000000000000 },
/* +2 * -3 +- 9 -> +3, -15, -3, +15 */ /* +2 * -3 +- 9 -> +3, -15, -3, +15 */
{ 0x4000000000000000, 0xc008000000000000, 0x4022000000000000, { 0x4000000000000000, 0xc008000000000000, 0x4022000000000000, FPS_RN_NEAR,
0x4008000000000000, 0xc02e000000000000, 0xc008000000000000, 0x402e000000000000 }, 0x4008000000000000, 0xc02e000000000000, 0xc008000000000000, 0x402e000000000000 },
/* -2 * +3 +- -3 -> -9, -3, +9, +3 */ /* -2 * +3 +- -3 -> -9, -3, +9, +3 */
{ 0xc000000000000000, 0x4008000000000000, 0xc008000000000000, { 0xc000000000000000, 0x4008000000000000, 0xc008000000000000, FPS_RN_NEAR,
0xc022000000000000, 0xc008000000000000, 0x4022000000000000, 0x4008000000000000 }, 0xc022000000000000, 0xc008000000000000, 0x4022000000000000, 0x4008000000000000 },
/* -2 * +3 +- -5 -> -11, -1, +11, +1 */ /* -2 * +3 +- -5 -> -11, -1, +11, +1 */
{ 0xc000000000000000, 0x4008000000000000, 0xc014000000000000, { 0xc000000000000000, 0x4008000000000000, 0xc014000000000000, FPS_RN_NEAR,
0xc026000000000000, 0xbff0000000000000, 0x4026000000000000, 0x3ff0000000000000 }, 0xc026000000000000, 0xbff0000000000000, 0x4026000000000000, 0x3ff0000000000000 },
/* -2 * +3 +- -7 -> -13, +1, +13, -1 */ /* -2 * +3 +- -7 -> -13, +1, +13, -1 */
{ 0xc000000000000000, 0x4008000000000000, 0xc01c000000000000, { 0xc000000000000000, 0x4008000000000000, 0xc01c000000000000, FPS_RN_NEAR,
0xc02a000000000000, 0x3ff0000000000000, 0x402a000000000000, 0xbff0000000000000 }, 0xc02a000000000000, 0x3ff0000000000000, 0x402a000000000000, 0xbff0000000000000 },
/* -2 * +3 +- -9 -> -15, +3, +15, -3 */ /* -2 * +3 +- -9 -> -15, +3, +15, -3 */
{ 0xc000000000000000, 0x4008000000000000, 0xc022000000000000, { 0xc000000000000000, 0x4008000000000000, 0xc022000000000000, FPS_RN_NEAR,
0xc02e000000000000, 0x4008000000000000, 0x402e000000000000, 0xc008000000000000 }, 0xc02e000000000000, 0x4008000000000000, 0x402e000000000000, 0xc008000000000000 },
/* -2 * +3 +- +0 -> -6, -6, +6, +6 */ /* -2 * +3 +- +0 -> -6, -6, +6, +6 */
{ 0xc000000000000000, 0x4008000000000000, 0x0000000000000000, { 0xc000000000000000, 0x4008000000000000, 0x0000000000000000, FPS_RN_NEAR,
0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 }, 0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 },
/* +2 * -3 +- -0 -> -6, -6, +6, +6 */ /* +2 * -3 +- -0 -> -6, -6, +6, +6 */
{ 0x4000000000000000, 0xc008000000000000, 0x8000000000000000, { 0x4000000000000000, 0xc008000000000000, 0x8000000000000000, FPS_RN_NEAR,
0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 }, 0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 },
/* 2^-1026 * (1.5 * 2^1023) +- -0 -> (1.5 * 2^-3), ditto, -ditto, -ditto */ /* 2^-1026 * (1.5 * 2^1023) +- -0 -> (1.5 * 2^-3), ditto, -ditto, -ditto */
{ 0x0001000000000000, 0x7fe8000000000000, 0x8000000000000000, { 0x0001000000000000, 0x7fe8000000000000, 0x8000000000000000, FPS_RN_NEAR,
0x3fc8000000000000, 0x3fc8000000000000, 0xbfc8000000000000, 0xbfc8000000000000 }, 0x3fc8000000000000, 0x3fc8000000000000, 0xbfc8000000000000, 0xbfc8000000000000 },
/* 1 * -1 + tiny -> -1 + delta, -1, 1 - delta, 1 */
{ 0x3ff0000000000000, 0xbff0000000000000, 0x00000000b2200102, FPS_RN_CEIL,
0xbfefffffffffffff, 0xbff0000000000000, 0x3fefffffffffffff, 0x3ff0000000000000 },
/* from random exec tests */
{ 0x43eff79000000000, 0x00000000000000ff, 0x0000000000000081, FPS_RN_CEIL,
0x014fd79870000001, 0x014fd79870000000, 0x814fd79870000001, 0x814fd79870000000 },
{ 0x00000000ffffffff, 0x1fc771af627f62ab, 0x8000000000000000, FPS_RN_ZERO,
0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 },
{ 0x41efffffffe00000, 0xc1efffffffe00000, 0x43f0000000000000, FPS_RN_CEIL,
0x41fffffffff00000, 0xc3ffffffffe00000, 0xc1fffffffff00000, 0x43ffffffffe00000 },
{ 0x3ff0000000000000, 0x000060fbffffefc1, 0x000060fbffffefc1, FPS_RN_NEAR,
0x0000c1f7ffffdf82, 0x0000000000000000, 0x8000c1f7ffffdf82, 0x8000000000000000 },
}; };


int test23(long arg) int test23(long arg)
@ -1604,8 +1638,8 @@ int test23(long arg)
struct fmavals *vp = fmavals; struct fmavals *vp = fmavals;
unsigned long fpscr; unsigned long fpscr;


set_fpscr(FPS_RN_NEAR);
for (i = 0; i < sizeof(fmavals) / sizeof(fmavals[0]); ++i, ++vp) { for (i = 0; i < sizeof(fmavals) / sizeof(fmavals[0]); ++i, ++vp) {
set_fpscr(vp->fpscr);
asm("lfd 6,0(%0); lfd 7,8(%0); lfd 8,16(%0); fmadd 0,6,7,8; stfd 0,0(%1)" asm("lfd 6,0(%0); lfd 7,8(%0); lfd 8,16(%0); fmadd 0,6,7,8; stfd 0,0(%1)"
: : "b" (&vp->ra), "b" (results) : "memory"); : : "b" (&vp->ra), "b" (results) : "memory");
asm("fmsub 1,6,7,8; fnmadd 2,6,7,8; fnmsub 3,6,7,8; stfd 1,8(%0); stfd 2,16(%0); stfd 3,24(%0)" asm("fmsub 1,6,7,8; fnmadd 2,6,7,8; fnmsub 3,6,7,8; stfd 1,8(%0); stfd 2,16(%0); stfd 3,24(%0)"

Binary file not shown.
Loading…
Cancel
Save