From 9e8fb293edd59f355cc1fd020f96dafee0af867c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 16 Jul 2020 15:51:57 +1000 Subject: [PATCH] FPU: Implement floating convert from integer instructions This implements fcfid, fcfidu, fcfids and fcfidus, which convert 64-bit integer values in an FPR into a floating-point value. This brings in a lot of the datapath that will be needed in future, including the shifter, adder, mask generator and count-leading-zeroes logic, along with the machinery for rounding to single-precision or double-precision, detecting inexact results, signalling inexact-result exceptions, and updating result flags in the FPSCR. Signed-off-by: Paul Mackerras --- decode1.vhdl | 19 ++ fpu.vhdl | 506 ++++++++++++++++++++++++++++++++++++- tests/fpu/fpu.c | 87 ++++++- tests/test_fpu.bin | Bin 12504 -> 13504 bytes tests/test_fpu.console_out | 1 + 5 files changed, 587 insertions(+), 26 deletions(-) diff --git a/decode1.vhdl b/decode1.vhdl index 5f5fb80..83444cf 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -55,6 +55,7 @@ architecture behaviour of decode1 is type op_19_subop_array_t is array(0 to 7) of decode_rom_t; type op_30_subop_array_t is array(0 to 15) of decode_rom_t; type op_31_subop_array_t is array(0 to 1023) of decode_rom_t; + type op_59_subop_array_t is array(0 to 31) of decode_rom_t; type minor_rom_array_2_t is array(0 to 3) of decode_rom_t; type op_63_subop_array_0_t is array(0 to 511) of decode_rom_t; @@ -410,6 +411,13 @@ architecture behaviour of decode1 is others => decode_rom_init ); + constant decode_op_59_array : op_59_subop_array_t := ( + -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl + -- op in out A out in out len ext pipe + 2#01110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fcfid[u]s + others => illegal_inst + ); + constant decode_op_62_array : minor_rom_array_2_t := ( -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- op in out A out in out len ext pipe @@ -433,6 +441,8 @@ architecture behaviour of decode1 is 2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr 2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs 2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs + 2#111011010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 26/14=fcfid + 2#111011110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 30/14=fcfidu others => illegal_inst ); @@ -586,6 +596,15 @@ begin when 58 => v.decode := decode_op_58_array(to_integer(unsigned(f_in.insn(1 downto 0)))); + when 59 => + if HAS_FPU then + -- floating point operations, mostly single-precision + v.decode := decode_op_59_array(to_integer(unsigned(f_in.insn(5 downto 1)))); + if f_in.insn(5) = '0' and not std_match(f_in.insn(10 downto 1), "11-1001110") then + vi.override := '1'; + end if; + end if; + when 62 => v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0)))); diff --git a/fpu.vhdl b/fpu.vhdl index 3711b35..fecb7bb 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -37,7 +37,12 @@ architecture behaviour of fpu is type state_t is (IDLE, DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF, - DO_FMR); + DO_FMR, + DO_FCFID, + FINISH, NORMALIZE, + ROUND_UFLOW, ROUND_OFLOW, + ROUNDING, ROUNDING_2, ROUNDING_3, + DENORM); type reg_type is record state : state_t; @@ -54,21 +59,121 @@ architecture behaviour of fpu is fpscr : std_ulogic_vector(31 downto 0); a : fpu_reg_type; b : fpu_reg_type; - r : std_ulogic_vector(63 downto 0); + r : std_ulogic_vector(63 downto 0); -- 10.54 format + x : std_ulogic; result_sign : std_ulogic; result_class : fp_number_class; result_exp : signed(EXP_BITS-1 downto 0); + shift : signed(EXP_BITS-1 downto 0); writing_back : std_ulogic; int_result : std_ulogic; cr_result : std_ulogic_vector(3 downto 0); cr_mask : std_ulogic_vector(7 downto 0); + old_exc : std_ulogic_vector(4 downto 0); + update_fprf : std_ulogic; + tiny : std_ulogic; + denorm : std_ulogic; + round_mode : std_ulogic_vector(2 downto 0); end record; signal r, rin : reg_type; signal fp_result : std_ulogic_vector(63 downto 0); + signal opsel_a : std_ulogic_vector(1 downto 0); + signal opsel_b : std_ulogic_vector(1 downto 0); signal opsel_r : std_ulogic_vector(1 downto 0); + signal opsel_ainv : std_ulogic; + signal opsel_amask : std_ulogic; + signal in_a : std_ulogic_vector(63 downto 0); + signal in_b : std_ulogic_vector(63 downto 0); signal result : std_ulogic_vector(63 downto 0); + signal carry_in : std_ulogic; + signal lost_bits : std_ulogic; + signal r_hi_nz : std_ulogic; + signal r_lo_nz : std_ulogic; + signal misc_sel : std_ulogic_vector(3 downto 0); + + -- opsel values + constant AIN_R : std_ulogic_vector(1 downto 0) := "00"; + constant AIN_A : std_ulogic_vector(1 downto 0) := "01"; + constant AIN_B : std_ulogic_vector(1 downto 0) := "10"; + + constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00"; + constant BIN_R : std_ulogic_vector(1 downto 0) := "01"; + constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10"; + + constant RES_SUM : std_ulogic_vector(1 downto 0) := "00"; + constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01"; + constant RES_MISC : std_ulogic_vector(1 downto 0) := "11"; + + -- Left and right shifter with 120 bit input and 64 bit output. + -- Shifts inp left by shift bits and returns the upper 64 bits of + -- the result. The shift parameter is interpreted as a signed + -- number in the range -64..63, with negative values indicating + -- right shifts. + function shifter_64(inp: std_ulogic_vector(119 downto 0); + shift: std_ulogic_vector(6 downto 0)) + return std_ulogic_vector is + variable s1 : std_ulogic_vector(94 downto 0); + variable s2 : std_ulogic_vector(70 downto 0); + variable result : std_ulogic_vector(63 downto 0); + begin + case shift(6 downto 5) is + when "00" => + s1 := inp(119 downto 25); + when "01" => + s1 := inp(87 downto 0) & "0000000"; + when "10" => + s1 := x"0000000000000000" & inp(119 downto 89); + when others => + s1 := x"00000000" & inp(119 downto 57); + end case; + case shift(4 downto 3) is + when "00" => + s2 := s1(94 downto 24); + when "01" => + s2 := s1(86 downto 16); + when "10" => + s2 := s1(78 downto 8); + when others => + s2 := s1(70 downto 0); + end case; + case shift(2 downto 0) is + when "000" => + result := s2(70 downto 7); + when "001" => + result := s2(69 downto 6); + when "010" => + result := s2(68 downto 5); + when "011" => + result := s2(67 downto 4); + when "100" => + result := s2(66 downto 3); + when "101" => + result := s2(65 downto 2); + when "110" => + result := s2(64 downto 1); + when others => + result := s2(63 downto 0); + end case; + return result; + end; + + -- Generate a mask with 0-bits on the left and 1-bits on the right which + -- selects the bits will be lost in doing a right shift. The shift + -- parameter is the bottom 6 bits of a negative shift count, + -- indicating a right shift. + function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is + variable result: std_ulogic_vector(63 downto 0); + begin + result := (others => '0'); + for i in 0 to 63 loop + if i >= shift then + result(63 - i) := '1'; + end if; + end loop; + return result; + end; -- Split a DP floating-point number into components and work out its class. -- If is_int = 1, the input is considered an integer @@ -112,7 +217,8 @@ architecture behaviour of fpu is -- Construct a DP floating-point result from components function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0); - mantissa: std_ulogic_vector) return std_ulogic_vector is + mantissa: std_ulogic_vector; single_prec: std_ulogic) + return std_ulogic_vector is variable result : std_ulogic_vector(63 downto 0); begin result := (others => '0'); @@ -124,16 +230,76 @@ architecture behaviour of fpu is -- normalized number result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023); end if; - result(51 downto 0) := mantissa(53 downto 2); + result(51 downto 29) := mantissa(53 downto 31); + if single_prec = '0' then + result(28 downto 0) := mantissa(30 downto 2); + end if; when INFINITY => result(62 downto 52) := "11111111111"; when NAN => result(62 downto 52) := "11111111111"; - result(51 downto 0) := mantissa(53 downto 2); + result(51 downto 29) := mantissa(53 downto 31); + if single_prec = '0' then + result(28 downto 0) := mantissa(30 downto 2); + end if; end case; return result; end; + -- Determine whether to increment when rounding + -- Returns rounding_inc & inexact + -- Assumes x includes the bottom 29 bits of the mantissa already + -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier). + function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic; + single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0); + sign: std_ulogic) + return std_ulogic_vector is + variable grx : std_ulogic_vector(2 downto 0); + variable ret : std_ulogic_vector(1 downto 0); + variable lsb : std_ulogic; + begin + if single_prec = '0' then + grx := mantissa(1 downto 0) & x; + lsb := mantissa(2); + else + grx := mantissa(30 downto 29) & x; + lsb := mantissa(31); + end if; + ret(1) := '0'; + ret(0) := or (grx); + case rn(1 downto 0) is + when "00" => -- round to nearest + if grx = "100" and rn(2) = '0' then + ret(1) := lsb; -- tie, round to even + else + ret(1) := grx(2); + end if; + when "01" => -- round towards zero + when others => -- round towards +/- inf + if rn(0) = sign then + -- round towards greater magnitude + ret(1) := ret(0); + end if; + end case; + return ret; + end; + + -- Determine result flags to write into the FPSCR + function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic) + return std_ulogic_vector is + begin + case class is + when ZERO => + return sign & "0010"; + when FINITE => + return (not unitbit) & sign & (not sign) & "00"; + when INFINITY => + return '0' & sign & (not sign) & "01"; + when NAN => + return "10001"; + end case; + end; + begin fpu_0: process(clk) begin @@ -174,6 +340,25 @@ begin variable j, k : integer; variable flm : std_ulogic_vector(7 downto 0); variable int_input : std_ulogic; + variable mask : std_ulogic_vector(63 downto 0); + variable in_a0 : std_ulogic_vector(63 downto 0); + variable in_b0 : std_ulogic_vector(63 downto 0); + variable misc : std_ulogic_vector(63 downto 0); + variable shift_res : std_ulogic_vector(63 downto 0); + variable round : std_ulogic_vector(1 downto 0); + variable update_fx : std_ulogic; + variable arith_done : std_ulogic; + variable mant_nz : std_ulogic; + variable min_exp : signed(EXP_BITS-1 downto 0); + variable max_exp : signed(EXP_BITS-1 downto 0); + variable bias_exp : signed(EXP_BITS-1 downto 0); + variable new_exp : signed(EXP_BITS-1 downto 0); + variable exp_tiny : std_ulogic; + variable exp_huge : std_ulogic; + variable renormalize : std_ulogic; + variable clz : std_ulogic_vector(5 downto 0); + variable set_x : std_ulogic; + variable mshift : signed(EXP_BITS-1 downto 0); begin v := r; illegal := '0'; @@ -199,16 +384,53 @@ begin if e_in.op = OP_FPOP_I then int_input := '1'; end if; + v.tiny := '0'; + v.denorm := '0'; + v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN); adec := decode_dp(e_in.fra, int_input); bdec := decode_dp(e_in.frb, int_input); v.a := adec; v.b := bdec; end if; + r_hi_nz <= or (r.r(55 downto 31)); + r_lo_nz <= or (r.r(30 downto 2)); + + if r.single_prec = '0' then + max_exp := to_signed(1023, EXP_BITS); + min_exp := to_signed(-1022, EXP_BITS); + bias_exp := to_signed(1536, EXP_BITS); + else + max_exp := to_signed(127, EXP_BITS); + min_exp := to_signed(-126, EXP_BITS); + bias_exp := to_signed(192, EXP_BITS); + end if; + new_exp := r.result_exp - r.shift; + exp_tiny := '0'; + exp_huge := '0'; + if new_exp < min_exp then + exp_tiny := '1'; + end if; + if new_exp > max_exp then + exp_huge := '1'; + end if; + v.writing_back := '0'; v.instr_done := '0'; - opsel_r <= "00"; + v.update_fprf := '0'; + v.shift := to_signed(0, EXP_BITS); + opsel_a <= AIN_R; + opsel_ainv <= '0'; + opsel_amask <= '0'; + opsel_b <= BIN_ZERO; + opsel_r <= RES_SUM; + carry_in <= '0'; + misc_sel <= "0000"; fpscr_mask := (others => '1'); + update_fx := '0'; + arith_done := '0'; + renormalize := '0'; + set_x := '0'; case r.state is when IDLE => @@ -230,10 +452,15 @@ begin end if; when "01000" => v.state := DO_FMR; + when "01110" => + -- fcfid[u][s] + v.state := DO_FCFID; when others => illegal := '1'; end case; end if; + v.x := '0'; + v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX); when DO_MCRFS => j := to_integer(unsigned(insn_bfa(r.insn))); @@ -276,7 +503,7 @@ begin when DO_MFFS => v.int_result := '1'; v.writing_back := '1'; - opsel_r <= "10"; + opsel_r <= RES_MISC; case r.insn(20 downto 16) is when "00000" => -- mffs @@ -322,6 +549,7 @@ begin v.state := IDLE; when DO_FMR => + opsel_a <= AIN_B; v.result_class := r.b.class; v.result_exp := r.b.exponent; if r.insn(9) = '1' then @@ -339,29 +567,281 @@ begin v.instr_done := '1'; v.state := IDLE; + when DO_FCFID => + v.result_sign := '0'; + opsel_a <= AIN_B; + if r.insn(8) = '0' and r.b.negative = '1' then + -- fcfid[s] with negative operand, set R = -B + opsel_ainv <= '1'; + carry_in <= '1'; + v.result_sign := '1'; + end if; + v.result_class := r.b.class; + v.result_exp := to_signed(54, EXP_BITS); + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + if r.b.class = ZERO then + arith_done := '1'; + else + v.state := FINISH; + end if; + + when FINISH => + if r.r(63 downto 54) /= "0000000001" then + renormalize := '1'; + v.state := NORMALIZE; + else + set_x := '1'; + if exp_tiny = '1' then + v.shift := new_exp - min_exp; + v.state := ROUND_UFLOW; + elsif exp_huge = '1' then + v.state := ROUND_OFLOW; + else + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + end if; + end if; + + when NORMALIZE => + -- Shift so we have 9 leading zeroes (we know R is non-zero) + opsel_r <= RES_SHIFT; + set_x := '1'; + if exp_tiny = '1' then + v.shift := new_exp - min_exp; + v.state := ROUND_UFLOW; + elsif exp_huge = '1' then + v.state := ROUND_OFLOW; + else + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + end if; + + when ROUND_UFLOW => + v.tiny := '1'; + if r.fpscr(FPSCR_UE) = '0' then + -- disabled underflow exception case + -- have to denormalize before rounding + opsel_r <= RES_SHIFT; + set_x := '1'; + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + else + -- enabled underflow exception case + -- if denormalized, have to normalize before rounding + v.fpscr(FPSCR_UX) := '1'; + v.result_exp := r.result_exp + bias_exp; + if r.r(54) = '0' then + renormalize := '1'; + v.state := NORMALIZE; + else + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + end if; + end if; + + when ROUND_OFLOW => + v.fpscr(FPSCR_OX) := '1'; + if r.fpscr(FPSCR_OE) = '0' then + -- disabled overflow exception + -- result depends on rounding mode + v.fpscr(FPSCR_XX) := '1'; + v.fpscr(FPSCR_FI) := '1'; + if r.round_mode(1 downto 0) = "00" or + (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then + v.result_class := INFINITY; + v.fpscr(FPSCR_FR) := '1'; + else + v.fpscr(FPSCR_FR) := '0'; + end if; + -- construct largest representable number + v.result_exp := max_exp; + opsel_r <= RES_MISC; + misc_sel <= "001" & r.single_prec; + arith_done := '1'; + else + -- enabled overflow exception + v.result_exp := r.result_exp - bias_exp; + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + end if; + + when ROUNDING => + opsel_amask <= '1'; + round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign); + v.fpscr(FPSCR_FR downto FPSCR_FI) := round; + if round(1) = '1' then + -- set mask to increment the LSB for the precision + opsel_b <= BIN_MASK; + carry_in <= '1'; + v.shift := to_signed(-1, EXP_BITS); + v.state := ROUNDING_2; + else + if r.r(54) = '0' then + -- result after masking could be zero, or could be a + -- denormalized result that needs to be renormalized + renormalize := '1'; + v.state := ROUNDING_3; + else + arith_done := '1'; + end if; + end if; + if round(0) = '1' then + v.fpscr(FPSCR_XX) := '1'; + if r.tiny = '1' then + v.fpscr(FPSCR_UX) := '1'; + end if; + end if; + + when ROUNDING_2 => + -- Check for overflow during rounding + v.x := '0'; + if r.r(55) = '1' then + opsel_r <= RES_SHIFT; + if exp_huge = '1' then + v.state := ROUND_OFLOW; + else + arith_done := '1'; + end if; + elsif r.r(54) = '0' then + -- Do CLZ so we can renormalize the result + renormalize := '1'; + v.state := ROUNDING_3; + else + arith_done := '1'; + end if; + + when ROUNDING_3 => + mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec); + if mant_nz = '0' then + v.result_class := ZERO; + arith_done := '1'; + else + -- Renormalize result after rounding + opsel_r <= RES_SHIFT; + v.denorm := exp_tiny; + v.shift := new_exp - to_signed(-1022, EXP_BITS); + if new_exp < to_signed(-1022, EXP_BITS) then + v.state := DENORM; + else + arith_done := '1'; + end if; + end if; + + when DENORM => + opsel_r <= RES_SHIFT; + arith_done := '1'; + end case; + if arith_done = '1' then + v.writing_back := '1'; + v.update_fprf := '1'; + v.instr_done := '1'; + v.state := IDLE; + update_fx := '1'; + end if; + -- Data path. + -- This has A and B input multiplexers, an adder, a shifter, + -- count-leading-zeroes logic, and a result mux. + if r.single_prec = '1' then + mshift := r.shift + to_signed(-29, EXP_BITS); + else + mshift := r.shift; + end if; + if mshift < to_signed(-64, EXP_BITS) then + mask := (others => '1'); + elsif mshift >= to_signed(0, EXP_BITS) then + mask := (others => '0'); + else + mask := right_mask(unsigned(mshift(5 downto 0))); + end if; + case opsel_a is + when AIN_R => + in_a0 := r.r; + when AIN_A => + in_a0 := r.a.mantissa; + when others => + in_a0 := r.b.mantissa; + end case; + if (or (mask and in_a0)) = '1' and set_x = '1' then + v.x := '1'; + end if; + if opsel_ainv = '1' then + in_a0 := not in_a0; + end if; + if opsel_amask = '1' then + in_a0 := in_a0 and not mask; + end if; + in_a <= in_a0; + case opsel_b is + when BIN_ZERO => + in_b0 := (others => '0'); + when BIN_R => + in_b0 := r.r; + when BIN_MASK => + in_b0 := mask; + when others => + in_b0 := (others => '0'); + end case; + in_b <= in_b0; + if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then + shift_res := shifter_64(r.r & x"00000000000000", + std_ulogic_vector(r.shift(6 downto 0))); + else + shift_res := (others => '0'); + end if; case opsel_r is - when "00" => - result <= r.b.mantissa; - when "10" => - result <= x"00000000" & (r.fpscr and fpscr_mask); + when RES_SUM => + result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in); + when RES_SHIFT => + result <= shift_res; when others => - result <= (others => '0'); + case misc_sel is + when "0000" => + misc := x"00000000" & (r.fpscr and fpscr_mask); + when "0010" => + -- mantissa of max representable DP number + misc := x"007ffffffffffffc"; + when "0011" => + -- mantissa of max representable SP number + misc := x"007fffff80000000"; + when others => + misc := x"0000000000000000"; + end case; + result <= misc; end case; v.r := result; + if opsel_r = RES_SHIFT then + v.result_exp := new_exp; + end if; + + if renormalize = '1' then + clz := count_left_zeroes(r.r); + v.shift := resize(signed('0' & clz) - 9, EXP_BITS); + end if; + if r.int_result = '1' then fp_result <= r.r; else - fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r); + fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r, + r.single_prec); + end if; + if r.update_fprf = '1' then + v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class, + r.r(54) and not r.denorm); end if; v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI))); v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and v.fpscr(FPSCR_VE downto FPSCR_XE)); + if update_fx = '1' and + (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then + v.fpscr(FPSCR_FX) := '1'; + end if; if r.rc = '1' then v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX); end if; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 46668f8..80751d1 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -64,7 +64,7 @@ void print_string(const char *str) putchar(*str); } -void print_hex(unsigned long val, int ndigits) +void print_hex(unsigned long val, int ndigits, const char *str) { int i, x; @@ -75,6 +75,7 @@ void print_hex(unsigned long val, int ndigits) else putchar(x + '0'); } + print_string(str); } // i < 100 @@ -201,12 +202,9 @@ int sp_to_dp(long arg) asm("lfs 20,0(%0); stfd 20,0(%1)" : : "b" (&sp_dp_equiv[arg].sp), "b" (&dp) : "memory"); if (dp != sp_dp_equiv[arg].dp) { - print_hex(sp_dp_equiv[arg].sp, 8); - print_string(" "); - print_hex(dp, 16); - print_string(" "); - print_hex(sp_dp_equiv[arg].dp, 16); - print_string(" "); + print_hex(sp_dp_equiv[arg].sp, 8, " "); + print_hex(dp, 16, " "); + print_hex(sp_dp_equiv[arg].dp, 16, " "); } return dp != sp_dp_equiv[arg].dp; } @@ -465,12 +463,77 @@ int test6(long arg) return 0; } +struct int_fp_equiv { + long ival; + unsigned long fp; + unsigned long fp_u; + unsigned long fp_s; + unsigned long fp_us; +} intvals[] = { + { 0, 0, 0, 0, 0 }, + { 1, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 }, + { -1, 0xbff0000000000000, 0x43f0000000000000, 0xbff0000000000000, 0x43f0000000000000 }, + { 2, 0x4000000000000000, 0x4000000000000000, 0x4000000000000000, 0x4000000000000000 }, + { -2, 0xc000000000000000, 0x43f0000000000000, 0xc000000000000000, 0x43f0000000000000 }, + { 0x12345678, 0x41b2345678000000, 0x41b2345678000000, 0x41b2345680000000, 0x41b2345680000000 }, + { 0x0008000000000000, 0x4320000000000000, 0x4320000000000000, 0x4320000000000000, 0x4320000000000000 }, + { 0x0010000000000000, 0x4330000000000000, 0x4330000000000000, 0x4330000000000000, 0x4330000000000000 }, + { 0x0020000000000000, 0x4340000000000000, 0x4340000000000000, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000000000001, 0x4340000000000000, 0x4340000000000000, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000000000002, 0x4340000000000001, 0x4340000000000001, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000000000003, 0x4340000000000002, 0x4340000000000002, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000010000000, 0x4340000008000000, 0x4340000008000000, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000020000000, 0x4340000010000000, 0x4340000010000000, 0x4340000000000000, 0x4340000000000000 }, + { 0x0020000030000000, 0x4340000018000000, 0x4340000018000000, 0x4340000020000000, 0x4340000020000000 }, + { 0x0020000040000000, 0x4340000020000000, 0x4340000020000000, 0x4340000020000000, 0x4340000020000000 }, + { 0x0020000080000000, 0x4340000040000000, 0x4340000040000000, 0x4340000040000000, 0x4340000040000000 }, + { 0x0040000000000000, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000001, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000002, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000003, 0x4350000000000001, 0x4350000000000001, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000004, 0x4350000000000001, 0x4350000000000001, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000005, 0x4350000000000001, 0x4350000000000001, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000006, 0x4350000000000002, 0x4350000000000002, 0x4350000000000000, 0x4350000000000000 }, + { 0x0040000000000007, 0x4350000000000002, 0x4350000000000002, 0x4350000000000000, 0x4350000000000000 }, +}; + +int test7(long arg) +{ + long i; + unsigned long results[4]; + + for (i = 0; i < sizeof(intvals) / sizeof(intvals[0]); ++i) { + asm("lfd%U0%X0 3,%0; fcfid 6,3; fcfidu 7,3; stfd 6,0(%1); stfd 7,8(%1)" + : : "m" (intvals[i].ival), "b" (results) : "memory"); + asm("fcfids 9,3; stfd 9,16(%0); fcfidus 10,3; stfd 10,24(%0)" + : : "b" (results) : "memory"); + if (results[0] != intvals[i].fp || + results[1] != intvals[i].fp_u || + results[2] != intvals[i].fp_s || + results[3] != intvals[i].fp_us) { + print_string("\r\n"); + print_hex(results[0], 16, " "); + print_hex(results[1], 16, " "); + print_hex(results[2], 16, " "); + print_hex(results[3], 16, " "); + return i + 1; + } + } + return 0; +} + int fpu_test_6(void) { enable_fp(); return trapit(0, test6); } +int fpu_test_7(void) +{ + enable_fp(); + return trapit(0, test7); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -484,12 +547,9 @@ void do_test(int num, int (*test)(void)) } else { fail = 1; print_string("FAIL "); - print_hex(ret, 5); - print_string(" SRR0="); - print_hex(mfspr(SRR0), 16); - print_string(" SRR1="); - print_hex(mfspr(SRR1), 16); - print_string("\r\n"); + print_hex(ret, 5, " SRR0="); + print_hex(mfspr(SRR0), 16, " SRR1="); + print_hex(mfspr(SRR1), 16, "\r\n"); } } @@ -503,6 +563,7 @@ int main(void) do_test(4, fpu_test_4); do_test(5, fpu_test_5); do_test(6, fpu_test_6); + do_test(7, fpu_test_7); return fail; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 4fb260e1d5a3e4f37deea64098b82e55151aceb2..25d50c77a0d990a40320f8baa84752efe3a6d996 100755 GIT binary patch delta 3104 zcma)7du&tJ9sb?x;5dP}H{{X0h;I^;#MH~&Qjmi~xz2;RrVUw}wi^PGlu~FxT_;h= zY)-FB?TJnu3Oa1+9|0OfqpYeX7@4#OZPSOQ(rH^eX;TF_;6>=8lZB@7#Nw zB?Ocm>FRgB?|kR?I_KUy(LerFBayL|DBAza{ODfT?X)V|Pt*r)7r0&Ec5P3rI@q{U zdG6q*d(=VwV-PwYO42@MnHS^x?UW=B%*;T6G;1E-0>a zCrqO6o_j?qG|LMB|~&Y`8R}wrT9s(x=s$ z-7H!bRrQ_h2W8!=cL)1RS+{bCtt#J`H|C}Y(>yvGE$=M<3{D=o&NskG8;T6l)5+lQ zBT>tujh>~v0PNJ2P*WUBtT^WMMRS>vLa8l zserMwyBLa955!}4y%k@h@yEvUIU3{fdnsWUR2Ph0d(T9abea!e%J6qCW%*~9MjWpA z9yD(C^bgJZ@_mmKP=UXOUN9abU+Y+Y!@xA>q3^rwXeXcxfKS_vMgpWH9K_7hBJ;V$ z6d6YLR#uJA$FeVe9xA>1wy9Gcj_~liL@)i(MQbN@3QtmxFWyG`!Yvsy&iS!67bSwR zJ+B1=G@SKWF^1DR^`|xPl@hyXz`_NGYyCXhS zJ3c++xiC%i!Gw#_6GOH7WsRE84tb2r6%;w^qBBSo1qI9%@tvrM^T*o~d$t9a;Vy{` zMRlSIQJsP69t01YZkE-pl{g^rmwLb^DQO?%H+zhi~}9^&4gY;R=a+ zM96QlsmhYdEl&IR*I@5gS*#*WA!r*Q(QLDib30d zdgh{KML9Yy1oesaSFld0`h#1*i&iZ{$*-bSvFm>}4}u#}zhzwz(TY*<0~Yt~xb6QK zTKyXGPly_3Vtg&j*&UubrIp?7DKW+M5$~Fc+&i}=gW~!K>;O(Ykqmx;1`C`A+`K&* z6dwVB7Xd#Dd{$y7@S!J@!7~!)1AhYiGhiLnii*_K<2W=WEnX>|65Zg~j&{2f=wICu|(gtKdEnTxUU;jw+q*Kymo_ zuNS(h{XE<1EmC_gvuC_5YTp%h1ZLt2`;E6&rN6T)FnJl~US6yC*{{9T&ZI)rn_(@> ztDMI%J)U9R%d75tAJcziIIj^QFmyZ<;HJTelw{Zrw}G(TBC0biQk#axm|`Q#YfVwL zS4E?yIDc7e=|HC8!;T~BQ3adAZX<$p*E_@KKmAj9F;O!lz0jYA{`jr*eVg6X1+g-{Ugj0*XB0(BDj~qjF+^O^vNymb1%V*~8F*^gmZrOWJzsDx7%_x8f#(Tue$fC7 z!5zXAKXtO{1t)U`maz*;nJHI=0waUJ?yW7CL$1FqZO6?Ry_r1>3Fg@wwm*|g!MJTR z(jm>@*rWF@l^RTc9L79n6B6evmRwS*OFy@>q8nnE0V%wl)h!tsEp_v)-NA)*c4zSG zvpgCdmNjqT*kK8DvNsqVIcsM!CjpdU1xDExpRKng1)pR?F) zZOmCi>T>vYHX(J;Tl;Ue_>KK9Q?f^oZ1sFATeqN2c5FeNL)4E1$adUgp9#$moM*XO iShwi^)H$V2{faf|k!L(VaDGm>fR25vl(2@tPyP$jogY;I delta 1800 zcmZvce{54#6vxkd+q&0{br09FK?}TYrK~z;2TI=PVC~qjlx15~z@WyJ!Q_W1XiNr{ zy_VEum`GeMA^tJhM6(1!%*du8@ka(B>hSkJhRE0=PP2*3QHc(jT)($1izJ@p_TKY3 z-+S&o=e^g#otMIHBL8ZlQ25jA(0h#Ew%U4ApIP?OoH zxhTdis`txkl>Mnbim5*|w^x!cs4Q{cQYpq36s(+QRw;OfJEu7x^iAAJ9j?Y$Yr#r+ zCZ8QI_z=1+*5$S#3kA<5btkGhKU7UL=p**F)hC~|uo3Gn`SdOpvW4VJ&$45-26?!Z z{bFm7>e<8d8f9-DJ5aQM^~|f!`VJ{JvdY32q@UTz!V-Dmuk7o>Q*t~PidA{RTnb*v zrl6>PjLo%|7@Uv!+U)I-RVv@Ck~A&$4`g#xySQu)Mg3lK>QgxsTsbpjtjYFlG?Tf^ zK{5Yk@>C9FmmHko+_xv4@pE*?3qGFe5)L6LD&vRzRn%9b_sU9SJ#vY5ByZquH`>(b zRi8D{ZD{1ggXlU>bod8_swOlFOi+_25~M?c@+l+2`H5LH2?8hZ-x@-y2Uq0q@j9h78(^AiWPUh3Z8|^ z+HU`3oWokgiV)j_b+3FNBC0-_<{J3MB!4BzJJY-r{EgJ(V4oG)@QNsgM^4=wZ&{Uu z+wa&K*B3$~d>MXiOI*)O`x)?~+v55^=!1x7!Y|z(*RQAjEckoji>`~f3I5U6xc(J< zJW*ZLB*(VmX-MOuR*j;pu+U}^+>!o>lo;FOG|aPJ|A_N3DKBIHvJCQ9&Y5NYq)xrZXP=Zb+LA}P%7{FN@wz=IE`u|Q zU7Ji|sS!KmDwp$e*n6%IS~dJ*i~$Q%F|doKz?uYr3FgNryUzI(W?_Ooj-gC)u3;7? z*bWRUm`ebdm?L7IWEYpXBEAjrKE(H?;$nZ3h1Sl-<05b3|Kqr_o!*6@ z2?CK!n1*o{?1=<(FI{9B#kHHms@69H8TJw^WzBc5RM@0;40Q zKrTX}i(Y7zKRE9b+O^OIx-#F&5F2pGfTigf3)PK|Tp1R36`g8^z5{yEvDw_ImY}2n z%P%$8bU-r z{mIVPqX;x_6h(-1Iy4Uawf6{3s{2o}u~L_(1F%Ctm^e?=I|CLM0493n$9Rracpk9z o+~QmiVL~0n7-0K61zBJIjb8S5dR{j5_jraSX7Vm%xevYnFKc-|0{{R3 diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out index a49bb9b..340756c 100644 --- a/tests/test_fpu.console_out +++ b/tests/test_fpu.console_out @@ -4,3 +4,4 @@ test 03:PASS test 04:PASS test 05:PASS test 06:PASS +test 07:PASS