@ -37,7 +37,12 @@ architecture behaviour of fpu is
type state_t is (IDLE,
type state_t is (IDLE,
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
DO_FMR);
DO_FMR,
DO_FCFID,
FINISH, NORMALIZE,
ROUND_UFLOW, ROUND_OFLOW,
ROUNDING, ROUNDING_2, ROUNDING_3,
DENORM);
type reg_type is record
type reg_type is record
state : state_t;
state : state_t;
@ -54,21 +59,121 @@ architecture behaviour of fpu is
fpscr : std_ulogic_vector(31 downto 0);
fpscr : std_ulogic_vector(31 downto 0);
a : fpu_reg_type;
a : fpu_reg_type;
b : fpu_reg_type;
b : fpu_reg_type;
r : std_ulogic_vector(63 downto 0);
r : std_ulogic_vector(63 downto 0); -- 10.54 format
x : std_ulogic;
result_sign : std_ulogic;
result_sign : std_ulogic;
result_class : fp_number_class;
result_class : fp_number_class;
result_exp : signed(EXP_BITS-1 downto 0);
result_exp : signed(EXP_BITS-1 downto 0);
shift : signed(EXP_BITS-1 downto 0);
writing_back : std_ulogic;
writing_back : std_ulogic;
int_result : std_ulogic;
int_result : std_ulogic;
cr_result : std_ulogic_vector(3 downto 0);
cr_result : std_ulogic_vector(3 downto 0);
cr_mask : std_ulogic_vector(7 downto 0);
cr_mask : std_ulogic_vector(7 downto 0);
old_exc : std_ulogic_vector(4 downto 0);
update_fprf : std_ulogic;
tiny : std_ulogic;
denorm : std_ulogic;
round_mode : std_ulogic_vector(2 downto 0);
end record;
end record;
signal r, rin : reg_type;
signal r, rin : reg_type;
signal fp_result : std_ulogic_vector(63 downto 0);
signal fp_result : std_ulogic_vector(63 downto 0);
signal opsel_a : std_ulogic_vector(1 downto 0);
signal opsel_b : std_ulogic_vector(1 downto 0);
signal opsel_r : std_ulogic_vector(1 downto 0);
signal opsel_r : std_ulogic_vector(1 downto 0);
signal opsel_ainv : std_ulogic;
signal opsel_amask : std_ulogic;
signal in_a : std_ulogic_vector(63 downto 0);
signal in_b : std_ulogic_vector(63 downto 0);
signal result : std_ulogic_vector(63 downto 0);
signal result : std_ulogic_vector(63 downto 0);
signal carry_in : std_ulogic;
signal lost_bits : std_ulogic;
signal r_hi_nz : std_ulogic;
signal r_lo_nz : std_ulogic;
signal misc_sel : std_ulogic_vector(3 downto 0);
-- opsel values
constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
-- Left and right shifter with 120 bit input and 64 bit output.
-- Shifts inp left by shift bits and returns the upper 64 bits of
-- the result. The shift parameter is interpreted as a signed
-- number in the range -64..63, with negative values indicating
-- right shifts.
function shifter_64(inp: std_ulogic_vector(119 downto 0);
shift: std_ulogic_vector(6 downto 0))
return std_ulogic_vector is
variable s1 : std_ulogic_vector(94 downto 0);
variable s2 : std_ulogic_vector(70 downto 0);
variable result : std_ulogic_vector(63 downto 0);
begin
case shift(6 downto 5) is
when "00" =>
s1 := inp(119 downto 25);
when "01" =>
s1 := inp(87 downto 0) & "0000000";
when "10" =>
s1 := x"0000000000000000" & inp(119 downto 89);
when others =>
s1 := x"00000000" & inp(119 downto 57);
end case;
case shift(4 downto 3) is
when "00" =>
s2 := s1(94 downto 24);
when "01" =>
s2 := s1(86 downto 16);
when "10" =>
s2 := s1(78 downto 8);
when others =>
s2 := s1(70 downto 0);
end case;
case shift(2 downto 0) is
when "000" =>
result := s2(70 downto 7);
when "001" =>
result := s2(69 downto 6);
when "010" =>
result := s2(68 downto 5);
when "011" =>
result := s2(67 downto 4);
when "100" =>
result := s2(66 downto 3);
when "101" =>
result := s2(65 downto 2);
when "110" =>
result := s2(64 downto 1);
when others =>
result := s2(63 downto 0);
end case;
return result;
end;
-- Generate a mask with 0-bits on the left and 1-bits on the right which
-- selects the bits will be lost in doing a right shift. The shift
-- parameter is the bottom 6 bits of a negative shift count,
-- indicating a right shift.
function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
variable result: std_ulogic_vector(63 downto 0);
begin
result := (others => '0');
for i in 0 to 63 loop
if i >= shift then
result(63 - i) := '1';
end if;
end loop;
return result;
end;
-- Split a DP floating-point number into components and work out its class.
-- Split a DP floating-point number into components and work out its class.
-- If is_int = 1, the input is considered an integer
-- If is_int = 1, the input is considered an integer
@ -112,7 +217,8 @@ architecture behaviour of fpu is
-- Construct a DP floating-point result from components
-- Construct a DP floating-point result from components
function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
mantissa: std_ulogic_vector) return std_ulogic_vector is
mantissa: std_ulogic_vector; single_prec: std_ulogic)
return std_ulogic_vector is
variable result : std_ulogic_vector(63 downto 0);
variable result : std_ulogic_vector(63 downto 0);
begin
begin
result := (others => '0');
result := (others => '0');
@ -124,16 +230,76 @@ architecture behaviour of fpu is
-- normalized number
-- normalized number
result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
end if;
end if;
result(51 downto 0) := mantissa(53 downto 2);
result(51 downto 29) := mantissa(53 downto 31);
if single_prec = '0' then
result(28 downto 0) := mantissa(30 downto 2);
end if;
when INFINITY =>
when INFINITY =>
result(62 downto 52) := "11111111111";
result(62 downto 52) := "11111111111";
when NAN =>
when NAN =>
result(62 downto 52) := "11111111111";
result(62 downto 52) := "11111111111";
result(51 downto 0) := mantissa(53 downto 2);
result(51 downto 29) := mantissa(53 downto 31);
if single_prec = '0' then
result(28 downto 0) := mantissa(30 downto 2);
end if;
end case;
end case;
return result;
return result;
end;
end;
-- Determine whether to increment when rounding
-- Returns rounding_inc & inexact
-- Assumes x includes the bottom 29 bits of the mantissa already
-- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
sign: std_ulogic)
return std_ulogic_vector is
variable grx : std_ulogic_vector(2 downto 0);
variable ret : std_ulogic_vector(1 downto 0);
variable lsb : std_ulogic;
begin
if single_prec = '0' then
grx := mantissa(1 downto 0) & x;
lsb := mantissa(2);
else
grx := mantissa(30 downto 29) & x;
lsb := mantissa(31);
end if;
ret(1) := '0';
ret(0) := or (grx);
case rn(1 downto 0) is
when "00" => -- round to nearest
if grx = "100" and rn(2) = '0' then
ret(1) := lsb; -- tie, round to even
else
ret(1) := grx(2);
end if;
when "01" => -- round towards zero
when others => -- round towards +/- inf
if rn(0) = sign then
-- round towards greater magnitude
ret(1) := ret(0);
end if;
end case;
return ret;
end;
-- Determine result flags to write into the FPSCR
function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
return std_ulogic_vector is
begin
case class is
when ZERO =>
return sign & "0010";
when FINITE =>
return (not unitbit) & sign & (not sign) & "00";
when INFINITY =>
return '0' & sign & (not sign) & "01";
when NAN =>
return "10001";
end case;
end;
begin
begin
fpu_0: process(clk)
fpu_0: process(clk)
begin
begin
@ -174,6 +340,25 @@ begin
variable j, k : integer;
variable j, k : integer;
variable flm : std_ulogic_vector(7 downto 0);
variable flm : std_ulogic_vector(7 downto 0);
variable int_input : std_ulogic;
variable int_input : std_ulogic;
variable mask : std_ulogic_vector(63 downto 0);
variable in_a0 : std_ulogic_vector(63 downto 0);
variable in_b0 : std_ulogic_vector(63 downto 0);
variable misc : std_ulogic_vector(63 downto 0);
variable shift_res : std_ulogic_vector(63 downto 0);
variable round : std_ulogic_vector(1 downto 0);
variable update_fx : std_ulogic;
variable arith_done : std_ulogic;
variable mant_nz : std_ulogic;
variable min_exp : signed(EXP_BITS-1 downto 0);
variable max_exp : signed(EXP_BITS-1 downto 0);
variable bias_exp : signed(EXP_BITS-1 downto 0);
variable new_exp : signed(EXP_BITS-1 downto 0);
variable exp_tiny : std_ulogic;
variable exp_huge : std_ulogic;
variable renormalize : std_ulogic;
variable clz : std_ulogic_vector(5 downto 0);
variable set_x : std_ulogic;
variable mshift : signed(EXP_BITS-1 downto 0);
begin
begin
v := r;
v := r;
illegal := '0';
illegal := '0';
@ -199,16 +384,53 @@ begin
if e_in.op = OP_FPOP_I then
if e_in.op = OP_FPOP_I then
int_input := '1';
int_input := '1';
end if;
end if;
v.tiny := '0';
v.denorm := '0';
v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
adec := decode_dp(e_in.fra, int_input);
adec := decode_dp(e_in.fra, int_input);
bdec := decode_dp(e_in.frb, int_input);
bdec := decode_dp(e_in.frb, int_input);
v.a := adec;
v.a := adec;
v.b := bdec;
v.b := bdec;
end if;
end if;
r_hi_nz <= or (r.r(55 downto 31));
r_lo_nz <= or (r.r(30 downto 2));
if r.single_prec = '0' then
max_exp := to_signed(1023, EXP_BITS);
min_exp := to_signed(-1022, EXP_BITS);
bias_exp := to_signed(1536, EXP_BITS);
else
max_exp := to_signed(127, EXP_BITS);
min_exp := to_signed(-126, EXP_BITS);
bias_exp := to_signed(192, EXP_BITS);
end if;
new_exp := r.result_exp - r.shift;
exp_tiny := '0';
exp_huge := '0';
if new_exp < min_exp then
exp_tiny := '1';
end if;
if new_exp > max_exp then
exp_huge := '1';
end if;
v.writing_back := '0';
v.writing_back := '0';
v.instr_done := '0';
v.instr_done := '0';
opsel_r <= "00";
v.update_fprf := '0';
v.shift := to_signed(0, EXP_BITS);
opsel_a <= AIN_R;
opsel_ainv <= '0';
opsel_amask <= '0';
opsel_b <= BIN_ZERO;
opsel_r <= RES_SUM;
carry_in <= '0';
misc_sel <= "0000";
fpscr_mask := (others => '1');
fpscr_mask := (others => '1');
update_fx := '0';
arith_done := '0';
renormalize := '0';
set_x := '0';
case r.state is
case r.state is
when IDLE =>
when IDLE =>
@ -230,10 +452,15 @@ begin
end if;
end if;
when "01000" =>
when "01000" =>
v.state := DO_FMR;
v.state := DO_FMR;
when "01110" =>
-- fcfid[u][s]
v.state := DO_FCFID;
when others =>
when others =>
illegal := '1';
illegal := '1';
end case;
end case;
end if;
end if;
v.x := '0';
v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
when DO_MCRFS =>
when DO_MCRFS =>
j := to_integer(unsigned(insn_bfa(r.insn)));
j := to_integer(unsigned(insn_bfa(r.insn)));
@ -276,7 +503,7 @@ begin
when DO_MFFS =>
when DO_MFFS =>
v.int_result := '1';
v.int_result := '1';
v.writing_back := '1';
v.writing_back := '1';
opsel_r <= "10";
opsel_r <= RES_MISC;
case r.insn(20 downto 16) is
case r.insn(20 downto 16) is
when "00000" =>
when "00000" =>
-- mffs
-- mffs
@ -322,6 +549,7 @@ begin
v.state := IDLE;
v.state := IDLE;
when DO_FMR =>
when DO_FMR =>
opsel_a <= AIN_B;
v.result_class := r.b.class;
v.result_class := r.b.class;
v.result_exp := r.b.exponent;
v.result_exp := r.b.exponent;
if r.insn(9) = '1' then
if r.insn(9) = '1' then
@ -339,29 +567,281 @@ begin
v.instr_done := '1';
v.instr_done := '1';
v.state := IDLE;
v.state := IDLE;
when DO_FCFID =>
v.result_sign := '0';
opsel_a <= AIN_B;
if r.insn(8) = '0' and r.b.negative = '1' then
-- fcfid[s] with negative operand, set R = -B
opsel_ainv <= '1';
carry_in <= '1';
v.result_sign := '1';
end if;
v.result_class := r.b.class;
v.result_exp := to_signed(54, EXP_BITS);
v.fpscr(FPSCR_FR) := '0';
v.fpscr(FPSCR_FI) := '0';
if r.b.class = ZERO then
arith_done := '1';
else
v.state := FINISH;
end if;
when FINISH =>
if r.r(63 downto 54) /= "0000000001" then
renormalize := '1';
v.state := NORMALIZE;
else
set_x := '1';
if exp_tiny = '1' then
v.shift := new_exp - min_exp;
v.state := ROUND_UFLOW;
elsif exp_huge = '1' then
v.state := ROUND_OFLOW;
else
v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
end if;
end if;
when NORMALIZE =>
-- Shift so we have 9 leading zeroes (we know R is non-zero)
opsel_r <= RES_SHIFT;
set_x := '1';
if exp_tiny = '1' then
v.shift := new_exp - min_exp;
v.state := ROUND_UFLOW;
elsif exp_huge = '1' then
v.state := ROUND_OFLOW;
else
v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
end if;
when ROUND_UFLOW =>
v.tiny := '1';
if r.fpscr(FPSCR_UE) = '0' then
-- disabled underflow exception case
-- have to denormalize before rounding
opsel_r <= RES_SHIFT;
set_x := '1';
v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
else
-- enabled underflow exception case
-- if denormalized, have to normalize before rounding
v.fpscr(FPSCR_UX) := '1';
v.result_exp := r.result_exp + bias_exp;
if r.r(54) = '0' then
renormalize := '1';
v.state := NORMALIZE;
else
v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
end if;
end if;
when ROUND_OFLOW =>
v.fpscr(FPSCR_OX) := '1';
if r.fpscr(FPSCR_OE) = '0' then
-- disabled overflow exception
-- result depends on rounding mode
v.fpscr(FPSCR_XX) := '1';
v.fpscr(FPSCR_FI) := '1';
if r.round_mode(1 downto 0) = "00" or
(r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
v.result_class := INFINITY;
v.fpscr(FPSCR_FR) := '1';
else
v.fpscr(FPSCR_FR) := '0';
end if;
-- construct largest representable number
v.result_exp := max_exp;
opsel_r <= RES_MISC;
misc_sel <= "001" & r.single_prec;
arith_done := '1';
else
-- enabled overflow exception
v.result_exp := r.result_exp - bias_exp;
v.shift := to_signed(-2, EXP_BITS);
v.state := ROUNDING;
end if;
when ROUNDING =>
opsel_amask <= '1';
round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
if round(1) = '1' then
-- set mask to increment the LSB for the precision
opsel_b <= BIN_MASK;
carry_in <= '1';
v.shift := to_signed(-1, EXP_BITS);
v.state := ROUNDING_2;
else
if r.r(54) = '0' then
-- result after masking could be zero, or could be a
-- denormalized result that needs to be renormalized
renormalize := '1';
v.state := ROUNDING_3;
else
arith_done := '1';
end if;
end if;
if round(0) = '1' then
v.fpscr(FPSCR_XX) := '1';
if r.tiny = '1' then
v.fpscr(FPSCR_UX) := '1';
end if;
end if;
when ROUNDING_2 =>
-- Check for overflow during rounding
v.x := '0';
if r.r(55) = '1' then
opsel_r <= RES_SHIFT;
if exp_huge = '1' then
v.state := ROUND_OFLOW;
else
arith_done := '1';
end if;
elsif r.r(54) = '0' then
-- Do CLZ so we can renormalize the result
renormalize := '1';
v.state := ROUNDING_3;
else
arith_done := '1';
end if;
when ROUNDING_3 =>
mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
if mant_nz = '0' then
v.result_class := ZERO;
arith_done := '1';
else
-- Renormalize result after rounding
opsel_r <= RES_SHIFT;
v.denorm := exp_tiny;
v.shift := new_exp - to_signed(-1022, EXP_BITS);
if new_exp < to_signed(-1022, EXP_BITS) then
v.state := DENORM;
else
arith_done := '1';
end if;
end if;
when DENORM =>
opsel_r <= RES_SHIFT;
arith_done := '1';
end case;
end case;
if arith_done = '1' then
v.writing_back := '1';
v.update_fprf := '1';
v.instr_done := '1';
v.state := IDLE;
update_fx := '1';
end if;
-- Data path.
-- Data path.
-- This has A and B input multiplexers, an adder, a shifter,
-- count-leading-zeroes logic, and a result mux.
if r.single_prec = '1' then
mshift := r.shift + to_signed(-29, EXP_BITS);
else
mshift := r.shift;
end if;
if mshift < to_signed(-64, EXP_BITS) then
mask := (others => '1');
elsif mshift >= to_signed(0, EXP_BITS) then
mask := (others => '0');
else
mask := right_mask(unsigned(mshift(5 downto 0)));
end if;
case opsel_a is
when AIN_R =>
in_a0 := r.r;
when AIN_A =>
in_a0 := r.a.mantissa;
when others =>
in_a0 := r.b.mantissa;
end case;
if (or (mask and in_a0)) = '1' and set_x = '1' then
v.x := '1';
end if;
if opsel_ainv = '1' then
in_a0 := not in_a0;
end if;
if opsel_amask = '1' then
in_a0 := in_a0 and not mask;
end if;
in_a <= in_a0;
case opsel_b is
when BIN_ZERO =>
in_b0 := (others => '0');
when BIN_R =>
in_b0 := r.r;
when BIN_MASK =>
in_b0 := mask;
when others =>
in_b0 := (others => '0');
end case;
in_b <= in_b0;
if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
shift_res := shifter_64(r.r & x"00000000000000",
std_ulogic_vector(r.shift(6 downto 0)));
else
shift_res := (others => '0');
end if;
case opsel_r is
case opsel_r is
when "00" =>
when RES_SUM =>
result <= r.b.mantissa;
result <= std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
when "10" =>
when RES_SHIFT =>
result <= x"00000000" & (r.fpscr and fpscr_mask);
result <= shift_res;
when others =>
when others =>
result <= (others => '0');
case misc_sel is
when "0000" =>
misc := x"00000000" & (r.fpscr and fpscr_mask);
when "0010" =>
-- mantissa of max representable DP number
misc := x"007ffffffffffffc";
when "0011" =>
-- mantissa of max representable SP number
misc := x"007fffff80000000";
when others =>
misc := x"0000000000000000";
end case;
result <= misc;
end case;
end case;
v.r := result;
v.r := result;
if opsel_r = RES_SHIFT then
v.result_exp := new_exp;
end if;
if renormalize = '1' then
clz := count_left_zeroes(r.r);
v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
end if;
if r.int_result = '1' then
if r.int_result = '1' then
fp_result <= r.r;
fp_result <= r.r;
else
else
fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r);
fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
r.single_prec);
end if;
if r.update_fprf = '1' then
v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
r.r(54) and not r.denorm);
end if;
end if;
v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
(or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
(or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
v.fpscr(FPSCR_VE downto FPSCR_XE));
v.fpscr(FPSCR_VE downto FPSCR_XE));
if update_fx = '1' and
(v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
v.fpscr(FPSCR_FX) := '1';
end if;
if r.rc = '1' then
if r.rc = '1' then
v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
end if;
end if;