core: Add framework for an FPU

This adds the skeleton of a floating-point unit and implements the
mffs and mtfsf instructions.

Execute1 sends FP instructions to the FPU and receives busy,
exception, FP interrupt and illegal interrupt signals from it.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
jtag-port
Paul Mackerras 4 years ago
parent 76ec1a2f0a
commit 856e9e955f

@ -48,7 +48,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \ cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \ logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \ loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \
core.vhdl core.vhdl fpu.vhdl


soc_files = $(core_files) wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \ soc_files = $(core_files) wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl soc.vhdl \ wishbone_debug_master.vhdl xics.vhdl syscon.vhdl soc.vhdl \

@ -94,6 +94,38 @@ package common is
end record; end record;
constant xerc_init : xer_common_t := (others => '0'); constant xerc_init : xer_common_t := (others => '0');


-- FPSCR bit numbers
constant FPSCR_FX : integer := 63 - 32;
constant FPSCR_FEX : integer := 63 - 33;
constant FPSCR_VX : integer := 63 - 34;
constant FPSCR_OX : integer := 63 - 35;
constant FPSCR_UX : integer := 63 - 36;
constant FPSCR_ZX : integer := 63 - 37;
constant FPSCR_XX : integer := 63 - 38;
constant FPSCR_VXSNAN : integer := 63 - 39;
constant FPSCR_VXISI : integer := 63 - 40;
constant FPSCR_VXIDI : integer := 63 - 41;
constant FPSCR_VXZDZ : integer := 63 - 42;
constant FPSCR_VXIMZ : integer := 63 - 43;
constant FPSCR_VXVC : integer := 63 - 44;
constant FPSCR_FR : integer := 63 - 45;
constant FPSCR_FI : integer := 63 - 46;
constant FPSCR_C : integer := 63 - 47;
constant FPSCR_FL : integer := 63 - 48;
constant FPSCR_FG : integer := 63 - 49;
constant FPSCR_FE : integer := 63 - 50;
constant FPSCR_FU : integer := 63 - 51;
constant FPSCR_VXSOFT : integer := 63 - 53;
constant FPSCR_VXSQRT : integer := 63 - 54;
constant FPSCR_VXCVI : integer := 63 - 55;
constant FPSCR_VE : integer := 63 - 56;
constant FPSCR_OE : integer := 63 - 57;
constant FPSCR_UE : integer := 63 - 58;
constant FPSCR_ZE : integer := 63 - 59;
constant FPSCR_XE : integer := 63 - 60;
constant FPSCR_NI : integer := 63 - 61;
constant FPSCR_RN : integer := 63 - 63;

type irq_state_t is (WRITE_SRR0, WRITE_SRR1); type irq_state_t is (WRITE_SRR0, WRITE_SRR1);


-- For now, fixed 16 sources, make this either a parametric -- For now, fixed 16 sources, make this either a parametric
@ -413,6 +445,43 @@ package common is
write_cr_data => (others => '0'), write_reg => (others => '0'), write_cr_data => (others => '0'), write_reg => (others => '0'),
exc_write_reg => (others => '0'), exc_write_data => (others => '0')); exc_write_reg => (others => '0'), exc_write_data => (others => '0'));


type Execute1ToFPUType is record
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
fra : std_ulogic_vector(63 downto 0);
frb : std_ulogic_vector(63 downto 0);
frc : std_ulogic_vector(63 downto 0);
frt : gspr_index_t;
rc : std_ulogic;
out_cr : std_ulogic;
end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
single => '0', out_cr => '0');

type FPUToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic;
interrupt : std_ulogic;
illegal : std_ulogic;
end record;

type FPUToWritebackType is record
valid : std_ulogic;
write_enable : std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
end record;

type DividerToExecute1Type is record type DividerToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);

@ -80,6 +80,11 @@ architecture behave of core is
signal mmu_to_dcache: MmuToDcacheType; signal mmu_to_dcache: MmuToDcacheType;
signal dcache_to_mmu: DcacheToMmuType; signal dcache_to_mmu: DcacheToMmuType;


-- FPU signals
signal execute1_to_fpu: Execute1ToFPUType;
signal fpu_to_execute1: FPUToExecute1Type;
signal fpu_to_writeback: FPUToWritebackType;

-- local signals -- local signals
signal fetch1_stall_in : std_ulogic; signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic; signal icache_stall_out : std_ulogic;
@ -109,6 +114,7 @@ architecture behave of core is
signal rst_dec1 : std_ulogic := '1'; signal rst_dec1 : std_ulogic := '1';
signal rst_dec2 : std_ulogic := '1'; signal rst_dec2 : std_ulogic := '1';
signal rst_ex1 : std_ulogic := '1'; signal rst_ex1 : std_ulogic := '1';
signal rst_fpu : std_ulogic := '1';
signal rst_ls1 : std_ulogic := '1'; signal rst_ls1 : std_ulogic := '1';
signal rst_dbg : std_ulogic := '1'; signal rst_dbg : std_ulogic := '1';
signal alt_reset_d : std_ulogic; signal alt_reset_d : std_ulogic;
@ -171,6 +177,7 @@ begin
rst_dec1 <= core_rst; rst_dec1 <= core_rst;
rst_dec2 <= core_rst; rst_dec2 <= core_rst;
rst_ex1 <= core_rst; rst_ex1 <= core_rst;
rst_fpu <= core_rst;
rst_ls1 <= core_rst; rst_ls1 <= core_rst;
rst_dbg <= rst; rst_dbg <= rst;
alt_reset_d <= alt_reset; alt_reset_d <= alt_reset;
@ -225,6 +232,7 @@ begin


decode1_0: entity work.decode1 decode1_0: entity work.decode1
generic map( generic map(
HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH LOG_LENGTH => LOG_LENGTH
) )
port map ( port map (
@ -313,9 +321,11 @@ begin
busy_out => ex1_busy_out, busy_out => ex1_busy_out,
e_in => decode2_to_execute1, e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1, l_in => loadstore1_to_execute1,
fp_in => fpu_to_execute1,
ext_irq_in => ext_irq, ext_irq_in => ext_irq,
l_out => execute1_to_loadstore1, l_out => execute1_to_loadstore1,
f_out => execute1_to_fetch1, f_out => execute1_to_fetch1,
fp_out => execute1_to_fpu,
e_out => execute1_to_writeback, e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval, icache_inval => ex1_icache_inval,
dbg_msr_out => msr, dbg_msr_out => msr,
@ -326,6 +336,29 @@ begin
log_wr_addr => log_wr_addr log_wr_addr => log_wr_addr
); );


with_fpu: if HAS_FPU generate
begin
fpu_0: entity work.fpu
port map (
clk => clk,
rst => rst_fpu,
e_in => execute1_to_fpu,
e_out => fpu_to_execute1,
w_out => fpu_to_writeback
);
end generate;

no_fpu: if not HAS_FPU generate
begin
fpu_to_execute1.busy <= '0';
fpu_to_execute1.exception <= '0';
fpu_to_execute1.interrupt <= '0';
fpu_to_execute1.illegal <= '0';
fpu_to_writeback.valid <= '0';
fpu_to_writeback.write_enable <= '0';
fpu_to_writeback.write_cr_enable <= '0';
end generate;

loadstore1_0: entity work.loadstore1 loadstore1_0: entity work.loadstore1
generic map ( generic map (
HAS_FPU => HAS_FPU, HAS_FPU => HAS_FPU,
@ -381,6 +414,7 @@ begin
clk => clk, clk => clk,
e_in => execute1_to_writeback, e_in => execute1_to_writeback,
l_in => loadstore1_to_writeback, l_in => loadstore1_to_writeback,
fp_in => fpu_to_writeback,
w_out => writeback_to_register_file, w_out => writeback_to_register_file,
c_out => writeback_to_cr_file, c_out => writeback_to_cr_file,
complete_out => complete complete_out => complete

@ -8,6 +8,7 @@ use work.decode_types.all;


entity decode1 is entity decode1 is
generic ( generic (
HAS_FPU : boolean := true;
-- Non-zero to enable log data collection -- Non-zero to enable log data collection
LOG_LENGTH : natural := 0 LOG_LENGTH : natural := 0
); );
@ -55,6 +56,7 @@ architecture behaviour of decode1 is
type op_30_subop_array_t is array(0 to 15) of decode_rom_t; type op_30_subop_array_t is array(0 to 15) of decode_rom_t;
type op_31_subop_array_t is array(0 to 1023) of decode_rom_t; type op_31_subop_array_t is array(0 to 1023) of decode_rom_t;
type minor_rom_array_2_t is array(0 to 3) of decode_rom_t; type minor_rom_array_2_t is array(0 to 3) of decode_rom_t;
type op_63_subop_array_0_t is array(0 to 511) of decode_rom_t;


constant major_decode_rom_array : major_rom_array_t := ( constant major_decode_rom_array : major_rom_array_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
@ -416,6 +418,15 @@ architecture behaviour of decode1 is
others => decode_rom_init others => decode_rom_init
); );


-- indexed by bits 4..1 and 10..6 of instruction word
constant decode_op_63l_array : op_63_subop_array_0_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#011110010# => (FPU, OP_FPOP_I, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 18/7=mffs family
2#011110110# => (FPU, OP_FPOP_I, NONE, FRB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 22/7=mtfsf
others => illegal_inst
);

-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe -- op in out A out in out len ext pipe
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
@ -569,6 +580,13 @@ begin
when 62 => when 62 =>
v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0)))); v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0))));


when 63 =>
if HAS_FPU then
-- floating point operations, general and double-precision
v.decode := decode_op_63l_array(to_integer(unsigned(f_in.insn(4 downto 1) & f_in.insn(10 downto 6))));
vi.override := f_in.insn(5);
end if;

when others => when others =>
end case; end case;



@ -93,6 +93,12 @@ architecture behaviour of decode2 is
case t is case t is
when RB => when RB =>
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data); ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
when FRB =>
if HAS_FPU then
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data);
else
ret := ('0', (others => '0'), (others => '0'));
end if;
when CONST_UI => when CONST_UI =>
ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64))); ret := ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
when CONST_SI => when CONST_SI =>
@ -296,6 +302,7 @@ begin
r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR r_out.read1_reg <= d_in.ispr1 when d_in.decode.input_reg_a = SPR
else gpr_to_gspr(insn_ra(d_in.insn)); else gpr_to_gspr(insn_ra(d_in.insn));
r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR
else fpr_to_gspr(insn_frb(d_in.insn)) when d_in.decode.input_reg_b = FRB and HAS_FPU
else gpr_to_gspr(insn_rb(d_in.insn)); else gpr_to_gspr(insn_rb(d_in.insn));
r_out.read3_reg <= gpr_to_gspr(insn_rcreg(d_in.insn)) when d_in.decode.input_reg_c = RCR r_out.read3_reg <= gpr_to_gspr(insn_rcreg(d_in.insn)) when d_in.decode.input_reg_c = RCR
else fpr_to_gspr(insn_frt(d_in.insn)) when d_in.decode.input_reg_c = FRS and HAS_FPU else fpr_to_gspr(insn_frt(d_in.insn)) when d_in.decode.input_reg_c = FRS and HAS_FPU
@ -321,7 +328,7 @@ begin
mul_b := (others => '0'); mul_b := (others => '0');


--v.e.input_cr := d_in.decode.input_cr; --v.e.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr; v.e.output_cr := d_in.decode.output_cr;
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1, decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia); d_in.nia);
@ -412,7 +419,7 @@ begin


cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn); cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
cr_bypass_avail <= '0'; cr_bypass_avail <= '0';
if EX1_BYPASS then if EX1_BYPASS and d_in.decode.unit = ALU then
cr_bypass_avail <= d_in.decode.output_cr; cr_bypass_avail <= d_in.decode.output_cr;
end if; end if;



@ -7,8 +7,9 @@ package decode_types is
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP, OP_CNTZ, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_EXTSWSLI,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, OP_FPOP, OP_FPOP_I,
OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_LOAD, OP_STORE,
OP_FPLOAD, OP_FPSTORE, OP_FPLOAD, OP_FPSTORE,
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD, OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD,
@ -24,7 +25,7 @@ package decode_types is
); );
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA); type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR); CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR, FRB);
type input_reg_c_t is (NONE, RS, RCR, FRS); type input_reg_c_t is (NONE, RS, RCR, FRS);
type output_reg_a_t is (NONE, RT, RA, SPR, FRT); type output_reg_a_t is (NONE, RT, RA, SPR, FRT);
type rc_t is (NONE, ONE, RC); type rc_t is (NONE, ONE, RC);
@ -48,7 +49,7 @@ package decode_types is


constant TOO_OFFSET : integer := 0; constant TOO_OFFSET : integer := 0;


type unit_t is (NONE, ALU, LDST); type unit_t is (NONE, ALU, LDST, FPU);
type length_t is (NONE, is1B, is2B, is4B, is8B); type length_t is (NONE, is1B, is2B, is4B, is8B);


type decode_rom_t is record type decode_rom_t is record

@ -27,12 +27,14 @@ entity execute1 is


e_in : in Decode2ToExecute1Type; e_in : in Decode2ToExecute1Type;
l_in : in Loadstore1ToExecute1Type; l_in : in Loadstore1ToExecute1Type;
fp_in : in FPUToExecute1Type;


ext_irq_in : std_ulogic; ext_irq_in : std_ulogic;


-- asynchronous -- asynchronous
l_out : out Execute1ToLoadstore1Type; l_out : out Execute1ToLoadstore1Type;
f_out : out Execute1ToFetch1Type; f_out : out Execute1ToFetch1Type;
fp_out : out Execute1ToFPUType;


e_out : out Execute1ToWritebackType; e_out : out Execute1ToWritebackType;


@ -54,6 +56,7 @@ architecture behaviour of execute1 is
f : Execute1ToFetch1Type; f : Execute1ToFetch1Type;
busy: std_ulogic; busy: std_ulogic;
terminate: std_ulogic; terminate: std_ulogic;
fp_exception_next : std_ulogic;
trace_next : std_ulogic; trace_next : std_ulogic;
prev_op : insn_type_t; prev_op : insn_type_t;
lr_update : std_ulogic; lr_update : std_ulogic;
@ -72,7 +75,8 @@ architecture behaviour of execute1 is
end record; end record;
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, f => Execute1ToFetch1Init, (e => Execute1ToWritebackInit, f => Execute1ToFetch1Init,
busy => '0', lr_update => '0', terminate => '0', trace_next => '0', prev_op => OP_ILLEGAL, busy => '0', lr_update => '0', terminate => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0')); next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0'));
@ -268,7 +272,7 @@ begin
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3; c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;


busy_out <= l_in.busy or r.busy; busy_out <= l_in.busy or r.busy or fp_in.busy;
valid_in <= e_in.valid and not busy_out; valid_in <= e_in.valid and not busy_out;


terminate_out <= r.terminate; terminate_out <= r.terminate;
@ -334,6 +338,7 @@ begin
variable spr_val : std_ulogic_vector(63 downto 0); variable spr_val : std_ulogic_vector(63 downto 0);
variable addend : std_ulogic_vector(127 downto 0); variable addend : std_ulogic_vector(127 downto 0);
variable do_trace : std_ulogic; variable do_trace : std_ulogic;
variable fv : Execute1ToFPUType;
begin begin
result := (others => '0'); result := (others => '0');
sum_with_carry := (others => '0'); sum_with_carry := (others => '0');
@ -347,6 +352,7 @@ begin
v.e := Execute1ToWritebackInit; v.e := Execute1ToWritebackInit;
lv := Execute1ToLoadstore1Init; lv := Execute1ToLoadstore1Init;
v.f.redirect := '0'; v.f.redirect := '0';
fv := Execute1ToFPUInit;


-- XER forwarding. To avoid having to track XER hazards, we -- XER forwarding. To avoid having to track XER hazards, we
-- use the previously latched value. -- use the previously latched value.
@ -522,9 +528,11 @@ begin
exception_nextpc := '0'; exception_nextpc := '0';
v.e.exc_write_enable := '0'; v.e.exc_write_enable := '0';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := e_in.nia;
if valid_in = '1' then if valid_in = '1' then
v.e.exc_write_data := e_in.nia;
v.last_nia := e_in.nia; v.last_nia := e_in.nia;
else
v.e.exc_write_data := r.last_nia;
end if; end if;


v.e.mode_32bit := not ctrl.msr(MSR_SF); v.e.mode_32bit := not ctrl.msr(MSR_SF);
@ -552,18 +560,27 @@ begin
ctrl_tmp.msr(MSR_LE) <= '1'; ctrl_tmp.msr(MSR_LE) <= '1';
v.e.valid := '1'; v.e.valid := '1';
v.trace_next := '0'; v.trace_next := '0';
v.fp_exception_next := '0';
report "Writing SRR1: " & to_hstring(ctrl.srr1); report "Writing SRR1: " & to_hstring(ctrl.srr1);


elsif r.trace_next = '1' and valid_in = '1' then elsif valid_in = '1' and ((HAS_FPU and r.fp_exception_next = '1') or r.trace_next = '1') then
-- Generate a trace interrupt rather than executing the next instruction if HAS_FPU and r.fp_exception_next = '1' then
-- or taking any asynchronous interrupt -- This is used for FP-type program interrupts that
v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64)); -- become pending due to MSR[FE0,FE1] changing from 00 to non-zero.
ctrl_tmp.srr1(63 - 33) <= '1'; v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or ctrl_tmp.srr1(63 - 43) <= '1';
r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then ctrl_tmp.srr1(63 - 47) <= '1';
ctrl_tmp.srr1(63 - 35) <= '1'; else
elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then -- Generate a trace interrupt rather than executing the next instruction
ctrl_tmp.srr1(63 - 36) <= '1'; -- or taking any asynchronous interrupt
v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64));
ctrl_tmp.srr1(63 - 33) <= '1';
if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or
r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then
ctrl_tmp.srr1(63 - 35) <= '1';
elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then
ctrl_tmp.srr1(63 - 36) <= '1';
end if;
end if; end if;
exception := '1'; exception := '1';


@ -589,7 +606,7 @@ begin
illegal := '1'; illegal := '1';


elsif HAS_FPU and valid_in = '1' and ctrl.msr(MSR_FP) = '0' and elsif HAS_FPU and valid_in = '1' and ctrl.msr(MSR_FP) = '0' and
(e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then (e_in.unit = FPU or e_in.insn_type = OP_FPLOAD or e_in.insn_type = OP_FPSTORE) then
-- generate a floating-point unavailable interrupt -- generate a floating-point unavailable interrupt
exception := '1'; exception := '1';
v.f.redirect_nia := std_logic_vector(to_unsigned(16#800#, 64)); v.f.redirect_nia := std_logic_vector(to_unsigned(16#800#, 64));
@ -809,6 +826,10 @@ begin
is_branch := '1'; is_branch := '1';
taken_branch := '1'; taken_branch := '1';
abs_branch := '1'; abs_branch := '1';
if HAS_FPU then
v.fp_exception_next := fp_in.exception and
(a_in(MSR_FE0) or a_in(MSR_FE1));
end if;
do_trace := '0'; do_trace := '0';


when OP_CNTZ => when OP_CNTZ =>
@ -980,6 +1001,10 @@ begin
ctrl_tmp.msr(MSR_IR) <= '1'; ctrl_tmp.msr(MSR_IR) <= '1';
ctrl_tmp.msr(MSR_DR) <= '1'; ctrl_tmp.msr(MSR_DR) <= '1';
end if; end if;
if HAS_FPU then
v.fp_exception_next := fp_in.exception and
(c_in(MSR_FE0) or c_in(MSR_FE1));
end if;
end if; end if;
when OP_MTSPR => when OP_MTSPR =>
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
@ -1096,6 +1121,8 @@ begin
lv.valid := '1'; lv.valid := '1';
elsif e_in.unit = NONE then elsif e_in.unit = NONE then
illegal := '1'; illegal := '1';
elsif HAS_FPU and e_in.unit = FPU then
fv.valid := '1';
end if; end if;


elsif r.f.redirect = '1' then elsif r.f.redirect = '1' then
@ -1170,7 +1197,17 @@ begin
v.e.valid := '1'; v.e.valid := '1';
end if; end if;


if illegal = '1' then -- Generate FP-type program interrupt. fp_in.interrupt will only
-- be set during the execution of a FP instruction.
-- The case where MSR[FE0,FE1] goes from zero to non-zero is
-- handled above by mtmsrd and rfid setting v.fp_exception_next.
if HAS_FPU and fp_in.interrupt = '1' then
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
ctrl_tmp.srr1(63 - 43) <= '1';
exception := '1';
end if;

if illegal = '1' or (HAS_FPU and fp_in.illegal = '1') then
exception := '1'; exception := '1';
v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64)); v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64));
-- Since we aren't doing Hypervisor emulation assist (0xe40) we -- Since we aren't doing Hypervisor emulation assist (0xe40) we
@ -1216,7 +1253,6 @@ begin
end if; end if;
v.e.exc_write_enable := '1'; v.e.exc_write_enable := '1';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := r.last_nia;
report "ldst exception writing srr0=" & to_hstring(r.last_nia); report "ldst exception writing srr0=" & to_hstring(r.last_nia);
end if; end if;


@ -1261,6 +1297,19 @@ begin
lv.mode_32bit := not ctrl.msr(MSR_SF); lv.mode_32bit := not ctrl.msr(MSR_SF);
lv.is_32bit := e_in.is_32bit; lv.is_32bit := e_in.is_32bit;


-- Outputs to FPU
fv.op := e_in.insn_type;
fv.nia := e_in.nia;
fv.insn := e_in.insn;
fv.single := e_in.is_32bit;
fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1);
fv.fra := a_in;
fv.frb := b_in;
fv.frc := c_in;
fv.frt := e_in.write_reg;
fv.rc := e_in.rc;
fv.out_cr := e_in.output_cr;

-- Update registers -- Update registers
rin <= v; rin <= v;


@ -1268,6 +1317,7 @@ begin
f_out <= r.f; f_out <= r.f;
l_out <= lv; l_out <= lv;
e_out <= r.e; e_out <= r.e;
fp_out <= fv;
flush_out <= f_out.redirect; flush_out <= f_out.redirect;


exception_log <= exception; exception_log <= exception;

@ -0,0 +1,185 @@
-- Floating-point unit for Microwatt

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.insn_helpers.all;
use work.decode_types.all;
use work.crhelpers.all;
use work.helpers.all;
use work.common.all;

entity fpu is
port (
clk : in std_ulogic;
rst : in std_ulogic;

e_in : in Execute1toFPUType;
e_out : out FPUToExecute1Type;

w_out : out FPUToWritebackType
);
end entity fpu;

architecture behaviour of fpu is

type state_t is (IDLE,
DO_MFFS, DO_MTFSF);

type reg_type is record
state : state_t;
busy : std_ulogic;
instr_done : std_ulogic;
do_intr : std_ulogic;
op : insn_type_t;
insn : std_ulogic_vector(31 downto 0);
dest_fpr : gspr_index_t;
fe_mode : std_ulogic;
rc : std_ulogic;
is_cmp : std_ulogic;
single_prec : std_ulogic;
fpscr : std_ulogic_vector(31 downto 0);
b : std_ulogic_vector(63 downto 0);
writing_back : std_ulogic;
cr_result : std_ulogic_vector(3 downto 0);
cr_mask : std_ulogic_vector(7 downto 0);
end record;

signal r, rin : reg_type;

signal fp_result : std_ulogic_vector(63 downto 0);

begin
fpu_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
r.state <= IDLE;
r.busy <= '0';
r.instr_done <= '0';
r.do_intr <= '0';
r.fpscr <= (others => '0');
r.writing_back <= '0';
else
assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
r <= rin;
end if;
end if;
end process;

e_out.busy <= r.busy;
e_out.exception <= r.fpscr(FPSCR_FEX);
e_out.interrupt <= r.do_intr;

w_out.valid <= r.instr_done and not r.do_intr;
w_out.write_enable <= r.writing_back;
w_out.write_reg <= r.dest_fpr;
w_out.write_data <= fp_result;
w_out.write_cr_enable <= r.instr_done and r.rc;
w_out.write_cr_mask <= r.cr_mask;
w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
r.cr_result & r.cr_result & r.cr_result & r.cr_result;

fpu_1: process(all)
variable v : reg_type;
variable illegal : std_ulogic;
variable j, k : integer;
variable flm : std_ulogic_vector(7 downto 0);
begin
v := r;
illegal := '0';
v.busy := '0';

-- capture incoming instruction
if e_in.valid = '1' then
v.insn := e_in.insn;
v.op := e_in.op;
v.fe_mode := or (e_in.fe_mode);
v.dest_fpr := e_in.frt;
v.single_prec := e_in.single;
v.rc := e_in.rc;
v.is_cmp := e_in.out_cr;
v.cr_mask := num_to_fxm(1);
v.b := e_in.frb;
end if;

v.writing_back := '0';
v.instr_done := '0';

case r.state is
when IDLE =>
if e_in.valid = '1' then
case e_in.insn(5 downto 1) is
when "00111" =>
if e_in.insn(8) = '0' then
v.state := DO_MFFS;
else
v.state := DO_MTFSF;
end if;
when others =>
illegal := '1';
end case;
end if;

when DO_MFFS =>
v.writing_back := '1';
case r.insn(20 downto 16) is
when "00000" =>
-- mffs
when others =>
illegal := '1';
end case;
v.instr_done := '1';
v.state := IDLE;

when DO_MTFSF =>
if r.insn(25) = '1' then
flm := x"FF";
elsif r.insn(16) = '1' then
flm := x"00";
else
flm := r.insn(24 downto 17);
end if;
for i in 0 to 7 loop
k := i * 4;
if flm(i) = '1' then
v.fpscr(k + 3 downto k) := r.b(k + 3 downto k);
end if;
end loop;
v.instr_done := '1';
v.state := IDLE;

end case;

-- Data path.
-- Just enough to read FPSCR for now.
fp_result <= x"00000000" & r.fpscr;

v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
(or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
v.fpscr(FPSCR_VE downto FPSCR_XE));
if r.rc = '1' then
v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
end if;

if illegal = '1' then
v.instr_done := '0';
v.do_intr := '0';
v.writing_back := '0';
v.busy := '0';
v.state := IDLE;
else
v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
if v.state /= IDLE or v.do_intr = '1' then
v.busy := '1';
end if;
end if;

rin <= v;
e_out.illegal <= illegal;
end process;

end architecture behaviour;

@ -23,6 +23,7 @@ filesets:
- cr_hazard.vhdl - cr_hazard.vhdl
- control.vhdl - control.vhdl
- execute1.vhdl - execute1.vhdl
- fpu.vhdl
- loadstore1.vhdl - loadstore1.vhdl
- mmu.vhdl - mmu.vhdl
- dcache.vhdl - dcache.vhdl

@ -84,17 +84,17 @@ struct log_entry {
#define FLGA(i, y, z) (log.i? y: z) #define FLGA(i, y, z) (log.i? y: z)
#define PNIA(f) (full_nia[log.f] & 0xff) #define PNIA(f) (full_nia[log.f] & 0xff)


const char *units[4] = { "--", "al", "ls", "?3" }; const char *units[4] = { "--", "al", "ls", "fp" };
const char *ops[64] = const char *ops[64] =
{ {
"illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ", "illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ",
"bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ", "bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ",
"dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ", "dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ",
"extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "fpload ", "extswsl", "fpop ", "fpopi ", "icbi ", "icbt ", "isel ", "isync ", "ld ",
"fpstore", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ", "mtcrf ", "mtmsr ", "st ", "fpload ", "fpstore", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ",
"mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ", "prty ", "rfid ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ",
"rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ", "shr ", "sync ", "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
"tlbie ", "trap ", "xor ", "bcd ", "addg6s ", "ffail ", "?62 ", "?63 " "shr ", "sync ", "tlbie ", "trap ", "xor ", "bcd ", "addg6s ", "ffail ",
}; };


const char *spr_names[13] = const char *spr_names[13] =

@ -12,6 +12,7 @@ entity writeback is


e_in : in Execute1ToWritebackType; e_in : in Execute1ToWritebackType;
l_in : in Loadstore1ToWritebackType; l_in : in Loadstore1ToWritebackType;
fp_in : in FPUToWritebackType;


w_out : out WritebackToRegisterFileType; w_out : out WritebackToRegisterFileType;
c_out : out WritebackToCrFileType; c_out : out WritebackToCrFileType;
@ -31,15 +32,21 @@ begin
-- Do consistency checks only on the clock edge -- Do consistency checks only on the clock edge
x(0) := e_in.valid; x(0) := e_in.valid;
y(0) := l_in.valid; y(0) := l_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; w(0) := fp_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) +
to_integer(unsigned(w))) <= 1 severity failure;


x(0) := e_in.write_enable or e_in.exc_write_enable; x(0) := e_in.write_enable or e_in.exc_write_enable;
y(0) := l_in.write_enable; y(0) := l_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; w(0) := fp_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) +
to_integer(unsigned(w))) <= 1 severity failure;


w(0) := e_in.write_cr_enable; w(0) := e_in.write_cr_enable;
x(0) := (e_in.write_enable and e_in.rc); x(0) := (e_in.write_enable and e_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure; y(0) := fp_in.write_cr_enable;
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) +
to_integer(unsigned(y))) <= 1 severity failure;
end if; end if;
end process; end process;


@ -53,7 +60,7 @@ begin
c_out <= WritebackToCrFileInit; c_out <= WritebackToCrFileInit;


complete_out <= '0'; complete_out <= '0';
if e_in.valid = '1' or l_in.valid = '1' then if e_in.valid = '1' or l_in.valid = '1' or fp_in.valid = '1' then
complete_out <= '1'; complete_out <= '1';
end if; end if;


@ -79,6 +86,18 @@ begin
c_out.write_xerc_data <= e_in.xerc; c_out.write_xerc_data <= e_in.xerc;
end if; end if;


if fp_in.write_enable = '1' then
w_out.write_reg <= fp_in.write_reg;
w_out.write_data <= fp_in.write_data;
w_out.write_enable <= '1';
end if;

if fp_in.write_cr_enable = '1' then
c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= fp_in.write_cr_mask;
c_out.write_cr_data <= fp_in.write_cr_data;
end if;

if l_in.write_enable = '1' then if l_in.write_enable = '1' then
w_out.write_reg <= l_in.write_reg; w_out.write_reg <= l_in.write_reg;
w_out.write_data <= l_in.write_data; w_out.write_data <= l_in.write_data;

Loading…
Cancel
Save