core: Use a busy signal rather than a stall

This changes the instruction dependency tracking so that we can
generate a "busy" signal from execute1 and loadstore1 which comes
along one cycle later than the current "stall" signal.  This will
enable us to signal busy cycles only when we need to from loadstore1.

The "busy" signal from execute1/loadstore1 indicates "I didn't take
the thing you gave me on this cycle", as distinct from the previous
stall signal which meant "I took that but don't give me anything
next cycle".  That means that decode2 proactively gives execute1
a new instruction as soon as it has taken the previous one (assuming
there is a valid instruction available from decode1), and that then
sits in decode2's output until execute1 can take it.  So instructions
are issued by decode2 somewhat earlier than they used to be.

Decode2 now only signals a stall upstream when its output buffer is
full, meaning that we can fill up bubbles in the upstream pipe while a
long instruction is executing.  This gives a small boost in
performance.

This also adds dependency tracking for rA updates by update-form
load/store instructions.

The GPR and CR hazard detection machinery now has one extra stage,
which may not be strictly necessary.  Some of the code now really
only applies to PIPELINE_DEPTH=1.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/208/head
Paul Mackerras 4 years ago
parent 62b24a8dae
commit 6701e7346b

@ -244,6 +244,7 @@ package common is
others => (others => '0')); others => (others => '0'));


type Loadstore1ToExecute1Type is record type Loadstore1ToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic; exception : std_ulogic;
invalid : std_ulogic; invalid : std_ulogic;
perm_error : std_ulogic; perm_error : std_ulogic;

@ -15,7 +15,8 @@ entity control is
complete_in : in std_ulogic; complete_in : in std_ulogic;
valid_in : in std_ulogic; valid_in : in std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;
stall_in : in std_ulogic; busy_in : in std_ulogic;
deferred : in std_ulogic;
sgl_pipe_in : in std_ulogic; sgl_pipe_in : in std_ulogic;
stop_mark_in : in std_ulogic; stop_mark_in : in std_ulogic;


@ -23,6 +24,9 @@ entity control is
gpr_write_in : in gspr_index_t; gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic; gpr_bypassable : in std_ulogic;


update_gpr_write_valid : in std_ulogic;
update_gpr_write_reg : in gspr_index_t;

gpr_a_read_valid_in : in std_ulogic; gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t; gpr_a_read_in : in gspr_index_t;


@ -72,7 +76,11 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
@ -80,6 +88,9 @@ begin
gpr_read_valid_in => gpr_a_read_valid_in, gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in, gpr_read_in => gpr_a_read_in,


ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_a_out, stall_out => stall_a_out,
use_bypass => gpr_bypass_a use_bypass => gpr_bypass_a
); );
@ -90,7 +101,11 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
@ -98,6 +113,9 @@ begin
gpr_read_valid_in => gpr_b_read_valid_in, gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in, gpr_read_in => gpr_b_read_in,


ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_b_out, stall_out => stall_b_out,
use_bypass => gpr_bypass_b use_bypass => gpr_bypass_b
); );
@ -110,7 +128,11 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
@ -118,6 +140,9 @@ begin
gpr_read_valid_in => gpr_c_read_valid_in, gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt, gpr_read_in => gpr_c_read_in_fmt,


ugpr_write_valid => update_gpr_write_valid,
ugpr_write_reg => update_gpr_write_reg,

stall_out => stall_c_out, stall_out => stall_c_out,
use_bypass => gpr_bypass_c use_bypass => gpr_bypass_c
); );
@ -128,7 +153,11 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
complete_in => complete_in,
flush_in => flush_in,
issuing => valid_out,


cr_read_in => cr_read_in, cr_read_in => cr_read_in,
cr_write_in => cr_write_valid, cr_write_in => cr_write_valid,
@ -139,7 +168,8 @@ begin
control0: process(clk) control0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
assert r_int.outstanding >= 0 and r_int.outstanding <= (PIPELINE_DEPTH+1) report "Outstanding bad " & integer'image(r_int.outstanding) severity failure; assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int; r_int <= rin_int;
end if; end if;
end process; end process;
@ -152,17 +182,18 @@ begin
v_int := r_int; v_int := r_int;


-- asynchronous -- asynchronous
valid_tmp := valid_in and not flush_in and not stall_in; valid_tmp := valid_in and not flush_in;
stall_tmp := stall_in; stall_tmp := '0';


if complete_in = '1' then if flush_in = '1' then
-- expect to see complete_in next cycle
v_int.outstanding := 1;
elsif complete_in = '1' then
v_int.outstanding := r_int.outstanding - 1; v_int.outstanding := r_int.outstanding - 1;
end if; end if;


if rst = '1' then if rst = '1' then
v_int.state := IDLE; v_int := reg_internal_init;
v_int.outstanding := 0;
stall_tmp := '0';
valid_tmp := '0'; valid_tmp := '0';
end if; end if;


@ -227,7 +258,9 @@ begin
end if; end if;


if valid_tmp = '1' then if valid_tmp = '1' then
v_int.outstanding := v_int.outstanding + 1; if deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;
gpr_write_valid <= gpr_write_valid_in; gpr_write_valid <= gpr_write_valid_in;
cr_write_valid <= cr_write_in; cr_write_valid <= cr_write_in;
else else
@ -237,7 +270,7 @@ begin


-- update outputs -- update outputs
valid_out <= valid_tmp; valid_out <= valid_tmp;
stall_out <= stall_tmp; stall_out <= stall_tmp or deferred;


-- update registers -- update registers
rin_int <= v_int; rin_int <= v_int;

@ -82,11 +82,10 @@ architecture behave of core is
signal icache_stall_out : std_ulogic; signal icache_stall_out : std_ulogic;
signal icache_stall_in : std_ulogic; signal icache_stall_in : std_ulogic;
signal decode1_stall_in : std_ulogic; signal decode1_stall_in : std_ulogic;
signal decode2_stall_in : std_ulogic; signal decode2_busy_in : std_ulogic;
signal decode2_stall_out : std_ulogic; signal decode2_stall_out : std_ulogic;
signal ex1_icache_inval: std_ulogic; signal ex1_icache_inval: std_ulogic;
signal ex1_stall_out: std_ulogic; signal ex1_busy_out: std_ulogic;
signal ls1_stall_out: std_ulogic;
signal dcache_stall_out: std_ulogic; signal dcache_stall_out: std_ulogic;


signal flush: std_ulogic; signal flush: std_ulogic;
@ -235,7 +234,7 @@ begin
port map ( port map (
clk => clk, clk => clk,
rst => rst_dec2, rst => rst_dec2,
stall_in => decode2_stall_in, busy_in => decode2_busy_in,
stall_out => decode2_stall_out, stall_out => decode2_stall_out,
flush_in => flush, flush_in => flush,
complete_in => complete, complete_in => complete,
@ -248,7 +247,7 @@ begin
c_out => decode2_to_cr_file, c_out => decode2_to_cr_file,
log_out => log_data(119 downto 110) log_out => log_data(119 downto 110)
); );
decode2_stall_in <= ex1_stall_out or ls1_stall_out; decode2_busy_in <= ex1_busy_out;


register_file_0: entity work.register_file register_file_0: entity work.register_file
generic map ( generic map (
@ -289,7 +288,7 @@ begin
clk => clk, clk => clk,
rst => rst_ex1, rst => rst_ex1,
flush_out => flush, flush_out => flush,
stall_out => ex1_stall_out, busy_out => ex1_busy_out,
e_in => decode2_to_execute1, e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1, l_in => loadstore1_to_execute1,
ext_irq_in => ext_irq, ext_irq_in => ext_irq,
@ -317,7 +316,6 @@ begin
m_out => loadstore1_to_mmu, m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1, m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out, dc_stall => dcache_stall_out,
stall_out => ls1_stall_out,
log_out => log_data(149 downto 140) log_out => log_data(149 downto 140)
); );



@ -4,11 +4,15 @@ use ieee.numeric_std.all;


entity cr_hazard is entity cr_hazard is
generic ( generic (
PIPELINE_DEPTH : natural := 2 PIPELINE_DEPTH : natural := 1
); );
port( port(
clk : in std_ulogic; clk : in std_ulogic;
stall_in : in std_ulogic; busy_in : in std_ulogic;
deferred : in std_ulogic;
complete_in : in std_ulogic;
flush_in : in std_ulogic;
issuing : in std_ulogic;


cr_read_in : in std_ulogic; cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic; cr_write_in : in std_ulogic;
@ -22,7 +26,7 @@ architecture behaviour of cr_hazard is
end record; end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0'); constant pipeline_entry_init : pipeline_entry_type := (valid => '0');


type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type; type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init); constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);


signal r, rin : pipeline_t := pipeline_t_init; signal r, rin : pipeline_t := pipeline_t_init;
@ -30,9 +34,7 @@ begin
cr_hazard0: process(clk) cr_hazard0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if stall_in = '0' then r <= rin;
r <= rin;
end if;
end if; end if;
end process; end process;


@ -41,22 +43,23 @@ begin
begin begin
v := r; v := r;


stall_out <= '0'; -- XXX assumes PIPELINE_DEPTH = 1
loop_0: for i in 0 to PIPELINE_DEPTH-1 loop if complete_in = '1' then
if (r(i).valid = cr_read_in) then v(1).valid := '0';
stall_out <= '1'; end if;
end if; stall_out <= cr_read_in and (v(0).valid or v(1).valid);
end loop;

v(0).valid := cr_write_in;
loop_1: for i in 0 to PIPELINE_DEPTH-2 loop
-- propagate to next slot
v(i+1) := r(i);
end loop;


-- asynchronous output -- XXX assumes PIPELINE_DEPTH = 1
if cr_read_in = '0' then if busy_in = '0' then
stall_out <= '0'; v(1) := r(0);
v(0).valid := '0';
end if;
if deferred = '0' and issuing = '1' then
v(0).valid := cr_write_in;
end if;
if flush_in = '1' then
v(0).valid := '0';
v(1).valid := '0';
end if; end if;


-- update registers -- update registers

@ -17,7 +17,7 @@ entity decode2 is
rst : in std_ulogic; rst : in std_ulogic;


complete_in : in std_ulogic; complete_in : in std_ulogic;
stall_in : in std_ulogic; busy_in : in std_ulogic;
stall_out : out std_ulogic; stall_out : out std_ulogic;


stopped_out : out std_ulogic; stopped_out : out std_ulogic;
@ -45,6 +45,8 @@ architecture behaviour of decode2 is


signal r, rin : reg_type; signal r, rin : reg_type;


signal deferred : std_ulogic;

signal log_data : std_ulogic_vector(9 downto 0); signal log_data : std_ulogic_vector(9 downto 0);


type decode_input_reg_t is record type decode_input_reg_t is record
@ -200,6 +202,9 @@ architecture behaviour of decode2 is
signal gpr_write : gspr_index_t; signal gpr_write : gspr_index_t;
signal gpr_bypassable : std_ulogic; signal gpr_bypassable : std_ulogic;


signal update_gpr_write_valid : std_ulogic;
signal update_gpr_write_reg : gspr_index_t;

signal gpr_a_read_valid : std_ulogic; signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read :gspr_index_t; signal gpr_a_read :gspr_index_t;
signal gpr_a_bypass : std_ulogic; signal gpr_a_bypass : std_ulogic;
@ -224,7 +229,8 @@ begin


complete_in => complete_in, complete_in => complete_in,
valid_in => control_valid_in, valid_in => control_valid_in,
stall_in => stall_in, busy_in => busy_in,
deferred => deferred,
flush_in => flush_in, flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe, sgl_pipe_in => control_sgl_pipe,
stop_mark_in => d_in.stop_mark, stop_mark_in => d_in.stop_mark,
@ -233,6 +239,9 @@ begin
gpr_write_in => gpr_write, gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable, gpr_bypassable => gpr_bypassable,


update_gpr_write_valid => update_gpr_write_valid,
update_gpr_write_reg => update_gpr_write_reg,

gpr_a_read_valid_in => gpr_a_read_valid, gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read, gpr_a_read_in => gpr_a_read,


@ -254,13 +263,17 @@ begin
gpr_bypass_c => gpr_c_bypass gpr_bypass_c => gpr_c_bypass
); );


deferred <= r.e.valid and busy_in;

decode2_0: process(clk) decode2_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if rin.e.valid = '1' then if rst = '1' or flush_in = '1' or deferred = '0' then
report "execute " & to_hstring(rin.e.nia); if rin.e.valid = '1' then
report "execute " & to_hstring(rin.e.nia);
end if;
r <= rin;
end if; end if;
r <= rin;
end if; end if;
end process; end process;


@ -358,6 +371,8 @@ begin
if EX1_BYPASS and d_in.decode.unit = ALU then if EX1_BYPASS and d_in.decode.unit = ALU then
gpr_bypassable <= '1'; gpr_bypassable <= '1';
end if; end if;
update_gpr_write_valid <= d_in.decode.update;
update_gpr_write_reg <= decoded_reg_a.reg;


gpr_a_read_valid <= decoded_reg_a.reg_valid; gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg; gpr_a_read <= decoded_reg_a.reg;
@ -375,7 +390,7 @@ begin
v.e.insn_type := OP_ILLEGAL; v.e.insn_type := OP_ILLEGAL;
end if; end if;


if rst = '1' then if rst = '1' or flush_in = '1' then
v.e := Decode2ToExecute1Init; v.e := Decode2ToExecute1Init;
end if; end if;



@ -20,7 +20,7 @@ entity execute1 is


-- asynchronous -- asynchronous
flush_out : out std_ulogic; flush_out : out std_ulogic;
stall_out : out std_ulogic; busy_out : out std_ulogic;


e_in : in Decode2ToExecute1Type; e_in : in Decode2ToExecute1Type;
l_in : in Loadstore1ToExecute1Type; l_in : in Loadstore1ToExecute1Type;
@ -48,6 +48,8 @@ end entity execute1;
architecture behaviour of execute1 is architecture behaviour of execute1 is
type reg_type is record type reg_type is record
e : Execute1ToWritebackType; e : Execute1ToWritebackType;
busy: std_ulogic;
terminate: std_ulogic;
lr_update : std_ulogic; lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0); next_lr : std_ulogic_vector(63 downto 0);
mul_in_progress : std_ulogic; mul_in_progress : std_ulogic;
@ -62,7 +64,7 @@ architecture behaviour of execute1 is
log_addr_spr : std_ulogic_vector(31 downto 0); log_addr_spr : std_ulogic_vector(31 downto 0);
end record; end record;
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, lr_update => '0', (e => Execute1ToWritebackInit, busy => '0', lr_update => '0', terminate => '0',
mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0', mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0',
slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
next_lr => (others => '0'), ldst_nia => (others => '0'), others => (others => '0')); next_lr => (others => '0'), ldst_nia => (others => '0'), others => (others => '0'));
@ -71,6 +73,7 @@ architecture behaviour of execute1 is


signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0); signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);


signal valid_in : std_ulogic;
signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0')); signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0')); signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
signal right_shift, rot_clear_left, rot_clear_right: std_ulogic; signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
@ -241,6 +244,11 @@ begin
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3; c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;


busy_out <= l_in.busy or r.busy;
valid_in <= e_in.valid and not busy_out;

terminate_out <= r.terminate;

execute1_0: process(clk) execute1_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
@ -251,7 +259,7 @@ begin
else else
r <= rin; r <= rin;
ctrl <= ctrl_tmp; ctrl <= ctrl_tmp;
assert not (r.lr_update = '1' and e_in.valid = '1') assert not (r.lr_update = '1' and valid_in = '1')
report "LR update collision with valid in EX1" report "LR update collision with valid in EX1"
severity failure; severity failure;
if r.lr_update = '1' then if r.lr_update = '1' then
@ -423,9 +431,9 @@ begin
end if; end if;
end if; end if;


terminate_out <= '0'; v.terminate := '0';
icache_inval <= '0'; icache_inval <= '0';
stall_out <= '0'; v.busy := '0';
f_out <= Execute1ToFetch1TypeInit; f_out <= Execute1ToFetch1TypeInit;
-- send MSR[IR] and ~MSR[PR] up to fetch1 -- send MSR[IR] and ~MSR[PR] up to fetch1
f_out.virt_mode <= ctrl.msr(MSR_IR); f_out.virt_mode <= ctrl.msr(MSR_IR);
@ -463,10 +471,10 @@ begin
f_out.virt_mode <= '0'; f_out.virt_mode <= '0';
f_out.priv_mode <= '1'; f_out.priv_mode <= '1';
f_out.redirect_nia <= ctrl.irq_nia; f_out.redirect_nia <= ctrl.irq_nia;
v.e.valid := e_in.valid; v.e.valid := '1';
report "Writing SRR1: " & to_hstring(ctrl.srr1); report "Writing SRR1: " & to_hstring(ctrl.srr1);


elsif irq_valid = '1' and e_in.valid = '1' then elsif irq_valid = '1' and valid_in = '1' then
-- we need two cycles to write srr0 and 1 -- we need two cycles to write srr0 and 1
-- will need more when we have to write HEIR -- will need more when we have to write HEIR
-- Don't deliver the interrupt until we have a valid instruction -- Don't deliver the interrupt until we have a valid instruction
@ -474,7 +482,7 @@ begin
exception := '1'; exception := '1';
ctrl_tmp.srr1 <= msr_copy(ctrl.msr); ctrl_tmp.srr1 <= msr_copy(ctrl.msr);


elsif e_in.valid = '1' and ctrl.msr(MSR_PR) = '1' and elsif valid_in = '1' and ctrl.msr(MSR_PR) = '1' and
instr_is_privileged(e_in.insn_type, e_in.insn) then instr_is_privileged(e_in.insn_type, e_in.insn) then
-- generate a program interrupt -- generate a program interrupt
exception := '1'; exception := '1';
@ -484,7 +492,7 @@ begin
ctrl_tmp.srr1(63 - 45) <= '1'; ctrl_tmp.srr1(63 - 45) <= '1';
report "privileged instruction"; report "privileged instruction";
elsif e_in.valid = '1' and e_in.unit = ALU then elsif valid_in = '1' and e_in.unit = ALU then


report "execute nia " & to_hstring(e_in.nia); report "execute nia " & to_hstring(e_in.nia);


@ -519,7 +527,7 @@ begin
-- check bits 1-10 of the instruction to make sure it's attn -- check bits 1-10 of the instruction to make sure it's attn
-- if not then it is illegal -- if not then it is illegal
if e_in.insn(10 downto 1) = "0100000000" then if e_in.insn(10 downto 1) = "0100000000" then
terminate_out <= '1'; v.terminate := '1';
report "ATTN"; report "ATTN";
else else
illegal := '1'; illegal := '1';
@ -674,7 +682,7 @@ begin
when OP_CNTZ => when OP_CNTZ =>
v.e.valid := '0'; v.e.valid := '0';
v.cntz_in_progress := '1'; v.cntz_in_progress := '1';
stall_out <= '1'; v.busy := '1';
when OP_EXTS => when OP_EXTS =>
-- note data_len is a 1-hot encoding -- note data_len is a 1-hot encoding
negative := (e_in.data_len(0) and c_in(7)) or negative := (e_in.data_len(0) and c_in(7)) or
@ -876,21 +884,21 @@ begin
when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 => when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 =>
v.e.valid := '0'; v.e.valid := '0';
v.mul_in_progress := '1'; v.mul_in_progress := '1';
stall_out <= '1'; v.busy := '1';
x_to_multiply.valid <= '1'; x_to_multiply.valid <= '1';


when OP_DIV | OP_DIVE | OP_MOD => when OP_DIV | OP_DIVE | OP_MOD =>
v.e.valid := '0'; v.e.valid := '0';
v.div_in_progress := '1'; v.div_in_progress := '1';
stall_out <= '1'; v.busy := '1';
x_to_divider.valid <= '1'; x_to_divider.valid <= '1';


when others => when others =>
terminate_out <= '1'; v.terminate := '1';
report "illegal"; report "illegal";
end case; end case;


v.e.rc := e_in.rc and e_in.valid; v.e.rc := e_in.rc and valid_in;


-- Update LR on the next cycle after a branch link -- Update LR on the next cycle after a branch link
-- --
@ -908,10 +916,10 @@ begin
v.next_lr := next_nia; v.next_lr := next_nia;
v.e.valid := '0'; v.e.valid := '0';
report "Delayed LR update to " & to_hstring(next_nia); report "Delayed LR update to " & to_hstring(next_nia);
stall_out <= '1'; v.busy := '1';
end if; end if;


elsif e_in.valid = '1' then elsif valid_in = '1' then
-- instruction for other units, i.e. LDST -- instruction for other units, i.e. LDST
v.ldst_nia := e_in.nia; v.ldst_nia := e_in.nia;
v.e.valid := '0'; v.e.valid := '0';
@ -967,7 +975,7 @@ begin
end if; end if;
v.e.valid := '1'; v.e.valid := '1';
else else
stall_out <= '1'; v.busy := '1';
v.mul_in_progress := r.mul_in_progress; v.mul_in_progress := r.mul_in_progress;
v.div_in_progress := r.div_in_progress; v.div_in_progress := r.div_in_progress;
end if; end if;
@ -988,7 +996,8 @@ begin
v.e.exc_write_data := next_nia; v.e.exc_write_data := next_nia;
end if; end if;
ctrl_tmp.irq_state <= WRITE_SRR1; ctrl_tmp.irq_state <= WRITE_SRR1;
v.e.valid := '1'; v.busy := '1';
v.e.valid := '0';
end if; end if;


v.e.write_data := result; v.e.write_data := result;
@ -1020,7 +1029,6 @@ begin
v.e.exc_write_data := r.ldst_nia; v.e.exc_write_data := r.ldst_nia;
report "ldst exception writing srr0=" & to_hstring(r.ldst_nia); report "ldst exception writing srr0=" & to_hstring(r.ldst_nia);
ctrl_tmp.irq_state <= WRITE_SRR1; ctrl_tmp.irq_state <= WRITE_SRR1;
v.e.valid := '1'; -- complete the original load or store
end if; end if;


-- Outputs to loadstore1 (async) -- Outputs to loadstore1 (async)
@ -1072,7 +1080,7 @@ begin
r.e.write_enable & r.e.write_enable &
r.e.valid & r.e.valid &
f_out.redirect & f_out.redirect &
stall_out & r.busy &
flush_out; flush_out;
end if; end if;
end process; end process;

@ -4,11 +4,15 @@ use ieee.numeric_std.all;


entity gpr_hazard is entity gpr_hazard is
generic ( generic (
PIPELINE_DEPTH : natural := 2 PIPELINE_DEPTH : natural := 1
); );
port( port(
clk : in std_ulogic; clk : in std_ulogic;
stall_in : in std_ulogic; busy_in : in std_ulogic;
deferred : in std_ulogic;
complete_in : in std_ulogic;
flush_in : in std_ulogic;
issuing : in std_ulogic;


gpr_write_valid_in : in std_ulogic; gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(5 downto 0); gpr_write_in : in std_ulogic_vector(5 downto 0);
@ -16,6 +20,9 @@ entity gpr_hazard is
gpr_read_valid_in : in std_ulogic; gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(5 downto 0); gpr_read_in : in std_ulogic_vector(5 downto 0);


ugpr_write_valid : in std_ulogic;
ugpr_write_reg : in std_ulogic_vector(5 downto 0);

stall_out : out std_ulogic; stall_out : out std_ulogic;
use_bypass : out std_ulogic use_bypass : out std_ulogic
); );
@ -25,10 +32,13 @@ architecture behaviour of gpr_hazard is
valid : std_ulogic; valid : std_ulogic;
bypass : std_ulogic; bypass : std_ulogic;
gpr : std_ulogic_vector(5 downto 0); gpr : std_ulogic_vector(5 downto 0);
ugpr_valid : std_ulogic;
ugpr : std_ulogic_vector(5 downto 0);
end record; end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0')); constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
ugpr_valid => '0', ugpr => (others => '0'));


type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type; type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init); constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);


signal r, rin : pipeline_t := pipeline_t_init; signal r, rin : pipeline_t := pipeline_t_init;
@ -45,50 +55,46 @@ begin
begin begin
v := r; v := r;


if complete_in = '1' then
v(PIPELINE_DEPTH).valid := '0';
v(PIPELINE_DEPTH).ugpr_valid := '0';
end if;

stall_out <= '0'; stall_out <= '0';
use_bypass <= '0'; use_bypass <= '0';
if gpr_read_valid_in = '1' then if gpr_read_valid_in = '1' then
if r(0).valid = '1' and r(0).gpr = gpr_read_in then loop_0: for i in 0 to PIPELINE_DEPTH loop
if r(0).bypass = '1' and stall_in = '0' then if v(i).valid = '1' and r(i).gpr = gpr_read_in then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if;
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
if r(i).valid = '1' and r(i).gpr = gpr_read_in then
if r(i).bypass = '1' then if r(i).bypass = '1' then
use_bypass <= '1'; use_bypass <= '1';
else else
stall_out <= '1'; stall_out <= '1';
end if; end if;
end if; end if;
if v(i).ugpr_valid = '1' and r(i).ugpr = gpr_read_in then
stall_out <= '1';
end if;
end loop; end loop;
end if; end if;


if stall_in = '0' then -- XXX assumes PIPELINE_DEPTH = 1
if busy_in = '0' then
v(1) := v(0);
v(0).valid := '0';
v(0).ugpr_valid := '0';
end if;
if deferred = '0' and issuing = '1' then
v(0).valid := gpr_write_valid_in; v(0).valid := gpr_write_valid_in;
v(0).bypass := bypass_avail; v(0).bypass := bypass_avail;
v(0).gpr := gpr_write_in; v(0).gpr := gpr_write_in;
loop_1: for i in 1 to PIPELINE_DEPTH-1 loop v(0).ugpr_valid := ugpr_write_valid;
-- propagate to next slot v(0).ugpr := ugpr_write_reg;
v(i).valid := r(i-1).valid; end if;
v(i).bypass := r(i-1).bypass; if flush_in = '1' then
v(i).gpr := r(i-1).gpr; v(0).valid := '0';
end loop; v(0).ugpr_valid := '0';

v(1).valid := '0';
else v(1).ugpr_valid := '0';
-- stage 0 stalled, so stage 1 becomes empty
loop_1b: for i in 1 to PIPELINE_DEPTH-1 loop
-- propagate to next slot
if i = 1 then
v(i).valid := '0';
else
v(i).valid := r(i-1).valid;
v(i).bypass := r(i-1).bypass;
v(i).gpr := r(i-1).gpr;
end if;
end loop;
end if; end if;


-- update registers -- update registers

@ -25,7 +25,6 @@ entity loadstore1 is
m_in : in MmuToLoadstore1Type; m_in : in MmuToLoadstore1Type;


dc_stall : in std_ulogic; dc_stall : in std_ulogic;
stall_out : out std_ulogic;


log_out : out std_ulogic_vector(9 downto 0) log_out : out std_ulogic_vector(9 downto 0)
); );
@ -47,6 +46,7 @@ architecture behave of loadstore1 is
); );


type reg_stage_t is record type reg_stage_t is record
busy : std_ulogic;
-- latch most of the input request -- latch most of the input request
load : std_ulogic; load : std_ulogic;
tlbie : std_ulogic; tlbie : std_ulogic;
@ -123,6 +123,7 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
if rst = '1' then if rst = '1' then
r.state <= IDLE; r.state <= IDLE;
r.busy <= '0';
else else
r <= rin; r <= rin;
end if; end if;
@ -499,6 +500,7 @@ begin
l_out.store_done <= d_in.store_done; l_out.store_done <= d_in.store_done;


-- update exception info back to execute1 -- update exception info back to execute1
e_out.busy <= r.busy;
e_out.exception <= exception; e_out.exception <= exception;
e_out.instr_fault <= r.instr_fault; e_out.instr_fault <= r.instr_fault;
e_out.invalid <= m_in.invalid; e_out.invalid <= m_in.invalid;
@ -513,7 +515,7 @@ begin
end if; end if;
end if; end if;


stall_out <= stall; v.busy := stall;


-- Update registers -- Update registers
rin <= v; rin <= v;
@ -523,7 +525,7 @@ begin
ls1_log: process(clk) ls1_log: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
log_data <= stall_out & log_data <= r.busy &
e_out.exception & e_out.exception &
l_out.valid & l_out.valid &
m_out.valid & m_out.valid &

@ -22,27 +22,33 @@ end entity writeback;


architecture behaviour of writeback is architecture behaviour of writeback is
begin begin
writeback_1: process(all) writeback_0: process(clk)
variable x : std_ulogic_vector(0 downto 0); variable x : std_ulogic_vector(0 downto 0);
variable y : std_ulogic_vector(0 downto 0); variable y : std_ulogic_vector(0 downto 0);
variable w : std_ulogic_vector(0 downto 0); variable w : std_ulogic_vector(0 downto 0);
begin
if rising_edge(clk) then
-- Do consistency checks only on the clock edge
x(0) := e_in.valid;
y(0) := l_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

x(0) := e_in.write_enable or e_in.exc_write_enable;
y(0) := l_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

w(0) := e_in.write_cr_enable;
x(0) := (e_in.write_enable and e_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;
end if;
end process;

writeback_1: process(all)
variable cf: std_ulogic_vector(3 downto 0); variable cf: std_ulogic_vector(3 downto 0);
variable zero : std_ulogic; variable zero : std_ulogic;
variable sign : std_ulogic; variable sign : std_ulogic;
variable scf : std_ulogic_vector(3 downto 0); variable scf : std_ulogic_vector(3 downto 0);
begin begin
x(0) := e_in.valid;
y(0) := l_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

x(0) := e_in.write_enable or e_in.exc_write_enable;
y(0) := l_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure;

w(0) := e_in.write_cr_enable;
x(0) := (e_in.write_enable and e_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure;

w_out <= WritebackToRegisterFileInit; w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit; c_out <= WritebackToCrFileInit;



Loading…
Cancel
Save