control: Reimplement serialization using tags

This lets us get rid of r_int and its 'outstanding' counter.  We now
test more directly for excess completions by checking that we don't
get duplicate completions for the same tag.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent 2da08bcf2e
commit e598c2aef8

@ -17,7 +17,7 @@ entity control is
valid_in : in std_ulogic; valid_in : in std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;
deferred : in std_ulogic; deferred : in std_ulogic;
sgl_pipe_in : in std_ulogic; serialize : in std_ulogic;
stop_mark_in : in std_ulogic; stop_mark_in : in std_ulogic;


gpr_write_valid_in : in std_ulogic; gpr_write_valid_in : in std_ulogic;
@ -53,16 +53,6 @@ entity control is
end entity control; end entity control;


architecture rtl of control is architecture rtl of control is
type state_type is (IDLE, WAIT_FOR_PREV_TO_COMPLETE, WAIT_FOR_CURR_TO_COMPLETE);

type reg_internal_type is record
state : state_type;
outstanding : integer range -1 to PIPELINE_DEPTH+2;
end record;
constant reg_internal_init : reg_internal_type := (state => IDLE, outstanding => 0);

signal r_int, rin_int : reg_internal_type := reg_internal_init;

signal gpr_write_valid : std_ulogic; signal gpr_write_valid : std_ulogic;
signal cr_write_valid : std_ulogic; signal cr_write_valid : std_ulogic;


@ -71,6 +61,7 @@ architecture rtl of control is
reg : gspr_index_t; reg : gspr_index_t;
recent : std_ulogic; recent : std_ulogic;
wr_cr : std_ulogic; wr_cr : std_ulogic;
valid : std_ulogic;
end record; end record;


type tag_regs_array is array(tag_number_t) of tag_register; type tag_regs_array is array(tag_number_t) of tag_register;
@ -80,27 +71,29 @@ architecture rtl of control is


signal gpr_tag_stall : std_ulogic; signal gpr_tag_stall : std_ulogic;
signal cr_tag_stall : std_ulogic; signal cr_tag_stall : std_ulogic;
signal serial_stall : std_ulogic;


signal curr_tag : tag_number_t; signal curr_tag : tag_number_t;
signal next_tag : tag_number_t; signal next_tag : tag_number_t;


signal curr_cr_tag : tag_number_t; signal curr_cr_tag : tag_number_t;
signal prev_tag : tag_number_t;


begin begin
control0: process(clk) control0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int;
for i in tag_number_t loop for i in tag_number_t loop
if rst = '1' or flush_in = '1' then if rst = '1' or flush_in = '1' then
tag_regs(i).wr_gpr <= '0'; tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0'; tag_regs(i).wr_cr <= '0';
tag_regs(i).valid <= '0';
else else
if complete_in.valid = '1' and i = complete_in.tag then if complete_in.valid = '1' and i = complete_in.tag then
assert tag_regs(i).valid = '1' report "spurious completion" severity failure;
tag_regs(i).wr_gpr <= '0'; tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0'; tag_regs(i).wr_cr <= '0';
tag_regs(i).valid <= '0';
report "tag " & integer'image(i) & " not valid"; report "tag " & integer'image(i) & " not valid";
end if; end if;
if instr_tag.valid = '1' and gpr_write_valid = '1' and if instr_tag.valid = '1' and gpr_write_valid = '1' and
@ -115,6 +108,7 @@ begin
tag_regs(i).reg <= gpr_write_in; tag_regs(i).reg <= gpr_write_in;
tag_regs(i).recent <= gpr_write_valid; tag_regs(i).recent <= gpr_write_valid;
tag_regs(i).wr_cr <= cr_write_valid; tag_regs(i).wr_cr <= cr_write_valid;
tag_regs(i).valid <= '1';
if gpr_write_valid = '1' then if gpr_write_valid = '1' then
report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in); report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
end if; end if;
@ -124,11 +118,15 @@ begin
if rst = '1' then if rst = '1' then
curr_tag <= 0; curr_tag <= 0;
curr_cr_tag <= 0; curr_cr_tag <= 0;
prev_tag <= 0;
else else
curr_tag <= next_tag; curr_tag <= next_tag;
if instr_tag.valid = '1' and cr_write_valid = '1' then if instr_tag.valid = '1' and cr_write_valid = '1' then
curr_cr_tag <= instr_tag.tag; curr_cr_tag <= instr_tag.tag;
end if; end if;
if valid_out = '1' then
prev_tag <= instr_tag.tag;
end if;
end if; end if;
end if; end if;
end process; end process;
@ -146,6 +144,7 @@ begin
variable byp_c : std_ulogic_vector(1 downto 0); variable byp_c : std_ulogic_vector(1 downto 0);
variable tag_cr : instr_tag_t; variable tag_cr : instr_tag_t;
variable byp_cr : std_ulogic_vector(1 downto 0); variable byp_cr : std_ulogic_vector(1 downto 0);
variable tag_prev : instr_tag_t;
begin begin
tag_a := instr_tag_init; tag_a := instr_tag_init;
for i in tag_number_t loop for i in tag_number_t loop
@ -226,107 +225,40 @@ begin


cr_bypass <= byp_cr; cr_bypass <= byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr(1); cr_tag_stall <= tag_cr.valid and not byp_cr(1);

tag_prev.tag := prev_tag;
tag_prev.valid := tag_regs(prev_tag).valid;
if tag_match(tag_prev, complete_in) then
tag_prev.valid := '0';
end if;
serial_stall <= tag_prev.valid;
end process; end process;


control1 : process(all) control1 : process(all)
variable v_int : reg_internal_type;
variable valid_tmp : std_ulogic; variable valid_tmp : std_ulogic;
variable stall_tmp : std_ulogic;
begin begin
v_int := r_int;

-- asynchronous -- asynchronous
valid_tmp := valid_in and not flush_in; valid_tmp := valid_in and not flush_in;
stall_tmp := '0';

if flush_in = '1' then
v_int.outstanding := 0;
elsif complete_in.valid = '1' then
v_int.outstanding := r_int.outstanding - 1;
end if;
if r_int.outstanding >= PIPELINE_DEPTH + 1 then
valid_tmp := '0';
stall_tmp := '1';
end if;


if rst = '1' then if rst = '1' then
gpr_write_valid <= '0'; gpr_write_valid <= '0';
cr_write_valid <= '0'; cr_write_valid <= '0';
v_int := reg_internal_init;
valid_tmp := '0'; valid_tmp := '0';
end if; end if;


-- Handle debugger stop -- Handle debugger stop
stopped_out <= '0'; stopped_out <= stop_mark_in and not serial_stall;
if stop_mark_in = '1' and v_int.outstanding = 0 then
stopped_out <= '1';
end if;

-- state machine to handle instructions that must be single
-- through the pipeline.
case r_int.state is
when IDLE =>
if valid_tmp = '1' then
if (sgl_pipe_in = '1') then
if v_int.outstanding /= 0 then
v_int.state := WAIT_FOR_PREV_TO_COMPLETE;
stall_tmp := '1';
else
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;

when WAIT_FOR_PREV_TO_COMPLETE =>
if v_int.outstanding = 0 then
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
else
stall_tmp := '1';
end if;

when WAIT_FOR_CURR_TO_COMPLETE =>
if v_int.outstanding = 0 then
v_int.state := IDLE;
-- XXX Don't replicate this
if valid_tmp = '1' then
if (sgl_pipe_in = '1') then
if v_int.outstanding /= 0 then
v_int.state := WAIT_FOR_PREV_TO_COMPLETE;
stall_tmp := '1';
else
-- send insn out and wait on it to complete
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;
else
stall_tmp := '1';
end if;
end case;


if stall_tmp = '1' then -- Don't let it go out if there are GPR or CR hazards
-- or we are waiting for the previous instruction to complete
if (gpr_tag_stall or cr_tag_stall or (serialize and serial_stall)) = '1' then
valid_tmp := '0'; valid_tmp := '0';
end if; end if;


gpr_write_valid <= gpr_write_valid_in and valid_tmp; gpr_write_valid <= gpr_write_valid_in and valid_tmp;
cr_write_valid <= cr_write_in and valid_tmp; cr_write_valid <= cr_write_in and valid_tmp;


if valid_tmp = '1' and deferred = '0' then
v_int.outstanding := v_int.outstanding + 1;
end if;

-- update outputs -- update outputs
valid_out <= valid_tmp; valid_out <= valid_tmp;

-- update registers
rin_int <= v_int;
end process; end process;
end; end;

@ -53,6 +53,7 @@ architecture behaviour of decode2 is
repeat : repeat_t; repeat : repeat_t;
busy : std_ulogic; busy : std_ulogic;
sgl_pipe : std_ulogic; sgl_pipe : std_ulogic;
prev_sgl : std_ulogic;
reg_a_valid : std_ulogic; reg_a_valid : std_ulogic;
reg_b_valid : std_ulogic; reg_b_valid : std_ulogic;
reg_c_valid : std_ulogic; reg_c_valid : std_ulogic;
@ -281,7 +282,7 @@ architecture behaviour of decode2 is
-- issue control signals -- issue control signals
signal control_valid_in : std_ulogic; signal control_valid_in : std_ulogic;
signal control_valid_out : std_ulogic; signal control_valid_out : std_ulogic;
signal control_sgl_pipe : std_logic; signal control_serialize : std_logic;


signal gpr_write_valid : std_ulogic; signal gpr_write_valid : std_ulogic;
signal gpr_write : gspr_index_t; signal gpr_write : gspr_index_t;
@ -317,7 +318,7 @@ begin
valid_in => control_valid_in, valid_in => control_valid_in,
deferred => deferred, deferred => deferred,
flush_in => flush_in, flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe, serialize => control_serialize,
stop_mark_in => d_in.stop_mark, stop_mark_in => d_in.stop_mark,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
@ -405,7 +406,10 @@ begin
if dc2.busy = '0' then if dc2.busy = '0' then
v.e := Decode2ToExecute1Init; v.e := Decode2ToExecute1Init;


v.sgl_pipe := d_in.decode.sgl_pipe; if d_in.valid = '1' then
v.prev_sgl := dc2.sgl_pipe;
v.sgl_pipe := d_in.decode.sgl_pipe;
end if;


v.e.input_cr := d_in.decode.input_cr; v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr; v.e.output_cr := d_in.decode.output_cr;
@ -527,7 +531,7 @@ begin


-- issue control -- issue control
control_valid_in <= valid_in; control_valid_in <= valid_in;
control_sgl_pipe <= v.sgl_pipe; control_serialize <= v.sgl_pipe or v.prev_sgl;


gpr_write_valid <= v.reg_o_valid; gpr_write_valid <= v.reg_o_valid;
gpr_write <= v.e.write_reg; gpr_write <= v.e.write_reg;

Loading…
Cancel
Save