decode2: Rework to make the stall_out signal come from a register

At present the busy/stall signal going to decode1 depends on whether
control thinks it can issue the current instruction, and that depends
on completion and bypass signals coming from execute1 and writeback.

To improve the timing of stall_out, this rearranges decode2 so that
stall_out is asserted when we have a valid instruction that couldn't
be issued in the previous cycle.  This means that decode1 could give
us a new instruction when we haven't issued the previous instruction.

This in turn means that we can only use d_in in the first cycle of
processing an instruction.  After the first cycle, we get register
addresses etc. from dc2 rather than d_in.

Then, to avoid the need to read register operands from register_file
in each cycle until the instruction issues, we bring the bypass path
for data being written to the register file into decode2 explicitly
rather than having it in register_file.

A new process called decode2_addrs does the process of calling
decode_input_reg_* and decode_output_reg and sets up the register file
addresses.  This was split out (and decode_input_reg_* reworked) to
try to reduce the number of passes through the decode2_1 process that
need to be done in simulation.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent c9e838b656
commit 2f45e545ed

@ -288,6 +288,7 @@ package common is
write_reg_enable: std_ulogic; write_reg_enable: std_ulogic;
read_reg1: gspr_index_t; read_reg1: gspr_index_t;
read_reg2: gspr_index_t; read_reg2: gspr_index_t;
read_reg3: gspr_index_t;
read_data1: std_ulogic_vector(63 downto 0); read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0); read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0); read_data3: std_ulogic_vector(63 downto 0);

@ -15,9 +15,7 @@ entity control is


complete_in : in instr_tag_t; complete_in : in instr_tag_t;
valid_in : in std_ulogic; valid_in : in std_ulogic;
repeated : in std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;
busy_in : in std_ulogic;
deferred : in std_ulogic; deferred : in std_ulogic;
sgl_pipe_in : in std_ulogic; sgl_pipe_in : in std_ulogic;
stop_mark_in : in std_ulogic; stop_mark_in : in std_ulogic;
@ -43,7 +41,6 @@ entity control is
cr_write_in : in std_ulogic; cr_write_in : in std_ulogic;


valid_out : out std_ulogic; valid_out : out std_ulogic;
stall_out : out std_ulogic;
stopped_out : out std_ulogic; stopped_out : out std_ulogic;


gpr_bypass_a : out std_ulogic_vector(1 downto 0); gpr_bypass_a : out std_ulogic_vector(1 downto 0);
@ -157,9 +154,6 @@ begin
tag_a.tag := i; tag_a.tag := i;
end if; end if;
end loop; end loop;
if tag_match(tag_a, complete_in) then
tag_a.valid := '0';
end if;
tag_b := instr_tag_init; tag_b := instr_tag_init;
for i in tag_number_t loop for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then
@ -167,9 +161,6 @@ begin
tag_b.tag := i; tag_b.tag := i;
end if; end if;
end loop; end loop;
if tag_match(tag_b, complete_in) then
tag_b.valid := '0';
end if;
tag_c := instr_tag_init; tag_c := instr_tag_init;
for i in tag_number_t loop for i in tag_number_t loop
if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then
@ -177,26 +168,29 @@ begin
tag_c.tag := i; tag_c.tag := i;
end if; end if;
end loop; end loop;
if tag_match(tag_c, complete_in) then
tag_c.valid := '0';
end if;


byp_a := "00"; byp_a := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
byp_a := "10"; byp_a := "01";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then
byp_a := "10";
elsif tag_match(complete_in, tag_a) then
byp_a := "11"; byp_a := "11";
end if; end if;
byp_b := "00"; byp_b := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
byp_b := "10"; byp_b := "01";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then
byp_b := "10";
elsif tag_match(complete_in, tag_b) then
byp_b := "11"; byp_b := "11";
end if; end if;
byp_c := "00"; byp_c := "00";
if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
byp_c := "10"; byp_c := "01";
elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then elsif EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then
byp_c := "10";
elsif tag_match(complete_in, tag_c) then
byp_c := "11"; byp_c := "11";
end if; end if;


@ -204,9 +198,9 @@ begin
gpr_bypass_b <= byp_b; gpr_bypass_b <= byp_b;
gpr_bypass_c <= byp_c; gpr_bypass_c <= byp_c;


gpr_tag_stall <= (tag_a.valid and not byp_a(1)) or gpr_tag_stall <= (tag_a.valid and not (or (byp_a))) or
(tag_b.valid and not byp_b(1)) or (tag_b.valid and not (or (byp_b))) or
(tag_c.valid and not byp_c(1)); (tag_c.valid and not (or (byp_c)));


incr_tag := curr_tag; incr_tag := curr_tag;
instr_tag.tag <= curr_tag; instr_tag.tag <= curr_tag;
@ -331,7 +325,6 @@ begin


-- update outputs -- update outputs
valid_out <= valid_tmp; valid_out <= valid_tmp;
stall_out <= stall_tmp or deferred;


-- update registers -- update registers
rin_int <= v_int; rin_int <= v_int;

@ -100,6 +100,9 @@ architecture behave of core is
signal fpu_to_execute1: FPUToExecute1Type; signal fpu_to_execute1: FPUToExecute1Type;
signal fpu_to_writeback: FPUToWritebackType; signal fpu_to_writeback: FPUToWritebackType;


-- Writeback signals
signal writeback_bypass: bypass_data_t;

-- local signals -- local signals
signal fetch1_stall_in : std_ulogic; signal fetch1_stall_in : std_ulogic;
signal icache_stall_out : std_ulogic; signal icache_stall_out : std_ulogic;
@ -302,6 +305,7 @@ begin
execute_cr_bypass => execute1_cr_bypass, execute_cr_bypass => execute1_cr_bypass,
execute2_bypass => execute2_bypass, execute2_bypass => execute2_bypass,
execute2_cr_bypass => execute2_cr_bypass, execute2_cr_bypass => execute2_cr_bypass,
writeback_bypass => writeback_bypass,
log_out => log_data(119 downto 110) log_out => log_data(119 downto 110)
); );
decode2_busy_in <= ex1_busy_out; decode2_busy_in <= ex1_busy_out;
@ -463,6 +467,7 @@ begin
w_out => writeback_to_register_file, w_out => writeback_to_register_file,
c_out => writeback_to_cr_file, c_out => writeback_to_cr_file,
f_out => writeback_to_fetch1, f_out => writeback_to_fetch1,
wb_bypass => writeback_bypass,
events => writeback_events, events => writeback_events,
interrupt_out => do_interrupt, interrupt_out => do_interrupt,
complete_out => complete complete_out => complete

@ -41,6 +41,7 @@ entity decode2 is
execute_cr_bypass : in cr_bypass_data_t; execute_cr_bypass : in cr_bypass_data_t;
execute2_bypass : in bypass_data_t; execute2_bypass : in bypass_data_t;
execute2_cr_bypass : in cr_bypass_data_t; execute2_cr_bypass : in cr_bypass_data_t;
writeback_bypass : in bypass_data_t;


log_out : out std_ulogic_vector(9 downto 0) log_out : out std_ulogic_vector(9 downto 0)
); );
@ -49,8 +50,16 @@ end entity decode2;
architecture behaviour of decode2 is architecture behaviour of decode2 is
type reg_type is record type reg_type is record
e : Decode2ToExecute1Type; e : Decode2ToExecute1Type;
repeat : std_ulogic; repeat : repeat_t;
busy : std_ulogic;
sgl_pipe : std_ulogic;
reg_a_valid : std_ulogic;
reg_b_valid : std_ulogic;
reg_c_valid : std_ulogic;
reg_o_valid : std_ulogic;
end record; end record;
constant reg_type_init : reg_type :=
(e => Decode2ToExecute1Init, repeat => NONE, others => '0');


signal dc2, dc2in : reg_type; signal dc2, dc2in : reg_type;


@ -61,20 +70,21 @@ architecture behaviour of decode2 is
reg : gspr_index_t; reg : gspr_index_t;
data : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0);
end record; end record;
constant decode_input_reg_init : decode_input_reg_t := ('0', (others => '0'), (others => '0'));


type decode_output_reg_t is record type decode_output_reg_t is record
reg_valid : std_ulogic; reg_valid : std_ulogic;
reg : gspr_index_t; reg : gspr_index_t;
end record; end record;
constant decode_output_reg_init : decode_output_reg_t := ('0', (others => '0'));


function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0); function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t; ispr : gspr_index_t;
instr_addr : std_ulogic_vector(63 downto 0)) instr_addr : std_ulogic_vector(63 downto 0))
return decode_input_reg_t is return decode_input_reg_t is
begin begin
if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data); return ('1', gpr_to_gspr(insn_ra(insn_in)), (others => '0'));
elsif t = SPR then elsif t = SPR then
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR. -- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs -- If it's all 0, we don't treat it as a dependency as slow SPRs
@ -83,27 +93,26 @@ architecture behaviour of decode2 is
assert is_fast_spr(ispr) = '1' or ispr = "0000000" assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode A says SPR but ISPR is invalid:" & report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure; to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data); return (is_fast_spr(ispr), ispr, (others => '0'));
elsif t = CIA then elsif t = CIA then
return ('0', (others => '0'), instr_addr); return ('0', (others => '0'), instr_addr);
elsif HAS_FPU and t = FRA then elsif HAS_FPU and t = FRA then
return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data); return ('1', fpr_to_gspr(insn_fra(insn_in)), (others => '0'));
else else
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end if; end if;
end; end;


function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0); function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t) return decode_input_reg_t is ispr : gspr_index_t) return decode_input_reg_t is
variable ret : decode_input_reg_t; variable ret : decode_input_reg_t;
begin begin
case t is case t is
when RB => when RB =>
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data); ret := ('1', gpr_to_gspr(insn_rb(insn_in)), (others => '0'));
when FRB => when FRB =>
if HAS_FPU then if HAS_FPU then
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data); ret := ('1', fpr_to_gspr(insn_frb(insn_in)), (others => '0'));
else else
ret := ('0', (others => '0'), (others => '0')); ret := ('0', (others => '0'), (others => '0'));
end if; end if;
@ -138,7 +147,7 @@ architecture behaviour of decode2 is
assert is_fast_spr(ispr) = '1' or ispr = "0000000" assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" & report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure; to_hstring(ispr) severity failure;
ret := (is_fast_spr(ispr), ispr, reg_data); ret := (is_fast_spr(ispr), ispr, (others => '0'));
when NONE => when NONE =>
ret := ('0', (others => '0'), (others => '0')); ret := ('0', (others => '0'), (others => '0'));
end case; end case;
@ -146,23 +155,23 @@ architecture behaviour of decode2 is
return ret; return ret;
end; end;


function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0); function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0))
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is return decode_input_reg_t is
begin begin
case t is case t is
when RS => when RS =>
return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data); return ('1', gpr_to_gspr(insn_rs(insn_in)), (others => '0'));
when RCR => when RCR =>
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data); return ('1', gpr_to_gspr(insn_rcreg(insn_in)), (others => '0'));
when FRS => when FRS =>
if HAS_FPU then if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)), reg_data); return ('1', fpr_to_gspr(insn_frt(insn_in)), (others => '0'));
else else
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end if; end if;
when FRC => when FRC =>
if HAS_FPU then if HAS_FPU then
return ('1', fpr_to_gspr(insn_frc(insn_in)), reg_data); return ('1', fpr_to_gspr(insn_frc(insn_in)), (others => '0'));
else else
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end if; end if;
@ -264,10 +273,14 @@ architecture behaviour of decode2 is
others => "000" others => "000"
); );


signal decoded_reg_a : decode_input_reg_t;
signal decoded_reg_b : decode_input_reg_t;
signal decoded_reg_c : decode_input_reg_t;
signal decoded_reg_o : decode_output_reg_t;

-- issue control signals -- issue control signals
signal control_valid_in : std_ulogic; signal control_valid_in : std_ulogic;
signal control_valid_out : std_ulogic; signal control_valid_out : std_ulogic;
signal control_stall_out : std_ulogic;
signal control_sgl_pipe : std_logic; signal control_sgl_pipe : std_logic;


signal gpr_write_valid : std_ulogic; signal gpr_write_valid : std_ulogic;
@ -302,8 +315,6 @@ begin


complete_in => complete_in, complete_in => complete_in,
valid_in => control_valid_in, valid_in => control_valid_in,
repeated => dc2.repeat,
busy_in => busy_in,
deferred => deferred, deferred => deferred,
flush_in => flush_in, flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe, sgl_pipe_in => control_sgl_pipe,
@ -331,7 +342,6 @@ begin
cr_bypass => cr_bypass, cr_bypass => cr_bypass,


valid_out => control_valid_out, valid_out => control_valid_out,
stall_out => control_stall_out,
stopped_out => stopped_out, stopped_out => stopped_out,


gpr_bypass_a => gpr_a_bypass, gpr_bypass_a => gpr_a_bypass,
@ -346,9 +356,12 @@ begin
decode2_0: process(clk) decode2_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if rst = '1' or flush_in = '1' or deferred = '0' then if rst = '1' or flush_in = '1' then
dc2 <= reg_type_init;
elsif deferred = '0' then
if dc2in.e.valid = '1' then if dc2in.e.valid = '1' then
report "execute " & to_hstring(dc2in.e.nia); report "execute " & to_hstring(dc2in.e.nia) &
" tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid);
end if; end if;
dc2 <= dc2in; dc2 <= dc2in;
end if; end if;
@ -357,20 +370,44 @@ begin


c_out.read <= d_in.decode.input_cr; c_out.read <= d_in.decode.input_cr;


decode2_addrs: process(all)
begin
decoded_reg_a <= decode_input_reg_init;
decoded_reg_b <= decode_input_reg_init;
decoded_reg_c <= decode_input_reg_init;
decoded_reg_o <= decode_output_reg_init;
if d_in.valid = '1' then
decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.ispr1, d_in.nia);
decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.ispr2);
decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro);
end if;

r_out.read1_enable <= decoded_reg_a.reg_valid;
r_out.read1_reg <= decoded_reg_a.reg;
r_out.read2_enable <= decoded_reg_b.reg_valid;
r_out.read2_reg <= decoded_reg_b.reg;
r_out.read3_enable <= decoded_reg_c.reg_valid;
r_out.read3_reg <= decoded_reg_c.reg;

end process;

decode2_1: process(all) decode2_1: process(all)
variable v : reg_type; variable v : reg_type;
variable decoded_reg_a : decode_input_reg_t;
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable length : std_ulogic_vector(3 downto 0); variable length : std_ulogic_vector(3 downto 0);
variable op : insn_type_t; variable op : insn_type_t;
variable valid_in : std_ulogic;
begin begin
v := dc2; v := dc2;


valid_in := d_in.valid or dc2.busy;

if dc2.busy = '0' then
v.e := Decode2ToExecute1Init; v.e := Decode2ToExecute1Init;


--v.e.input_cr := d_in.decode.input_cr; v.sgl_pipe := d_in.decode.sgl_pipe;

v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr; v.e.output_cr := d_in.decode.output_cr;


-- Work out whether XER common bits are set -- Work out whether XER common bits are set
@ -389,11 +426,10 @@ begin
when others => when others =>
end case; end case;


decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1, v.reg_a_valid := decoded_reg_a.reg_valid;
d_in.nia); v.reg_b_valid := decoded_reg_b.reg_valid;
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2); v.reg_c_valid := decoded_reg_c.reg_valid;
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data); v.reg_o_valid := decoded_reg_o.reg_valid;
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro);


if d_in.decode.lr = '1' then if d_in.decode.lr = '1' then
v.e.lr := insn_lk(d_in.insn); v.e.lr := insn_lk(d_in.insn);
@ -402,34 +438,16 @@ begin
end if; end if;
op := d_in.decode.insn_type; op := d_in.decode.insn_type;


v.repeat := d_in.decode.repeat;
if d_in.decode.repeat /= NONE then if d_in.decode.repeat /= NONE then
v.e.repeat := '1'; v.e.repeat := '1';
v.e.second := dc2.repeat;
case d_in.decode.repeat is
when DUPD =>
-- update-form loads, 2nd instruction writes RA
if dc2.repeat = '1' then
decoded_reg_o.reg := decoded_reg_a.reg;
end if;
when others =>
end case;
elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then
-- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled -- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled
v.e.repeat := '1'; v.e.repeat := '1';
v.e.second := dc2.repeat;
-- first one does CTR, second does LR
decoded_reg_o.reg(0) := not dc2.repeat;
end if; end if;


v.e.spr_select := d_in.spr_info; v.e.spr_select := d_in.spr_info;


r_out.read1_enable <= decoded_reg_a.reg_valid and d_in.valid;
r_out.read1_reg <= decoded_reg_a.reg;
r_out.read2_enable <= decoded_reg_b.reg_valid and d_in.valid;
r_out.read2_reg <= decoded_reg_b.reg;
r_out.read3_enable <= decoded_reg_c.reg_valid and d_in.valid;
r_out.read3_reg <= decoded_reg_c.reg;

case d_in.decode.length is case d_in.decode.length is
when is1B => when is1B =>
length := "0001"; length := "0001";
@ -447,9 +465,9 @@ begin
v.e.nia := d_in.nia; v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit; v.e.unit := d_in.decode.unit;
v.e.fac := d_in.decode.facility; v.e.fac := d_in.decode.facility;
v.e.instr_tag := instr_tag;
v.e.read_reg1 := decoded_reg_a.reg; v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_reg2 := decoded_reg_b.reg; v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_reg3 := decoded_reg_c.reg;
v.e.write_reg := decoded_reg_o.reg; v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid; v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
@ -472,7 +490,7 @@ begin
v.e.result_sel := result_select(op); v.e.result_sel := result_select(op);
v.e.sub_select := subresult_select(op); v.e.sub_select := subresult_select(op);
if op = OP_BC or op = OP_BCREG then if op = OP_BC or op = OP_BCREG then
if d_in.insn(23) = '0' and dc2.repeat = '0' and if d_in.insn(23) = '0' and
not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
-- decrement CTR if BO(2) = 0 and not bcctr -- decrement CTR if BO(2) = 0 and not bcctr
v.e.addm1 := '1'; v.e.addm1 := '1';
@ -491,71 +509,107 @@ begin
end if; end if;
end if; end if;


elsif dc2.e.valid = '1' then
-- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction.
-- Set up for the second iteration (if deferred = 1 this will all be ignored)
v.e.second := '1';
case dc2.repeat is
when DUPD =>
-- update-form loads, 2nd instruction writes RA
v.e.write_reg := dc2.e.read_reg1;
when NONE =>
-- bcl/bclrl/bctarl that needs to write both CTR and LR
v.e.write_reg(0) := '0'; -- point to LR
v.e.result_sel := "110"; -- select NIA (to go to LR)
when others =>
end case;
end if;

-- issue control
control_valid_in <= valid_in;
control_sgl_pipe <= v.sgl_pipe;

gpr_write_valid <= v.reg_o_valid;
gpr_write <= v.e.write_reg;

gpr_a_read_valid <= v.reg_a_valid;
gpr_a_read <= v.e.read_reg1;

gpr_b_read_valid <= v.reg_b_valid;
gpr_b_read <= v.e.read_reg2;

gpr_c_read_valid <= v.reg_c_valid;
gpr_c_read <= v.e.read_reg3;

cr_write_valid <= v.e.output_cr or v.e.rc;
-- Since ops that write CR only write some of the fields,
-- any op that writes CR effectively also reads it.
cr_read_valid <= cr_write_valid or v.e.input_cr;

-- See if any of the operands can get their value via the bypass path. -- See if any of the operands can get their value via the bypass path.
if dc2.busy = '0' or gpr_a_bypass /= "00" then
case gpr_a_bypass is case gpr_a_bypass is
when "10" => when "01" =>
v.e.read_data1 := execute_bypass.data; v.e.read_data1 := execute_bypass.data;
when "11" => when "10" =>
v.e.read_data1 := execute2_bypass.data; v.e.read_data1 := execute2_bypass.data;
when "11" =>
v.e.read_data1 := writeback_bypass.data;
when others => when others =>
if decoded_reg_a.reg_valid = '1' then
v.e.read_data1 := r_in.read1_data;
else
v.e.read_data1 := decoded_reg_a.data; v.e.read_data1 := decoded_reg_a.data;
end if;
end case; end case;
end if;
if dc2.busy = '0' or gpr_b_bypass /= "00" then
case gpr_b_bypass is case gpr_b_bypass is
when "10" => when "01" =>
v.e.read_data2 := execute_bypass.data; v.e.read_data2 := execute_bypass.data;
when "11" => when "10" =>
v.e.read_data2 := execute2_bypass.data; v.e.read_data2 := execute2_bypass.data;
when "11" =>
v.e.read_data2 := writeback_bypass.data;
when others => when others =>
if decoded_reg_b.reg_valid = '1' then
v.e.read_data2 := r_in.read2_data;
else
v.e.read_data2 := decoded_reg_b.data; v.e.read_data2 := decoded_reg_b.data;
end if;
end case; end case;
end if;
if dc2.busy = '0' or gpr_c_bypass /= "00" then
case gpr_c_bypass is case gpr_c_bypass is
when "10" => when "01" =>
v.e.read_data3 := execute_bypass.data; v.e.read_data3 := execute_bypass.data;
when "11" => when "10" =>
v.e.read_data3 := execute2_bypass.data; v.e.read_data3 := execute2_bypass.data;
when "11" =>
v.e.read_data3 := writeback_bypass.data;
when others => when others =>
if decoded_reg_c.reg_valid = '1' then
v.e.read_data3 := r_in.read3_data;
else
v.e.read_data3 := decoded_reg_c.data; v.e.read_data3 := decoded_reg_c.data;
end if;
end case; end case;
end if;


v.e.cr := c_in.read_cr_data; case cr_bypass is
if cr_bypass = "10" then when "10" =>
v.e.cr := execute_cr_bypass.data; v.e.cr := execute_cr_bypass.data;
elsif cr_bypass = "11" then when "11" =>
v.e.cr := execute2_cr_bypass.data; v.e.cr := execute2_cr_bypass.data;
end if; when others =>

v.e.cr := c_in.read_cr_data;
-- issue control end case;
control_valid_in <= d_in.valid;
control_sgl_pipe <= d_in.decode.sgl_pipe;

gpr_write_valid <= v.e.write_reg_enable;
gpr_write <= decoded_reg_o.reg;

gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;

gpr_b_read_valid <= decoded_reg_b.reg_valid;
gpr_b_read <= decoded_reg_b.reg;

gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= decoded_reg_c.reg;

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
-- Since ops that write CR only write some of the fields,
-- any op that writes CR effectively also reads it.
cr_read_valid <= cr_write_valid or d_in.decode.input_cr;


v.e.valid := control_valid_out; v.e.valid := control_valid_out;
if control_valid_out = '1' then v.e.instr_tag := instr_tag;
v.repeat := v.e.repeat and not dc2.repeat; v.busy := valid_in and (not control_valid_out or (v.e.repeat and not v.e.second));
end if;

stall_out <= control_stall_out or v.repeat;


if rst = '1' or flush_in = '1' then stall_out <= dc2.busy or deferred;
v.e := Decode2ToExecute1Init;
v.repeat := '0';
end if;


-- Update registers -- Update registers
dc2in <= v; dc2in <= v;
@ -574,9 +628,9 @@ begin
dc2.e.valid & dc2.e.valid &
stopped_out & stopped_out &
stall_out & stall_out &
(gpr_a_bypass(1) or gpr_a_bypass(0)) & (gpr_a_bypass(1) xor gpr_a_bypass(0)) &
(gpr_b_bypass(1) or gpr_b_bypass(0)) & (gpr_b_bypass(1) xor gpr_b_bypass(0)) &
(gpr_c_bypass(1) or gpr_c_bypass(0)); (gpr_c_bypass(1) xor gpr_c_bypass(0));
end if; end if;
end process; end process;
log_out <= log_data; log_out <= log_data;

@ -100,18 +100,8 @@ begin
d_out.read2_data <= rd_port_b; d_out.read2_data <= rd_port_b;
d_out.read3_data <= registers(to_integer(unsigned(c_addr))); d_out.read3_data <= registers(to_integer(unsigned(c_addr)));


-- Forward any written data -- Forwarding of written data is now done explicitly with a bypass path
if w_in.write_enable = '1' then -- from writeback to decode2.
if a_addr = w_addr then
d_out.read1_data <= w_in.write_data;
end if;
if b_addr = w_addr then
d_out.read2_data <= w_in.write_data;
end if;
if c_addr = w_addr then
d_out.read3_data <= w_in.write_data;
end if;
end if;
end process register_read_0; end process register_read_0;


-- Latch read data and ack if dbg read requested and B port not busy -- Latch read data and ack if dbg read requested and B port not busy

@ -19,6 +19,8 @@ entity writeback is
c_out : out WritebackToCrFileType; c_out : out WritebackToCrFileType;
f_out : out WritebackToFetch1Type; f_out : out WritebackToFetch1Type;


wb_bypass : out bypass_data_t;

-- PMU event bus -- PMU event bus
events : out WritebackEventType; events : out WritebackEventType;


@ -215,6 +217,11 @@ begin
f_out <= f; f_out <= f;
flush_out <= f_out.redirect; flush_out <= f_out.redirect;


-- Register write data bypass to decode2
wb_bypass.tag.tag <= complete_out.tag;
wb_bypass.tag.valid <= complete_out.valid and w_out.write_enable;
wb_bypass.data <= w_out.write_data;

rin <= v; rin <= v;
end process; end process;
end; end;

Loading…
Cancel
Save