core: Track CR hazards and bypasses using tags

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/269/head
Paul Mackerras 3 years ago
parent d290d2a9bb
commit ae2afeca5c

@ -44,7 +44,7 @@ all: $(all)
core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
decode1.vhdl helpers.vhdl insn_helpers.vhdl \
cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \
control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \

@ -210,6 +210,12 @@ package common is
end record;
constant bypass_data_init : bypass_data_t := (tag => instr_tag_init, data => (others => '0'));

type cr_bypass_data_t is record
tag : instr_tag_t;
data : std_ulogic_vector(31 downto 0);
end record;
constant cr_bypass_data_init : cr_bypass_data_t := (tag => instr_tag_init, data => (others => '0'));

type Decode2ToExecute1Type is record
valid: std_ulogic;
unit : unit_t;
@ -225,7 +231,6 @@ package common is
read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0);
cr: std_ulogic_vector(31 downto 0);
bypass_cr : std_ulogic;
xerc: xer_common_t;
lr: std_ulogic;
br_abs: std_ulogic;
@ -255,7 +260,7 @@ package common is
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
write_reg_enable => '0',
bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'),

@ -35,10 +35,10 @@ entity control is
gpr_c_read_in : in gspr_index_t;

execute_next_tag : in instr_tag_t;
execute_next_cr_tag : in instr_tag_t;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
cr_bypassable : in std_ulogic;

valid_out : out std_ulogic;
stall_out : out std_ulogic;
@ -64,11 +64,6 @@ architecture rtl of control is

signal r_int, rin_int : reg_internal_type := reg_internal_init;

signal stall_a_out : std_ulogic;
signal stall_b_out : std_ulogic;
signal stall_c_out : std_ulogic;
signal cr_stall_out : std_ulogic;

signal gpr_write_valid : std_ulogic := '0';
signal cr_write_valid : std_ulogic := '0';

@ -76,6 +71,7 @@ architecture rtl of control is
wr_gpr : std_ulogic;
reg : gspr_index_t;
recent : std_ulogic;
wr_cr : std_ulogic;
end record;

type tag_regs_array is array(tag_number_t) of tag_register;
@ -84,31 +80,14 @@ architecture rtl of control is
signal instr_tag : instr_tag_t;

signal gpr_tag_stall : std_ulogic;
signal cr_tag_stall : std_ulogic;

signal curr_tag : tag_number_t;
signal next_tag : tag_number_t;

begin
cr_hazard0: entity work.cr_hazard
generic map (
PIPELINE_DEPTH => PIPELINE_DEPTH
)
port map (
clk => clk,
busy_in => busy_in,
deferred => deferred,
complete_in => complete_in.valid,
flush_in => flush_in,
issuing => valid_out,

cr_read_in => cr_read_in,
cr_write_in => cr_write_valid,
bypassable => cr_bypassable,

stall_out => cr_stall_out,
use_bypass => cr_bypass
);
signal curr_cr_tag : tag_number_t;

begin
control0: process(clk)
begin
if rising_edge(clk) then
@ -118,9 +97,11 @@ begin
for i in tag_number_t loop
if rst = '1' or flush_in = '1' then
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
else
if complete_in.valid = '1' and i = complete_in.tag then
tag_regs(i).wr_gpr <= '0';
tag_regs(i).wr_cr <= '0';
report "tag " & integer'image(i) & " not valid";
end if;
if gpr_write_valid = '1' and tag_regs(i).reg = gpr_write_in then
@ -133,6 +114,7 @@ begin
tag_regs(i).wr_gpr <= gpr_write_valid;
tag_regs(i).reg <= gpr_write_in;
tag_regs(i).recent <= gpr_write_valid;
tag_regs(i).wr_cr <= cr_write_valid;
if gpr_write_valid = '1' then
report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
end if;
@ -141,8 +123,12 @@ begin
end loop;
if rst = '1' then
curr_tag <= 0;
curr_cr_tag <= 0;
else
curr_tag <= next_tag;
if cr_write_valid = '1' then
curr_cr_tag <= instr_tag.tag;
end if;
end if;
end if;
end process;
@ -158,6 +144,8 @@ begin
variable byp_a : std_ulogic;
variable byp_b : std_ulogic;
variable byp_c : std_ulogic;
variable tag_cr : instr_tag_t;
variable byp_cr : std_ulogic;
begin
tag_a := instr_tag_init;
for i in tag_number_t loop
@ -219,6 +207,20 @@ begin
end if;
next_tag <= incr_tag;
instr_tag_out <= instr_tag;

-- CR hazards
tag_cr.tag := curr_cr_tag;
tag_cr.valid := cr_read_in and tag_regs(curr_cr_tag).wr_cr;
if tag_match(tag_cr, complete_in) then
tag_cr.valid := '0';
end if;
byp_cr := '0';
if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then
byp_cr := '1';
end if;

cr_bypass <= byp_cr;
cr_tag_stall <= tag_cr.valid and not byp_cr;
end process;

control1 : process(all)
@ -265,7 +267,7 @@ begin
end if;
else
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_stall_out;
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;

@ -292,7 +294,7 @@ begin
end if;
else
-- let it go out if there are no GPR or CR hazards
stall_tmp := gpr_tag_stall or cr_stall_out;
stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;
else

@ -68,6 +68,7 @@ architecture behave of core is
signal execute1_to_writeback: Execute1ToWritebackType;
signal execute1_to_fetch1: Execute1ToFetch1Type;
signal execute1_bypass: bypass_data_t;
signal execute1_cr_bypass: cr_bypass_data_t;

-- load store signals
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
@ -275,6 +276,7 @@ begin
c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file,
execute_bypass => execute1_bypass,
execute_cr_bypass => execute1_cr_bypass,
log_out => log_data(119 downto 110)
);
decode2_busy_in <= ex1_busy_out;
@ -333,6 +335,7 @@ begin
fp_out => execute1_to_fpu,
e_out => execute1_to_writeback,
bypass_data => execute1_bypass,
bypass_cr_data => execute1_cr_bypass,
icache_inval => ex1_icache_inval,
dbg_msr_out => msr,
terminate_out => terminate,

@ -1,86 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

entity cr_hazard is
generic (
PIPELINE_DEPTH : natural := 1
);
port(
clk : in std_ulogic;
busy_in : in std_ulogic;
deferred : in std_ulogic;
complete_in : in std_ulogic;
flush_in : in std_ulogic;
issuing : in std_ulogic;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
bypassable : in std_ulogic;

stall_out : out std_ulogic;
use_bypass : out std_ulogic
);
end entity cr_hazard;
architecture behaviour of cr_hazard is
type pipeline_entry_type is record
valid : std_ulogic;
bypass : std_ulogic;
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0');

type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);

signal r, rin : pipeline_t := pipeline_t_init;
begin
cr_hazard0: process(clk)
begin
if rising_edge(clk) then
r <= rin;
end if;
end process;

cr_hazard1: process(all)
variable v : pipeline_t;
begin
v := r;

-- XXX assumes PIPELINE_DEPTH = 1
if complete_in = '1' then
v(1).valid := '0';
end if;

use_bypass <= '0';
stall_out <= '0';
if cr_read_in = '1' then
loop_0: for i in 0 to PIPELINE_DEPTH loop
if v(i).valid = '1' then
if r(i).bypass = '1' then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if;
end loop;
end if;

-- XXX assumes PIPELINE_DEPTH = 1
if busy_in = '0' then
v(1) := r(0);
v(0).valid := '0';
end if;
if deferred = '0' and issuing = '1' then
v(0).valid := cr_write_in;
v(0).bypass := bypassable;
end if;
if flush_in = '1' then
v(0).valid := '0';
v(1).valid := '0';
end if;

-- update registers
rin <= v;

end process;
end;

@ -214,7 +214,7 @@ architecture behaviour of decode1 is
2#0100111010# => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cbcdtd
2#0100011010# => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cdtbcd
2#0000000000# => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- cmp
2#0111111100# => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpb
2#0111111100# => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpb
2#0011100000# => (ALU, NONE, OP_CMPEQB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpeqb
2#0000100000# => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpl
2#0011000000# => (ALU, NONE, OP_CMPRB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmprb

@ -37,7 +37,8 @@ entity decode2 is
c_in : in CrFileToDecode2Type;
c_out : out Decode2ToCrFileType;

execute_bypass : in bypass_data_t;
execute_bypass : in bypass_data_t;
execute_cr_bypass : in cr_bypass_data_t;

log_out : out std_ulogic_vector(9 downto 0)
);
@ -300,9 +301,9 @@ architecture behaviour of decode2 is
signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic;

signal cr_read_valid : std_ulogic;
signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic;
signal cr_bypass_avail : std_ulogic;

signal instr_tag : instr_tag_t;

@ -338,11 +339,11 @@ begin
gpr_c_read_in => gpr_c_read,

execute_next_tag => execute_bypass.tag,
execute_next_cr_tag => execute_cr_bypass.tag,

cr_read_in => d_in.decode.input_cr,
cr_read_in => cr_read_valid,
cr_write_in => cr_write_valid,
cr_bypass => cr_bypass,
cr_bypassable => cr_bypass_avail,

valid_out => control_valid_out,
stall_out => control_stall_out,
@ -391,7 +392,7 @@ begin

--v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr;

decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2);
@ -467,8 +468,6 @@ begin
if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.e.cr := c_in.read_cr_data;
v.e.bypass_cr := cr_bypass;
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
v.e.addm1 := '0';
@ -516,6 +515,11 @@ begin
v.e.read_data3 := decoded_reg_c.data;
end case;

v.e.cr := c_in.read_cr_data;
if cr_bypass = '1' then
v.e.cr := execute_cr_bypass.data;
end if;

-- issue control
control_valid_in <= d_in.valid;
control_sgl_pipe <= d_in.decode.sgl_pipe;
@ -533,10 +537,9 @@ begin
gpr_c_read <= decoded_reg_c.reg;

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
cr_bypass_avail <= '0';
if EX1_BYPASS and d_in.decode.unit = ALU then
cr_bypass_avail <= d_in.decode.output_cr;
end if;
-- Since ops that write CR only write some of the fields,
-- any op that writes CR effectively also reads it.
cr_read_valid <= cr_write_valid or d_in.decode.input_cr;

v.e.valid := control_valid_out;
if control_valid_out = '1' then

@ -38,6 +38,7 @@ entity execute1 is

e_out : out Execute1ToWritebackType;
bypass_data : out bypass_data_t;
bypass_cr_data : out cr_bypass_data_t;

dbg_msr_out : out std_ulogic_vector(63 downto 0);

@ -412,15 +413,7 @@ begin
v.e.xerc := e_in.xerc;
end if;

-- CR forwarding
cr_in <= e_in.cr;
if EX1_BYPASS and e_in.bypass_cr = '1' and r.e.write_cr_enable = '1' then
for i in 0 to 7 loop
if r.e.write_cr_mask(i) = '1' then
cr_in(i * 4 + 3 downto i * 4) <= r.e.write_cr_data(i * 4 + 3 downto i * 4);
end if;
end loop;
end if;

v.mul_in_progress := '0';
v.div_in_progress := '0';
@ -809,7 +802,6 @@ begin
end if;
bf := insn_bf(e_in.insn);
crnum := to_integer(unsigned(bf));
v.e.write_cr_enable := '1';
v.e.write_cr_mask := num_to_fxm(crnum);
for i in 0 to 7 loop
lo := i*4;
@ -831,7 +823,6 @@ begin
newcrf := ppc_cmprb(a_in, b_in, insn_l(e_in.insn));
bf := insn_bf(e_in.insn);
crnum := to_integer(unsigned(bf));
v.e.write_cr_enable := '1';
v.e.write_cr_mask := num_to_fxm(crnum);
v.e.write_cr_data := newcrf & newcrf & newcrf & newcrf &
newcrf & newcrf & newcrf & newcrf;
@ -839,7 +830,6 @@ begin
newcrf := ppc_cmpeqb(a_in, b_in);
bf := insn_bf(e_in.insn);
crnum := to_integer(unsigned(bf));
v.e.write_cr_enable := '1';
v.e.write_cr_mask := num_to_fxm(crnum);
v.e.write_cr_data := newcrf & newcrf & newcrf & newcrf &
newcrf & newcrf & newcrf & newcrf;
@ -913,7 +903,6 @@ begin
if cr_op(0) = '0' then -- MCRF
bf := insn_bf(e_in.insn);
bfa := insn_bfa(e_in.insn);
v.e.write_cr_enable := '1';
crnum := to_integer(unsigned(bf));
scrnum := to_integer(unsigned(bfa));
v.e.write_cr_mask := num_to_fxm(crnum);
@ -930,7 +919,6 @@ begin
v.e.write_cr_data(hi downto lo) := newcrf;
end loop;
else
v.e.write_cr_enable := '1';
bt := insn_bt(e_in.insn);
ba := insn_ba(e_in.insn);
bb := insn_bb(e_in.insn);
@ -954,7 +942,6 @@ begin
newcrf := v.e.xerc.ov & v.e.xerc.ca & v.e.xerc.ov32 & v.e.xerc.ca32;
bf := insn_bf(e_in.insn);
crnum := to_integer(unsigned(bf));
v.e.write_cr_enable := '1';
v.e.write_cr_mask := num_to_fxm(crnum);
v.e.write_cr_data := newcrf & newcrf & newcrf & newcrf &
newcrf & newcrf & newcrf & newcrf;
@ -1007,7 +994,6 @@ begin

when OP_MFCR =>
when OP_MTCRF =>
v.e.write_cr_enable := '1';
if e_in.insn(20) = '0' then
-- mtcrf
v.e.write_cr_mask := insn_fxm(e_in.insn);
@ -1269,12 +1255,23 @@ begin
end if;
v.e.write_reg := current.write_reg;
v.e.write_enable := current.write_reg_enable and v.e.valid and not exception;
v.e.write_cr_enable := current.output_cr and v.e.valid and not exception;
v.e.rc := current.rc and v.e.valid and not exception;

bypass_data.tag.valid <= current.instr_tag.valid and current.write_reg_enable and v.e.valid;
bypass_data.tag.tag <= current.instr_tag.tag;
bypass_data.data <= v.e.write_data;

bypass_cr_data.tag.valid <= current.instr_tag.valid and current.output_cr and v.e.valid;
bypass_cr_data.tag.tag <= current.instr_tag.tag;
for i in 0 to 7 loop
if v.e.write_cr_mask(i) = '1' then
bypass_cr_data.data(i*4 + 3 downto i*4) <= v.e.write_cr_data(i*4 + 3 downto i*4);
else
bypass_cr_data.data(i*4 + 3 downto i*4) <= cr_in(i*4 + 3 downto i*4);
end if;
end loop;

-- Defer completion for one cycle when redirecting.
-- This also ensures r.busy = 1 when ctrl.irq_state = WRITE_SRR1
if v.redirect = '1' then

@ -19,7 +19,6 @@ filesets:
- sim_console.vhdl
- logical.vhdl
- countzero.vhdl
- cr_hazard.vhdl
- control.vhdl
- execute1.vhdl
- fpu.vhdl

Loading…
Cancel
Save