core: Restore bypass path from execute1

This changes the bypass path.  Previously it went from after
execute1's output to after decode2's output.  Now it goes from before
execute1's output register to before decode2's output register.  The
reason is that the new path will be simpler to manage when there are
possibly multiple instructions in flight.  This means that the
bypassing can be managed inside decode2 and control.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/269/head
Paul Mackerras 4 years ago
parent c0b45e153b
commit d290d2a9bb

@ -137,6 +137,7 @@ package common is
valid : std_ulogic; valid : std_ulogic;
end record; end record;
constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0'); constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0');
function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean;


type irq_state_t is (WRITE_SRR0, WRITE_SRR1); type irq_state_t is (WRITE_SRR0, WRITE_SRR1);


@ -203,6 +204,12 @@ package common is
redirect_nia : std_ulogic_vector(63 downto 0); redirect_nia : std_ulogic_vector(63 downto 0);
end record; end record;


type bypass_data_t is record
tag : instr_tag_t;
data : std_ulogic_vector(63 downto 0);
end record;
constant bypass_data_init : bypass_data_t := (tag => instr_tag_init, data => (others => '0'));

type Decode2ToExecute1Type is record type Decode2ToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;
unit : unit_t; unit : unit_t;
@ -217,9 +224,6 @@ package common is
read_data1: std_ulogic_vector(63 downto 0); read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0); read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0); read_data3: std_ulogic_vector(63 downto 0);
bypass_data1: std_ulogic;
bypass_data2: std_ulogic;
bypass_data3: std_ulogic;
cr: std_ulogic_vector(31 downto 0); cr: std_ulogic_vector(31 downto 0);
bypass_cr : std_ulogic; bypass_cr : std_ulogic;
xerc: xer_common_t; xerc: xer_common_t;
@ -250,7 +254,7 @@ package common is
end record; end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type := constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init, (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', write_reg_enable => '0',
bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0', bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
@ -644,4 +648,9 @@ package body common is
begin begin
return "10" & f; return "10" & f;
end; end;

function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is
begin
return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag;
end;
end common; end common;

@ -6,6 +6,7 @@ use work.common.all;


entity control is entity control is
generic ( generic (
EX1_BYPASS : boolean := true;
PIPELINE_DEPTH : natural := 2 PIPELINE_DEPTH : natural := 2
); );
port ( port (
@ -23,7 +24,6 @@ entity control is


gpr_write_valid_in : in std_ulogic; gpr_write_valid_in : in std_ulogic;
gpr_write_in : in gspr_index_t; gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic;


gpr_a_read_valid_in : in std_ulogic; gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t; gpr_a_read_in : in gspr_index_t;
@ -34,6 +34,8 @@ entity control is
gpr_c_read_valid_in : in std_ulogic; gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in gspr_index_t; gpr_c_read_in : in gspr_index_t;


execute_next_tag : in instr_tag_t;

cr_read_in : in std_ulogic; cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic; cr_write_in : in std_ulogic;
cr_bypassable : in std_ulogic; cr_bypassable : in std_ulogic;
@ -81,19 +83,11 @@ architecture rtl of control is


signal instr_tag : instr_tag_t; signal instr_tag : instr_tag_t;


signal gpr_tag_a : instr_tag_t;
signal gpr_tag_b : instr_tag_t;
signal gpr_tag_c : instr_tag_t;
signal gpr_tag_stall : std_ulogic; signal gpr_tag_stall : std_ulogic;


signal curr_tag : tag_number_t; signal curr_tag : tag_number_t;
signal next_tag : tag_number_t; signal next_tag : tag_number_t;


function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is
begin
return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag;
end;

begin begin
cr_hazard0: entity work.cr_hazard cr_hazard0: entity work.cr_hazard
generic map ( generic map (
@ -115,10 +109,6 @@ begin
use_bypass => cr_bypass use_bypass => cr_bypass
); );


gpr_bypass_a <= '0';
gpr_bypass_b <= '0';
gpr_bypass_c <= '0';

control0: process(clk) control0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
@ -165,6 +155,9 @@ begin
variable tag_s : instr_tag_t; variable tag_s : instr_tag_t;
variable tag_t : instr_tag_t; variable tag_t : instr_tag_t;
variable incr_tag : tag_number_t; variable incr_tag : tag_number_t;
variable byp_a : std_ulogic;
variable byp_b : std_ulogic;
variable byp_c : std_ulogic;
begin begin
tag_a := instr_tag_init; tag_a := instr_tag_init;
for i in tag_number_t loop for i in tag_number_t loop
@ -196,10 +189,27 @@ begin
if tag_match(tag_c, complete_in) then if tag_match(tag_c, complete_in) then
tag_c.valid := '0'; tag_c.valid := '0';
end if; end if;
gpr_tag_a <= tag_a;
gpr_tag_b <= tag_b; byp_a := '0';
gpr_tag_c <= tag_c; if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
gpr_tag_stall <= tag_a.valid or tag_b.valid or tag_c.valid; byp_a := '1';
end if;
byp_b := '0';
if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
byp_b := '1';
end if;
byp_c := '0';
if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
byp_c := '1';
end if;

gpr_bypass_a <= byp_a;
gpr_bypass_b <= byp_b;
gpr_bypass_c <= byp_c;

gpr_tag_stall <= (tag_a.valid and not byp_a) or
(tag_b.valid and not byp_b) or
(tag_c.valid and not byp_c);


incr_tag := curr_tag; incr_tag := curr_tag;
instr_tag.tag <= curr_tag; instr_tag.tag <= curr_tag;

@ -67,6 +67,7 @@ architecture behave of core is
-- execute signals -- execute signals
signal execute1_to_writeback: Execute1ToWritebackType; signal execute1_to_writeback: Execute1ToWritebackType;
signal execute1_to_fetch1: Execute1ToFetch1Type; signal execute1_to_fetch1: Execute1ToFetch1Type;
signal execute1_bypass: bypass_data_t;


-- load store signals -- load store signals
signal execute1_to_loadstore1: Execute1ToLoadstore1Type; signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
@ -273,6 +274,7 @@ begin
r_out => decode2_to_register_file, r_out => decode2_to_register_file,
c_in => cr_file_to_decode2, c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file, c_out => decode2_to_cr_file,
execute_bypass => execute1_bypass,
log_out => log_data(119 downto 110) log_out => log_data(119 downto 110)
); );
decode2_busy_in <= ex1_busy_out; decode2_busy_in <= ex1_busy_out;
@ -330,6 +332,7 @@ begin
f_out => execute1_to_fetch1, f_out => execute1_to_fetch1,
fp_out => execute1_to_fpu, fp_out => execute1_to_fpu,
e_out => execute1_to_writeback, e_out => execute1_to_writeback,
bypass_data => execute1_bypass,
icache_inval => ex1_icache_inval, icache_inval => ex1_icache_inval,
dbg_msr_out => msr, dbg_msr_out => msr,
terminate_out => terminate, terminate_out => terminate,

@ -37,6 +37,8 @@ entity decode2 is
c_in : in CrFileToDecode2Type; c_in : in CrFileToDecode2Type;
c_out : out Decode2ToCrFileType; c_out : out Decode2ToCrFileType;


execute_bypass : in bypass_data_t;

log_out : out std_ulogic_vector(9 downto 0) log_out : out std_ulogic_vector(9 downto 0)
); );
end entity decode2; end entity decode2;
@ -285,19 +287,18 @@ architecture behaviour of decode2 is


signal gpr_write_valid : std_ulogic; signal gpr_write_valid : std_ulogic;
signal gpr_write : gspr_index_t; signal gpr_write : gspr_index_t;
signal gpr_bypassable : std_ulogic;


signal gpr_a_read_valid : std_ulogic; signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read :gspr_index_t; signal gpr_a_read : gspr_index_t;
signal gpr_a_bypass : std_ulogic; signal gpr_a_bypass : std_ulogic;


signal gpr_b_read_valid : std_ulogic; signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : gspr_index_t; signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic; signal gpr_b_bypass : std_ulogic;


signal gpr_c_read_valid : std_ulogic; signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gspr_index_t; signal gpr_c_read : gspr_index_t;
signal gpr_c_bypass : std_ulogic; signal gpr_c_bypass : std_ulogic;


signal cr_write_valid : std_ulogic; signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic; signal cr_bypass : std_ulogic;
@ -308,6 +309,7 @@ architecture behaviour of decode2 is
begin begin
control_0: entity work.control control_0: entity work.control
generic map ( generic map (
EX1_BYPASS => EX1_BYPASS,
PIPELINE_DEPTH => 1 PIPELINE_DEPTH => 1
) )
port map ( port map (
@ -325,7 +327,6 @@ begin


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write, gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable,


gpr_a_read_valid_in => gpr_a_read_valid, gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read, gpr_a_read_in => gpr_a_read,
@ -336,6 +337,8 @@ begin
gpr_c_read_valid_in => gpr_c_read_valid, gpr_c_read_valid_in => gpr_c_read_valid,
gpr_c_read_in => gpr_c_read, gpr_c_read_in => gpr_c_read,


execute_next_tag => execute_bypass.tag,

cr_read_in => d_in.decode.input_cr, cr_read_in => d_in.decode.input_cr,
cr_write_in => cr_write_valid, cr_write_in => cr_write_valid,
cr_bypass => cr_bypass, cr_bypass => cr_bypass,
@ -457,13 +460,7 @@ begin
v.e.fac := d_in.decode.facility; v.e.fac := d_in.decode.facility;
v.e.instr_tag := instr_tag; v.e.instr_tag := instr_tag;
v.e.read_reg1 := decoded_reg_a.reg; v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_data1 := decoded_reg_a.data;
v.e.bypass_data1 := gpr_a_bypass;
v.e.read_reg2 := decoded_reg_b.reg; v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_data2 := decoded_reg_b.data;
v.e.bypass_data2 := gpr_b_bypass;
v.e.read_data3 := decoded_reg_c.data;
v.e.bypass_data3 := gpr_c_bypass;
v.e.write_reg := decoded_reg_o.reg; v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid; v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
@ -499,16 +496,32 @@ begin
end if; end if;
end if; end if;


-- See if any of the operands can get their value via the bypass path.
case gpr_a_bypass is
when '1' =>
v.e.read_data1 := execute_bypass.data;
when others =>
v.e.read_data1 := decoded_reg_a.data;
end case;
case gpr_b_bypass is
when '1' =>
v.e.read_data2 := execute_bypass.data;
when others =>
v.e.read_data2 := decoded_reg_b.data;
end case;
case gpr_c_bypass is
when '1' =>
v.e.read_data3 := execute_bypass.data;
when others =>
v.e.read_data3 := decoded_reg_c.data;
end case;

-- issue control -- issue control
control_valid_in <= d_in.valid; control_valid_in <= d_in.valid;
control_sgl_pipe <= d_in.decode.sgl_pipe; control_sgl_pipe <= d_in.decode.sgl_pipe;


gpr_write_valid <= v.e.write_reg_enable; gpr_write_valid <= v.e.write_reg_enable;
gpr_write <= decoded_reg_o.reg; gpr_write <= decoded_reg_o.reg;
gpr_bypassable <= '0';
if EX1_BYPASS and d_in.decode.unit = ALU then
gpr_bypassable <= '1';
end if;


gpr_a_read_valid <= decoded_reg_a.reg_valid; gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg; gpr_a_read <= decoded_reg_a.reg;
@ -554,9 +567,9 @@ begin
r.e.valid & r.e.valid &
stopped_out & stopped_out &
stall_out & stall_out &
r.e.bypass_data3 & gpr_a_bypass &
r.e.bypass_data2 & gpr_b_bypass &
r.e.bypass_data1; gpr_c_bypass;
end if; end if;
end process; end process;
log_out <= log_data; log_out <= log_data;

@ -37,6 +37,7 @@ entity execute1 is
fp_out : out Execute1ToFPUType; fp_out : out Execute1ToFPUType;


e_out : out Execute1ToWritebackType; e_out : out Execute1ToWritebackType;
bypass_data : out bypass_data_t;


dbg_msr_out : out std_ulogic_vector(63 downto 0); dbg_msr_out : out std_ulogic_vector(63 downto 0);


@ -283,9 +284,9 @@ begin
dbg_msr_out <= ctrl.msr; dbg_msr_out <= ctrl.msr;
log_rd_addr <= r.log_addr_spr; log_rd_addr <= r.log_addr_spr;


a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1; a_in <= e_in.read_data1;
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; b_in <= e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3; c_in <= e_in.read_data3;


busy_out <= l_in.busy or r.busy or fp_in.busy; busy_out <= l_in.busy or r.busy or fp_in.busy;
valid_in <= e_in.valid and not busy_out; valid_in <= e_in.valid and not busy_out;
@ -1270,6 +1271,10 @@ begin
v.e.write_enable := current.write_reg_enable and v.e.valid and not exception; v.e.write_enable := current.write_reg_enable and v.e.valid and not exception;
v.e.rc := current.rc and v.e.valid and not exception; v.e.rc := current.rc and v.e.valid and not exception;


bypass_data.tag.valid <= current.instr_tag.valid and current.write_reg_enable and v.e.valid;
bypass_data.tag.tag <= current.instr_tag.tag;
bypass_data.data <= v.e.write_data;

-- Defer completion for one cycle when redirecting. -- Defer completion for one cycle when redirecting.
-- This also ensures r.busy = 1 when ctrl.irq_state = WRITE_SRR1 -- This also ensures r.busy = 1 when ctrl.irq_state = WRITE_SRR1
if v.redirect = '1' then if v.redirect = '1' then

Loading…
Cancel
Save