From d290d2a9bbddcfe52faa9427088bf6c4f225a711 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 11 Nov 2020 09:42:17 +1100 Subject: [PATCH] core: Restore bypass path from execute1 This changes the bypass path. Previously it went from after execute1's output to after decode2's output. Now it goes from before execute1's output register to before decode2's output register. The reason is that the new path will be simpler to manage when there are possibly multiple instructions in flight. This means that the bypassing can be managed inside decode2 and control. Signed-off-by: Paul Mackerras --- common.vhdl | 17 ++++++++++++---- control.vhdl | 44 +++++++++++++++++++++++++---------------- core.vhdl | 3 +++ decode2.vhdl | 55 +++++++++++++++++++++++++++++++-------------------- execute1.vhdl | 11 ++++++++--- 5 files changed, 85 insertions(+), 45 deletions(-) diff --git a/common.vhdl b/common.vhdl index 8792944..8d1ca29 100644 --- a/common.vhdl +++ b/common.vhdl @@ -137,6 +137,7 @@ package common is valid : std_ulogic; end record; constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0'); + function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean; type irq_state_t is (WRITE_SRR0, WRITE_SRR1); @@ -203,6 +204,12 @@ package common is redirect_nia : std_ulogic_vector(63 downto 0); end record; + type bypass_data_t is record + tag : instr_tag_t; + data : std_ulogic_vector(63 downto 0); + end record; + constant bypass_data_init : bypass_data_t := (tag => instr_tag_init, data => (others => '0')); + type Decode2ToExecute1Type is record valid: std_ulogic; unit : unit_t; @@ -217,9 +224,6 @@ package common is read_data1: std_ulogic_vector(63 downto 0); read_data2: std_ulogic_vector(63 downto 0); read_data3: std_ulogic_vector(63 downto 0); - bypass_data1: std_ulogic; - bypass_data2: std_ulogic; - bypass_data3: std_ulogic; cr: std_ulogic_vector(31 downto 0); bypass_cr : std_ulogic; xerc: xer_common_t; @@ -250,7 +254,7 @@ package common is end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init, - write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', + write_reg_enable => '0', bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0', @@ -644,4 +648,9 @@ package body common is begin return "10" & f; end; + + function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is + begin + return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag; + end; end common; diff --git a/control.vhdl b/control.vhdl index 576627b..c4b8d4e 100644 --- a/control.vhdl +++ b/control.vhdl @@ -6,6 +6,7 @@ use work.common.all; entity control is generic ( + EX1_BYPASS : boolean := true; PIPELINE_DEPTH : natural := 2 ); port ( @@ -23,7 +24,6 @@ entity control is gpr_write_valid_in : in std_ulogic; gpr_write_in : in gspr_index_t; - gpr_bypassable : in std_ulogic; gpr_a_read_valid_in : in std_ulogic; gpr_a_read_in : in gspr_index_t; @@ -34,6 +34,8 @@ entity control is gpr_c_read_valid_in : in std_ulogic; gpr_c_read_in : in gspr_index_t; + execute_next_tag : in instr_tag_t; + cr_read_in : in std_ulogic; cr_write_in : in std_ulogic; cr_bypassable : in std_ulogic; @@ -81,19 +83,11 @@ architecture rtl of control is signal instr_tag : instr_tag_t; - signal gpr_tag_a : instr_tag_t; - signal gpr_tag_b : instr_tag_t; - signal gpr_tag_c : instr_tag_t; signal gpr_tag_stall : std_ulogic; signal curr_tag : tag_number_t; signal next_tag : tag_number_t; - function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is - begin - return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag; - end; - begin cr_hazard0: entity work.cr_hazard generic map ( @@ -115,10 +109,6 @@ begin use_bypass => cr_bypass ); - gpr_bypass_a <= '0'; - gpr_bypass_b <= '0'; - gpr_bypass_c <= '0'; - control0: process(clk) begin if rising_edge(clk) then @@ -165,6 +155,9 @@ begin variable tag_s : instr_tag_t; variable tag_t : instr_tag_t; variable incr_tag : tag_number_t; + variable byp_a : std_ulogic; + variable byp_b : std_ulogic; + variable byp_c : std_ulogic; begin tag_a := instr_tag_init; for i in tag_number_t loop @@ -196,10 +189,27 @@ begin if tag_match(tag_c, complete_in) then tag_c.valid := '0'; end if; - gpr_tag_a <= tag_a; - gpr_tag_b <= tag_b; - gpr_tag_c <= tag_c; - gpr_tag_stall <= tag_a.valid or tag_b.valid or tag_c.valid; + + byp_a := '0'; + if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then + byp_a := '1'; + end if; + byp_b := '0'; + if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then + byp_b := '1'; + end if; + byp_c := '0'; + if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then + byp_c := '1'; + end if; + + gpr_bypass_a <= byp_a; + gpr_bypass_b <= byp_b; + gpr_bypass_c <= byp_c; + + gpr_tag_stall <= (tag_a.valid and not byp_a) or + (tag_b.valid and not byp_b) or + (tag_c.valid and not byp_c); incr_tag := curr_tag; instr_tag.tag <= curr_tag; diff --git a/core.vhdl b/core.vhdl index 2ac2ece..71bf2c8 100644 --- a/core.vhdl +++ b/core.vhdl @@ -67,6 +67,7 @@ architecture behave of core is -- execute signals signal execute1_to_writeback: Execute1ToWritebackType; signal execute1_to_fetch1: Execute1ToFetch1Type; + signal execute1_bypass: bypass_data_t; -- load store signals signal execute1_to_loadstore1: Execute1ToLoadstore1Type; @@ -273,6 +274,7 @@ begin r_out => decode2_to_register_file, c_in => cr_file_to_decode2, c_out => decode2_to_cr_file, + execute_bypass => execute1_bypass, log_out => log_data(119 downto 110) ); decode2_busy_in <= ex1_busy_out; @@ -330,6 +332,7 @@ begin f_out => execute1_to_fetch1, fp_out => execute1_to_fpu, e_out => execute1_to_writeback, + bypass_data => execute1_bypass, icache_inval => ex1_icache_inval, dbg_msr_out => msr, terminate_out => terminate, diff --git a/decode2.vhdl b/decode2.vhdl index a5d7f67..51c8ef1 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -37,6 +37,8 @@ entity decode2 is c_in : in CrFileToDecode2Type; c_out : out Decode2ToCrFileType; + execute_bypass : in bypass_data_t; + log_out : out std_ulogic_vector(9 downto 0) ); end entity decode2; @@ -285,19 +287,18 @@ architecture behaviour of decode2 is signal gpr_write_valid : std_ulogic; signal gpr_write : gspr_index_t; - signal gpr_bypassable : std_ulogic; signal gpr_a_read_valid : std_ulogic; - signal gpr_a_read :gspr_index_t; - signal gpr_a_bypass : std_ulogic; + signal gpr_a_read : gspr_index_t; + signal gpr_a_bypass : std_ulogic; signal gpr_b_read_valid : std_ulogic; - signal gpr_b_read : gspr_index_t; - signal gpr_b_bypass : std_ulogic; + signal gpr_b_read : gspr_index_t; + signal gpr_b_bypass : std_ulogic; signal gpr_c_read_valid : std_ulogic; - signal gpr_c_read : gspr_index_t; - signal gpr_c_bypass : std_ulogic; + signal gpr_c_read : gspr_index_t; + signal gpr_c_bypass : std_ulogic; signal cr_write_valid : std_ulogic; signal cr_bypass : std_ulogic; @@ -308,6 +309,7 @@ architecture behaviour of decode2 is begin control_0: entity work.control generic map ( + EX1_BYPASS => EX1_BYPASS, PIPELINE_DEPTH => 1 ) port map ( @@ -325,7 +327,6 @@ begin gpr_write_valid_in => gpr_write_valid, gpr_write_in => gpr_write, - gpr_bypassable => gpr_bypassable, gpr_a_read_valid_in => gpr_a_read_valid, gpr_a_read_in => gpr_a_read, @@ -336,6 +337,8 @@ begin gpr_c_read_valid_in => gpr_c_read_valid, gpr_c_read_in => gpr_c_read, + execute_next_tag => execute_bypass.tag, + cr_read_in => d_in.decode.input_cr, cr_write_in => cr_write_valid, cr_bypass => cr_bypass, @@ -457,13 +460,7 @@ begin v.e.fac := d_in.decode.facility; v.e.instr_tag := instr_tag; v.e.read_reg1 := decoded_reg_a.reg; - v.e.read_data1 := decoded_reg_a.data; - v.e.bypass_data1 := gpr_a_bypass; v.e.read_reg2 := decoded_reg_b.reg; - v.e.read_data2 := decoded_reg_b.data; - v.e.bypass_data2 := gpr_b_bypass; - v.e.read_data3 := decoded_reg_c.data; - v.e.bypass_data3 := gpr_c_bypass; v.e.write_reg := decoded_reg_o.reg; v.e.write_reg_enable := decoded_reg_o.reg_valid; v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); @@ -499,16 +496,32 @@ begin end if; end if; + -- See if any of the operands can get their value via the bypass path. + case gpr_a_bypass is + when '1' => + v.e.read_data1 := execute_bypass.data; + when others => + v.e.read_data1 := decoded_reg_a.data; + end case; + case gpr_b_bypass is + when '1' => + v.e.read_data2 := execute_bypass.data; + when others => + v.e.read_data2 := decoded_reg_b.data; + end case; + case gpr_c_bypass is + when '1' => + v.e.read_data3 := execute_bypass.data; + when others => + v.e.read_data3 := decoded_reg_c.data; + end case; + -- issue control control_valid_in <= d_in.valid; control_sgl_pipe <= d_in.decode.sgl_pipe; gpr_write_valid <= v.e.write_reg_enable; gpr_write <= decoded_reg_o.reg; - gpr_bypassable <= '0'; - if EX1_BYPASS and d_in.decode.unit = ALU then - gpr_bypassable <= '1'; - end if; gpr_a_read_valid <= decoded_reg_a.reg_valid; gpr_a_read <= decoded_reg_a.reg; @@ -554,9 +567,9 @@ begin r.e.valid & stopped_out & stall_out & - r.e.bypass_data3 & - r.e.bypass_data2 & - r.e.bypass_data1; + gpr_a_bypass & + gpr_b_bypass & + gpr_c_bypass; end if; end process; log_out <= log_data; diff --git a/execute1.vhdl b/execute1.vhdl index e1fc240..c0cc32f 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -37,6 +37,7 @@ entity execute1 is fp_out : out Execute1ToFPUType; e_out : out Execute1ToWritebackType; + bypass_data : out bypass_data_t; dbg_msr_out : out std_ulogic_vector(63 downto 0); @@ -283,9 +284,9 @@ begin dbg_msr_out <= ctrl.msr; log_rd_addr <= r.log_addr_spr; - a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1; - b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; - c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3; + a_in <= e_in.read_data1; + b_in <= e_in.read_data2; + c_in <= e_in.read_data3; busy_out <= l_in.busy or r.busy or fp_in.busy; valid_in <= e_in.valid and not busy_out; @@ -1270,6 +1271,10 @@ begin v.e.write_enable := current.write_reg_enable and v.e.valid and not exception; v.e.rc := current.rc and v.e.valid and not exception; + bypass_data.tag.valid <= current.instr_tag.valid and current.write_reg_enable and v.e.valid; + bypass_data.tag.tag <= current.instr_tag.tag; + bypass_data.data <= v.e.write_data; + -- Defer completion for one cycle when redirecting. -- This also ensures r.busy = 1 when ctrl.irq_state = WRITE_SRR1 if v.redirect = '1' then