diff --git a/common.vhdl b/common.vhdl index 8792944..8d1ca29 100644 --- a/common.vhdl +++ b/common.vhdl @@ -137,6 +137,7 @@ package common is valid : std_ulogic; end record; constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0'); + function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean; type irq_state_t is (WRITE_SRR0, WRITE_SRR1); @@ -203,6 +204,12 @@ package common is redirect_nia : std_ulogic_vector(63 downto 0); end record; + type bypass_data_t is record + tag : instr_tag_t; + data : std_ulogic_vector(63 downto 0); + end record; + constant bypass_data_init : bypass_data_t := (tag => instr_tag_init, data => (others => '0')); + type Decode2ToExecute1Type is record valid: std_ulogic; unit : unit_t; @@ -217,9 +224,6 @@ package common is read_data1: std_ulogic_vector(63 downto 0); read_data2: std_ulogic_vector(63 downto 0); read_data3: std_ulogic_vector(63 downto 0); - bypass_data1: std_ulogic; - bypass_data2: std_ulogic; - bypass_data3: std_ulogic; cr: std_ulogic_vector(31 downto 0); bypass_cr : std_ulogic; xerc: xer_common_t; @@ -250,7 +254,7 @@ package common is end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init, - write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', + write_reg_enable => '0', bypass_cr => '0', lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0', @@ -644,4 +648,9 @@ package body common is begin return "10" & f; end; + + function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is + begin + return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag; + end; end common; diff --git a/control.vhdl b/control.vhdl index 576627b..c4b8d4e 100644 --- a/control.vhdl +++ b/control.vhdl @@ -6,6 +6,7 @@ use work.common.all; entity control is generic ( + EX1_BYPASS : boolean := true; PIPELINE_DEPTH : natural := 2 ); port ( @@ -23,7 +24,6 @@ entity control is gpr_write_valid_in : in std_ulogic; gpr_write_in : in gspr_index_t; - gpr_bypassable : in std_ulogic; gpr_a_read_valid_in : in std_ulogic; gpr_a_read_in : in gspr_index_t; @@ -34,6 +34,8 @@ entity control is gpr_c_read_valid_in : in std_ulogic; gpr_c_read_in : in gspr_index_t; + execute_next_tag : in instr_tag_t; + cr_read_in : in std_ulogic; cr_write_in : in std_ulogic; cr_bypassable : in std_ulogic; @@ -81,19 +83,11 @@ architecture rtl of control is signal instr_tag : instr_tag_t; - signal gpr_tag_a : instr_tag_t; - signal gpr_tag_b : instr_tag_t; - signal gpr_tag_c : instr_tag_t; signal gpr_tag_stall : std_ulogic; signal curr_tag : tag_number_t; signal next_tag : tag_number_t; - function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is - begin - return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag; - end; - begin cr_hazard0: entity work.cr_hazard generic map ( @@ -115,10 +109,6 @@ begin use_bypass => cr_bypass ); - gpr_bypass_a <= '0'; - gpr_bypass_b <= '0'; - gpr_bypass_c <= '0'; - control0: process(clk) begin if rising_edge(clk) then @@ -165,6 +155,9 @@ begin variable tag_s : instr_tag_t; variable tag_t : instr_tag_t; variable incr_tag : tag_number_t; + variable byp_a : std_ulogic; + variable byp_b : std_ulogic; + variable byp_c : std_ulogic; begin tag_a := instr_tag_init; for i in tag_number_t loop @@ -196,10 +189,27 @@ begin if tag_match(tag_c, complete_in) then tag_c.valid := '0'; end if; - gpr_tag_a <= tag_a; - gpr_tag_b <= tag_b; - gpr_tag_c <= tag_c; - gpr_tag_stall <= tag_a.valid or tag_b.valid or tag_c.valid; + + byp_a := '0'; + if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then + byp_a := '1'; + end if; + byp_b := '0'; + if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then + byp_b := '1'; + end if; + byp_c := '0'; + if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then + byp_c := '1'; + end if; + + gpr_bypass_a <= byp_a; + gpr_bypass_b <= byp_b; + gpr_bypass_c <= byp_c; + + gpr_tag_stall <= (tag_a.valid and not byp_a) or + (tag_b.valid and not byp_b) or + (tag_c.valid and not byp_c); incr_tag := curr_tag; instr_tag.tag <= curr_tag; diff --git a/core.vhdl b/core.vhdl index 2ac2ece..71bf2c8 100644 --- a/core.vhdl +++ b/core.vhdl @@ -67,6 +67,7 @@ architecture behave of core is -- execute signals signal execute1_to_writeback: Execute1ToWritebackType; signal execute1_to_fetch1: Execute1ToFetch1Type; + signal execute1_bypass: bypass_data_t; -- load store signals signal execute1_to_loadstore1: Execute1ToLoadstore1Type; @@ -273,6 +274,7 @@ begin r_out => decode2_to_register_file, c_in => cr_file_to_decode2, c_out => decode2_to_cr_file, + execute_bypass => execute1_bypass, log_out => log_data(119 downto 110) ); decode2_busy_in <= ex1_busy_out; @@ -330,6 +332,7 @@ begin f_out => execute1_to_fetch1, fp_out => execute1_to_fpu, e_out => execute1_to_writeback, + bypass_data => execute1_bypass, icache_inval => ex1_icache_inval, dbg_msr_out => msr, terminate_out => terminate, diff --git a/decode2.vhdl b/decode2.vhdl index a5d7f67..51c8ef1 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -37,6 +37,8 @@ entity decode2 is c_in : in CrFileToDecode2Type; c_out : out Decode2ToCrFileType; + execute_bypass : in bypass_data_t; + log_out : out std_ulogic_vector(9 downto 0) ); end entity decode2; @@ -285,19 +287,18 @@ architecture behaviour of decode2 is signal gpr_write_valid : std_ulogic; signal gpr_write : gspr_index_t; - signal gpr_bypassable : std_ulogic; signal gpr_a_read_valid : std_ulogic; - signal gpr_a_read :gspr_index_t; - signal gpr_a_bypass : std_ulogic; + signal gpr_a_read : gspr_index_t; + signal gpr_a_bypass : std_ulogic; signal gpr_b_read_valid : std_ulogic; - signal gpr_b_read : gspr_index_t; - signal gpr_b_bypass : std_ulogic; + signal gpr_b_read : gspr_index_t; + signal gpr_b_bypass : std_ulogic; signal gpr_c_read_valid : std_ulogic; - signal gpr_c_read : gspr_index_t; - signal gpr_c_bypass : std_ulogic; + signal gpr_c_read : gspr_index_t; + signal gpr_c_bypass : std_ulogic; signal cr_write_valid : std_ulogic; signal cr_bypass : std_ulogic; @@ -308,6 +309,7 @@ architecture behaviour of decode2 is begin control_0: entity work.control generic map ( + EX1_BYPASS => EX1_BYPASS, PIPELINE_DEPTH => 1 ) port map ( @@ -325,7 +327,6 @@ begin gpr_write_valid_in => gpr_write_valid, gpr_write_in => gpr_write, - gpr_bypassable => gpr_bypassable, gpr_a_read_valid_in => gpr_a_read_valid, gpr_a_read_in => gpr_a_read, @@ -336,6 +337,8 @@ begin gpr_c_read_valid_in => gpr_c_read_valid, gpr_c_read_in => gpr_c_read, + execute_next_tag => execute_bypass.tag, + cr_read_in => d_in.decode.input_cr, cr_write_in => cr_write_valid, cr_bypass => cr_bypass, @@ -457,13 +460,7 @@ begin v.e.fac := d_in.decode.facility; v.e.instr_tag := instr_tag; v.e.read_reg1 := decoded_reg_a.reg; - v.e.read_data1 := decoded_reg_a.data; - v.e.bypass_data1 := gpr_a_bypass; v.e.read_reg2 := decoded_reg_b.reg; - v.e.read_data2 := decoded_reg_b.data; - v.e.bypass_data2 := gpr_b_bypass; - v.e.read_data3 := decoded_reg_c.data; - v.e.bypass_data3 := gpr_c_bypass; v.e.write_reg := decoded_reg_o.reg; v.e.write_reg_enable := decoded_reg_o.reg_valid; v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); @@ -499,16 +496,32 @@ begin end if; end if; + -- See if any of the operands can get their value via the bypass path. + case gpr_a_bypass is + when '1' => + v.e.read_data1 := execute_bypass.data; + when others => + v.e.read_data1 := decoded_reg_a.data; + end case; + case gpr_b_bypass is + when '1' => + v.e.read_data2 := execute_bypass.data; + when others => + v.e.read_data2 := decoded_reg_b.data; + end case; + case gpr_c_bypass is + when '1' => + v.e.read_data3 := execute_bypass.data; + when others => + v.e.read_data3 := decoded_reg_c.data; + end case; + -- issue control control_valid_in <= d_in.valid; control_sgl_pipe <= d_in.decode.sgl_pipe; gpr_write_valid <= v.e.write_reg_enable; gpr_write <= decoded_reg_o.reg; - gpr_bypassable <= '0'; - if EX1_BYPASS and d_in.decode.unit = ALU then - gpr_bypassable <= '1'; - end if; gpr_a_read_valid <= decoded_reg_a.reg_valid; gpr_a_read <= decoded_reg_a.reg; @@ -554,9 +567,9 @@ begin r.e.valid & stopped_out & stall_out & - r.e.bypass_data3 & - r.e.bypass_data2 & - r.e.bypass_data1; + gpr_a_bypass & + gpr_b_bypass & + gpr_c_bypass; end if; end process; log_out <= log_data; diff --git a/execute1.vhdl b/execute1.vhdl index e1fc240..c0cc32f 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -37,6 +37,7 @@ entity execute1 is fp_out : out Execute1ToFPUType; e_out : out Execute1ToWritebackType; + bypass_data : out bypass_data_t; dbg_msr_out : out std_ulogic_vector(63 downto 0); @@ -283,9 +284,9 @@ begin dbg_msr_out <= ctrl.msr; log_rd_addr <= r.log_addr_spr; - a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1; - b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; - c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3; + a_in <= e_in.read_data1; + b_in <= e_in.read_data2; + c_in <= e_in.read_data3; busy_out <= l_in.busy or r.busy or fp_in.busy; valid_in <= e_in.valid and not busy_out; @@ -1270,6 +1271,10 @@ begin v.e.write_enable := current.write_reg_enable and v.e.valid and not exception; v.e.rc := current.rc and v.e.valid and not exception; + bypass_data.tag.valid <= current.instr_tag.valid and current.write_reg_enable and v.e.valid; + bypass_data.tag.tag <= current.instr_tag.tag; + bypass_data.data <= v.e.write_data; + -- Defer completion for one cycle when redirecting. -- This also ensures r.busy = 1 when ctrl.irq_state = WRITE_SRR1 if v.redirect = '1' then