From bdc26b7527c3fa66de099473df07e6bb0272333e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 14 Oct 2019 13:27:45 +1100 Subject: [PATCH] Add GPR hazard detection Check GPRs against any writers in the pipeline. All instructions are still marked single in pipeline at this stage. Signed-off-by: Anton Blanchard --- Makefile | 1 + control.vhdl | 142 ++++++++++++++++++++++++++++++++++++++---------- decode2.vhdl | 41 ++++++++++++++ gpr_hazard.vhdl | 67 +++++++++++++++++++++++ microwatt.core | 1 + 5 files changed, 224 insertions(+), 28 deletions(-) create mode 100644 gpr_hazard.vhdl diff --git a/Makefile b/Makefile index 8a946f8..20a79a1 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ all: $(all) $(GHDL) -a $(GHDLFLAGS) $< common.o: decode_types.o +control.o: gpr_hazard.o sim_jtag.o: sim_jtag_socket.o core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o divider.o diff --git a/control.vhdl b/control.vhdl index 35a7e17..d62a140 100644 --- a/control.vhdl +++ b/control.vhdl @@ -6,18 +6,30 @@ entity control is PIPELINE_DEPTH : natural := 2 ); port ( - clk : in std_ulogic; - rst : in std_ulogic; - - complete_in : in std_ulogic; - valid_in : in std_ulogic; - flush_in : in std_ulogic; - sgl_pipe_in : in std_ulogic; - stop_mark_in : in std_ulogic; - - valid_out : out std_ulogic; - stall_out : out std_ulogic; - stopped_out : out std_ulogic + clk : in std_ulogic; + rst : in std_ulogic; + + complete_in : in std_ulogic; + valid_in : in std_ulogic; + flush_in : in std_ulogic; + sgl_pipe_in : in std_ulogic; + stop_mark_in : in std_ulogic; + + gpr_write_valid_in : in std_ulogic; + gpr_write_in : in std_ulogic_vector(4 downto 0); + + gpr_a_read_valid_in : in std_ulogic; + gpr_a_read_in : in std_ulogic_vector(4 downto 0); + + gpr_b_read_valid_in : in std_ulogic; + gpr_b_read_in : in std_ulogic_vector(4 downto 0); + + gpr_c_read_valid_in : in std_ulogic; + gpr_c_read_in : in std_ulogic_vector(4 downto 0); + + valid_out : out std_ulogic; + stall_out : out std_ulogic; + stopped_out : out std_ulogic ); end entity control; @@ -26,12 +38,61 @@ architecture rtl of control is type reg_internal_type is record state : state_type; - outstanding : integer range -1 to PIPELINE_DEPTH+1; + outstanding : integer range -1 to PIPELINE_DEPTH+2; -- XXX ? end record; constant reg_internal_init : reg_internal_type := (state => IDLE, outstanding => 0); signal r_int, rin_int : reg_internal_type := reg_internal_init; + + signal stall_a_out, stall_b_out, stall_c_out : std_ulogic; + + signal gpr_write_valid : std_ulogic := '0'; begin + gpr_hazard0: entity work.gpr_hazard + generic map ( + PIPELINE_DEPTH => 2 + ) + port map ( + clk => clk, + + gpr_write_valid_in => gpr_write_valid, + gpr_write_in => gpr_write_in, + gpr_read_valid_in => gpr_a_read_valid_in, + gpr_read_in => gpr_a_read_in, + + stall_out => stall_a_out + ); + + gpr_hazard1: entity work.gpr_hazard + generic map ( + PIPELINE_DEPTH => 2 + ) + port map ( + clk => clk, + + gpr_write_valid_in => gpr_write_valid, + gpr_write_in => gpr_write_in, + gpr_read_valid_in => gpr_b_read_valid_in, + gpr_read_in => gpr_b_read_in, + + stall_out => stall_b_out + ); + + gpr_hazard2: entity work.gpr_hazard + generic map ( + PIPELINE_DEPTH => 2 + ) + port map ( + clk => clk, + + gpr_write_valid_in => gpr_write_valid, + gpr_write_in => gpr_write_in, + gpr_read_valid_in => gpr_c_read_valid_in, + gpr_read_in => gpr_c_read_in, + + stall_out => stall_c_out + ); + control0: process(clk) begin if rising_edge(clk) then @@ -42,15 +103,16 @@ begin control1 : process(all) variable v_int : reg_internal_type; variable valid_tmp : std_ulogic; + variable stall_tmp : std_ulogic; begin v_int := r_int; -- asynchronous valid_tmp := valid_in and not flush_in; - stall_out <= '0'; + stall_tmp := '0'; if complete_in = '1' then - assert r_int.outstanding <= 1 report "Outstanding bad " & integer'image(r_int.outstanding) severity failure; + assert r_int.outstanding >= 0 and r_int.outstanding <= (PIPELINE_DEPTH+1) report "Outstanding bad " & integer'image(r_int.outstanding) severity failure; v_int.outstanding := r_int.outstanding - 1; end if; @@ -64,14 +126,18 @@ begin -- through the pipeline. case r_int.state is when IDLE => - if (flush_in = '0') and (valid_tmp = '1') and (sgl_pipe_in = '1') then - if v_int.outstanding /= 0 then - v_int.state := WAIT_FOR_PREV_TO_COMPLETE; - valid_tmp := '0'; - stall_out <= '1'; + if valid_tmp = '1' then + if (sgl_pipe_in = '1') then + if v_int.outstanding /= 0 then + v_int.state := WAIT_FOR_PREV_TO_COMPLETE; + stall_tmp := '1'; + else + -- send insn out and wait on it to complete + v_int.state := WAIT_FOR_CURR_TO_COMPLETE; + end if; else - -- send insn out and wait on it to complete - v_int.state := WAIT_FOR_CURR_TO_COMPLETE; + -- let it go out if there are no GPR hazards + stall_tmp := stall_a_out or stall_b_out or stall_c_out; end if; end if; @@ -80,32 +146,52 @@ begin -- send insn out and wait on it to complete v_int.state := WAIT_FOR_CURR_TO_COMPLETE; else - valid_tmp := '0'; - stall_out <= '1'; + stall_tmp := '1'; end if; when WAIT_FOR_CURR_TO_COMPLETE => if v_int.outstanding = 0 then v_int.state := IDLE; + -- XXX Don't replicate this + if valid_tmp = '1' then + if (sgl_pipe_in = '1') then + if v_int.outstanding /= 0 then + v_int.state := WAIT_FOR_PREV_TO_COMPLETE; + stall_tmp := '1'; + else + -- send insn out and wait on it to complete + v_int.state := WAIT_FOR_CURR_TO_COMPLETE; + end if; + else + -- let it go out if there are no GPR hazards + stall_tmp := stall_a_out or stall_b_out or stall_c_out; + end if; + end if; else - valid_tmp := '0'; - stall_out <= '1'; + stall_tmp := '1'; end if; end case; - -- track outstanding instructions + if stall_tmp = '1' then + valid_tmp := '0'; + end if; + if valid_tmp = '1' then v_int.outstanding := v_int.outstanding + 1; + gpr_write_valid <= gpr_write_valid_in; + else + gpr_write_valid <= '0'; end if; if rst = '1' then v_int.state := IDLE; v_int.outstanding := 0; - stall_out <= '0'; + stall_tmp := '0'; end if; -- update outputs valid_out <= valid_tmp; + stall_out <= stall_tmp; -- update registers rin_int <= v_int; diff --git a/decode2.vhdl b/decode2.vhdl index 3ba1079..2cd2da4 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -53,8 +53,12 @@ architecture behaviour of decode2 is function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0); reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is + variable is_reg : std_ulogic; begin + is_reg := '0' when insn_ra(insn_in) = "00000" else '1'; + if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then + --return (is_reg, insn_ra(insn_in), reg_data); return ('1', insn_ra(insn_in), reg_data); else return ('0', (others => '0'), (others => '0')); @@ -127,9 +131,22 @@ architecture behaviour of decode2 is end case; end; + -- issue control signals signal control_valid_in : std_ulogic; signal control_valid_out : std_ulogic; signal control_sgl_pipe : std_logic; + + signal gpr_write_valid : std_ulogic; + signal gpr_write : std_ulogic_vector(4 downto 0); + + signal gpr_a_read_valid : std_ulogic; + signal gpr_a_read : std_ulogic_vector(4 downto 0); + + signal gpr_b_read_valid : std_ulogic; + signal gpr_b_read : std_ulogic_vector(4 downto 0); + + signal gpr_c_read_valid : std_ulogic; + signal gpr_c_read : std_ulogic_vector(4 downto 0); begin control_0: entity work.control generic map ( @@ -145,6 +162,18 @@ begin sgl_pipe_in => control_sgl_pipe, stop_mark_in => d_in.stop_mark, + gpr_write_valid_in => gpr_write_valid, + gpr_write_in => gpr_write, + + gpr_a_read_valid_in => gpr_a_read_valid, + gpr_a_read_in => gpr_a_read, + + gpr_b_read_valid_in => gpr_b_read_valid, + gpr_b_read_in => gpr_b_read, + + gpr_c_read_valid_in => gpr_c_read_valid, + gpr_c_read_in => gpr_c_read, + valid_out => control_valid_out, stall_out => stall_out, stopped_out => stopped_out @@ -323,6 +352,18 @@ begin control_valid_in <= d_in.valid; control_sgl_pipe <= d_in.decode.sgl_pipe; + gpr_write_valid <= '1' when d_in.decode.output_reg_a /= NONE else '0'; + gpr_write <= decode_output_reg(d_in.decode.output_reg_a, d_in.insn); + + gpr_a_read_valid <= decoded_reg_a.reg_valid; + gpr_a_read <= decoded_reg_a.reg; + + gpr_b_read_valid <= decoded_reg_b.reg_valid; + gpr_b_read <= decoded_reg_b.reg; + + gpr_c_read_valid <= decoded_reg_c.reg_valid; + gpr_c_read <= decoded_reg_c.reg; + v.e.valid := '0'; v.m.valid := '0'; v.d.valid := '0'; diff --git a/gpr_hazard.vhdl b/gpr_hazard.vhdl new file mode 100644 index 0000000..6c8614b --- /dev/null +++ b/gpr_hazard.vhdl @@ -0,0 +1,67 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +entity gpr_hazard is + generic ( + PIPELINE_DEPTH : natural := 2 + ); + port( + clk : in std_logic; + + gpr_write_valid_in : in std_ulogic; + gpr_write_in : in std_ulogic_vector(4 downto 0); + gpr_read_valid_in : in std_ulogic; + gpr_read_in : in std_ulogic_vector(4 downto 0); + + stall_out : out std_ulogic + ); +end entity gpr_hazard; +architecture behaviour of gpr_hazard is + type pipeline_entry_type is record + valid : std_ulogic; + gpr : std_ulogic_vector(4 downto 0); + end record; + constant pipeline_entry_init : pipeline_entry_type := (valid => '0', gpr => (others => '0')); + + type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type; + constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init); + + signal r, rin : pipeline_t := pipeline_t_init; +begin + gpr_hazard0: process(clk) + begin + if rising_edge(clk) then + r <= rin; + end if; + end process; + + gpr_hazard1: process(all) + variable v : pipeline_t; + begin + v := r; + + stall_out <= '0'; + loop_0: for i in 0 to PIPELINE_DEPTH-1 loop + if ((r(i).valid = gpr_read_valid_in) and r(i).gpr = gpr_read_in) then + stall_out <= '1'; + end if; + end loop; + + v(0).valid := gpr_write_valid_in; + v(0).gpr := gpr_write_in; + loop_1: for i in 0 to PIPELINE_DEPTH-2 loop + -- propagate to next slot + v(i+1) := r(i); + end loop; + + -- asynchronous output + if gpr_read_valid_in = '0' then + stall_out <= '0'; + end if; + + -- update registers + rin <= v; + + end process; +end; diff --git a/microwatt.core b/microwatt.core index 5081453..ae75fa6 100644 --- a/microwatt.core +++ b/microwatt.core @@ -20,6 +20,7 @@ filesets: - sim_console.vhdl - logical.vhdl - countzero.vhdl + - gpr_hazard.vhdl - control.vhdl - execute1.vhdl - execute2.vhdl