From b3799c432ba51b4c2aceeefbe9de209e8e935362 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Jun 2020 09:28:03 +1000 Subject: [PATCH] decode1: Add a stash buffer to the output This means that the busy signal from execute1 (which can be driven combinatorially from mmu or dcache) now stops at decode1 and doesn't go on to icache or fetch1. This helps with timing. Signed-off-by: Paul Mackerras --- core.vhdl | 6 ++++-- decode1.vhdl | 31 ++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/core.vhdl b/core.vhdl index 13f3ce7..019660c 100644 --- a/core.vhdl +++ b/core.vhdl @@ -82,6 +82,7 @@ architecture behave of core is signal icache_stall_out : std_ulogic; signal icache_stall_in : std_ulogic; signal decode1_stall_in : std_ulogic; + signal decode1_busy : std_ulogic; signal decode2_busy_in : std_ulogic; signal decode2_stall_out : std_ulogic; signal ex1_icache_inval: std_ulogic; @@ -188,7 +189,7 @@ begin log_out => log_data(42 downto 0) ); - fetch1_stall_in <= icache_stall_out or decode2_stall_out; + fetch1_stall_in <= icache_stall_out or decode1_busy; icache_0: entity work.icache generic map( @@ -212,7 +213,7 @@ begin log_out => log_data(96 downto 43) ); - icache_stall_in <= decode2_stall_out; + icache_stall_in <= decode1_busy; decode1_0: entity work.decode1 port map ( @@ -220,6 +221,7 @@ begin rst => rst_dec1, stall_in => decode1_stall_in, flush_in => flush, + busy_out => decode1_busy, f_in => icache_to_decode1, d_out => decode1_to_decode2, log_out => log_data(109 downto 97) diff --git a/decode1.vhdl b/decode1.vhdl index ae3e970..21596f6 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -13,6 +13,7 @@ entity decode1 is stall_in : in std_ulogic; flush_in : in std_ulogic; + busy_out : out std_ulogic; f_in : in IcacheToDecode1Type; d_out : out Decode1ToDecode2Type; @@ -22,6 +23,7 @@ end entity decode1; architecture behaviour of decode1 is signal r, rin : Decode1ToDecode2Type; + signal s : Decode1ToDecode2Type; subtype major_opcode_t is unsigned(5 downto 0); type major_rom_array_t is array(0 to 63) of decode_rom_t; @@ -359,12 +361,27 @@ begin decode1_0: process(clk) begin if rising_edge(clk) then - -- Output state remains unchanged on stall, unless we are flushing - if rst = '1' or flush_in = '1' or stall_in = '0' then - r <= rin; + if rst = '1' then + r <= Decode1ToDecode2Init; + s <= Decode1ToDecode2Init; + elsif flush_in = '1' then + r.valid <= '0'; + s.valid <= '0'; + elsif s.valid = '1' then + if stall_in = '0' then + r <= s; + s.valid <= '0'; + end if; + else + s <= rin; + s.valid <= rin.valid and r.valid and stall_in; + if r.valid = '0' or stall_in = '0' then + r <= rin; + end if; end if; end if; end process; + busy_out <= s.valid; decode1_1: process(all) variable v : Decode1ToDecode2Type; @@ -472,14 +489,6 @@ begin end if; end if; - if flush_in = '1' then - v.valid := '0'; - end if; - - if rst = '1' then - v := Decode1ToDecode2Init; - end if; - -- Update registers rin <= v;