From 89849a6856b127718f6d5fd2034abe62628777bd Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 11 Sep 2019 13:05:17 +1000 Subject: [PATCH] Add a simple direct mapped icache Signed-off-by: Anton Blanchard --- Makefile | 5 +- common.vhdl | 10 +++ core.vhdl | 22 ++++++- core_tb.vhdl | 2 - fetch2.vhdl | 114 +++----------------------------- icache.vhdl | 173 +++++++++++++++++++++++++++++++++++++++++++++++++ microwatt.core | 1 + 7 files changed, 217 insertions(+), 110 deletions(-) create mode 100644 icache.vhdl diff --git a/Makefile b/Makefile index 51ab903..3c3836f 100644 --- a/Makefile +++ b/Makefile @@ -12,8 +12,8 @@ all: $(all) $(GHDL) -a $(GHDLFLAGS) $< common.o: decode_types.o -core_tb.o: common.o wishbone_types.o soc.o -core.o: common.o wishbone_types.o fetch1.o fetch2.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o +core_tb.o: common.o core.o soc.o +core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o cr_file.o: common.o crhelpers.o: common.o decode1.o: common.o decode_types.o @@ -26,6 +26,7 @@ fetch2.o: common.o wishbone_types.o glibc_random_helpers.o: glibc_random.o: glibc_random_helpers.o helpers.o: +icache.o: common.o wishbone_types.o insn_helpers.o: loadstore1.o: common.o loadstore2.o: common.o helpers.o wishbone_types.o diff --git a/common.vhdl b/common.vhdl index 399ab98..bf383ca 100644 --- a/common.vhdl +++ b/common.vhdl @@ -31,6 +31,16 @@ package common is end record; constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', decode => decode_rom_init, others => (others => '0')); + type Fetch2ToIcacheType is record + req: std_ulogic; + addr: std_ulogic_vector(63 downto 0); + end record; + + type IcacheToFetch2Type is record + ack: std_ulogic; + insn: std_ulogic_vector(31 downto 0); + end record; + type Decode2ToExecute1Type is record valid: std_ulogic; insn_type: insn_type_t; diff --git a/core.vhdl b/core.vhdl index 87401f0..d34bf71 100644 --- a/core.vhdl +++ b/core.vhdl @@ -31,6 +31,10 @@ architecture behave of core is signal fetch1_to_fetch2: Fetch1ToFetch2Type; signal fetch2_to_decode1: Fetch2ToDecode1Type; + -- icache signals + signal fetch2_to_icache : Fetch2ToIcacheType; + signal icache_to_fetch2 : IcacheToFetch2Type; + -- decode signals signal decode1_to_decode2: Decode1ToDecode2Type; signal decode2_to_execute1: Decode2ToExecute1Type; @@ -97,14 +101,28 @@ begin stall_in => fetch2_stall_in, stall_out => fetch2_stall_out, flush_in => flush, - wishbone_in => wishbone_insn_in, - wishbone_out => wishbone_insn_out, + i_in => icache_to_fetch2, + i_out => fetch2_to_icache, f_in => fetch1_to_fetch2, f_out => fetch2_to_decode1 ); fetch2_stall_in <= decode2_stall_out; + icache_0: entity work.icache + generic map( + LINE_SIZE_DW => 8, + NUM_LINES => 16 + ) + port map( + clk => clk, + rst => rst, + i_in => fetch2_to_icache, + i_out => icache_to_fetch2, + wishbone_out => wishbone_insn_out, + wishbone_in => wishbone_insn_in + ); + decode1_0: entity work.decode1 port map ( clk => clk, diff --git a/core_tb.vhdl b/core_tb.vhdl index cb11d08..4522da4 100644 --- a/core_tb.vhdl +++ b/core_tb.vhdl @@ -1,7 +1,5 @@ library ieee; use ieee.std_logic_1164.all; -use ieee.numeric_std.all; -use std.textio.all; library work; use work.common.all; diff --git a/fetch2.vhdl b/fetch2.vhdl index 6ae7269..37cb66c 100644 --- a/fetch2.vhdl +++ b/fetch2.vhdl @@ -16,9 +16,8 @@ entity fetch2 is flush_in : in std_ulogic; - -- instruction memory interface - wishbone_in : in wishbone_slave_out; - wishbone_out : out wishbone_master_out; + i_in : in IcacheToFetch2Type; + i_out : out Fetch2ToIcacheType; f_in : in Fetch1ToFetch2Type; @@ -27,35 +26,7 @@ entity fetch2 is end entity fetch2; architecture behaviour of fetch2 is - type state_type is (IDLE, JUST_IDLE, WAIT_ACK, WAIT_ACK_THROWAWAY); - - type reg_internal_type is record - state : state_type; - nia : std_ulogic_vector(63 downto 0); - w : wishbone_master_out; - -- Trivial 64B cache - cache : std_ulogic_vector(63 downto 0); - tag : std_ulogic_vector(60 downto 0); - tag_valid : std_ulogic; - end record; - - function wishbone_fetch(nia : std_ulogic_vector(63 downto 0)) return wishbone_master_out is - variable w : wishbone_master_out; - begin - assert nia(2 downto 0) = "000"; - - w.adr := nia; - w.dat := (others => '0'); - w.cyc := '1'; - w.stb := '1'; - w.sel := "11111111"; - w.we := '0'; - - return w; - end; - - signal r, rin : Fetch2ToDecode1Type; - signal r_int, rin_int : reg_internal_type; + signal r, rin : Fetch2ToDecode1Type; begin regs : process(clk) begin @@ -64,96 +35,31 @@ begin if rst = '1' or flush_in = '1' or stall_in = '0' then r <= rin; end if; - r_int <= rin_int; end if; end process; comb : process(all) - variable v : Fetch2ToDecode1Type; - variable v_int : reg_internal_type; + variable v : Fetch2ToDecode1Type; begin v := r; - v_int := r_int; - v.valid := '0'; + -- asynchronous icache lookup + i_out.req <= '1'; + i_out.addr <= f_in.nia; + v.valid := i_in.ack; v.nia := f_in.nia; + v.insn := i_in.insn; + stall_out <= not i_in.ack; - case v_int.state is - when IDLE | JUST_IDLE => - v_int.state := IDLE; - - if (v_int.tag_valid = '1') and (v_int.tag = f_in.nia(63 downto 3)) then - v.valid := '1'; - if f_in.nia(2) = '0' then - v.insn := v_int.cache(31 downto 0); - else - v.insn := v_int.cache(63 downto 32); - end if; - else - v_int.state := WAIT_ACK; - v_int.nia := f_in.nia; - v_int.w := wishbone_fetch(f_in.nia(63 downto 3) & "000"); - end if; - - when WAIT_ACK => - if wishbone_in.ack = '1' then - v_int.state := IDLE; - v_int.w := wishbone_master_out_init; - v_int.cache := wishbone_in.dat; - v_int.tag := v_int.nia(63 downto 3); - v_int.tag_valid := '1'; - - v.valid := '1'; - if v_int.nia(2) = '0' then - v.insn := v_int.cache(31 downto 0); - else - v.insn := v_int.cache(63 downto 32); - end if; - end if; - - when WAIT_ACK_THROWAWAY => - if wishbone_in.ack = '1' then - -- Should we put the returned data in the cache? We went to the - -- trouble of fetching it and it might be useful in the future - - v_int.w := wishbone_master_out_init; - - -- We need to stall fetch1 for one more cycle, so transition through JUST_IDLE - v_int.state := JUST_IDLE; - end if; - end case; - - stall_out <= '0'; - if v_int.state /= IDLE then - stall_out <= '1'; - end if; if flush_in = '1' then v.valid := '0'; - - -- Throw away in flight data - if v_int.state = WAIT_ACK then - v_int.state := WAIT_ACK_THROWAWAY; - end if; - end if; - - if rst = '1' then - v := Fetch2ToDecode1Init; - - v_int.state := IDLE; - v_int.nia := (others => '0'); - v_int.w := wishbone_master_out_init; - v_int.cache := (others => '0'); - v_int.tag := (others => '0'); - v_int.tag_valid := '0'; end if; -- Update registers - rin_int <= v_int; rin <= v; -- Update outputs f_out <= r; - wishbone_out <= r_int.w; end process; end architecture behaviour; diff --git a/icache.vhdl b/icache.vhdl new file mode 100644 index 0000000..2565219 --- /dev/null +++ b/icache.vhdl @@ -0,0 +1,173 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.common.all; +use work.wishbone_types.all; + +-- 64 bit direct mapped icache. All instructions are 4B aligned. + +entity icache is + generic ( + -- Line size in 64bit doublewords + LINE_SIZE_DW : natural := 8; + -- Number of lines + NUM_LINES : natural := 32 + ); + port ( + clk : in std_ulogic; + rst : in std_ulogic; + + i_in : in Fetch2ToIcacheType; + i_out : out IcacheToFetch2Type; + + wishbone_out : out wishbone_master_out; + wishbone_in : in wishbone_slave_out + ); +end entity icache; + +architecture rtl of icache is + function log2(i : natural) return integer is + variable tmp : integer := i; + variable ret : integer := 0; + begin + while tmp > 1 loop + ret := ret + 1; + tmp := tmp / 2; + end loop; + return ret; + end function; + + function ispow2(i : integer) return boolean is + begin + if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then + return true; + else + return false; + end if; + end function; + + constant LINE_SIZE : natural := LINE_SIZE_DW*8; + constant OFFSET_BITS : natural := log2(LINE_SIZE); + constant INDEX_BITS : natural := log2(NUM_LINES); + constant TAG_BITS : natural := 64 - OFFSET_BITS - INDEX_BITS; + + subtype cacheline_type is std_logic_vector((LINE_SIZE*8)-1 downto 0); + type cacheline_array is array(0 to NUM_LINES-1) of cacheline_type; + + subtype cacheline_tag_type is std_logic_vector(TAG_BITS-1 downto 0); + type cacheline_tag_array is array(0 to NUM_LINES-1) of cacheline_tag_type; + + signal cachelines : cacheline_array := (others => (others => '0')); + signal tags : cacheline_tag_array := (others => (others => '0')); + signal tags_valid : std_ulogic_vector(NUM_LINES-1 downto 0) := (others => '0'); + + attribute ram_style : string; + attribute ram_style of cachelines : signal is "block"; + + attribute ram_decomp : string; + attribute ram_decomp of cachelines : signal is "power"; + + type state_type is (IDLE, WAIT_ACK); + + type reg_internal_type is record + state : state_type; + w : wishbone_master_out; + store_index : integer range 0 to (NUM_LINES-1); + store_word : integer range 0 to (LINE_SIZE-1); + end record; + + signal r : reg_internal_type; + + signal read_index : integer range 0 to NUM_LINES-1; + signal read_tag : std_ulogic_vector(63-OFFSET_BITS-INDEX_BITS downto 0); + signal read_miss : boolean; + + function get_index(addr: std_ulogic_vector(63 downto 0)) return integer is + begin + return to_integer(unsigned(addr((OFFSET_BITS+INDEX_BITS-1) downto OFFSET_BITS))); + end; + + function get_word(addr: std_ulogic_vector(63 downto 0); data: cacheline_type) return std_ulogic_vector is + variable word : integer; + begin + word := to_integer(unsigned(addr(OFFSET_BITS-1 downto 2))); + return data((word+1)*32-1 downto word*32); + end; + + function get_tag(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is + begin + return addr(63 downto OFFSET_BITS+INDEX_BITS); + end; +begin + assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE; + assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE; + + icache_read : process(all) + begin + read_index <= get_index(i_in.addr); + read_tag <= get_tag(i_in.addr); + read_miss <= false; + + i_out.ack <= '0'; + i_out.insn <= get_word(i_in.addr, cachelines(read_index)); + + if i_in.req = '1' then + if (tags_valid(read_index) = '1') and (tags(read_index) = read_tag) then + -- report hit asynchronously + i_out.ack <= '1'; + else + read_miss <= true; + end if; + end if; + end process; + + wishbone_out <= r.w; + + icache_write : process(clk) + begin + if rising_edge(clk) then + if rst = '1' then + tags_valid <= (others => '0'); + r.state <= IDLE; + r.w.cyc <= '0'; + r.w.stb <= '0'; + end if; + + r.w.dat <= (others => '0'); + r.w.sel <= "11111111"; + r.w.we <= '0'; + + case r.state is + when IDLE => + if read_miss = true then + r.state <= WAIT_ACK; + r.store_word <= 0; + r.store_index <= read_index; + + tags(read_index) <= read_tag; + tags_valid(read_index) <= '0'; + + r.w.adr <= i_in.addr(63 downto OFFSET_BITS) & (OFFSET_BITS-1 downto 0 => '0'); + r.w.cyc <= '1'; + r.w.stb <= '1'; + end if; + when WAIT_ACK => + if wishbone_in.ack = '1' then + cachelines(r.store_index)((r.store_word+1)*64-1 downto ((r.store_word)*64)) <= wishbone_in.dat; + r.store_word <= r.store_word + 1; + + if r.store_word = (LINE_SIZE_DW-1) then + r.state <= IDLE; + tags_valid(r.store_index) <= '1'; + r.w.cyc <= '0'; + r.w.stb <= '0'; + else + r.w.adr(OFFSET_BITS-1 downto 3) <= std_ulogic_vector(to_unsigned(r.store_word+1, OFFSET_BITS-3)); + end if; + end if; + end case; + end if; + end process; +end; diff --git a/microwatt.core b/microwatt.core index 58485a7..b62aef9 100644 --- a/microwatt.core +++ b/microwatt.core @@ -26,6 +26,7 @@ filesets: - writeback.vhdl - insn_helpers.vhdl - core.vhdl + - icache.vhdl file_type : vhdlSource-2008 soc: