Add a simple direct mapped icache

Signed-off-by: Anton Blanchard <anton@linux.ibm.com>
nia-debug
Anton Blanchard 5 years ago committed by Anton Blanchard
parent 6cbf456388
commit 89849a6856

@ -12,8 +12,8 @@ all: $(all)
$(GHDL) -a $(GHDLFLAGS) $< $(GHDL) -a $(GHDLFLAGS) $<


common.o: decode_types.o common.o: decode_types.o
core_tb.o: common.o wishbone_types.o soc.o core_tb.o: common.o core.o soc.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o
cr_file.o: common.o cr_file.o: common.o
crhelpers.o: common.o crhelpers.o: common.o
decode1.o: common.o decode_types.o decode1.o: common.o decode_types.o
@ -26,6 +26,7 @@ fetch2.o: common.o wishbone_types.o
glibc_random_helpers.o: glibc_random_helpers.o:
glibc_random.o: glibc_random_helpers.o glibc_random.o: glibc_random_helpers.o
helpers.o: helpers.o:
icache.o: common.o wishbone_types.o
insn_helpers.o: insn_helpers.o:
loadstore1.o: common.o loadstore1.o: common.o
loadstore2.o: common.o helpers.o wishbone_types.o loadstore2.o: common.o helpers.o wishbone_types.o

@ -31,6 +31,16 @@ package common is
end record; end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', decode => decode_rom_init, others => (others => '0')); constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', decode => decode_rom_init, others => (others => '0'));


type Fetch2ToIcacheType is record
req: std_ulogic;
addr: std_ulogic_vector(63 downto 0);
end record;

type IcacheToFetch2Type is record
ack: std_ulogic;
insn: std_ulogic_vector(31 downto 0);
end record;

type Decode2ToExecute1Type is record type Decode2ToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;
insn_type: insn_type_t; insn_type: insn_type_t;

@ -31,6 +31,10 @@ architecture behave of core is
signal fetch1_to_fetch2: Fetch1ToFetch2Type; signal fetch1_to_fetch2: Fetch1ToFetch2Type;
signal fetch2_to_decode1: Fetch2ToDecode1Type; signal fetch2_to_decode1: Fetch2ToDecode1Type;


-- icache signals
signal fetch2_to_icache : Fetch2ToIcacheType;
signal icache_to_fetch2 : IcacheToFetch2Type;

-- decode signals -- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type; signal decode1_to_decode2: Decode1ToDecode2Type;
signal decode2_to_execute1: Decode2ToExecute1Type; signal decode2_to_execute1: Decode2ToExecute1Type;
@ -97,14 +101,28 @@ begin
stall_in => fetch2_stall_in, stall_in => fetch2_stall_in,
stall_out => fetch2_stall_out, stall_out => fetch2_stall_out,
flush_in => flush, flush_in => flush,
wishbone_in => wishbone_insn_in, i_in => icache_to_fetch2,
wishbone_out => wishbone_insn_out, i_out => fetch2_to_icache,
f_in => fetch1_to_fetch2, f_in => fetch1_to_fetch2,
f_out => fetch2_to_decode1 f_out => fetch2_to_decode1
); );


fetch2_stall_in <= decode2_stall_out; fetch2_stall_in <= decode2_stall_out;


icache_0: entity work.icache
generic map(
LINE_SIZE_DW => 8,
NUM_LINES => 16
)
port map(
clk => clk,
rst => rst,
i_in => fetch2_to_icache,
i_out => icache_to_fetch2,
wishbone_out => wishbone_insn_out,
wishbone_in => wishbone_insn_in
);

decode1_0: entity work.decode1 decode1_0: entity work.decode1
port map ( port map (
clk => clk, clk => clk,

@ -1,7 +1,5 @@
library ieee; library ieee;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;


library work; library work;
use work.common.all; use work.common.all;

@ -16,9 +16,8 @@ entity fetch2 is


flush_in : in std_ulogic; flush_in : in std_ulogic;


-- instruction memory interface i_in : in IcacheToFetch2Type;
wishbone_in : in wishbone_slave_out; i_out : out Fetch2ToIcacheType;
wishbone_out : out wishbone_master_out;


f_in : in Fetch1ToFetch2Type; f_in : in Fetch1ToFetch2Type;


@ -27,35 +26,7 @@ entity fetch2 is
end entity fetch2; end entity fetch2;


architecture behaviour of fetch2 is architecture behaviour of fetch2 is
type state_type is (IDLE, JUST_IDLE, WAIT_ACK, WAIT_ACK_THROWAWAY); signal r, rin : Fetch2ToDecode1Type;

type reg_internal_type is record
state : state_type;
nia : std_ulogic_vector(63 downto 0);
w : wishbone_master_out;
-- Trivial 64B cache
cache : std_ulogic_vector(63 downto 0);
tag : std_ulogic_vector(60 downto 0);
tag_valid : std_ulogic;
end record;

function wishbone_fetch(nia : std_ulogic_vector(63 downto 0)) return wishbone_master_out is
variable w : wishbone_master_out;
begin
assert nia(2 downto 0) = "000";

w.adr := nia;
w.dat := (others => '0');
w.cyc := '1';
w.stb := '1';
w.sel := "11111111";
w.we := '0';

return w;
end;

signal r, rin : Fetch2ToDecode1Type;
signal r_int, rin_int : reg_internal_type;
begin begin
regs : process(clk) regs : process(clk)
begin begin
@ -64,96 +35,31 @@ begin
if rst = '1' or flush_in = '1' or stall_in = '0' then if rst = '1' or flush_in = '1' or stall_in = '0' then
r <= rin; r <= rin;
end if; end if;
r_int <= rin_int;
end if; end if;
end process; end process;


comb : process(all) comb : process(all)
variable v : Fetch2ToDecode1Type; variable v : Fetch2ToDecode1Type;
variable v_int : reg_internal_type;
begin begin
v := r; v := r;
v_int := r_int;


v.valid := '0'; -- asynchronous icache lookup
i_out.req <= '1';
i_out.addr <= f_in.nia;
v.valid := i_in.ack;
v.nia := f_in.nia; v.nia := f_in.nia;
v.insn := i_in.insn;
stall_out <= not i_in.ack;


case v_int.state is
when IDLE | JUST_IDLE =>
v_int.state := IDLE;

if (v_int.tag_valid = '1') and (v_int.tag = f_in.nia(63 downto 3)) then
v.valid := '1';
if f_in.nia(2) = '0' then
v.insn := v_int.cache(31 downto 0);
else
v.insn := v_int.cache(63 downto 32);
end if;
else
v_int.state := WAIT_ACK;
v_int.nia := f_in.nia;
v_int.w := wishbone_fetch(f_in.nia(63 downto 3) & "000");
end if;

when WAIT_ACK =>
if wishbone_in.ack = '1' then
v_int.state := IDLE;
v_int.w := wishbone_master_out_init;
v_int.cache := wishbone_in.dat;
v_int.tag := v_int.nia(63 downto 3);
v_int.tag_valid := '1';

v.valid := '1';
if v_int.nia(2) = '0' then
v.insn := v_int.cache(31 downto 0);
else
v.insn := v_int.cache(63 downto 32);
end if;
end if;

when WAIT_ACK_THROWAWAY =>
if wishbone_in.ack = '1' then
-- Should we put the returned data in the cache? We went to the
-- trouble of fetching it and it might be useful in the future

v_int.w := wishbone_master_out_init;

-- We need to stall fetch1 for one more cycle, so transition through JUST_IDLE
v_int.state := JUST_IDLE;
end if;
end case;

stall_out <= '0';
if v_int.state /= IDLE then
stall_out <= '1';
end if;


if flush_in = '1' then if flush_in = '1' then
v.valid := '0'; v.valid := '0';

-- Throw away in flight data
if v_int.state = WAIT_ACK then
v_int.state := WAIT_ACK_THROWAWAY;
end if;
end if;

if rst = '1' then
v := Fetch2ToDecode1Init;

v_int.state := IDLE;
v_int.nia := (others => '0');
v_int.w := wishbone_master_out_init;
v_int.cache := (others => '0');
v_int.tag := (others => '0');
v_int.tag_valid := '0';
end if; end if;


-- Update registers -- Update registers
rin_int <= v_int;
rin <= v; rin <= v;


-- Update outputs -- Update outputs
f_out <= r; f_out <= r;
wishbone_out <= r_int.w;
end process; end process;
end architecture behaviour; end architecture behaviour;

@ -0,0 +1,173 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;
use work.wishbone_types.all;

-- 64 bit direct mapped icache. All instructions are 4B aligned.

entity icache is
generic (
-- Line size in 64bit doublewords
LINE_SIZE_DW : natural := 8;
-- Number of lines
NUM_LINES : natural := 32
);
port (
clk : in std_ulogic;
rst : in std_ulogic;

i_in : in Fetch2ToIcacheType;
i_out : out IcacheToFetch2Type;

wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out
);
end entity icache;

architecture rtl of icache is
function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

constant LINE_SIZE : natural := LINE_SIZE_DW*8;
constant OFFSET_BITS : natural := log2(LINE_SIZE);
constant INDEX_BITS : natural := log2(NUM_LINES);
constant TAG_BITS : natural := 64 - OFFSET_BITS - INDEX_BITS;

subtype cacheline_type is std_logic_vector((LINE_SIZE*8)-1 downto 0);
type cacheline_array is array(0 to NUM_LINES-1) of cacheline_type;

subtype cacheline_tag_type is std_logic_vector(TAG_BITS-1 downto 0);
type cacheline_tag_array is array(0 to NUM_LINES-1) of cacheline_tag_type;

signal cachelines : cacheline_array := (others => (others => '0'));
signal tags : cacheline_tag_array := (others => (others => '0'));
signal tags_valid : std_ulogic_vector(NUM_LINES-1 downto 0) := (others => '0');

attribute ram_style : string;
attribute ram_style of cachelines : signal is "block";

attribute ram_decomp : string;
attribute ram_decomp of cachelines : signal is "power";

type state_type is (IDLE, WAIT_ACK);

type reg_internal_type is record
state : state_type;
w : wishbone_master_out;
store_index : integer range 0 to (NUM_LINES-1);
store_word : integer range 0 to (LINE_SIZE-1);
end record;

signal r : reg_internal_type;

signal read_index : integer range 0 to NUM_LINES-1;
signal read_tag : std_ulogic_vector(63-OFFSET_BITS-INDEX_BITS downto 0);
signal read_miss : boolean;

function get_index(addr: std_ulogic_vector(63 downto 0)) return integer is
begin
return to_integer(unsigned(addr((OFFSET_BITS+INDEX_BITS-1) downto OFFSET_BITS)));
end;

function get_word(addr: std_ulogic_vector(63 downto 0); data: cacheline_type) return std_ulogic_vector is
variable word : integer;
begin
word := to_integer(unsigned(addr(OFFSET_BITS-1 downto 2)));
return data((word+1)*32-1 downto word*32);
end;

function get_tag(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
begin
return addr(63 downto OFFSET_BITS+INDEX_BITS);
end;
begin
assert ispow2(LINE_SIZE) report "LINE_SIZE not power of 2" severity FAILURE;
assert ispow2(NUM_LINES) report "NUM_LINES not power of 2" severity FAILURE;

icache_read : process(all)
begin
read_index <= get_index(i_in.addr);
read_tag <= get_tag(i_in.addr);
read_miss <= false;

i_out.ack <= '0';
i_out.insn <= get_word(i_in.addr, cachelines(read_index));

if i_in.req = '1' then
if (tags_valid(read_index) = '1') and (tags(read_index) = read_tag) then
-- report hit asynchronously
i_out.ack <= '1';
else
read_miss <= true;
end if;
end if;
end process;

wishbone_out <= r.w;

icache_write : process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
tags_valid <= (others => '0');
r.state <= IDLE;
r.w.cyc <= '0';
r.w.stb <= '0';
end if;

r.w.dat <= (others => '0');
r.w.sel <= "11111111";
r.w.we <= '0';

case r.state is
when IDLE =>
if read_miss = true then
r.state <= WAIT_ACK;
r.store_word <= 0;
r.store_index <= read_index;

tags(read_index) <= read_tag;
tags_valid(read_index) <= '0';

r.w.adr <= i_in.addr(63 downto OFFSET_BITS) & (OFFSET_BITS-1 downto 0 => '0');
r.w.cyc <= '1';
r.w.stb <= '1';
end if;
when WAIT_ACK =>
if wishbone_in.ack = '1' then
cachelines(r.store_index)((r.store_word+1)*64-1 downto ((r.store_word)*64)) <= wishbone_in.dat;
r.store_word <= r.store_word + 1;

if r.store_word = (LINE_SIZE_DW-1) then
r.state <= IDLE;
tags_valid(r.store_index) <= '1';
r.w.cyc <= '0';
r.w.stb <= '0';
else
r.w.adr(OFFSET_BITS-1 downto 3) <= std_ulogic_vector(to_unsigned(r.store_word+1, OFFSET_BITS-3));
end if;
end if;
end case;
end if;
end process;
end;

@ -26,6 +26,7 @@ filesets:
- writeback.vhdl - writeback.vhdl
- insn_helpers.vhdl - insn_helpers.vhdl
- core.vhdl - core.vhdl
- icache.vhdl
file_type : vhdlSource-2008 file_type : vhdlSource-2008


soc: soc:

Loading…
Cancel
Save