Add TLB to icache

This adds a direct-mapped TLB to the icache, with 64 entries by default.
Execute1 now sends a "virt_mode" signal from MSR[IR] to fetch1 along
with redirects to indicate whether instruction addresses should be
translated through the TLB, and fetch1 sends that on to icache.
Similarly a "priv_mode" signal is sent to indicate the privilege
mode for instruction fetches.  This means that changes to MSR[IR]
or MSR[PR] don't take effect until the next redirect, meaning an
isync, rfid, branch, etc.

The icache uses a hash of the effective address (i.e. next instruction
address) to index the TLB.  The hash is an XOR of three fields of the
address; with a 64-entry TLB, the fields are bits 12--17, 18--23 and
24--29 of the address.  TLB invalidations simply invalidate the
indexed TLB entry without checking the contents.

If the icache detects a TLB miss with virt_mode=1, it will send a
fetch_failed indication through fetch2 to decode1, which will turn it
into a special OP_FETCH_FAILED opcode with unit=LDST.  That will get
sent down to loadstore1 which will currently just raise a Instruction
Storage Interrupt (0x400) exception.

One bit in the PTE obtained from the TLB is used to check whether an
instruction access is allowed -- the privilege bit (bit 3).  If bit 3
is 1 and priv_mode=0, then a fetch_failed indication is sent down to
fetch2 and to decode1, which generates an OP_FETCH_FAILED.  Any PTEs
with PTE bit 0 (EAA[3]) clear or bit 8 (R) clear should not be put
into the iTLB since such PTEs would not allow execution by any
context.

Tlbie operations get sent from mmu to icache over a new connection.

Unfortunately the privileged instruction tests are broken for now.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/169/head
Paul Mackerras 4 years ago
parent 882a5a0dc0
commit 3d4712ad43

@ -89,6 +89,8 @@ package common is


type Fetch1ToIcacheType is record type Fetch1ToIcacheType is record
req: std_ulogic; req: std_ulogic;
virt_mode : std_ulogic;
priv_mode : std_ulogic;
stop_mark: std_ulogic; stop_mark: std_ulogic;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
end record; end record;
@ -96,6 +98,7 @@ package common is
type IcacheToFetch2Type is record type IcacheToFetch2Type is record
valid: std_ulogic; valid: std_ulogic;
stop_mark: std_ulogic; stop_mark: std_ulogic;
fetch_failed: std_ulogic;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0); insn: std_ulogic_vector(31 downto 0);
end record; end record;
@ -103,10 +106,12 @@ package common is
type Fetch2ToDecode1Type is record type Fetch2ToDecode1Type is record
valid: std_ulogic; valid: std_ulogic;
stop_mark : std_ulogic; stop_mark : std_ulogic;
fetch_failed: std_ulogic;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0); insn: std_ulogic_vector(31 downto 0);
end record; end record;
constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', others => (others => '0')); constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', fetch_failed => '0',
others => (others => '0'));


type Decode1ToDecode2Type is record type Decode1ToDecode2Type is record
valid: std_ulogic; valid: std_ulogic;
@ -211,13 +216,17 @@ package common is


type Execute1ToFetch1Type is record type Execute1ToFetch1Type is record
redirect: std_ulogic; redirect: std_ulogic;
virt_mode: std_ulogic;
priv_mode: std_ulogic;
redirect_nia: std_ulogic_vector(63 downto 0); redirect_nia: std_ulogic_vector(63 downto 0);
end record; end record;
constant Execute1ToFetch1TypeInit : Execute1ToFetch1Type := (redirect => '0', others => (others => '0')); constant Execute1ToFetch1TypeInit : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0',
priv_mode => '0', others => (others => '0'));


type Execute1ToLoadstore1Type is record type Execute1ToLoadstore1Type is record
valid : std_ulogic; valid : std_ulogic;
op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do
nia : std_ulogic_vector(63 downto 0);
addr1 : std_ulogic_vector(63 downto 0); addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
@ -243,6 +252,7 @@ package common is
type Loadstore1ToExecute1Type is record type Loadstore1ToExecute1Type is record
exception : std_ulogic; exception : std_ulogic;
segment_fault : std_ulogic; segment_fault : std_ulogic;
instr_fault : std_ulogic;
end record; end record;


type Loadstore1ToDcacheType is record type Loadstore1ToDcacheType is record
@ -270,6 +280,7 @@ package common is
valid : std_ulogic; valid : std_ulogic;
tlbie : std_ulogic; tlbie : std_ulogic;
mtspr : std_ulogic; mtspr : std_ulogic;
iside : std_ulogic;
load : std_ulogic; load : std_ulogic;
priv : std_ulogic; priv : std_ulogic;
sprn : std_ulogic_vector(3 downto 0); sprn : std_ulogic_vector(3 downto 0);
@ -302,6 +313,13 @@ package common is
data : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0);
end record; end record;


type MmuToIcacheType is record
tlbld : std_ulogic;
tlbie : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
pte : std_ulogic_vector(63 downto 0);
end record;

type Loadstore1ToWritebackType is record type Loadstore1ToWritebackType is record
valid : std_ulogic; valid : std_ulogic;
write_enable: std_ulogic; write_enable: std_ulogic;

@ -42,6 +42,7 @@ architecture behave of core is
-- icache signals -- icache signals
signal fetch1_to_icache : Fetch1ToIcacheType; signal fetch1_to_icache : Fetch1ToIcacheType;
signal icache_to_fetch2 : IcacheToFetch2Type; signal icache_to_fetch2 : IcacheToFetch2Type;
signal mmu_to_icache : MmuToIcacheType;


-- decode signals -- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type; signal decode1_to_decode2: Decode1ToDecode2Type;
@ -164,6 +165,7 @@ begin
rst => icache_rst, rst => icache_rst,
i_in => fetch1_to_icache, i_in => fetch1_to_icache,
i_out => icache_to_fetch2, i_out => icache_to_fetch2,
m_in => mmu_to_icache,
flush_in => flush, flush_in => flush,
stall_out => icache_stall_out, stall_out => icache_stall_out,
wishbone_out => wishbone_insn_out, wishbone_out => wishbone_insn_out,
@ -288,7 +290,8 @@ begin
l_in => loadstore1_to_mmu, l_in => loadstore1_to_mmu,
l_out => mmu_to_loadstore1, l_out => mmu_to_loadstore1,
d_out => mmu_to_dcache, d_out => mmu_to_dcache,
d_in => dcache_to_mmu d_in => dcache_to_mmu,
i_out => mmu_to_icache
); );


dcache_0: entity work.dcache dcache_0: entity work.dcache

@ -345,9 +345,10 @@ architecture behaviour of decode1 is
others => decode_rom_init others => decode_rom_init
); );


-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe -- op in out A out in out len ext pipe
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');


begin begin
decode1_0: process(clk) decode1_0: process(clk)
@ -380,7 +381,15 @@ begin
end if; end if;


majorop := unsigned(f_in.insn(31 downto 26)); majorop := unsigned(f_in.insn(31 downto 26));
if majorop = "011111" then if f_in.fetch_failed = '1' then
v.valid := '1';
-- Only send down a single OP_FETCH_FAILED
if r.decode.insn_type = OP_FETCH_FAILED then
v.valid := '0';
end if;
v.decode := fetch_fail_inst;

elsif majorop = "011111" then
-- major opcode 31, lots of things -- major opcode 31, lots of things
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1)))); v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));



@ -17,7 +17,8 @@ package decode_types is
OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB, OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB,
OP_SHL, OP_SHR, OP_SHL, OP_SHR,
OP_SYNC, OP_TLBIE, OP_TRAP, OP_SYNC, OP_TLBIE, OP_TRAP,
OP_XOR OP_XOR,
OP_FETCH_FAILED
); );
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR); type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR); type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR);

@ -430,6 +430,9 @@ begin
icache_inval <= '0'; icache_inval <= '0';
stall_out <= '0'; stall_out <= '0';
f_out <= Execute1ToFetch1TypeInit; f_out <= Execute1ToFetch1TypeInit;
-- send MSR[IR] and ~MSR[PR] up to fetch1
f_out.virt_mode <= ctrl.msr(MSR_IR);
f_out.priv_mode <= not ctrl.msr(MSR_PR);


-- Next insn adder used in a couple of places -- Next insn adder used in a couple of places
next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4); next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4);
@ -460,6 +463,8 @@ begin
ctrl_tmp.msr(MSR_RI) <= '0'; ctrl_tmp.msr(MSR_RI) <= '0';
ctrl_tmp.msr(MSR_LE) <= '1'; ctrl_tmp.msr(MSR_LE) <= '1';
f_out.redirect <= '1'; f_out.redirect <= '1';
f_out.virt_mode <= '0';
f_out.priv_mode <= '1';
f_out.redirect_nia <= ctrl.irq_nia; f_out.redirect_nia <= ctrl.irq_nia;
v.e.valid := e_in.valid; v.e.valid := e_in.valid;
report "Writing SRR1: " & to_hstring(ctrl.srr1); report "Writing SRR1: " & to_hstring(ctrl.srr1);
@ -651,6 +656,8 @@ begin


when OP_RFID => when OP_RFID =>
f_out.redirect <= '1'; f_out.redirect <= '1';
f_out.virt_mode <= b_in(MSR_IR) or b_in(MSR_PR);
f_out.priv_mode <= not b_in(MSR_PR);
f_out.redirect_nia <= a_in(63 downto 2) & "00"; -- srr0 f_out.redirect_nia <= a_in(63 downto 2) & "00"; -- srr0
-- Can't use msr_copy here because the partial function MSR -- Can't use msr_copy here because the partial function MSR
-- bits should be left unchanged, not zeroed. -- bits should be left unchanged, not zeroed.
@ -972,23 +979,35 @@ begin
v.e.write_data := result; v.e.write_data := result;
v.e.write_enable := result_en; v.e.write_enable := result_en;


-- generate DSI for load/store exceptions -- generate DSI or DSegI for load/store exceptions
-- or ISI or ISegI for instruction fetch exceptions
if l_in.exception = '1' then if l_in.exception = '1' then
if l_in.segment_fault = '0' then ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#300#, 64)); if l_in.instr_fault = '0' then
if l_in.segment_fault = '0' then
ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#300#, 64));
else
ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#380#, 64));
end if;
else else
ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#380#, 64)); if l_in.segment_fault = '0' then
ctrl_tmp.srr1(63 - 33) <= '1';
ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#400#, 64));
else
ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#480#, 64));
end if;
end if; end if;
ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
v.e.exc_write_enable := '1'; v.e.exc_write_enable := '1';
v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
v.e.exc_write_data := r.ldst_nia; v.e.exc_write_data := r.ldst_nia;
report "ldst exception writing srr0=" & to_hstring(r.ldst_nia);
ctrl_tmp.irq_state <= WRITE_SRR1; ctrl_tmp.irq_state <= WRITE_SRR1;
v.e.valid := '1'; -- complete the original load or store v.e.valid := '1'; -- complete the original load or store
end if; end if;


-- Outputs to loadstore1 (async) -- Outputs to loadstore1 (async)
lv.op := e_in.insn_type; lv.op := e_in.insn_type;
lv.nia := e_in.nia;
lv.addr1 := a_in; lv.addr1 := a_in;
lv.addr2 := b_in; lv.addr2 := b_in;
lv.data := c_in; lv.data := c_in;

@ -40,6 +40,8 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
if r /= r_next then if r /= r_next then
report "fetch1 rst:" & std_ulogic'image(rst) & report "fetch1 rst:" & std_ulogic'image(rst) &
" IR:" & std_ulogic'image(e_in.virt_mode) &
" P:" & std_ulogic'image(e_in.priv_mode) &
" R:" & std_ulogic'image(e_in.redirect) & " R:" & std_ulogic'image(e_in.redirect) &
" S:" & std_ulogic'image(stall_in) & " S:" & std_ulogic'image(stall_in) &
" T:" & std_ulogic'image(stop_in) & " T:" & std_ulogic'image(stop_in) &
@ -61,9 +63,13 @@ begin


if rst = '1' then if rst = '1' then
v.nia := RESET_ADDRESS; v.nia := RESET_ADDRESS;
v.virt_mode := '0';
v.priv_mode := '1';
v_int.stop_state := RUNNING; v_int.stop_state := RUNNING;
elsif e_in.redirect = '1' then elsif e_in.redirect = '1' then
v.nia := e_in.redirect_nia; v.nia := e_in.redirect_nia;
v.virt_mode := e_in.virt_mode;
v.priv_mode := e_in.priv_mode;
elsif stall_in = '0' then elsif stall_in = '0' then


-- For debug stop/step to work properly we need a little bit of -- For debug stop/step to work properly we need a little bit of

@ -46,6 +46,7 @@ begin
" F:" & std_ulogic'image(flush_in) & " F:" & std_ulogic'image(flush_in) &
" T:" & std_ulogic'image(rin.stop_mark) & " T:" & std_ulogic'image(rin.stop_mark) &
" V:" & std_ulogic'image(rin.valid) & " V:" & std_ulogic'image(rin.valid) &
" FF:" & std_ulogic'image(rin.fetch_failed) &
" nia:" & to_hstring(rin.nia); " nia:" & to_hstring(rin.nia);
end if; end if;


@ -84,6 +85,7 @@ begin


v.valid := v_i_in.valid; v.valid := v_i_in.valid;
v.stop_mark := v_i_in.stop_mark; v.stop_mark := v_i_in.stop_mark;
v.fetch_failed := v_i_in.fetch_failed;
v.nia := v_i_in.nia; v.nia := v_i_in.nia;
v.insn := v_i_in.insn; v.insn := v_i_in.insn;


@ -94,12 +96,14 @@ begin
-- --
if flush_in = '1' then if flush_in = '1' then
v_int.stash.valid := '0'; v_int.stash.valid := '0';
v_int.stash.fetch_failed := '0';
end if; end if;


-- If we are flushing or the instruction comes with a stop mark -- If we are flushing or the instruction comes with a stop mark
-- we tag it as invalid so it doesn't get decoded and executed -- we tag it as invalid so it doesn't get decoded and executed
if flush_in = '1' or v.stop_mark = '1' then if flush_in = '1' or v.stop_mark = '1' then
v.valid := '0'; v.valid := '0';
v.fetch_failed := '0';
end if; end if;


-- Clear stash on reset -- Clear stash on reset

@ -35,7 +35,13 @@ entity icache is
-- Number of lines in a set -- Number of lines in a set
NUM_LINES : positive := 32; NUM_LINES : positive := 32;
-- Number of ways -- Number of ways
NUM_WAYS : positive := 4 NUM_WAYS : positive := 4;
-- L1 ITLB number of entries (direct mapped)
TLB_SIZE : positive := 64;
-- L1 ITLB log_2(page_size)
TLB_LG_PGSZ : positive := 12;
-- Number of real address bits that we store
REAL_ADDR_BITS : positive := 56
); );
port ( port (
clk : in std_ulogic; clk : in std_ulogic;
@ -44,6 +50,8 @@ entity icache is
i_in : in Fetch1ToIcacheType; i_in : in Fetch1ToIcacheType;
i_out : out IcacheToFetch2Type; i_out : out IcacheToFetch2Type;


m_in : in MmuToIcacheType;

stall_out : out std_ulogic; stall_out : out std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;


@ -78,10 +86,12 @@ architecture rtl of icache is
constant LINE_OFF_BITS : natural := log2(LINE_SIZE); constant LINE_OFF_BITS : natural := log2(LINE_SIZE);
-- ROW_OFF_BITS is the number of bits for the offset in a row -- ROW_OFF_BITS is the number of bits for the offset in a row
constant ROW_OFF_BITS : natural := log2(ROW_SIZE); constant ROW_OFF_BITS : natural := log2(ROW_SIZE);
-- INDEX_BITS is the number if bits to select a cache line -- INDEX_BITS is the number of bits to select a cache line
constant INDEX_BITS : natural := log2(NUM_LINES); constant INDEX_BITS : natural := log2(NUM_LINES);
-- SET_SIZE_BITS is the log base 2 of the set size
constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
-- TAG_BITS is the number of bits of the tag part of the address -- TAG_BITS is the number of bits of the tag part of the address
constant TAG_BITS : natural := 64 - LINE_OFF_BITS - INDEX_BITS; constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
-- WAY_BITS is the number of bits to select a way -- WAY_BITS is the number of bits to select a way
constant WAY_BITS : natural := log2(NUM_WAYS); constant WAY_BITS : natural := log2(NUM_WAYS);


@ -126,6 +136,27 @@ architecture rtl of icache is
attribute ram_style : string; attribute ram_style : string;
attribute ram_style of cache_tags : signal is "distributed"; attribute ram_style of cache_tags : signal is "distributed";


-- L1 ITLB.
constant TLB_BITS : natural := log2(TLB_SIZE);
constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
constant TLB_PTE_BITS : natural := 64;

subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
type tlb_valids_t is array(tlb_index_t) of std_ulogic;
subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;

signal itlb_valids : tlb_valids_t;
signal itlb_tags : tlb_tags_t;
signal itlb_ptes : tlb_ptes_t;
attribute ram_style of itlb_tags : signal is "distributed";
attribute ram_style of itlb_ptes : signal is "distributed";

-- Privilege bit from PTE EAA field
signal eaa_priv : std_ulogic;

-- Cache reload state machine -- Cache reload state machine
type state_t is (IDLE, WAIT_ACK); type state_t is (IDLE, WAIT_ACK);


@ -142,6 +173,9 @@ architecture rtl of icache is
store_way : way_t; store_way : way_t;
store_index : index_t; store_index : index_t;
store_row : row_t; store_row : row_t;

-- TLB miss state
fetch_failed : std_ulogic;
end record; end record;


signal r : reg_internal_t; signal r : reg_internal_t;
@ -155,6 +189,12 @@ architecture rtl of icache is
signal req_is_miss : std_ulogic; signal req_is_miss : std_ulogic;
signal req_laddr : std_ulogic_vector(63 downto 0); signal req_laddr : std_ulogic_vector(63 downto 0);


signal tlb_req_index : tlb_index_t;
signal real_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
signal ra_valid : std_ulogic;
signal priv_fault : std_ulogic;
signal access_ok : std_ulogic;

-- Cache RAM interface -- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t; type cache_ram_out_t is array(way_t) of cache_row_t;
signal cache_out : cache_ram_out_t; signal cache_out : cache_ram_out_t;
@ -167,13 +207,13 @@ architecture rtl of icache is
-- Return the cache line index (tag index) for an address -- Return the cache line index (tag index) for an address
function get_index(addr: std_ulogic_vector(63 downto 0)) return index_t is function get_index(addr: std_ulogic_vector(63 downto 0)) return index_t is
begin begin
return to_integer(unsigned(addr(63-TAG_BITS downto LINE_OFF_BITS))); return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS)));
end; end;


-- Return the cache row index (data memory) for an address -- Return the cache row index (data memory) for an address
function get_row(addr: std_ulogic_vector(63 downto 0)) return row_t is function get_row(addr: std_ulogic_vector(63 downto 0)) return row_t is
begin begin
return to_integer(unsigned(addr(63-TAG_BITS downto ROW_OFF_BITS))); return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS)));
end; end;


-- Returns whether this is the last row of a line -- Returns whether this is the last row of a line
@ -231,9 +271,9 @@ architecture rtl of icache is
end; end;


-- Get the tag value from the address -- Get the tag value from the address
function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)) return cache_tag_t is
begin begin
return addr(63 downto 64-TAG_BITS); return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
end; end;


-- Read a tag from a tag memory row -- Read a tag from a tag memory row
@ -249,6 +289,15 @@ architecture rtl of icache is
tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag; tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
end; end;


-- Simple hash for direct-mapped TLB index
function hash_ea(addr: std_ulogic_vector(63 downto 0)) return tlb_index_t is
variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
xor addr(TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto TLB_LG_PGSZ + TLB_BITS)
xor addr(TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto TLB_LG_PGSZ + 2 * TLB_BITS);
return to_integer(unsigned(hash));
end;
begin begin


assert LINE_SIZE mod ROW_SIZE = 0; assert LINE_SIZE mod ROW_SIZE = 0;
@ -260,9 +309,9 @@ begin
report "geometry bits don't add up" severity FAILURE; report "geometry bits don't add up" severity FAILURE;
assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
report "geometry bits don't add up" severity FAILURE; report "geometry bits don't add up" severity FAILURE;
assert (64 = TAG_BITS + INDEX_BITS + LINE_OFF_BITS) assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
report "geometry bits don't add up" severity FAILURE; report "geometry bits don't add up" severity FAILURE;
assert (64 = TAG_BITS + ROW_BITS + ROW_OFF_BITS) assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
report "geometry bits don't add up" severity FAILURE; report "geometry bits don't add up" severity FAILURE;


sim_debug: if SIM generate sim_debug: if SIM generate
@ -356,6 +405,69 @@ begin
end generate; end generate;
end generate; end generate;


-- TLB hit detection and real address generation
itlb_lookup : process(all)
variable pte : tlb_pte_t;
variable ttag : tlb_tag_t;
begin
tlb_req_index <= hash_ea(i_in.nia);
pte := itlb_ptes(tlb_req_index);
ttag := itlb_tags(tlb_req_index);
if i_in.virt_mode = '1' then
real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
i_in.nia(TLB_LG_PGSZ - 1 downto 0);
if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
ra_valid <= itlb_valids(tlb_req_index);
else
ra_valid <= '0';
end if;
eaa_priv <= pte(3);
else
real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0);
ra_valid <= '1';
eaa_priv <= '1';
end if;

-- no IAMR, so no KUEP support for now
priv_fault <= eaa_priv and not i_in.priv_mode;
access_ok <= ra_valid and not priv_fault;
end process;

-- iTLB update
itlb_update: process(clk)
variable tlbie : std_ulogic;
variable tlbia : std_ulogic;
variable tlbwe : std_ulogic;
variable wr_index : tlb_index_t;
begin
if rising_edge(clk) then
tlbie := '0';
tlbia := '0';
tlbwe := m_in.tlbld;
if m_in.tlbie = '1' then
if m_in.addr(11 downto 10) /= "00" then
tlbia := '1';
else
tlbie := '1';
end if;
end if;
wr_index := hash_ea(m_in.addr);
if rst = '1' or tlbia = '1' then
-- clear all valid bits
for i in tlb_index_t loop
itlb_valids(i) <= '0';
end loop;
elsif tlbie = '1' then
-- clear entry regardless of hit or miss
itlb_valids(wr_index) <= '0';
elsif tlbwe = '1' then
itlb_tags(wr_index) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
itlb_ptes(wr_index) <= m_in.pte;
itlb_valids(wr_index) <= '1';
end if;
end if;
end process;

-- Cache hit detection, output to fetch2 and other misc logic -- Cache hit detection, output to fetch2 and other misc logic
icache_comb : process(all) icache_comb : process(all)
variable is_hit : std_ulogic; variable is_hit : std_ulogic;
@ -364,12 +476,13 @@ begin
-- Extract line, row and tag from request -- Extract line, row and tag from request
req_index <= get_index(i_in.nia); req_index <= get_index(i_in.nia);
req_row <= get_row(i_in.nia); req_row <= get_row(i_in.nia);
req_tag <= get_tag(i_in.nia); req_tag <= get_tag(real_addr);


-- Calculate address of beginning of cache line, will be -- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed -- used for cache miss processing if needed
-- --
req_laddr <= i_in.nia(63 downto LINE_OFF_BITS) & req_laddr <= (63 downto REAL_ADDR_BITS => '0') &
real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0'); (LINE_OFF_BITS-1 downto 0 => '0');


-- Test if pending request is a hit on any way -- Test if pending request is a hit on any way
@ -385,8 +498,13 @@ begin
end loop; end loop;


-- Generate the "hit" and "miss" signals for the synchronous blocks -- Generate the "hit" and "miss" signals for the synchronous blocks
req_is_hit <= i_in.req and is_hit and not flush_in; if i_in.req = '1' and access_ok = '1' and flush_in = '0' then
req_is_miss <= i_in.req and not is_hit and not flush_in; req_is_hit <= is_hit;
req_is_miss <= not is_hit;
else
req_is_hit <= '0';
req_is_miss <= '0';
end if;
req_hit_way <= hit_way; req_hit_way <= hit_way;


-- The way to replace on a miss -- The way to replace on a miss
@ -404,9 +522,10 @@ begin
i_out.valid <= r.hit_valid; i_out.valid <= r.hit_valid;
i_out.nia <= r.hit_nia; i_out.nia <= r.hit_nia;
i_out.stop_mark <= r.hit_smark; i_out.stop_mark <= r.hit_smark;
i_out.fetch_failed <= r.fetch_failed;


-- Stall fetch1 if we have a miss -- Stall fetch1 if we have a miss on cache or TLB or a protection fault
stall_out <= not is_hit; stall_out <= not (is_hit and access_ok);


-- Wishbone requests output (from the cache miss reload machine) -- Wishbone requests output (from the cache miss reload machine)
wishbone_out <= r.wb; wishbone_out <= r.wb;
@ -419,22 +538,21 @@ begin
-- On a hit, latch the request for the next cycle, when the BRAM data -- On a hit, latch the request for the next cycle, when the BRAM data
-- will be available on the cache_out output of the corresponding way -- will be available on the cache_out output of the corresponding way
-- --
r.hit_valid <= req_is_hit;
-- Send stop marks and NIA down regardless of validity
r.hit_smark <= i_in.stop_mark;
r.hit_nia <= i_in.nia;
if req_is_hit = '1' then if req_is_hit = '1' then
r.hit_way <= req_hit_way; r.hit_way <= req_hit_way;
r.hit_nia <= i_in.nia;
r.hit_smark <= i_in.stop_mark; r.hit_smark <= i_in.stop_mark;
r.hit_valid <= '1';


report "cache hit nia:" & to_hstring(i_in.nia) & report "cache hit nia:" & to_hstring(i_in.nia) &
" IR:" & std_ulogic'image(i_in.virt_mode) &
" SM:" & std_ulogic'image(i_in.stop_mark) & " SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) & " idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag) & " tag:" & to_hstring(req_tag) &
" way: " & integer'image(req_hit_way); " way:" & integer'image(req_hit_way) &
else " RA:" & to_hstring(real_addr);
r.hit_valid <= '0';

-- Send stop marks down regardless of validity
r.hit_smark <= i_in.stop_mark;
end if; end if;
end if; end if;
end process; end process;
@ -468,10 +586,12 @@ begin
-- We need to read a cache line -- We need to read a cache line
if req_is_miss = '1' then if req_is_miss = '1' then
report "cache miss nia:" & to_hstring(i_in.nia) & report "cache miss nia:" & to_hstring(i_in.nia) &
" IR:" & std_ulogic'image(i_in.virt_mode) &
" SM:" & std_ulogic'image(i_in.stop_mark) & " SM:" & std_ulogic'image(i_in.stop_mark) &
" idx:" & integer'image(req_index) & " idx:" & integer'image(req_index) &
" way:" & integer'image(replace_way) & " way:" & integer'image(replace_way) &
" tag:" & to_hstring(req_tag); " tag:" & to_hstring(req_tag) &
" RA:" & to_hstring(real_addr);


-- Force misses on that way while reloading that line -- Force misses on that way while reloading that line
cache_valids(req_index)(replace_way) <= '0'; cache_valids(req_index)(replace_way) <= '0';
@ -539,6 +659,13 @@ begin
end if; end if;
end case; end case;
end if; end if;

-- TLB miss and protection fault processing
if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
r.fetch_failed <= '0';
elsif i_in.req = '1' and access_ok = '0' then
r.fetch_failed <= '1';
end if;
end if; end if;
end process; end process;
end; end;

@ -15,6 +15,8 @@ architecture behave of icache_tb is
signal i_out : Fetch1ToIcacheType; signal i_out : Fetch1ToIcacheType;
signal i_in : IcacheToFetch2Type; signal i_in : IcacheToFetch2Type;


signal m_out : MmuToIcacheType;

signal wb_bram_in : wishbone_master_out; signal wb_bram_in : wishbone_master_out;
signal wb_bram_out : wishbone_slave_out; signal wb_bram_out : wishbone_slave_out;


@ -30,6 +32,7 @@ begin
rst => rst, rst => rst,
i_in => i_out, i_in => i_out,
i_out => i_in, i_out => i_in,
m_in => m_out,
flush_in => '0', flush_in => '0',
wishbone_out => wb_bram_in, wishbone_out => wb_bram_in,
wishbone_in => wb_bram_out wishbone_in => wb_bram_out
@ -70,6 +73,11 @@ begin
i_out.nia <= (others => '0'); i_out.nia <= (others => '0');
i_out.stop_mark <= '0'; i_out.stop_mark <= '0';


m_out.tlbld <= '0';
m_out.tlbie <= '0';
m_out.addr <= (others => '0');
m_out.pte <= (others => '0');

wait until rising_edge(clk); wait until rising_edge(clk);
wait until rising_edge(clk); wait until rising_edge(clk);
wait until rising_edge(clk); wait until rising_edge(clk);

@ -41,7 +41,8 @@ architecture behave of loadstore1 is
ACK_WAIT, -- waiting for ack from dcache ACK_WAIT, -- waiting for ack from dcache
LD_UPDATE, -- writing rA with computed addr on load LD_UPDATE, -- writing rA with computed addr on load
MMU_LOOKUP, -- waiting for MMU to look up translation MMU_LOOKUP, -- waiting for MMU to look up translation
TLBIE_WAIT -- waiting for MMU to finish doing a tlbie TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
DO_ISI
); );


type reg_stage_t is record type reg_stage_t is record
@ -70,6 +71,7 @@ architecture behave of loadstore1 is
second_bytes : std_ulogic_vector(7 downto 0); second_bytes : std_ulogic_vector(7 downto 0);
dar : std_ulogic_vector(63 downto 0); dar : std_ulogic_vector(63 downto 0);
dsisr : std_ulogic_vector(31 downto 0); dsisr : std_ulogic_vector(31 downto 0);
instr_fault : std_ulogic;
end record; end record;


type byte_sel_t is array(0 to 7) of std_ulogic; type byte_sel_t is array(0 to 7) of std_ulogic;
@ -154,6 +156,7 @@ begin
variable mmureq : std_ulogic; variable mmureq : std_ulogic;
variable dsisr : std_ulogic_vector(31 downto 0); variable dsisr : std_ulogic_vector(31 downto 0);
variable mmu_mtspr : std_ulogic; variable mmu_mtspr : std_ulogic;
variable itlb_fault : std_ulogic;
begin begin
v := r; v := r;
req := '0'; req := '0';
@ -163,6 +166,7 @@ begin
addr := lsu_sum; addr := lsu_sum;
mfspr := '0'; mfspr := '0';
mmu_mtspr := '0'; mmu_mtspr := '0';
itlb_fault := '0';
sprn := std_ulogic_vector(to_unsigned(l_in.spr_num, 10)); sprn := std_ulogic_vector(to_unsigned(l_in.spr_num, 10));
sprval := (others => '0'); -- avoid inferred latches sprval := (others => '0'); -- avoid inferred latches
exception := '0'; exception := '0';
@ -230,6 +234,7 @@ begin
v.load := '0'; v.load := '0';
v.dcbz := '0'; v.dcbz := '0';
v.tlbie := '0'; v.tlbie := '0';
v.instr_fault := '0';
v.dwords_done := '0'; v.dwords_done := '0';
case l_in.op is case l_in.op is
when OP_STORE => when OP_STORE =>
@ -272,6 +277,10 @@ begin
-- writing one of the SPRs in the MMU -- writing one of the SPRs in the MMU
mmu_mtspr := '1'; mmu_mtspr := '1';
end if; end if;
when OP_FETCH_FAILED =>
-- for now, always signal an ISI in the next cycle
v.instr_fault := '1';
v.state := DO_ISI;
when others => when others =>
assert false report "unknown op sent to loadstore1"; assert false report "unknown op sent to loadstore1";
end case; end case;
@ -425,6 +434,10 @@ begin
do_update := '1'; do_update := '1';
v.state := IDLE; v.state := IDLE;
done := '1'; done := '1';

when DO_ISI =>
exception := '1';
v.state := IDLE;
end case; end case;


-- Update outputs to dcache -- Update outputs to dcache
@ -441,6 +454,7 @@ begin


-- Update outputs to MMU -- Update outputs to MMU
m_out.valid <= mmureq; m_out.valid <= mmureq;
m_out.iside <= itlb_fault;
m_out.load <= r.load; m_out.load <= r.load;
m_out.priv <= r.priv_mode; m_out.priv <= r.priv_mode;
m_out.tlbie <= v.tlbie; m_out.tlbie <= v.tlbie;
@ -472,9 +486,11 @@ begin


-- update exception info back to execute1 -- update exception info back to execute1
e_out.exception <= exception; e_out.exception <= exception;
e_out.segment_fault <= m_in.segerr; e_out.segment_fault <= '0';
if exception = '1' then e_out.instr_fault <= r.instr_fault;
if exception = '1' and r.instr_fault = '0' then
v.dar := addr; v.dar := addr;
e_out.segment_fault <= m_in.segerr;
if m_in.segerr = '0' then if m_in.segerr = '0' then
v.dsisr := dsisr; v.dsisr := dsisr;
end if; end if;

@ -18,7 +18,9 @@ entity mmu is
l_out : out MmuToLoadstore1Type; l_out : out MmuToLoadstore1Type;


d_out : out MmuToDcacheType; d_out : out MmuToDcacheType;
d_in : in DcacheToMmuType d_in : in DcacheToMmuType;

i_out : out MmuToIcacheType
); );
end mmu; end mmu;


@ -336,5 +338,11 @@ begin
d_out.addr <= pgtable_addr; d_out.addr <= pgtable_addr;
d_out.pte <= (others => '0'); d_out.pte <= (others => '0');
end if; end if;

i_out.tlbld <= '0';
i_out.tlbie <= tlbie_req;
i_out.addr <= l_in.addr;
i_out.pte <= l_in.rs;

end process; end process;
end; end;

Loading…
Cancel
Save