From 3d4712ad436bf4465014fd418d895286989e3823 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 27 Apr 2020 17:43:19 +1000 Subject: [PATCH] Add TLB to icache This adds a direct-mapped TLB to the icache, with 64 entries by default. Execute1 now sends a "virt_mode" signal from MSR[IR] to fetch1 along with redirects to indicate whether instruction addresses should be translated through the TLB, and fetch1 sends that on to icache. Similarly a "priv_mode" signal is sent to indicate the privilege mode for instruction fetches. This means that changes to MSR[IR] or MSR[PR] don't take effect until the next redirect, meaning an isync, rfid, branch, etc. The icache uses a hash of the effective address (i.e. next instruction address) to index the TLB. The hash is an XOR of three fields of the address; with a 64-entry TLB, the fields are bits 12--17, 18--23 and 24--29 of the address. TLB invalidations simply invalidate the indexed TLB entry without checking the contents. If the icache detects a TLB miss with virt_mode=1, it will send a fetch_failed indication through fetch2 to decode1, which will turn it into a special OP_FETCH_FAILED opcode with unit=LDST. That will get sent down to loadstore1 which will currently just raise a Instruction Storage Interrupt (0x400) exception. One bit in the PTE obtained from the TLB is used to check whether an instruction access is allowed -- the privilege bit (bit 3). If bit 3 is 1 and priv_mode=0, then a fetch_failed indication is sent down to fetch2 and to decode1, which generates an OP_FETCH_FAILED. Any PTEs with PTE bit 0 (EAA[3]) clear or bit 8 (R) clear should not be put into the iTLB since such PTEs would not allow execution by any context. Tlbie operations get sent from mmu to icache over a new connection. Unfortunately the privileged instruction tests are broken for now. Signed-off-by: Paul Mackerras --- common.vhdl | 22 +++++- core.vhdl | 5 +- decode1.vhdl | 17 +++-- decode_types.vhdl | 3 +- execute1.vhdl | 29 ++++++-- fetch1.vhdl | 6 ++ fetch2.vhdl | 4 ++ icache.vhdl | 175 +++++++++++++++++++++++++++++++++++++++------- icache_tb.vhdl | 8 +++ loadstore1.vhdl | 22 +++++- mmu.vhdl | 10 ++- 11 files changed, 260 insertions(+), 41 deletions(-) diff --git a/common.vhdl b/common.vhdl index 424259b..ba8aab3 100644 --- a/common.vhdl +++ b/common.vhdl @@ -89,6 +89,8 @@ package common is type Fetch1ToIcacheType is record req: std_ulogic; + virt_mode : std_ulogic; + priv_mode : std_ulogic; stop_mark: std_ulogic; nia: std_ulogic_vector(63 downto 0); end record; @@ -96,6 +98,7 @@ package common is type IcacheToFetch2Type is record valid: std_ulogic; stop_mark: std_ulogic; + fetch_failed: std_ulogic; nia: std_ulogic_vector(63 downto 0); insn: std_ulogic_vector(31 downto 0); end record; @@ -103,10 +106,12 @@ package common is type Fetch2ToDecode1Type is record valid: std_ulogic; stop_mark : std_ulogic; + fetch_failed: std_ulogic; nia: std_ulogic_vector(63 downto 0); insn: std_ulogic_vector(31 downto 0); end record; - constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', others => (others => '0')); + constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', fetch_failed => '0', + others => (others => '0')); type Decode1ToDecode2Type is record valid: std_ulogic; @@ -211,13 +216,17 @@ package common is type Execute1ToFetch1Type is record redirect: std_ulogic; + virt_mode: std_ulogic; + priv_mode: std_ulogic; redirect_nia: std_ulogic_vector(63 downto 0); end record; - constant Execute1ToFetch1TypeInit : Execute1ToFetch1Type := (redirect => '0', others => (others => '0')); + constant Execute1ToFetch1TypeInit : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0', + priv_mode => '0', others => (others => '0')); type Execute1ToLoadstore1Type is record valid : std_ulogic; op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do + nia : std_ulogic_vector(63 downto 0); addr1 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- data to write, unused for read @@ -243,6 +252,7 @@ package common is type Loadstore1ToExecute1Type is record exception : std_ulogic; segment_fault : std_ulogic; + instr_fault : std_ulogic; end record; type Loadstore1ToDcacheType is record @@ -270,6 +280,7 @@ package common is valid : std_ulogic; tlbie : std_ulogic; mtspr : std_ulogic; + iside : std_ulogic; load : std_ulogic; priv : std_ulogic; sprn : std_ulogic_vector(3 downto 0); @@ -302,6 +313,13 @@ package common is data : std_ulogic_vector(63 downto 0); end record; + type MmuToIcacheType is record + tlbld : std_ulogic; + tlbie : std_ulogic; + addr : std_ulogic_vector(63 downto 0); + pte : std_ulogic_vector(63 downto 0); + end record; + type Loadstore1ToWritebackType is record valid : std_ulogic; write_enable: std_ulogic; diff --git a/core.vhdl b/core.vhdl index c870404..05fb328 100644 --- a/core.vhdl +++ b/core.vhdl @@ -42,6 +42,7 @@ architecture behave of core is -- icache signals signal fetch1_to_icache : Fetch1ToIcacheType; signal icache_to_fetch2 : IcacheToFetch2Type; + signal mmu_to_icache : MmuToIcacheType; -- decode signals signal decode1_to_decode2: Decode1ToDecode2Type; @@ -164,6 +165,7 @@ begin rst => icache_rst, i_in => fetch1_to_icache, i_out => icache_to_fetch2, + m_in => mmu_to_icache, flush_in => flush, stall_out => icache_stall_out, wishbone_out => wishbone_insn_out, @@ -288,7 +290,8 @@ begin l_in => loadstore1_to_mmu, l_out => mmu_to_loadstore1, d_out => mmu_to_dcache, - d_in => dcache_to_mmu + d_in => dcache_to_mmu, + i_out => mmu_to_icache ); dcache_0: entity work.dcache diff --git a/decode1.vhdl b/decode1.vhdl index b7212c2..598e59c 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -345,9 +345,10 @@ architecture behaviour of decode1 is others => decode_rom_init ); - -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl - -- op in out A out in out len ext pipe - constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); + -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl + -- op in out A out in out len ext pipe + constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); + constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); begin decode1_0: process(clk) @@ -380,7 +381,15 @@ begin end if; majorop := unsigned(f_in.insn(31 downto 26)); - if majorop = "011111" then + if f_in.fetch_failed = '1' then + v.valid := '1'; + -- Only send down a single OP_FETCH_FAILED + if r.decode.insn_type = OP_FETCH_FAILED then + v.valid := '0'; + end if; + v.decode := fetch_fail_inst; + + elsif majorop = "011111" then -- major opcode 31, lots of things v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1)))); diff --git a/decode_types.vhdl b/decode_types.vhdl index ef51bd0..8f000a0 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -17,7 +17,8 @@ package decode_types is OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB, OP_SHL, OP_SHR, OP_SYNC, OP_TLBIE, OP_TRAP, - OP_XOR + OP_XOR, + OP_FETCH_FAILED ); type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR); type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR); diff --git a/execute1.vhdl b/execute1.vhdl index 7181f7f..71c79ee 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -430,6 +430,9 @@ begin icache_inval <= '0'; stall_out <= '0'; f_out <= Execute1ToFetch1TypeInit; + -- send MSR[IR] and ~MSR[PR] up to fetch1 + f_out.virt_mode <= ctrl.msr(MSR_IR); + f_out.priv_mode <= not ctrl.msr(MSR_PR); -- Next insn adder used in a couple of places next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4); @@ -460,6 +463,8 @@ begin ctrl_tmp.msr(MSR_RI) <= '0'; ctrl_tmp.msr(MSR_LE) <= '1'; f_out.redirect <= '1'; + f_out.virt_mode <= '0'; + f_out.priv_mode <= '1'; f_out.redirect_nia <= ctrl.irq_nia; v.e.valid := e_in.valid; report "Writing SRR1: " & to_hstring(ctrl.srr1); @@ -651,6 +656,8 @@ begin when OP_RFID => f_out.redirect <= '1'; + f_out.virt_mode <= b_in(MSR_IR) or b_in(MSR_PR); + f_out.priv_mode <= not b_in(MSR_PR); f_out.redirect_nia <= a_in(63 downto 2) & "00"; -- srr0 -- Can't use msr_copy here because the partial function MSR -- bits should be left unchanged, not zeroed. @@ -972,23 +979,35 @@ begin v.e.write_data := result; v.e.write_enable := result_en; - -- generate DSI for load/store exceptions + -- generate DSI or DSegI for load/store exceptions + -- or ISI or ISegI for instruction fetch exceptions if l_in.exception = '1' then - if l_in.segment_fault = '0' then - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#300#, 64)); + ctrl_tmp.srr1 <= msr_copy(ctrl.msr); + if l_in.instr_fault = '0' then + if l_in.segment_fault = '0' then + ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#300#, 64)); + else + ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#380#, 64)); + end if; else - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#380#, 64)); + if l_in.segment_fault = '0' then + ctrl_tmp.srr1(63 - 33) <= '1'; + ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#400#, 64)); + else + ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#480#, 64)); + end if; end if; - ctrl_tmp.srr1 <= msr_copy(ctrl.msr); v.e.exc_write_enable := '1'; v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_data := r.ldst_nia; + report "ldst exception writing srr0=" & to_hstring(r.ldst_nia); ctrl_tmp.irq_state <= WRITE_SRR1; v.e.valid := '1'; -- complete the original load or store end if; -- Outputs to loadstore1 (async) lv.op := e_in.insn_type; + lv.nia := e_in.nia; lv.addr1 := a_in; lv.addr2 := b_in; lv.data := c_in; diff --git a/fetch1.vhdl b/fetch1.vhdl index 9cd5445..936e830 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -40,6 +40,8 @@ begin if rising_edge(clk) then if r /= r_next then report "fetch1 rst:" & std_ulogic'image(rst) & + " IR:" & std_ulogic'image(e_in.virt_mode) & + " P:" & std_ulogic'image(e_in.priv_mode) & " R:" & std_ulogic'image(e_in.redirect) & " S:" & std_ulogic'image(stall_in) & " T:" & std_ulogic'image(stop_in) & @@ -61,9 +63,13 @@ begin if rst = '1' then v.nia := RESET_ADDRESS; + v.virt_mode := '0'; + v.priv_mode := '1'; v_int.stop_state := RUNNING; elsif e_in.redirect = '1' then v.nia := e_in.redirect_nia; + v.virt_mode := e_in.virt_mode; + v.priv_mode := e_in.priv_mode; elsif stall_in = '0' then -- For debug stop/step to work properly we need a little bit of diff --git a/fetch2.vhdl b/fetch2.vhdl index 99f92ee..cc0727d 100644 --- a/fetch2.vhdl +++ b/fetch2.vhdl @@ -46,6 +46,7 @@ begin " F:" & std_ulogic'image(flush_in) & " T:" & std_ulogic'image(rin.stop_mark) & " V:" & std_ulogic'image(rin.valid) & + " FF:" & std_ulogic'image(rin.fetch_failed) & " nia:" & to_hstring(rin.nia); end if; @@ -84,6 +85,7 @@ begin v.valid := v_i_in.valid; v.stop_mark := v_i_in.stop_mark; + v.fetch_failed := v_i_in.fetch_failed; v.nia := v_i_in.nia; v.insn := v_i_in.insn; @@ -94,12 +96,14 @@ begin -- if flush_in = '1' then v_int.stash.valid := '0'; + v_int.stash.fetch_failed := '0'; end if; -- If we are flushing or the instruction comes with a stop mark -- we tag it as invalid so it doesn't get decoded and executed if flush_in = '1' or v.stop_mark = '1' then v.valid := '0'; + v.fetch_failed := '0'; end if; -- Clear stash on reset diff --git a/icache.vhdl b/icache.vhdl index 343c73a..7d7973d 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -35,7 +35,13 @@ entity icache is -- Number of lines in a set NUM_LINES : positive := 32; -- Number of ways - NUM_WAYS : positive := 4 + NUM_WAYS : positive := 4; + -- L1 ITLB number of entries (direct mapped) + TLB_SIZE : positive := 64; + -- L1 ITLB log_2(page_size) + TLB_LG_PGSZ : positive := 12; + -- Number of real address bits that we store + REAL_ADDR_BITS : positive := 56 ); port ( clk : in std_ulogic; @@ -44,6 +50,8 @@ entity icache is i_in : in Fetch1ToIcacheType; i_out : out IcacheToFetch2Type; + m_in : in MmuToIcacheType; + stall_out : out std_ulogic; flush_in : in std_ulogic; @@ -78,10 +86,12 @@ architecture rtl of icache is constant LINE_OFF_BITS : natural := log2(LINE_SIZE); -- ROW_OFF_BITS is the number of bits for the offset in a row constant ROW_OFF_BITS : natural := log2(ROW_SIZE); - -- INDEX_BITS is the number if bits to select a cache line + -- INDEX_BITS is the number of bits to select a cache line constant INDEX_BITS : natural := log2(NUM_LINES); + -- SET_SIZE_BITS is the log base 2 of the set size + constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS; -- TAG_BITS is the number of bits of the tag part of the address - constant TAG_BITS : natural := 64 - LINE_OFF_BITS - INDEX_BITS; + constant TAG_BITS : natural := REAL_ADDR_BITS - SET_SIZE_BITS; -- WAY_BITS is the number of bits to select a way constant WAY_BITS : natural := log2(NUM_WAYS); @@ -126,6 +136,27 @@ architecture rtl of icache is attribute ram_style : string; attribute ram_style of cache_tags : signal is "distributed"; + -- L1 ITLB. + constant TLB_BITS : natural := log2(TLB_SIZE); + constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS); + constant TLB_PTE_BITS : natural := 64; + + subtype tlb_index_t is integer range 0 to TLB_SIZE - 1; + type tlb_valids_t is array(tlb_index_t) of std_ulogic; + subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0); + type tlb_tags_t is array(tlb_index_t) of tlb_tag_t; + subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0); + type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t; + + signal itlb_valids : tlb_valids_t; + signal itlb_tags : tlb_tags_t; + signal itlb_ptes : tlb_ptes_t; + attribute ram_style of itlb_tags : signal is "distributed"; + attribute ram_style of itlb_ptes : signal is "distributed"; + + -- Privilege bit from PTE EAA field + signal eaa_priv : std_ulogic; + -- Cache reload state machine type state_t is (IDLE, WAIT_ACK); @@ -142,6 +173,9 @@ architecture rtl of icache is store_way : way_t; store_index : index_t; store_row : row_t; + + -- TLB miss state + fetch_failed : std_ulogic; end record; signal r : reg_internal_t; @@ -155,6 +189,12 @@ architecture rtl of icache is signal req_is_miss : std_ulogic; signal req_laddr : std_ulogic_vector(63 downto 0); + signal tlb_req_index : tlb_index_t; + signal real_addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0); + signal ra_valid : std_ulogic; + signal priv_fault : std_ulogic; + signal access_ok : std_ulogic; + -- Cache RAM interface type cache_ram_out_t is array(way_t) of cache_row_t; signal cache_out : cache_ram_out_t; @@ -167,13 +207,13 @@ architecture rtl of icache is -- Return the cache line index (tag index) for an address function get_index(addr: std_ulogic_vector(63 downto 0)) return index_t is begin - return to_integer(unsigned(addr(63-TAG_BITS downto LINE_OFF_BITS))); + return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS))); end; -- Return the cache row index (data memory) for an address function get_row(addr: std_ulogic_vector(63 downto 0)) return row_t is begin - return to_integer(unsigned(addr(63-TAG_BITS downto ROW_OFF_BITS))); + return to_integer(unsigned(addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS))); end; -- Returns whether this is the last row of a line @@ -231,9 +271,9 @@ architecture rtl of icache is end; -- Get the tag value from the address - function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is + function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)) return cache_tag_t is begin - return addr(63 downto 64-TAG_BITS); + return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS); end; -- Read a tag from a tag memory row @@ -249,6 +289,15 @@ architecture rtl of icache is tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag; end; + -- Simple hash for direct-mapped TLB index + function hash_ea(addr: std_ulogic_vector(63 downto 0)) return tlb_index_t is + variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0); + begin + hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ) + xor addr(TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto TLB_LG_PGSZ + TLB_BITS) + xor addr(TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto TLB_LG_PGSZ + 2 * TLB_BITS); + return to_integer(unsigned(hash)); + end; begin assert LINE_SIZE mod ROW_SIZE = 0; @@ -260,9 +309,9 @@ begin report "geometry bits don't add up" severity FAILURE; assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) report "geometry bits don't add up" severity FAILURE; - assert (64 = TAG_BITS + INDEX_BITS + LINE_OFF_BITS) + assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS) report "geometry bits don't add up" severity FAILURE; - assert (64 = TAG_BITS + ROW_BITS + ROW_OFF_BITS) + assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS) report "geometry bits don't add up" severity FAILURE; sim_debug: if SIM generate @@ -356,6 +405,69 @@ begin end generate; end generate; + -- TLB hit detection and real address generation + itlb_lookup : process(all) + variable pte : tlb_pte_t; + variable ttag : tlb_tag_t; + begin + tlb_req_index <= hash_ea(i_in.nia); + pte := itlb_ptes(tlb_req_index); + ttag := itlb_tags(tlb_req_index); + if i_in.virt_mode = '1' then + real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) & + i_in.nia(TLB_LG_PGSZ - 1 downto 0); + if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then + ra_valid <= itlb_valids(tlb_req_index); + else + ra_valid <= '0'; + end if; + eaa_priv <= pte(3); + else + real_addr <= i_in.nia(REAL_ADDR_BITS - 1 downto 0); + ra_valid <= '1'; + eaa_priv <= '1'; + end if; + + -- no IAMR, so no KUEP support for now + priv_fault <= eaa_priv and not i_in.priv_mode; + access_ok <= ra_valid and not priv_fault; + end process; + + -- iTLB update + itlb_update: process(clk) + variable tlbie : std_ulogic; + variable tlbia : std_ulogic; + variable tlbwe : std_ulogic; + variable wr_index : tlb_index_t; + begin + if rising_edge(clk) then + tlbie := '0'; + tlbia := '0'; + tlbwe := m_in.tlbld; + if m_in.tlbie = '1' then + if m_in.addr(11 downto 10) /= "00" then + tlbia := '1'; + else + tlbie := '1'; + end if; + end if; + wr_index := hash_ea(m_in.addr); + if rst = '1' or tlbia = '1' then + -- clear all valid bits + for i in tlb_index_t loop + itlb_valids(i) <= '0'; + end loop; + elsif tlbie = '1' then + -- clear entry regardless of hit or miss + itlb_valids(wr_index) <= '0'; + elsif tlbwe = '1' then + itlb_tags(wr_index) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS); + itlb_ptes(wr_index) <= m_in.pte; + itlb_valids(wr_index) <= '1'; + end if; + end if; + end process; + -- Cache hit detection, output to fetch2 and other misc logic icache_comb : process(all) variable is_hit : std_ulogic; @@ -364,12 +476,13 @@ begin -- Extract line, row and tag from request req_index <= get_index(i_in.nia); req_row <= get_row(i_in.nia); - req_tag <= get_tag(i_in.nia); + req_tag <= get_tag(real_addr); -- Calculate address of beginning of cache line, will be -- used for cache miss processing if needed -- - req_laddr <= i_in.nia(63 downto LINE_OFF_BITS) & + req_laddr <= (63 downto REAL_ADDR_BITS => '0') & + real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) & (LINE_OFF_BITS-1 downto 0 => '0'); -- Test if pending request is a hit on any way @@ -385,8 +498,13 @@ begin end loop; -- Generate the "hit" and "miss" signals for the synchronous blocks - req_is_hit <= i_in.req and is_hit and not flush_in; - req_is_miss <= i_in.req and not is_hit and not flush_in; + if i_in.req = '1' and access_ok = '1' and flush_in = '0' then + req_is_hit <= is_hit; + req_is_miss <= not is_hit; + else + req_is_hit <= '0'; + req_is_miss <= '0'; + end if; req_hit_way <= hit_way; -- The way to replace on a miss @@ -404,9 +522,10 @@ begin i_out.valid <= r.hit_valid; i_out.nia <= r.hit_nia; i_out.stop_mark <= r.hit_smark; + i_out.fetch_failed <= r.fetch_failed; - -- Stall fetch1 if we have a miss - stall_out <= not is_hit; + -- Stall fetch1 if we have a miss on cache or TLB or a protection fault + stall_out <= not (is_hit and access_ok); -- Wishbone requests output (from the cache miss reload machine) wishbone_out <= r.wb; @@ -419,22 +538,21 @@ begin -- On a hit, latch the request for the next cycle, when the BRAM data -- will be available on the cache_out output of the corresponding way -- + r.hit_valid <= req_is_hit; + -- Send stop marks and NIA down regardless of validity + r.hit_smark <= i_in.stop_mark; + r.hit_nia <= i_in.nia; if req_is_hit = '1' then r.hit_way <= req_hit_way; - r.hit_nia <= i_in.nia; r.hit_smark <= i_in.stop_mark; - r.hit_valid <= '1'; report "cache hit nia:" & to_hstring(i_in.nia) & + " IR:" & std_ulogic'image(i_in.virt_mode) & " SM:" & std_ulogic'image(i_in.stop_mark) & " idx:" & integer'image(req_index) & " tag:" & to_hstring(req_tag) & - " way: " & integer'image(req_hit_way); - else - r.hit_valid <= '0'; - - -- Send stop marks down regardless of validity - r.hit_smark <= i_in.stop_mark; + " way:" & integer'image(req_hit_way) & + " RA:" & to_hstring(real_addr); end if; end if; end process; @@ -468,10 +586,12 @@ begin -- We need to read a cache line if req_is_miss = '1' then report "cache miss nia:" & to_hstring(i_in.nia) & + " IR:" & std_ulogic'image(i_in.virt_mode) & " SM:" & std_ulogic'image(i_in.stop_mark) & " idx:" & integer'image(req_index) & " way:" & integer'image(replace_way) & - " tag:" & to_hstring(req_tag); + " tag:" & to_hstring(req_tag) & + " RA:" & to_hstring(real_addr); -- Force misses on that way while reloading that line cache_valids(req_index)(replace_way) <= '0'; @@ -539,6 +659,13 @@ begin end if; end case; end if; + + -- TLB miss and protection fault processing + if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then + r.fetch_failed <= '0'; + elsif i_in.req = '1' and access_ok = '0' then + r.fetch_failed <= '1'; + end if; end if; end process; end; diff --git a/icache_tb.vhdl b/icache_tb.vhdl index ea5cf3a..09a644b 100644 --- a/icache_tb.vhdl +++ b/icache_tb.vhdl @@ -15,6 +15,8 @@ architecture behave of icache_tb is signal i_out : Fetch1ToIcacheType; signal i_in : IcacheToFetch2Type; + signal m_out : MmuToIcacheType; + signal wb_bram_in : wishbone_master_out; signal wb_bram_out : wishbone_slave_out; @@ -30,6 +32,7 @@ begin rst => rst, i_in => i_out, i_out => i_in, + m_in => m_out, flush_in => '0', wishbone_out => wb_bram_in, wishbone_in => wb_bram_out @@ -70,6 +73,11 @@ begin i_out.nia <= (others => '0'); i_out.stop_mark <= '0'; + m_out.tlbld <= '0'; + m_out.tlbie <= '0'; + m_out.addr <= (others => '0'); + m_out.pte <= (others => '0'); + wait until rising_edge(clk); wait until rising_edge(clk); wait until rising_edge(clk); diff --git a/loadstore1.vhdl b/loadstore1.vhdl index c56346f..666cf4e 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -41,7 +41,8 @@ architecture behave of loadstore1 is ACK_WAIT, -- waiting for ack from dcache LD_UPDATE, -- writing rA with computed addr on load MMU_LOOKUP, -- waiting for MMU to look up translation - TLBIE_WAIT -- waiting for MMU to finish doing a tlbie + TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie + DO_ISI ); type reg_stage_t is record @@ -70,6 +71,7 @@ architecture behave of loadstore1 is second_bytes : std_ulogic_vector(7 downto 0); dar : std_ulogic_vector(63 downto 0); dsisr : std_ulogic_vector(31 downto 0); + instr_fault : std_ulogic; end record; type byte_sel_t is array(0 to 7) of std_ulogic; @@ -154,6 +156,7 @@ begin variable mmureq : std_ulogic; variable dsisr : std_ulogic_vector(31 downto 0); variable mmu_mtspr : std_ulogic; + variable itlb_fault : std_ulogic; begin v := r; req := '0'; @@ -163,6 +166,7 @@ begin addr := lsu_sum; mfspr := '0'; mmu_mtspr := '0'; + itlb_fault := '0'; sprn := std_ulogic_vector(to_unsigned(l_in.spr_num, 10)); sprval := (others => '0'); -- avoid inferred latches exception := '0'; @@ -230,6 +234,7 @@ begin v.load := '0'; v.dcbz := '0'; v.tlbie := '0'; + v.instr_fault := '0'; v.dwords_done := '0'; case l_in.op is when OP_STORE => @@ -272,6 +277,10 @@ begin -- writing one of the SPRs in the MMU mmu_mtspr := '1'; end if; + when OP_FETCH_FAILED => + -- for now, always signal an ISI in the next cycle + v.instr_fault := '1'; + v.state := DO_ISI; when others => assert false report "unknown op sent to loadstore1"; end case; @@ -425,6 +434,10 @@ begin do_update := '1'; v.state := IDLE; done := '1'; + + when DO_ISI => + exception := '1'; + v.state := IDLE; end case; -- Update outputs to dcache @@ -441,6 +454,7 @@ begin -- Update outputs to MMU m_out.valid <= mmureq; + m_out.iside <= itlb_fault; m_out.load <= r.load; m_out.priv <= r.priv_mode; m_out.tlbie <= v.tlbie; @@ -472,9 +486,11 @@ begin -- update exception info back to execute1 e_out.exception <= exception; - e_out.segment_fault <= m_in.segerr; - if exception = '1' then + e_out.segment_fault <= '0'; + e_out.instr_fault <= r.instr_fault; + if exception = '1' and r.instr_fault = '0' then v.dar := addr; + e_out.segment_fault <= m_in.segerr; if m_in.segerr = '0' then v.dsisr := dsisr; end if; diff --git a/mmu.vhdl b/mmu.vhdl index 3a1003c..e26c5a7 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -18,7 +18,9 @@ entity mmu is l_out : out MmuToLoadstore1Type; d_out : out MmuToDcacheType; - d_in : in DcacheToMmuType + d_in : in DcacheToMmuType; + + i_out : out MmuToIcacheType ); end mmu; @@ -336,5 +338,11 @@ begin d_out.addr <= pgtable_addr; d_out.pte <= (others => '0'); end if; + + i_out.tlbld <= '0'; + i_out.tlbie <= tlbie_req; + i_out.addr <= l_in.addr; + i_out.pte <= l_in.rs; + end process; end;