diff --git a/Makefile b/Makefile index ed74176..48be5b4 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ common.o: decode_types.o control.o: gpr_hazard.o cr_hazard.o common.o sim_jtag.o: sim_jtag_socket.o core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o -core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o +core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o mmu.o dcache.o writeback.o core_debug.o core_debug.o: common.o countzero.o: countzero_tb.o: common.o glibc_random.o countzero.o @@ -58,10 +58,11 @@ icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o insn_helpers.o: -loadstore1.o: common.o helpers.o decode_types.o +loadstore1.o: common.o decode_types.o logical.o: decode_types.o multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o multiply.o: common.o decode_types.o +mmu.o: common.o divider_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o divider.o divider.o: common.o decode_types.o ppc_fx_insns.o: helpers.o diff --git a/common.vhdl b/common.vhdl index e8ec19e..3ee19d7 100644 --- a/common.vhdl +++ b/common.vhdl @@ -246,7 +246,6 @@ package common is type Loadstore1ToDcacheType is record valid : std_ulogic; load : std_ulogic; -- is this a load - tlbie : std_ulogic; -- is this a tlbie dcbz : std_ulogic; nc : std_ulogic; reserve : std_ulogic; @@ -267,6 +266,30 @@ package common is rc_error : std_ulogic; end record; + type Loadstore1ToMmuType is record + valid : std_ulogic; + tlbie : std_ulogic; + addr : std_ulogic_vector(63 downto 0); + rs : std_ulogic_vector(63 downto 0); + end record; + + type MmuToLoadstore1Type is record + done : std_ulogic; + error : std_ulogic; + end record; + + type MmuToDcacheType is record + valid : std_ulogic; + tlbie : std_ulogic; + addr : std_ulogic_vector(63 downto 0); + pte : std_ulogic_vector(63 downto 0); + end record; + + type DcacheToMmuType is record + stall : std_ulogic; + done : std_ulogic; + end record; + type Loadstore1ToWritebackType is record valid : std_ulogic; write_enable: std_ulogic; diff --git a/core.vhdl b/core.vhdl index 0cb2ecd..c870404 100644 --- a/core.vhdl +++ b/core.vhdl @@ -65,10 +65,14 @@ architecture behave of core is signal execute1_to_loadstore1: Execute1ToLoadstore1Type; signal loadstore1_to_execute1: Loadstore1ToExecute1Type; signal loadstore1_to_writeback: Loadstore1ToWritebackType; + signal loadstore1_to_mmu: Loadstore1ToMmuType; + signal mmu_to_loadstore1: MmuToLoadstore1Type; -- dcache signals signal loadstore1_to_dcache: Loadstore1ToDcacheType; signal dcache_to_loadstore1: DcacheToLoadstore1Type; + signal mmu_to_dcache: MmuToDcacheType; + signal dcache_to_mmu: DcacheToMmuType; -- local signals signal fetch1_stall_in : std_ulogic; @@ -124,6 +128,7 @@ architecture behave of core is attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of mmu_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of debug_0 : label is keep_h(DISABLE_FLATTEN); @@ -270,10 +275,22 @@ begin l_out => loadstore1_to_writeback, d_out => loadstore1_to_dcache, d_in => dcache_to_loadstore1, + m_out => loadstore1_to_mmu, + m_in => mmu_to_loadstore1, dc_stall => dcache_stall_out, stall_out => ls1_stall_out ); + mmu_0: entity work.mmu + port map ( + clk => clk, + rst => core_rst, + l_in => loadstore1_to_mmu, + l_out => mmu_to_loadstore1, + d_out => mmu_to_dcache, + d_in => dcache_to_mmu + ); + dcache_0: entity work.dcache generic map( LINE_SIZE => 64, @@ -285,6 +302,8 @@ begin rst => core_rst, d_in => loadstore1_to_dcache, d_out => dcache_to_loadstore1, + m_in => mmu_to_dcache, + m_out => dcache_to_mmu, stall_out => dcache_stall_out, wishbone_in => wishbone_data_in, wishbone_out => wishbone_data_out diff --git a/dcache.vhdl b/dcache.vhdl index 03b3886..126df48 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -40,6 +40,9 @@ entity dcache is d_in : in Loadstore1ToDcacheType; d_out : out DcacheToLoadstore1Type; + m_in : in MmuToDcacheType; + m_out : out DcacheToMmuType; + stall_out : out std_ulogic; wishbone_out : out wishbone_master_out; @@ -146,9 +149,6 @@ architecture rtl of dcache is attribute ram_style of dtlb_tags : signal is "distributed"; attribute ram_style of dtlb_ptes : signal is "distributed"; - signal r0 : Loadstore1ToDcacheType; - signal r0_valid : std_ulogic; - -- Record for storing permission, attribute, etc. bits from a PTE type perm_attr_t is record reference : std_ulogic; @@ -205,6 +205,15 @@ architecture rtl of dcache is -- first stage emits a stall for a complex op. -- + -- Stage 0 register, basically contains just the latched request + type reg_stage_0_t is record + req : Loadstore1ToDcacheType; + tlbie : std_ulogic; + end record; + + signal r0 : reg_stage_0_t; + signal r0_valid : std_ulogic; + -- First stage register, contains state for stage 1 of load hits -- and for the state machine used by all other operations -- @@ -424,35 +433,61 @@ begin assert (64 = wishbone_data_bits) report "Can't yet handle a wishbone width that isn't 64-bits" severity FAILURE; - -- Latch the request in r0 as long as we're not stalling + -- Latch the request in r0.req as long as we're not stalling stage_0 : process(clk) begin if rising_edge(clk) then if rst = '1' then - r0.valid <= '0'; + r0.req.valid <= '0'; elsif stall_out = '0' then - r0 <= d_in; + assert (d_in.valid and m_in.valid) = '0' report + "request collision loadstore vs MMU"; + if m_in.valid = '1' then + r0.req.valid <= '1'; + r0.req.load <= '0'; + r0.req.dcbz <= '0'; + r0.req.nc <= '0'; + r0.req.reserve <= '0'; + r0.req.virt_mode <= '0'; + r0.req.priv_mode <= '1'; + r0.req.addr <= m_in.addr; + r0.req.data <= m_in.pte; + r0.req.byte_sel <= (others => '1'); + r0.tlbie <= m_in.tlbie; + assert m_in.tlbie = '1' report "unknown request from MMU"; + else + r0.req <= d_in; + r0.tlbie <= '0'; + end if; end if; end if; end process; + -- we don't yet handle collisions between loadstore1 requests and MMU requests + m_out.stall <= '0'; + -- Hold off the request in r0 when stalling, -- and cancel it if we get an error in a previous request. - r0_valid <= r0.valid and not stall_out and not r1.error_done; + r0_valid <= r0.req.valid and not stall_out and not r1.error_done; -- TLB - -- Operates in the second cycle on the request latched in r0. + -- Operates in the second cycle on the request latched in r0.req. -- TLB updates write the entry at the end of the second cycle. tlb_read : process(clk) variable index : tlb_index_t; + variable addrbits : std_ulogic_vector(TLB_SET_BITS - 1 downto 0); begin if rising_edge(clk) then if stall_out = '1' then -- keep reading the same thing while stalled index := tlb_req_index; else - index := to_integer(unsigned(d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 - downto TLB_LG_PGSZ))); + if m_in.valid = '1' then + addrbits := m_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ); + else + addrbits := d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ); + end if; + index := to_integer(unsigned(addrbits)); end if; tlb_valid_way <= dtlb_valids(index); tlb_tag_way <= dtlb_tags(index); @@ -500,11 +535,11 @@ begin variable hit : std_ulogic; variable eatag : tlb_tag_t; begin - tlb_req_index <= to_integer(unsigned(r0.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 + tlb_req_index <= to_integer(unsigned(r0.req.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ))); hitway := 0; hit := '0'; - eatag := r0.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS); + eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS); for i in tlb_way_t loop if tlb_valid_way(i) = '1' and read_tlb_tag(i, tlb_tag_way) = eatag then @@ -515,13 +550,13 @@ begin tlb_hit <= hit and r0_valid; tlb_hit_way <= hitway; pte <= read_tlb_pte(hitway, tlb_pte_way); - valid_ra <= tlb_hit or not r0.virt_mode; - if r0.virt_mode = '1' then + valid_ra <= tlb_hit or not r0.req.virt_mode; + if r0.req.virt_mode = '1' then ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) & - r0.addr(TLB_LG_PGSZ - 1 downto 0); + r0.req.addr(TLB_LG_PGSZ - 1 downto 0); perm_attr <= extract_perm_attr(pte); else - ra <= r0.addr(REAL_ADDR_BITS - 1 downto 0); + ra <= r0.req.addr(REAL_ADDR_BITS - 1 downto 0); perm_attr <= real_mode_perm_attr; end if; end process; @@ -540,9 +575,9 @@ begin tlbia := '0'; tlbwe := '0'; if r0_valid = '1' and r0.tlbie = '1' then - if r0.addr(11 downto 10) /= "00" then + if r0.req.addr(11 downto 10) /= "00" then tlbia := '1'; - elsif r0.addr(9) = '1' then + elsif r0.req.addr(9) = '1' then tlbwe := '1'; else tlbie := '1'; @@ -563,15 +598,16 @@ begin else repl_way := to_integer(unsigned(tlb_plru_victim(tlb_req_index))); end if; - eatag := r0.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS); + eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS); tagset := tlb_tag_way; write_tlb_tag(repl_way, tagset, eatag); dtlb_tags(tlb_req_index) <= tagset; pteset := tlb_pte_way; - write_tlb_pte(repl_way, pteset, r0.data); + write_tlb_pte(repl_way, pteset, r0.req.data); dtlb_ptes(tlb_req_index) <= pteset; dtlb_valids(tlb_req_index)(repl_way) <= '1'; end if; + m_out.done <= r0_valid and r0.tlbie; end if; end process; @@ -628,8 +664,8 @@ begin variable hit_way_set : hit_way_set_t; begin -- Extract line, row and tag from request - req_index <= get_index(r0.addr); - req_row <= get_row(r0.addr); + req_index <= get_index(r0.req.addr); + req_row <= get_row(r0.req.addr); req_tag <= get_tag(ra); -- Only do anything if not being stalled by stage 1 @@ -648,13 +684,13 @@ begin -- the TLB, and then decide later which match to use. hit_way := 0; is_hit := '0'; - if r0.virt_mode = '1' then + if r0.req.virt_mode = '1' then for j in tlb_way_t loop hit_way_set(j) := 0; s_hit := '0'; s_pte := read_tlb_pte(j, tlb_pte_way); s_ra := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) & - r0.addr(TLB_LG_PGSZ - 1 downto 0); + r0.req.addr(TLB_LG_PGSZ - 1 downto 0); s_tag := get_tag(s_ra); for i in way_t loop if go = '1' and cache_valids(req_index)(i) = '1' and @@ -671,7 +707,7 @@ begin hit_way := hit_way_set(tlb_hit_way); end if; else - s_tag := get_tag(r0.addr(REAL_ADDR_BITS - 1 downto 0)); + s_tag := get_tag(r0.req.addr(REAL_ADDR_BITS - 1 downto 0)); for i in way_t loop if go = '1' and cache_valids(req_index)(i) = '1' and read_tag(i, cache_tags(req_index)) = s_tag then @@ -689,18 +725,18 @@ begin -- work out whether we have permission for this access -- NB we don't yet implement AMR, thus no KUAP - rc_ok <= perm_attr.reference and (r0.load or perm_attr.changed); - perm_ok <= (r0.priv_mode or not perm_attr.priv) and - (perm_attr.wr_perm or (r0.load and perm_attr.rd_perm)); + rc_ok <= perm_attr.reference and (r0.req.load or perm_attr.changed); + perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and + (perm_attr.wr_perm or (r0.req.load and perm_attr.rd_perm)); -- Combine the request and cache hit status to decide what -- operation needs to be done -- - nc := r0.nc or perm_attr.nocache; + nc := r0.req.nc or perm_attr.nocache; op := OP_NONE; if go = '1' then if valid_ra = '1' and rc_ok = '1' and perm_ok = '1' then - opsel := r0.load & nc & is_hit; + opsel := r0.req.load & nc & is_hit; case opsel is when "101" => op := OP_LOAD_HIT; when "100" => op := OP_LOAD_MISS; @@ -723,7 +759,11 @@ begin -- If we're stalling then we need to keep reading the last -- row requested. if stall_out = '0' then - early_req_row <= get_row(d_in.addr); + if m_in.valid = '1' then + early_req_row <= get_row(m_in.addr); + else + early_req_row <= get_row(d_in.addr); + end if; else early_req_row <= req_row; end if; @@ -741,17 +781,17 @@ begin cancel_store <= '0'; set_rsrv <= '0'; clear_rsrv <= '0'; - if r0_valid = '1' and r0.reserve = '1' then + if r0_valid = '1' and r0.req.reserve = '1' then -- XXX generate alignment interrupt if address is not aligned - -- XXX or if r0.nc = '1' - if r0.load = '1' then + -- XXX or if r0.req.nc = '1' + if r0.req.load = '1' then -- load with reservation set_rsrv <= '1'; else -- store conditional clear_rsrv <= '1'; if reservation.valid = '0' or - r0.addr(63 downto LINE_OFF_BITS) /= reservation.addr then + r0.req.addr(63 downto LINE_OFF_BITS) /= reservation.addr then cancel_store <= '1'; end if; end if; @@ -765,7 +805,7 @@ begin reservation.valid <= '0'; elsif set_rsrv = '1' then reservation.valid <= '1'; - reservation.addr <= r0.addr(63 downto LINE_OFF_BITS); + reservation.addr <= r0.req.addr(63 downto LINE_OFF_BITS); end if; end if; end process; @@ -818,12 +858,6 @@ begin d_out.valid <= '1'; end if; - -- tlbie is handled above and doesn't go through the cache state machine - if r1.tlbie_done = '1' then - report "completing tlbie"; - d_out.valid <= '1'; - end if; - -- Slow ops (load miss, NC, stores) if r1.slow_valid = '1' then -- If it's a load, enable register writeback and switch @@ -900,8 +934,8 @@ begin if r1.state = IDLE then -- In IDLE state, the only write path is the store-hit update case wr_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS)); - wr_data <= r0.data; - wr_sel <= r0.byte_sel; + wr_data <= r0.req.data; + wr_sel <= r0.req.byte_sel; else -- Otherwise, we might be doing a reload or a DCBZ if r1.req.dcbz = '1' then @@ -936,17 +970,17 @@ begin dcache_fast_hit : process(clk) begin if rising_edge(clk) then - -- If we have a request incoming, we have to latch it as r0.valid + -- If we have a request incoming, we have to latch it as r0.req.valid -- is only set for a single cycle. It's up to the control logic to -- ensure we don't override an uncompleted request (for now we are -- single issue on load/stores so we are fine, later, we can generate -- a stall output if necessary). if req_op /= OP_NONE and stall_out = '0' then - r1.req <= r0; + r1.req <= r0.req; report "op:" & op_t'image(req_op) & - " addr:" & to_hstring(r0.addr) & - " nc:" & std_ulogic'image(r0.nc) & + " addr:" & to_hstring(r0.req.addr) & + " nc:" & std_ulogic'image(r0.req.nc) & " idx:" & integer'image(req_index) & " tag:" & to_hstring(req_tag) & " way: " & integer'image(req_hit_way); @@ -1018,7 +1052,7 @@ begin when OP_LOAD_MISS => -- Normal load cache miss, start the reload machine -- - report "cache miss addr:" & to_hstring(r0.addr) & + report "cache miss addr:" & to_hstring(r0.req.addr) & " idx:" & integer'image(req_index) & " way:" & integer'image(replace_way) & " tag:" & to_hstring(req_tag); @@ -1053,7 +1087,7 @@ begin r1.state <= RELOAD_WAIT_ACK; when OP_LOAD_NC => - r1.wb.sel <= r0.byte_sel; + r1.wb.sel <= r0.req.byte_sel; r1.wb.adr <= ra(r1.wb.adr'left downto 3) & "000"; r1.wb.cyc <= '1'; r1.wb.stb <= '1'; @@ -1061,10 +1095,10 @@ begin r1.state <= NC_LOAD_WAIT_ACK; when OP_STORE_HIT | OP_STORE_MISS => - if r0.dcbz = '0' then - r1.wb.sel <= r0.byte_sel; + if r0.req.dcbz = '0' then + r1.wb.sel <= r0.req.byte_sel; r1.wb.adr <= ra(r1.wb.adr'left downto 3) & "000"; - r1.wb.dat <= r0.data; + r1.wb.dat <= r0.req.data; if cancel_store = '0' then r1.wb.cyc <= '1'; r1.wb.stb <= '1'; diff --git a/dcache_tb.vhdl b/dcache_tb.vhdl index 66b938f..48c6877 100644 --- a/dcache_tb.vhdl +++ b/dcache_tb.vhdl @@ -15,6 +15,9 @@ architecture behave of dcache_tb is signal d_in : Loadstore1ToDcacheType; signal d_out : DcacheToLoadstore1Type; + signal m_in : MmuToDcacheType; + signal m_out : DcacheToMmuType; + signal wb_bram_in : wishbone_master_out; signal wb_bram_out : wishbone_slave_out; @@ -30,6 +33,8 @@ begin rst => rst, d_in => d_in, d_out => d_out, + m_in => m_in, + m_out => m_out, wishbone_out => wb_bram_in, wishbone_in => wb_bram_out ); @@ -68,10 +73,12 @@ begin -- Clear stuff d_in.valid <= '0'; d_in.load <= '0'; - d_in.tlbie <= '0'; d_in.nc <= '0'; d_in.addr <= (others => '0'); d_in.data <= (others => '0'); + m_in.valid <= '0'; + m_in.addr <= (others => '0'); + m_in.pte <= (others => '0'); wait for 4*clk_period; wait until rising_edge(clk); diff --git a/loadstore1.vhdl b/loadstore1.vhdl index c54e47b..d5dd010 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -5,7 +5,6 @@ use ieee.numeric_std.all; library work; use work.decode_types.all; use work.common.all; -use work.helpers.all; -- 2 cycle LSU -- We calculate the address in the first cycle @@ -22,6 +21,9 @@ entity loadstore1 is d_out : out Loadstore1ToDcacheType; d_in : in DcacheToLoadstore1Type; + m_out : out Loadstore1ToMmuType; + m_in : in MmuToLoadstore1Type; + dc_stall : in std_ulogic; stall_out : out std_ulogic ); @@ -38,7 +40,9 @@ architecture behave of loadstore1 is SECOND_REQ, -- send 2nd request of unaligned xfer FIRST_ACK_WAIT, -- waiting for 1st ack from dcache LAST_ACK_WAIT, -- waiting for last ack from dcache - LD_UPDATE -- writing rA with computed addr on load + LD_UPDATE, -- writing rA with computed addr on load + MMU_LOOKUP_1ST, -- waiting for MMU to look up translation + MMU_LOOKUP_LAST ); type reg_stage_t is record @@ -62,6 +66,7 @@ architecture behave of loadstore1 is virt_mode : std_ulogic; priv_mode : std_ulogic; state : state_t; + first_bytes : std_ulogic_vector(7 downto 0); second_bytes : std_ulogic_vector(7 downto 0); dar : std_ulogic_vector(63 downto 0); dsisr : std_ulogic_vector(31 downto 0); @@ -146,6 +151,7 @@ begin variable sprval : std_ulogic_vector(63 downto 0); variable exception : std_ulogic; variable next_addr : std_ulogic_vector(63 downto 0); + variable mmureq : std_ulogic; variable dsisr : std_ulogic_vector(31 downto 0); begin v := r; @@ -158,6 +164,7 @@ begin sprval := (others => '0'); -- avoid inferred latches exception := '0'; dsisr := (others => '0'); + mmureq := '0'; write_enable := '0'; do_update := '0'; @@ -230,7 +237,7 @@ begin req := '1'; v.dcbz := '1'; when OP_TLBIE => - req := '1'; + mmureq := '1'; v.tlbie := '1'; when OP_MFSPR => done := '1'; @@ -282,18 +289,14 @@ begin -- Do length_to_sel and work out if we are doing 2 dwords long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0)); byte_sel := long_sel(7 downto 0); + v.first_bytes := byte_sel; v.second_bytes := long_sel(15 downto 8); - v.addr := lsu_sum; - -- Do byte reversing and rotating for stores in the first cycle - byte_offset := "000"; + byte_offset := unsigned(lsu_sum(2 downto 0)); brev_lenm1 := "000"; - if v.tlbie = '0' then - byte_offset := unsigned(lsu_sum(2 downto 0)); - if l_in.byte_reverse = '1' then - brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; - end if; + if l_in.byte_reverse = '1' then + brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; end if; for i in 0 to 7 loop k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset; @@ -309,6 +312,10 @@ begin v.state := SECOND_REQ; end if; end if; + if mmureq = '1' then + stall := '1'; + v.state := LAST_ACK_WAIT; + end if; end if; when SECOND_REQ => @@ -323,12 +330,19 @@ begin if d_in.valid = '1' then if d_in.error = '1' then -- dcache will discard the second request - exception := '1'; - dsisr(30) := d_in.tlb_miss; - dsisr(63 - 36) := d_in.perm_error; - dsisr(63 - 38) := not r.load; - dsisr(63 - 45) := d_in.rc_error; - v.state := IDLE; + addr := r.addr; + if d_in.tlb_miss = '1' then + -- give it to the MMU to look up + mmureq := '1'; + v.state := MMU_LOOKUP_1ST; + else + -- signal an interrupt straight away + exception := '1'; + dsisr(63 - 36) := d_in.perm_error; + dsisr(63 - 38) := not r.load; + dsisr(63 - 45) := d_in.rc_error; + v.state := IDLE; + end if; else v.state := LAST_ACK_WAIT; if r.load = '1' then @@ -337,6 +351,32 @@ begin end if; end if; + when MMU_LOOKUP_1ST | MMU_LOOKUP_LAST => + stall := '1'; + if two_dwords = '1' and r.state = MMU_LOOKUP_LAST then + addr := next_addr; + byte_sel := r.second_bytes; + else + addr := r.addr; + byte_sel := r.first_bytes; + end if; + if m_in.done = '1' then + if m_in.error = '0' then + -- retry the request now that the MMU has installed a TLB entry + req := '1'; + if r.state = MMU_LOOKUP_1ST then + v.state := SECOND_REQ; + else + v.state := LAST_ACK_WAIT; + end if; + else + exception := '1'; + dsisr(63 - 33) := '1'; + dsisr(63 - 38) := not r.load; + v.state := IDLE; + end if; + end if; + when LAST_ACK_WAIT => stall := '1'; if d_in.valid = '1' then @@ -346,12 +386,18 @@ begin else addr := r.addr; end if; - exception := '1'; - dsisr(30) := d_in.tlb_miss; - dsisr(63 - 36) := d_in.perm_error; - dsisr(63 - 38) := not r.load; - dsisr(63 - 45) := d_in.rc_error; - v.state := IDLE; + if d_in.tlb_miss = '1' then + -- give it to the MMU to look up + mmureq := '1'; + v.state := MMU_LOOKUP_LAST; + else + -- signal an interrupt straight away + exception := '1'; + dsisr(63 - 36) := d_in.perm_error; + dsisr(63 - 38) := not r.load; + dsisr(63 - 45) := d_in.rc_error; + v.state := IDLE; + end if; else write_enable := r.load; if r.load = '1' and r.update = '1' then @@ -366,6 +412,12 @@ begin end if; end if; end if; + if m_in.done = '1' then + -- tlbie is finished + stall := '0'; + done := '1'; + v.state := IDLE; + end if; when LD_UPDATE => do_update := '1'; @@ -376,7 +428,6 @@ begin -- Update outputs to dcache d_out.valid <= req; d_out.load <= v.load; - d_out.tlbie <= v.tlbie; d_out.dcbz <= v.dcbz; d_out.nc <= v.nc; d_out.reserve <= v.reserve; @@ -386,6 +437,12 @@ begin d_out.virt_mode <= v.virt_mode; d_out.priv_mode <= v.priv_mode; + -- Update outputs to MMU + m_out.valid <= mmureq; + m_out.tlbie <= v.tlbie; + m_out.addr <= addr; + m_out.rs <= l_in.data; + -- Update outputs to writeback -- Multiplex either cache data to the destination GPR or -- the address for the rA update. diff --git a/microwatt.core b/microwatt.core index a2d6ab5..180e0a5 100644 --- a/microwatt.core +++ b/microwatt.core @@ -25,6 +25,7 @@ filesets: - control.vhdl - execute1.vhdl - loadstore1.vhdl + - mmu.vhdl - dcache.vhdl - multiply.vhdl - divider.vhdl diff --git a/mmu.vhdl b/mmu.vhdl new file mode 100644 index 0000000..2e6d0fd --- /dev/null +++ b/mmu.vhdl @@ -0,0 +1,109 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.common.all; + +-- Radix MMU +-- Supports 4-level trees as in arch 3.0B, but not the two-step translation for +-- guests under a hypervisor (i.e. there is no gRA -> hRA translation). + +entity mmu is + port ( + clk : in std_ulogic; + rst : in std_ulogic; + + l_in : in Loadstore1ToMmuType; + l_out : out MmuToLoadstore1Type; + + d_out : out MmuToDcacheType; + d_in : in DcacheToMmuType + ); +end mmu; + +architecture behave of mmu is + + type state_t is (IDLE, + TLBIE_WAIT, + RADIX_LOOKUP_0 + ); + + type reg_stage_t is record + -- latched request from loadstore1 + valid : std_ulogic; + addr : std_ulogic_vector(63 downto 0); + state : state_t; + end record; + + signal r, rin : reg_stage_t; + +begin + + mmu_0: process(clk) + begin + if rising_edge(clk) then + if rst = '1' then + r.state <= IDLE; + r.valid <= '0'; + else + if rin.valid = '1' then + report "MMU got tlb miss for " & to_hstring(rin.addr); + end if; + if l_out.done = '1' then + report "MMU completing miss with error=" & std_ulogic'image(l_out.error); + end if; + r <= rin; + end if; + end if; + end process; + + mmu_1: process(all) + variable v : reg_stage_t; + variable dcreq : std_ulogic; + variable done : std_ulogic; + variable err : std_ulogic; + begin + v.valid := l_in.valid; + v.addr := l_in.addr; + v.state := r.state; + dcreq := '0'; + done := '0'; + err := '0'; + + case r.state is + when IDLE => + if l_in.valid = '1' then + if l_in.tlbie = '1' then + dcreq := '1'; + v.state := TLBIE_WAIT; + else + v.state := RADIX_LOOKUP_0; + end if; + end if; + + when TLBIE_WAIT => + if d_in.done = '1' then + done := '1'; + v.state := IDLE; + end if; + + when RADIX_LOOKUP_0 => + done := '1'; + err := '1'; + v.state := IDLE; + end case; + + -- update registers + rin <= v; + + -- drive outputs + l_out.done <= done; + l_out.error <= err; + + d_out.valid <= dcreq; + d_out.tlbie <= l_in.tlbie; + d_out.addr <= l_in.addr; + d_out.pte <= l_in.rs; + end process; +end;