Add framework for implementing an MMU

This adds a new module to implement an MMU.  At the moment it doesn't
do very much.  Tlbie instructions now get sent by loadstore1 to mmu,
which sends them to dcache, rather than loadstore1 sending them
directly to dcache.  TLB misses from dcache now get sent by loadstore1
to mmu, which currently just returns an error.  Loadstore1 then
generates a DSI in response to the error return from mmu.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
jtag-port
Paul Mackerras 5 years ago
parent d47fbf88d1
commit 8160f4f821

@ -31,7 +31,7 @@ common.o: decode_types.o
control.o: gpr_hazard.o cr_hazard.o common.o
sim_jtag.o: sim_jtag_socket.o
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o mmu.o dcache.o writeback.o core_debug.o
core_debug.o: common.o
countzero.o:
countzero_tb.o: common.o glibc_random.o countzero.o
@ -58,10 +58,11 @@ icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o
dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o
dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o
insn_helpers.o:
loadstore1.o: common.o helpers.o decode_types.o
loadstore1.o: common.o decode_types.o
logical.o: decode_types.o
multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o
multiply.o: common.o decode_types.o
mmu.o: common.o
divider_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o divider.o
divider.o: common.o decode_types.o
ppc_fx_insns.o: helpers.o

@ -246,7 +246,6 @@ package common is
type Loadstore1ToDcacheType is record
valid : std_ulogic;
load : std_ulogic; -- is this a load
tlbie : std_ulogic; -- is this a tlbie
dcbz : std_ulogic;
nc : std_ulogic;
reserve : std_ulogic;
@ -267,6 +266,30 @@ package common is
rc_error : std_ulogic;
end record;

type Loadstore1ToMmuType is record
valid : std_ulogic;
tlbie : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
rs : std_ulogic_vector(63 downto 0);
end record;

type MmuToLoadstore1Type is record
done : std_ulogic;
error : std_ulogic;
end record;

type MmuToDcacheType is record
valid : std_ulogic;
tlbie : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
pte : std_ulogic_vector(63 downto 0);
end record;

type DcacheToMmuType is record
stall : std_ulogic;
done : std_ulogic;
end record;

type Loadstore1ToWritebackType is record
valid : std_ulogic;
write_enable: std_ulogic;

@ -65,10 +65,14 @@ architecture behave of core is
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
signal loadstore1_to_execute1: Loadstore1ToExecute1Type;
signal loadstore1_to_writeback: Loadstore1ToWritebackType;
signal loadstore1_to_mmu: Loadstore1ToMmuType;
signal mmu_to_loadstore1: MmuToLoadstore1Type;

-- dcache signals
signal loadstore1_to_dcache: Loadstore1ToDcacheType;
signal dcache_to_loadstore1: DcacheToLoadstore1Type;
signal mmu_to_dcache: MmuToDcacheType;
signal dcache_to_mmu: DcacheToMmuType;

-- local signals
signal fetch1_stall_in : std_ulogic;
@ -124,6 +128,7 @@ architecture behave of core is
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of mmu_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of debug_0 : label is keep_h(DISABLE_FLATTEN);
@ -270,10 +275,22 @@ begin
l_out => loadstore1_to_writeback,
d_out => loadstore1_to_dcache,
d_in => dcache_to_loadstore1,
m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out,
stall_out => ls1_stall_out
);

mmu_0: entity work.mmu
port map (
clk => clk,
rst => core_rst,
l_in => loadstore1_to_mmu,
l_out => mmu_to_loadstore1,
d_out => mmu_to_dcache,
d_in => dcache_to_mmu
);

dcache_0: entity work.dcache
generic map(
LINE_SIZE => 64,
@ -285,6 +302,8 @@ begin
rst => core_rst,
d_in => loadstore1_to_dcache,
d_out => dcache_to_loadstore1,
m_in => mmu_to_dcache,
m_out => dcache_to_mmu,
stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out

@ -40,6 +40,9 @@ entity dcache is
d_in : in Loadstore1ToDcacheType;
d_out : out DcacheToLoadstore1Type;

m_in : in MmuToDcacheType;
m_out : out DcacheToMmuType;

stall_out : out std_ulogic;

wishbone_out : out wishbone_master_out;
@ -146,9 +149,6 @@ architecture rtl of dcache is
attribute ram_style of dtlb_tags : signal is "distributed";
attribute ram_style of dtlb_ptes : signal is "distributed";

signal r0 : Loadstore1ToDcacheType;
signal r0_valid : std_ulogic;

-- Record for storing permission, attribute, etc. bits from a PTE
type perm_attr_t is record
reference : std_ulogic;
@ -205,6 +205,15 @@ architecture rtl of dcache is
-- first stage emits a stall for a complex op.
--

-- Stage 0 register, basically contains just the latched request
type reg_stage_0_t is record
req : Loadstore1ToDcacheType;
tlbie : std_ulogic;
end record;

signal r0 : reg_stage_0_t;
signal r0_valid : std_ulogic;
-- First stage register, contains state for stage 1 of load hits
-- and for the state machine used by all other operations
--
@ -424,35 +433,61 @@ begin
assert (64 = wishbone_data_bits)
report "Can't yet handle a wishbone width that isn't 64-bits" severity FAILURE;

-- Latch the request in r0 as long as we're not stalling
-- Latch the request in r0.req as long as we're not stalling
stage_0 : process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
r0.valid <= '0';
r0.req.valid <= '0';
elsif stall_out = '0' then
r0 <= d_in;
assert (d_in.valid and m_in.valid) = '0' report
"request collision loadstore vs MMU";
if m_in.valid = '1' then
r0.req.valid <= '1';
r0.req.load <= '0';
r0.req.dcbz <= '0';
r0.req.nc <= '0';
r0.req.reserve <= '0';
r0.req.virt_mode <= '0';
r0.req.priv_mode <= '1';
r0.req.addr <= m_in.addr;
r0.req.data <= m_in.pte;
r0.req.byte_sel <= (others => '1');
r0.tlbie <= m_in.tlbie;
assert m_in.tlbie = '1' report "unknown request from MMU";
else
r0.req <= d_in;
r0.tlbie <= '0';
end if;
end if;
end if;
end process;

-- we don't yet handle collisions between loadstore1 requests and MMU requests
m_out.stall <= '0';

-- Hold off the request in r0 when stalling,
-- and cancel it if we get an error in a previous request.
r0_valid <= r0.valid and not stall_out and not r1.error_done;
r0_valid <= r0.req.valid and not stall_out and not r1.error_done;

-- TLB
-- Operates in the second cycle on the request latched in r0.
-- Operates in the second cycle on the request latched in r0.req.
-- TLB updates write the entry at the end of the second cycle.
tlb_read : process(clk)
variable index : tlb_index_t;
variable addrbits : std_ulogic_vector(TLB_SET_BITS - 1 downto 0);
begin
if rising_edge(clk) then
if stall_out = '1' then
-- keep reading the same thing while stalled
index := tlb_req_index;
else
index := to_integer(unsigned(d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1
downto TLB_LG_PGSZ)));
if m_in.valid = '1' then
addrbits := m_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ);
else
addrbits := d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ);
end if;
index := to_integer(unsigned(addrbits));
end if;
tlb_valid_way <= dtlb_valids(index);
tlb_tag_way <= dtlb_tags(index);
@ -500,11 +535,11 @@ begin
variable hit : std_ulogic;
variable eatag : tlb_tag_t;
begin
tlb_req_index <= to_integer(unsigned(r0.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1
tlb_req_index <= to_integer(unsigned(r0.req.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1
downto TLB_LG_PGSZ)));
hitway := 0;
hit := '0';
eatag := r0.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
for i in tlb_way_t loop
if tlb_valid_way(i) = '1' and
read_tlb_tag(i, tlb_tag_way) = eatag then
@ -515,13 +550,13 @@ begin
tlb_hit <= hit and r0_valid;
tlb_hit_way <= hitway;
pte <= read_tlb_pte(hitway, tlb_pte_way);
valid_ra <= tlb_hit or not r0.virt_mode;
if r0.virt_mode = '1' then
valid_ra <= tlb_hit or not r0.req.virt_mode;
if r0.req.virt_mode = '1' then
ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
r0.addr(TLB_LG_PGSZ - 1 downto 0);
r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
perm_attr <= extract_perm_attr(pte);
else
ra <= r0.addr(REAL_ADDR_BITS - 1 downto 0);
ra <= r0.req.addr(REAL_ADDR_BITS - 1 downto 0);
perm_attr <= real_mode_perm_attr;
end if;
end process;
@ -540,9 +575,9 @@ begin
tlbia := '0';
tlbwe := '0';
if r0_valid = '1' and r0.tlbie = '1' then
if r0.addr(11 downto 10) /= "00" then
if r0.req.addr(11 downto 10) /= "00" then
tlbia := '1';
elsif r0.addr(9) = '1' then
elsif r0.req.addr(9) = '1' then
tlbwe := '1';
else
tlbie := '1';
@ -563,15 +598,16 @@ begin
else
repl_way := to_integer(unsigned(tlb_plru_victim(tlb_req_index)));
end if;
eatag := r0.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
tagset := tlb_tag_way;
write_tlb_tag(repl_way, tagset, eatag);
dtlb_tags(tlb_req_index) <= tagset;
pteset := tlb_pte_way;
write_tlb_pte(repl_way, pteset, r0.data);
write_tlb_pte(repl_way, pteset, r0.req.data);
dtlb_ptes(tlb_req_index) <= pteset;
dtlb_valids(tlb_req_index)(repl_way) <= '1';
end if;
m_out.done <= r0_valid and r0.tlbie;
end if;
end process;

@ -628,8 +664,8 @@ begin
variable hit_way_set : hit_way_set_t;
begin
-- Extract line, row and tag from request
req_index <= get_index(r0.addr);
req_row <= get_row(r0.addr);
req_index <= get_index(r0.req.addr);
req_row <= get_row(r0.req.addr);
req_tag <= get_tag(ra);

-- Only do anything if not being stalled by stage 1
@ -648,13 +684,13 @@ begin
-- the TLB, and then decide later which match to use.
hit_way := 0;
is_hit := '0';
if r0.virt_mode = '1' then
if r0.req.virt_mode = '1' then
for j in tlb_way_t loop
hit_way_set(j) := 0;
s_hit := '0';
s_pte := read_tlb_pte(j, tlb_pte_way);
s_ra := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
r0.addr(TLB_LG_PGSZ - 1 downto 0);
r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
s_tag := get_tag(s_ra);
for i in way_t loop
if go = '1' and cache_valids(req_index)(i) = '1' and
@ -671,7 +707,7 @@ begin
hit_way := hit_way_set(tlb_hit_way);
end if;
else
s_tag := get_tag(r0.addr(REAL_ADDR_BITS - 1 downto 0));
s_tag := get_tag(r0.req.addr(REAL_ADDR_BITS - 1 downto 0));
for i in way_t loop
if go = '1' and cache_valids(req_index)(i) = '1' and
read_tag(i, cache_tags(req_index)) = s_tag then
@ -689,18 +725,18 @@ begin

-- work out whether we have permission for this access
-- NB we don't yet implement AMR, thus no KUAP
rc_ok <= perm_attr.reference and (r0.load or perm_attr.changed);
perm_ok <= (r0.priv_mode or not perm_attr.priv) and
(perm_attr.wr_perm or (r0.load and perm_attr.rd_perm));
rc_ok <= perm_attr.reference and (r0.req.load or perm_attr.changed);
perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and
(perm_attr.wr_perm or (r0.req.load and perm_attr.rd_perm));

-- Combine the request and cache hit status to decide what
-- operation needs to be done
--
nc := r0.nc or perm_attr.nocache;
nc := r0.req.nc or perm_attr.nocache;
op := OP_NONE;
if go = '1' then
if valid_ra = '1' and rc_ok = '1' and perm_ok = '1' then
opsel := r0.load & nc & is_hit;
opsel := r0.req.load & nc & is_hit;
case opsel is
when "101" => op := OP_LOAD_HIT;
when "100" => op := OP_LOAD_MISS;
@ -723,7 +759,11 @@ begin
-- If we're stalling then we need to keep reading the last
-- row requested.
if stall_out = '0' then
if m_in.valid = '1' then
early_req_row <= get_row(m_in.addr);
else
early_req_row <= get_row(d_in.addr);
end if;
else
early_req_row <= req_row;
end if;
@ -741,17 +781,17 @@ begin
cancel_store <= '0';
set_rsrv <= '0';
clear_rsrv <= '0';
if r0_valid = '1' and r0.reserve = '1' then
if r0_valid = '1' and r0.req.reserve = '1' then
-- XXX generate alignment interrupt if address is not aligned
-- XXX or if r0.nc = '1'
if r0.load = '1' then
-- XXX or if r0.req.nc = '1'
if r0.req.load = '1' then
-- load with reservation
set_rsrv <= '1';
else
-- store conditional
clear_rsrv <= '1';
if reservation.valid = '0' or
r0.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
r0.req.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
cancel_store <= '1';
end if;
end if;
@ -765,7 +805,7 @@ begin
reservation.valid <= '0';
elsif set_rsrv = '1' then
reservation.valid <= '1';
reservation.addr <= r0.addr(63 downto LINE_OFF_BITS);
reservation.addr <= r0.req.addr(63 downto LINE_OFF_BITS);
end if;
end if;
end process;
@ -818,12 +858,6 @@ begin
d_out.valid <= '1';
end if;

-- tlbie is handled above and doesn't go through the cache state machine
if r1.tlbie_done = '1' then
report "completing tlbie";
d_out.valid <= '1';
end if;

-- Slow ops (load miss, NC, stores)
if r1.slow_valid = '1' then
-- If it's a load, enable register writeback and switch
@ -900,8 +934,8 @@ begin
if r1.state = IDLE then
-- In IDLE state, the only write path is the store-hit update case
wr_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
wr_data <= r0.data;
wr_sel <= r0.byte_sel;
wr_data <= r0.req.data;
wr_sel <= r0.req.byte_sel;
else
-- Otherwise, we might be doing a reload or a DCBZ
if r1.req.dcbz = '1' then
@ -936,17 +970,17 @@ begin
dcache_fast_hit : process(clk)
begin
if rising_edge(clk) then
-- If we have a request incoming, we have to latch it as r0.valid
-- If we have a request incoming, we have to latch it as r0.req.valid
-- is only set for a single cycle. It's up to the control logic to
-- ensure we don't override an uncompleted request (for now we are
-- single issue on load/stores so we are fine, later, we can generate
-- a stall output if necessary).

if req_op /= OP_NONE and stall_out = '0' then
r1.req <= r0;
r1.req <= r0.req;
report "op:" & op_t'image(req_op) &
" addr:" & to_hstring(r0.addr) &
" nc:" & std_ulogic'image(r0.nc) &
" addr:" & to_hstring(r0.req.addr) &
" nc:" & std_ulogic'image(r0.req.nc) &
" idx:" & integer'image(req_index) &
" tag:" & to_hstring(req_tag) &
" way: " & integer'image(req_hit_way);
@ -1018,7 +1052,7 @@ begin
when OP_LOAD_MISS =>
-- Normal load cache miss, start the reload machine
--
report "cache miss addr:" & to_hstring(r0.addr) &
report "cache miss addr:" & to_hstring(r0.req.addr) &
" idx:" & integer'image(req_index) &
" way:" & integer'image(replace_way) &
" tag:" & to_hstring(req_tag);
@ -1053,7 +1087,7 @@ begin
r1.state <= RELOAD_WAIT_ACK;

when OP_LOAD_NC =>
r1.wb.sel <= r0.byte_sel;
r1.wb.sel <= r0.req.byte_sel;
r1.wb.adr <= ra(r1.wb.adr'left downto 3) & "000";
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
@ -1061,10 +1095,10 @@ begin
r1.state <= NC_LOAD_WAIT_ACK;

when OP_STORE_HIT | OP_STORE_MISS =>
if r0.dcbz = '0' then
r1.wb.sel <= r0.byte_sel;
if r0.req.dcbz = '0' then
r1.wb.sel <= r0.req.byte_sel;
r1.wb.adr <= ra(r1.wb.adr'left downto 3) & "000";
r1.wb.dat <= r0.data;
r1.wb.dat <= r0.req.data;
if cancel_store = '0' then
r1.wb.cyc <= '1';
r1.wb.stb <= '1';

@ -15,6 +15,9 @@ architecture behave of dcache_tb is
signal d_in : Loadstore1ToDcacheType;
signal d_out : DcacheToLoadstore1Type;

signal m_in : MmuToDcacheType;
signal m_out : DcacheToMmuType;

signal wb_bram_in : wishbone_master_out;
signal wb_bram_out : wishbone_slave_out;

@ -30,6 +33,8 @@ begin
rst => rst,
d_in => d_in,
d_out => d_out,
m_in => m_in,
m_out => m_out,
wishbone_out => wb_bram_in,
wishbone_in => wb_bram_out
);
@ -68,10 +73,12 @@ begin
-- Clear stuff
d_in.valid <= '0';
d_in.load <= '0';
d_in.tlbie <= '0';
d_in.nc <= '0';
d_in.addr <= (others => '0');
d_in.data <= (others => '0');
m_in.valid <= '0';
m_in.addr <= (others => '0');
m_in.pte <= (others => '0');

wait for 4*clk_period;
wait until rising_edge(clk);

@ -5,7 +5,6 @@ use ieee.numeric_std.all;
library work;
use work.decode_types.all;
use work.common.all;
use work.helpers.all;

-- 2 cycle LSU
-- We calculate the address in the first cycle
@ -22,6 +21,9 @@ entity loadstore1 is
d_out : out Loadstore1ToDcacheType;
d_in : in DcacheToLoadstore1Type;

m_out : out Loadstore1ToMmuType;
m_in : in MmuToLoadstore1Type;

dc_stall : in std_ulogic;
stall_out : out std_ulogic
);
@ -38,7 +40,9 @@ architecture behave of loadstore1 is
SECOND_REQ, -- send 2nd request of unaligned xfer
FIRST_ACK_WAIT, -- waiting for 1st ack from dcache
LAST_ACK_WAIT, -- waiting for last ack from dcache
LD_UPDATE -- writing rA with computed addr on load
LD_UPDATE, -- writing rA with computed addr on load
MMU_LOOKUP_1ST, -- waiting for MMU to look up translation
MMU_LOOKUP_LAST
);

type reg_stage_t is record
@ -62,6 +66,7 @@ architecture behave of loadstore1 is
virt_mode : std_ulogic;
priv_mode : std_ulogic;
state : state_t;
first_bytes : std_ulogic_vector(7 downto 0);
second_bytes : std_ulogic_vector(7 downto 0);
dar : std_ulogic_vector(63 downto 0);
dsisr : std_ulogic_vector(31 downto 0);
@ -146,6 +151,7 @@ begin
variable sprval : std_ulogic_vector(63 downto 0);
variable exception : std_ulogic;
variable next_addr : std_ulogic_vector(63 downto 0);
variable mmureq : std_ulogic;
variable dsisr : std_ulogic_vector(31 downto 0);
begin
v := r;
@ -158,6 +164,7 @@ begin
sprval := (others => '0'); -- avoid inferred latches
exception := '0';
dsisr := (others => '0');
mmureq := '0';

write_enable := '0';
do_update := '0';
@ -230,7 +237,7 @@ begin
req := '1';
v.dcbz := '1';
when OP_TLBIE =>
req := '1';
mmureq := '1';
v.tlbie := '1';
when OP_MFSPR =>
done := '1';
@ -282,19 +289,15 @@ begin
-- Do length_to_sel and work out if we are doing 2 dwords
long_sel := xfer_data_sel(l_in.length, v.addr(2 downto 0));
byte_sel := long_sel(7 downto 0);
v.first_bytes := byte_sel;
v.second_bytes := long_sel(15 downto 8);

v.addr := lsu_sum;

-- Do byte reversing and rotating for stores in the first cycle
byte_offset := "000";
brev_lenm1 := "000";
if v.tlbie = '0' then
byte_offset := unsigned(lsu_sum(2 downto 0));
brev_lenm1 := "000";
if l_in.byte_reverse = '1' then
brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
end if;
end if;
for i in 0 to 7 loop
k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset;
j := to_integer(k) * 8;
@ -309,6 +312,10 @@ begin
v.state := SECOND_REQ;
end if;
end if;
if mmureq = '1' then
stall := '1';
v.state := LAST_ACK_WAIT;
end if;
end if;

when SECOND_REQ =>
@ -323,12 +330,19 @@ begin
if d_in.valid = '1' then
if d_in.error = '1' then
-- dcache will discard the second request
addr := r.addr;
if d_in.tlb_miss = '1' then
-- give it to the MMU to look up
mmureq := '1';
v.state := MMU_LOOKUP_1ST;
else
-- signal an interrupt straight away
exception := '1';
dsisr(30) := d_in.tlb_miss;
dsisr(63 - 36) := d_in.perm_error;
dsisr(63 - 38) := not r.load;
dsisr(63 - 45) := d_in.rc_error;
v.state := IDLE;
end if;
else
v.state := LAST_ACK_WAIT;
if r.load = '1' then
@ -337,6 +351,32 @@ begin
end if;
end if;

when MMU_LOOKUP_1ST | MMU_LOOKUP_LAST =>
stall := '1';
if two_dwords = '1' and r.state = MMU_LOOKUP_LAST then
addr := next_addr;
byte_sel := r.second_bytes;
else
addr := r.addr;
byte_sel := r.first_bytes;
end if;
if m_in.done = '1' then
if m_in.error = '0' then
-- retry the request now that the MMU has installed a TLB entry
req := '1';
if r.state = MMU_LOOKUP_1ST then
v.state := SECOND_REQ;
else
v.state := LAST_ACK_WAIT;
end if;
else
exception := '1';
dsisr(63 - 33) := '1';
dsisr(63 - 38) := not r.load;
v.state := IDLE;
end if;
end if;

when LAST_ACK_WAIT =>
stall := '1';
if d_in.valid = '1' then
@ -346,12 +386,18 @@ begin
else
addr := r.addr;
end if;
if d_in.tlb_miss = '1' then
-- give it to the MMU to look up
mmureq := '1';
v.state := MMU_LOOKUP_LAST;
else
-- signal an interrupt straight away
exception := '1';
dsisr(30) := d_in.tlb_miss;
dsisr(63 - 36) := d_in.perm_error;
dsisr(63 - 38) := not r.load;
dsisr(63 - 45) := d_in.rc_error;
v.state := IDLE;
end if;
else
write_enable := r.load;
if r.load = '1' and r.update = '1' then
@ -366,6 +412,12 @@ begin
end if;
end if;
end if;
if m_in.done = '1' then
-- tlbie is finished
stall := '0';
done := '1';
v.state := IDLE;
end if;

when LD_UPDATE =>
do_update := '1';
@ -376,7 +428,6 @@ begin
-- Update outputs to dcache
d_out.valid <= req;
d_out.load <= v.load;
d_out.tlbie <= v.tlbie;
d_out.dcbz <= v.dcbz;
d_out.nc <= v.nc;
d_out.reserve <= v.reserve;
@ -386,6 +437,12 @@ begin
d_out.virt_mode <= v.virt_mode;
d_out.priv_mode <= v.priv_mode;

-- Update outputs to MMU
m_out.valid <= mmureq;
m_out.tlbie <= v.tlbie;
m_out.addr <= addr;
m_out.rs <= l_in.data;

-- Update outputs to writeback
-- Multiplex either cache data to the destination GPR or
-- the address for the rA update.

@ -25,6 +25,7 @@ filesets:
- control.vhdl
- execute1.vhdl
- loadstore1.vhdl
- mmu.vhdl
- dcache.vhdl
- multiply.vhdl
- divider.vhdl

@ -0,0 +1,109 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.common.all;

-- Radix MMU
-- Supports 4-level trees as in arch 3.0B, but not the two-step translation for
-- guests under a hypervisor (i.e. there is no gRA -> hRA translation).

entity mmu is
port (
clk : in std_ulogic;
rst : in std_ulogic;

l_in : in Loadstore1ToMmuType;
l_out : out MmuToLoadstore1Type;

d_out : out MmuToDcacheType;
d_in : in DcacheToMmuType
);
end mmu;

architecture behave of mmu is

type state_t is (IDLE,
TLBIE_WAIT,
RADIX_LOOKUP_0
);

type reg_stage_t is record
-- latched request from loadstore1
valid : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
state : state_t;
end record;

signal r, rin : reg_stage_t;

begin

mmu_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
r.state <= IDLE;
r.valid <= '0';
else
if rin.valid = '1' then
report "MMU got tlb miss for " & to_hstring(rin.addr);
end if;
if l_out.done = '1' then
report "MMU completing miss with error=" & std_ulogic'image(l_out.error);
end if;
r <= rin;
end if;
end if;
end process;

mmu_1: process(all)
variable v : reg_stage_t;
variable dcreq : std_ulogic;
variable done : std_ulogic;
variable err : std_ulogic;
begin
v.valid := l_in.valid;
v.addr := l_in.addr;
v.state := r.state;
dcreq := '0';
done := '0';
err := '0';

case r.state is
when IDLE =>
if l_in.valid = '1' then
if l_in.tlbie = '1' then
dcreq := '1';
v.state := TLBIE_WAIT;
else
v.state := RADIX_LOOKUP_0;
end if;
end if;

when TLBIE_WAIT =>
if d_in.done = '1' then
done := '1';
v.state := IDLE;
end if;

when RADIX_LOOKUP_0 =>
done := '1';
err := '1';
v.state := IDLE;
end case;

-- update registers
rin <= v;

-- drive outputs
l_out.done <= done;
l_out.error <= err;

d_out.valid <= dcreq;
d_out.tlbie <= l_in.tlbie;
d_out.addr <= l_in.addr;
d_out.pte <= l_in.rs;
end process;
end;
Loading…
Cancel
Save