dcache: Implement data cache touch and flush instructions

This implements dcbf, dcbt and dcbtst in the dcache.  The dcbst (data
cache block store) instruction remains a no-op because our dcache is
write-through and therefore never has modified data that could need to
be written back.

Dcbt (data cache block touch) and dcbtst (data cache block touch for
store) behave similarly except that dcbtst is a no-op on a readonly
page.  Neither instruction ever causes an interrupt.  If they miss in
the cache and the page is cacheable, they are handled like a load miss
except that they complete immediately the state machine starts
handling the load miss rather than waiting for any data.

Dcbf (data cache block flush) can cause a data storage interrupt.  If
it hits in the cache, the state machine goes to a new FLUSH_CYCLE
state in which the cache line valid bit is cleared.

In order to avoid having more than 8 values in op_t, this combines
OP_STORE_MISS and OP_STORE_HIT into a single state.  A new OP_NOP
state is used for operations which can complete immediately without
changing any dcache state (now used for dcbt/dcbtst causing access
exception or on a non-cachable page, or dcbf that misses the cache).

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/434/head
Paul Mackerras 4 years ago
parent b181d28df2
commit ba4614c5f4

@ -603,6 +603,8 @@ package common is
hold : std_ulogic; hold : std_ulogic;
load : std_ulogic; -- is this a load load : std_ulogic; -- is this a load
dcbz : std_ulogic; dcbz : std_ulogic;
flush : std_ulogic;
touch : std_ulogic;
nc : std_ulogic; nc : std_ulogic;
reserve : std_ulogic; reserve : std_ulogic;
atomic_qw : std_ulogic; -- part of a quadword atomic op atomic_qw : std_ulogic; -- part of a quadword atomic op
@ -614,6 +616,9 @@ package common is
data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1 data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1
byte_sel : std_ulogic_vector(7 downto 0); byte_sel : std_ulogic_vector(7 downto 0);
end record; end record;
constant Loadstore1ToDcacheInit : Loadstore1ToDcacheType :=
(addr => (others => '0'), data => (others => '0'), byte_sel => x"00",
others => '0');


type DcacheToLoadstore1Type is record type DcacheToLoadstore1Type is record
valid : std_ulogic; valid : std_ulogic;

@ -187,15 +187,17 @@ architecture rtl of dcache is
OP_LOAD_HIT, -- Cache hit on load OP_LOAD_HIT, -- Cache hit on load
OP_LOAD_MISS, -- Load missing cache OP_LOAD_MISS, -- Load missing cache
OP_LOAD_NC, -- Non-cachable load OP_LOAD_NC, -- Non-cachable load
OP_STORE_HIT, -- Store hitting cache OP_STORE, -- Store, whether hitting or missing cache
OP_STORE_MISS); -- Store missing cache OP_NOP, -- nothing to do, just complete the op
OP_MISC); -- Flush


-- Cache state machine -- Cache state machine
type state_t is (IDLE, -- Normal load hit processing type state_t is (IDLE, -- Normal load hit processing
RELOAD_WAIT_ACK, -- Cache reload wait ack RELOAD_WAIT_ACK, -- Cache reload wait ack
STORE_WAIT_ACK, -- Store wait ack STORE_WAIT_ACK, -- Store wait ack
NC_LOAD_WAIT_ACK, -- Non-cachable load wait ack NC_LOAD_WAIT_ACK, -- Non-cachable load wait ack
DO_STCX); -- Check for stcx. validity DO_STCX, -- Check for stcx. validity
FLUSH_CYCLE); -- Cycle for invalidating cache line


-- --
-- Dcache operations: -- Dcache operations:
@ -289,12 +291,15 @@ architecture rtl of dcache is
op : op_t; op : op_t;
valid : std_ulogic; valid : std_ulogic;
dcbz : std_ulogic; dcbz : std_ulogic;
flush : std_ulogic;
touch : std_ulogic;
reserve : std_ulogic; reserve : std_ulogic;
first_dw : std_ulogic; first_dw : std_ulogic;
last_dw : std_ulogic; last_dw : std_ulogic;
real_addr : real_addr_t; real_addr : real_addr_t;
data : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0);
byte_sel : std_ulogic_vector(7 downto 0); byte_sel : std_ulogic_vector(7 downto 0);
is_hit : std_ulogic;
hit_way : way_t; hit_way : way_t;
same_tag : std_ulogic; same_tag : std_ulogic;
mmu_req : std_ulogic; mmu_req : std_ulogic;
@ -377,6 +382,7 @@ architecture rtl of dcache is
-- Async signals on incoming request -- Async signals on incoming request
signal req_index : index_t; signal req_index : index_t;
signal req_hit_way : way_t; signal req_hit_way : way_t;
signal req_is_hit : std_ulogic;
signal req_tag : cache_tag_t; signal req_tag : cache_tag_t;
signal req_op : op_t; signal req_op : op_t;
signal req_data : std_ulogic_vector(63 downto 0); signal req_data : std_ulogic_vector(63 downto 0);
@ -568,12 +574,9 @@ begin
assert (d_in.valid and m_in.valid) = '0' report assert (d_in.valid and m_in.valid) = '0' report
"request collision loadstore vs MMU"; "request collision loadstore vs MMU";
if m_in.valid = '1' then if m_in.valid = '1' then
r.req := Loadstore1ToDcacheInit;
r.req.valid := '1'; r.req.valid := '1';
r.req.load := not (m_in.tlbie or m_in.tlbld); r.req.load := not (m_in.tlbie or m_in.tlbld);
r.req.dcbz := '0';
r.req.nc := '0';
r.req.reserve := '0';
r.req.virt_mode := '0';
r.req.priv_mode := '1'; r.req.priv_mode := '1';
r.req.addr := m_in.addr; r.req.addr := m_in.addr;
r.req.data := m_in.pte; r.req.data := m_in.pte;
@ -1077,13 +1080,17 @@ begin
-- since it will be by the time we perform the store. -- since it will be by the time we perform the store.
-- For a load, check the appropriate row valid bit; but also, -- For a load, check the appropriate row valid bit; but also,
-- if use_forward_rl is 1 then we can consider this a hit. -- if use_forward_rl is 1 then we can consider this a hit.
is_hit := not r0.req.load or r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or -- For a touch, since the line we want is being reloaded already,
-- consider this a hit.
is_hit := not r0.req.load or r0.req.touch or
r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or
use_forward_rl; use_forward_rl;
hit_way := replace_way; hit_way := replace_way;
end if; end if;


-- The way that matched on a hit -- The way that matched on a hit
req_hit_way <= hit_way; req_hit_way <= hit_way;
req_is_hit <= is_hit;


-- work out whether we have permission for this access -- work out whether we have permission for this access
-- NB we don't yet implement AMR, thus no KUAP -- NB we don't yet implement AMR, thus no KUAP
@ -1098,17 +1105,32 @@ begin
nc := r0.req.nc or perm_attr.nocache; nc := r0.req.nc or perm_attr.nocache;
op := OP_NONE; op := OP_NONE;
if go = '1' then if go = '1' then
if access_ok = '0' then if r0.req.touch = '1' then
if access_ok = '1' and is_hit = '0' and nc = '0' then
op := OP_LOAD_MISS;
elsif access_ok = '1' and is_hit = '1' and nc = '0' then
-- Make this OP_LOAD_HIT so the PLRU gets updated
op := OP_LOAD_HIT;
else
op := OP_NOP;
end if;
elsif access_ok = '0' then
op := OP_BAD; op := OP_BAD;
elsif r0.req.flush = '1' then
if is_hit = '0' then
op := OP_NOP;
else
op := OP_MISC;
end if;
else else
opsel := r0.req.load & nc & is_hit; opsel := r0.req.load & nc & is_hit;
case opsel is case opsel is
when "101" => op := OP_LOAD_HIT; when "101" => op := OP_LOAD_HIT;
when "100" => op := OP_LOAD_MISS; when "100" => op := OP_LOAD_MISS;
when "110" => op := OP_LOAD_NC; when "110" => op := OP_LOAD_NC;
when "001" => op := OP_STORE_HIT; when "001" => op := OP_STORE;
when "000" => op := OP_STORE_MISS; when "000" => op := OP_STORE;
when "010" => op := OP_STORE_MISS; when "010" => op := OP_STORE;
when "011" => op := OP_BAD; when "011" => op := OP_BAD;
when "111" => op := OP_BAD; when "111" => op := OP_BAD;
when others => op := OP_NONE; when others => op := OP_NONE;
@ -1348,8 +1370,8 @@ begin
end if; end if;


-- The cache hit indication is used for PLRU updates -- The cache hit indication is used for PLRU updates
if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then if req_op = OP_LOAD_HIT or req_op = OP_STORE then
r1.cache_hit <= '1'; r1.cache_hit <= req_is_hit;
else else
r1.cache_hit <= '0'; r1.cache_hit <= '0';
end if; end if;
@ -1430,7 +1452,7 @@ begin
r1.ls_valid <= '0'; r1.ls_valid <= '0';
-- complete tlbies and TLB loads in the third cycle -- complete tlbies and TLB loads in the third cycle
r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld); r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
if req_op = OP_LOAD_HIT then if req_op = OP_LOAD_HIT or req_op = OP_NOP then
if r0.mmu_req = '0' then if r0.mmu_req = '0' then
r1.ls_valid <= '1'; r1.ls_valid <= '1';
else else
@ -1446,7 +1468,7 @@ begin
if req_go = '1' and access_ok = '1' and r0.req.load = '1' and if req_go = '1' and access_ok = '1' and r0.req.load = '1' and
r0.req.reserve = '1' and r0.req.atomic_first = '1' then r0.req.reserve = '1' and r0.req.atomic_first = '1' then
reservation.addr <= ra(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS); reservation.addr <= ra(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
if req_op = OP_LOAD_HIT then if req_is_hit = '1' then
reservation.valid <= not req_snoop_hit; reservation.valid <= not req_snoop_hit;
end if; end if;
end if; end if;
@ -1485,6 +1507,8 @@ begin
req.valid := req_go; req.valid := req_go;
req.mmu_req := r0.mmu_req; req.mmu_req := r0.mmu_req;
req.dcbz := r0.req.dcbz; req.dcbz := r0.req.dcbz;
req.flush := r0.req.flush;
req.touch := r0.req.touch;
req.reserve := r0.req.reserve; req.reserve := r0.req.reserve;
req.first_dw := r0.req.atomic_first; req.first_dw := r0.req.atomic_first;
req.last_dw := r0.req.atomic_last; req.last_dw := r0.req.atomic_last;
@ -1504,12 +1528,13 @@ begin
req.byte_sel := r0.req.byte_sel; req.byte_sel := r0.req.byte_sel;
end if; end if;
req.hit_way := req_hit_way; req.hit_way := req_hit_way;
req.is_hit := req_is_hit;
req.same_tag := req_same_tag; req.same_tag := req_same_tag;


-- Store the incoming request from r0, if it is a slow request -- Store the incoming request from r0, if it is a slow request
-- Note that r1.full = 1 implies req_op = OP_NONE -- Note that r1.full = 1 implies req_op = OP_NONE
if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC or if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC or
req_op = OP_STORE_MISS or req_op = OP_STORE_HIT then req_op = OP_STORE or req_op = OP_MISC then
r1.req <= req; r1.req <= req;
r1.full <= '1'; r1.full <= '1';
end if; end if;
@ -1523,7 +1548,7 @@ begin
r1.victim_way <= plru_victim; r1.victim_way <= plru_victim;
report "victim way:" & to_hstring(plru_victim); report "victim way:" & to_hstring(plru_victim);
end if; end if;
if req_op = OP_LOAD_MISS or (req_op = OP_STORE_MISS and r0.req.dcbz = '1') then if req_op = OP_LOAD_MISS or (r0.req.dcbz = '1' and req_is_hit = '0') then
r1.choose_victim <= '1'; r1.choose_victim <= '1';
end if; end if;


@ -1555,7 +1580,7 @@ begin
r1.reload_tag <= get_tag(req.real_addr); r1.reload_tag <= get_tag(req.real_addr);
r1.req.same_tag <= '1'; r1.req.same_tag <= '1';


if req.op = OP_STORE_HIT then if req.is_hit = '1' then
r1.store_way <= req.hit_way; r1.store_way <= req.hit_way;
end if; end if;


@ -1585,13 +1610,20 @@ begin
r1.write_tag <= '1'; r1.write_tag <= '1';
ev.load_miss <= '1'; ev.load_miss <= '1';


-- If this is a touch, complete the instruction
if req.touch = '1' then
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
end if;

when OP_LOAD_NC => when OP_LOAD_NC =>
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
r1.wb.stb <= '1'; r1.wb.stb <= '1';
r1.wb.we <= '0'; r1.wb.we <= '0';
r1.state <= NC_LOAD_WAIT_ACK; r1.state <= NC_LOAD_WAIT_ACK;


when OP_STORE_HIT | OP_STORE_MISS => when OP_STORE =>
if req.reserve = '1' then if req.reserve = '1' then
-- stcx needs to wait until next cycle -- stcx needs to wait until next cycle
-- for the reservation address check -- for the reservation address check
@ -1605,9 +1637,7 @@ begin
else else
r1.mmu_done <= '1'; r1.mmu_done <= '1';
end if; end if;
if req.op = OP_STORE_HIT then r1.write_bram <= req.is_hit;
r1.write_bram <= '1';
end if;
r1.wb.we <= '1'; r1.wb.we <= '1';
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
r1.wb.stb <= '1'; r1.wb.stb <= '1';
@ -1615,21 +1645,24 @@ begin
-- dcbz is handled much like a load miss except -- dcbz is handled much like a load miss except
-- that we are writing to memory instead of reading -- that we are writing to memory instead of reading
r1.state <= RELOAD_WAIT_ACK; r1.state <= RELOAD_WAIT_ACK;
if req.op = OP_STORE_MISS then r1.write_tag <= not req.is_hit;
r1.write_tag <= '1';
end if;
r1.wb.we <= '1'; r1.wb.we <= '1';
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
r1.wb.stb <= '1'; r1.wb.stb <= '1';
end if; end if;
if req.op = OP_STORE_MISS then if req.op = OP_STORE then
ev.store_miss <= '1'; ev.store_miss <= not req.is_hit;
end if; end if;


when OP_MISC =>
r1.state <= FLUSH_CYCLE;

-- OP_NONE and OP_BAD do nothing -- OP_NONE and OP_BAD do nothing
-- OP_BAD was handled above already -- OP_BAD & OP_NOP were handled above already
when OP_NONE => when OP_NONE =>
when OP_BAD => when OP_BAD =>
when OP_NOP =>

end case; end case;


when RELOAD_WAIT_ACK => when RELOAD_WAIT_ACK =>
@ -1712,14 +1745,12 @@ begin
end if; end if;
assert not is_X(acks); assert not is_X(acks);
if acks < 7 and req.same_tag = '1' and req.dcbz = '0' and if acks < 7 and req.same_tag = '1' and req.dcbz = '0' and
(req.op = OP_STORE_MISS or req.op = OP_STORE_HIT) then req.op = OP_STORE then
r1.wb.stb <= '1'; r1.wb.stb <= '1';
stbs_done := false; stbs_done := false;
r1.store_way <= req.hit_way; r1.store_way <= req.hit_way;
r1.store_row <= get_row(req.real_addr); r1.store_row <= get_row(req.real_addr);
if req.op = OP_STORE_HIT then r1.write_bram <= req.is_hit;
r1.write_bram <= '1';
end if;
r1.full <= '0'; r1.full <= '0';
r1.slow_valid <= '1'; r1.slow_valid <= '1';
-- Store requests never come from the MMU -- Store requests never come from the MMU
@ -1783,9 +1814,7 @@ begin
if wishbone_in.stall = '0' then if wishbone_in.stall = '0' then
-- Store has been accepted, so now we can write the -- Store has been accepted, so now we can write the
-- cache data RAM -- cache data RAM
if r1.req.op = OP_STORE_HIT then r1.write_bram <= req.is_hit;
r1.write_bram <= '1';
end if;
r1.wb.stb <= '0'; r1.wb.stb <= '0';
end if; end if;
if wishbone_in.ack = '1' then if wishbone_in.ack = '1' then
@ -1802,6 +1831,12 @@ begin
end if; end if;
end if; end if;


when FLUSH_CYCLE =>
cache_valids(to_integer(r1.store_index))(to_integer(r1.store_way)) <= '0';
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
r1.state <= IDLE;
end case; end case;
end if; end if;
end if; end if;

@ -129,10 +129,10 @@ architecture behaviour of decode1 is
INSN_crorc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_crorc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_crxor => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_crxor => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_darn => (ALU, NONE, OP_DARN, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_darn => (ALU, NONE, OP_DARN, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbf => (ALU, NONE, OP_DCBF, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbf => (LDST, NONE, OP_DCBF, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbst => (ALU, NONE, OP_DCBST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbst => (ALU, NONE, OP_DCBST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbt => (ALU, NONE, OP_XCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbt => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbtst => (ALU, NONE, OP_DCBTST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbtst => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbz => (LDST, NONE, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbz => (LDST, NONE, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_divd => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE), INSN_divd => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE),
INSN_divde => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE), INSN_divde => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE),
@ -200,7 +200,7 @@ architecture behaviour of decode1 is
INSN_ftdiv => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_ftdiv => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_ftsqrt => (FPU, FPU, OP_FP_CMP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_ftsqrt => (FPU, FPU, OP_FP_CMP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_icbi => (ALU, NONE, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), INSN_icbi => (ALU, NONE, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_icbt => (ALU, NONE, OP_XCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_icbt => (ALU, NONE, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_isel => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_isel => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_isync => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_isync => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_lbarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), INSN_lbarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE),

@ -7,8 +7,9 @@ package decode_types is
OP_BCD, OP_BPERM, OP_BREV, OP_BCD, OP_BPERM, OP_BREV,
OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_COUNTB, OP_CROP, OP_COUNTB, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_XCBT, OP_DCBTST, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBZ,
OP_DCBZ, OP_ICBI, OP_SPARE,
OP_ICBI, OP_ICBT,
OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC, OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC,
OP_DIV, OP_DIVE, OP_MOD, OP_DIV, OP_DIVE, OP_MOD,
OP_EXTS, OP_EXTSWSLI, OP_EXTS, OP_EXTSWSLI,

@ -1184,8 +1184,8 @@ begin
else else
illegal := '1'; illegal := '1';
end if; end if;
when OP_NOP | OP_DCBF | OP_DCBST | OP_XCBT | OP_DCBTST => when OP_NOP | OP_DCBST | OP_ICBT =>
-- Do nothing -- Do nothing
when OP_ADD => when OP_ADD =>
if e_in.output_carry = '1' then if e_in.output_carry = '1' then
if e_in.input_carry /= OV then if e_in.input_carry /= OV then
@ -1653,11 +1653,10 @@ begin
v.e.srr1 := (others => '0'); v.e.srr1 := (others => '0');
v.e.srr1(47 - 33) := '1'; v.e.srr1(47 - 33) := '1';
v.e.srr1(47 - 34) := ex1.prev_prefixed; v.e.srr1(47 - 34) := ex1.prev_prefixed;
if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or
ex1.prev_op = OP_XCBT or ex1.prev_op = OP_DCBST or ex1.prev_op = OP_DCBF then ex1.prev_op = OP_DCBF then
v.e.srr1(47 - 35) := '1'; v.e.srr1(47 - 35) := '1';
elsif ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ or elsif ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ then
ex1.prev_op = OP_DCBTST then
v.e.srr1(47 - 36) := '1'; v.e.srr1(47 - 36) := '1';
end if; end if;



@ -61,6 +61,8 @@ architecture behave of loadstore1 is
dc_req : std_ulogic; dc_req : std_ulogic;
load : std_ulogic; load : std_ulogic;
store : std_ulogic; store : std_ulogic;
flush : std_ulogic;
touch : std_ulogic;
tlbie : std_ulogic; tlbie : std_ulogic;
dcbz : std_ulogic; dcbz : std_ulogic;
read_spr : std_ulogic; read_spr : std_ulogic;
@ -100,7 +102,8 @@ architecture behave of loadstore1 is
two_dwords : std_ulogic; two_dwords : std_ulogic;
incomplete : std_ulogic; incomplete : std_ulogic;
end record; end record;
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0', constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0',
flush => '0', touch => '0', tlbie => '0',
dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0', dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
instr_fault => '0', do_update => '0', instr_fault => '0', do_update => '0',
mode_32bit => '0', prefixed => '0', mode_32bit => '0', prefixed => '0',
@ -470,7 +473,7 @@ begin
addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1); addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1);


-- Do length_to_sel and work out if we are doing 2 dwords -- Do length_to_sel and work out if we are doing 2 dwords
long_sel := xfer_data_sel(v.length, addr(2 downto 0)); long_sel := xfer_data_sel(l_in.length, addr(2 downto 0));
v.byte_sel := long_sel(7 downto 0); v.byte_sel := long_sel(7 downto 0);
v.second_bytes := long_sel(15 downto 8); v.second_bytes := long_sel(15 downto 8);
if long_sel(15 downto 8) /= "00000000" then if long_sel(15 downto 8) /= "00000000" then
@ -505,6 +508,9 @@ begin
case l_in.op is case l_in.op is
when OP_STORE => when OP_STORE =>
v.store := '1'; v.store := '1';
if l_in.length = "0000" then
v.touch := '1';
end if;
when OP_LOAD => when OP_LOAD =>
if l_in.update = '0' or l_in.second = '0' then if l_in.update = '0' or l_in.second = '0' then
v.load := '1'; v.load := '1';
@ -512,10 +518,16 @@ begin
-- Allow an extra cycle for SP->DP precision conversion -- Allow an extra cycle for SP->DP precision conversion
v.load_sp := '1'; v.load_sp := '1';
end if; end if;
if l_in.length = "0000" then
v.touch := '1';
end if;
else else
-- write back address to RA -- write back address to RA
v.do_update := '1'; v.do_update := '1';
end if; end if;
when OP_DCBF =>
v.load := '1';
v.flush := '1';
when OP_DCBZ => when OP_DCBZ =>
v.dcbz := '1'; v.dcbz := '1';
v.align_intr := v.nc; v.align_intr := v.nc;
@ -541,7 +553,7 @@ begin
-- Work out controls for load and store formatting -- Work out controls for load and store formatting
brev_lenm1 := "000"; brev_lenm1 := "000";
if v.byte_reverse = '1' then if v.byte_reverse = '1' then
brev_lenm1 := unsigned(v.length(2 downto 0)) - 1; brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
end if; end if;
v.brev_mask := brev_lenm1; v.brev_mask := brev_lenm1;


@ -882,7 +894,8 @@ begin


if d_in.valid = '1' then if d_in.valid = '1' then
if r2.req.incomplete = '0' then if r2.req.incomplete = '0' then
write_enable := r2.req.load and not r2.req.load_sp; write_enable := r2.req.load and not r2.req.load_sp and
not r2.req.flush and not r2.req.touch;
-- stores write back rA update -- stores write back rA update
do_update := r2.req.update and r2.req.store; do_update := r2.req.update and r2.req.store;
end if; end if;
@ -977,6 +990,8 @@ begin
d_out.valid <= stage1_dcreq; d_out.valid <= stage1_dcreq;
d_out.load <= stage1_req.load; d_out.load <= stage1_req.load;
d_out.dcbz <= stage1_req.dcbz; d_out.dcbz <= stage1_req.dcbz;
d_out.flush <= stage1_req.flush;
d_out.touch <= stage1_req.touch;
d_out.nc <= stage1_req.nc; d_out.nc <= stage1_req.nc;
d_out.reserve <= stage1_req.reserve; d_out.reserve <= stage1_req.reserve;
d_out.atomic_qw <= stage1_req.atomic_qw; d_out.atomic_qw <= stage1_req.atomic_qw;
@ -990,6 +1005,8 @@ begin
d_out.valid <= req; d_out.valid <= req;
d_out.load <= r2.req.load; d_out.load <= r2.req.load;
d_out.dcbz <= r2.req.dcbz; d_out.dcbz <= r2.req.dcbz;
d_out.flush <= r2.req.flush;
d_out.touch <= r2.req.touch;
d_out.nc <= r2.req.nc; d_out.nc <= r2.req.nc;
d_out.reserve <= r2.req.reserve; d_out.reserve <= r2.req.reserve;
d_out.atomic_qw <= r2.req.atomic_qw; d_out.atomic_qw <= r2.req.atomic_qw;

Loading…
Cancel
Save