loadstore1: Generate busy signal earlier

This makes the calculation of busy as simple as possible and dependent
only on register outputs.  The timing of busy is critical, as it gates
the valid signal for the next instruction, and therefore any delays
in dropping busy at the end of a load or store directly impact the
timing of a host of other paths.

This also separates the 'done without error' and 'done with error'
cases from the MMU into separate signals that are both driven directly
from registers.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/233/head
Paul Mackerras 4 years ago
parent c180ed0af0
commit 91cbeee77c

@ -315,6 +315,7 @@ package common is

type MmuToLoadstore1Type is record
done : std_ulogic;
err : std_ulogic;
invalid : std_ulogic;
badtree : std_ulogic;
segerr : std_ulogic;

@ -80,6 +80,9 @@ architecture behave of loadstore1 is
dsisr : std_ulogic_vector(31 downto 0);
instr_fault : std_ulogic;
sprval : std_ulogic_vector(63 downto 0);
busy : std_ulogic;
wait_dcache : std_ulogic;
wait_mmu : std_ulogic;
end record;

type byte_sel_t is array(0 to 7) of std_ulogic;
@ -128,6 +131,9 @@ begin
if rising_edge(clk) then
if rst = '1' then
r.state <= IDLE;
r.busy <= '0';
r.wait_dcache <= '0';
r.wait_mmu <= '0';
else
r <= rin;
end if;
@ -228,8 +234,17 @@ begin
-- compute (addr + 8) & ~7 for the second doubleword when unaligned
next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";

-- Busy calculation.
-- We need to minimize the delay from clock to busy valid because it
-- gates the start of execution of the next instruction.
busy := r.busy or (r.wait_dcache and not d_in.valid) or (r.wait_mmu and not m_in.done);

done := '0';
if r.state /= IDLE and busy = '0' then
done := '1';
end if;
exception := '0';

case r.state is
when IDLE =>

@ -255,7 +270,6 @@ begin
dsisr(63 - 38) := not r.load;
-- XXX there is no architected bit for this
dsisr(63 - 35) := d_in.cache_paradox;
v.state := IDLE;
else
-- Look up the translation for TLB miss
-- and also for permission error and RC error
@ -279,8 +293,6 @@ begin
else
-- stores write back rA update in this cycle
do_update := r.update;
done := '1';
v.state := IDLE;
end if;
end if;
end if;
@ -294,8 +306,6 @@ begin
byte_sel := r.first_bytes;
end if;
if m_in.done = '1' then
if m_in.invalid = '0' and m_in.perm_error = '0' and m_in.rc_error = '0' and
m_in.badtree = '0' and m_in.segerr = '0' then
if r.instr_fault = '0' then
-- retry the request now that the MMU has installed a TLB entry
req := '1';
@ -304,43 +314,28 @@ begin
else
v.state := ACK_WAIT;
end if;
else
-- nothing to do, the icache retries automatically
done := '1';
v.state := IDLE;
end if;
else
end if;
if m_in.err = '1' then
exception := '1';
dsisr(63 - 33) := m_in.invalid;
dsisr(63 - 36) := m_in.perm_error;
dsisr(63 - 38) := not r.load;
dsisr(63 - 44) := m_in.badtree;
dsisr(63 - 45) := m_in.rc_error;
v.state := IDLE;
end if;
end if;

when TLBIE_WAIT =>
if m_in.done = '1' then
-- tlbie is finished
done := '1';
v.state := IDLE;
end if;

when LD_UPDATE =>
do_update := '1';
v.state := IDLE;
done := '1';

when SPR_CMPLT =>
done := '1';
v.state := IDLE;

end case;

busy := '1';
if r.state = IDLE or done = '1' then
busy := '0';
if done = '1' or exception = '1' then
v.state := IDLE;
end if;

-- Note that l_in.valid is gated with busy inside execute1
@ -450,6 +445,31 @@ begin
end if;
end if;

-- Work out whether we'll be busy next cycle
v.busy := '0';
v.wait_dcache := '0';
v.wait_mmu := '0';
case v.state is
when SECOND_REQ =>
v.busy := '1';
when ACK_WAIT =>
if v.last_dword = '0' or (v.load = '1' and v.update = '1') then
v.busy := '1';
else
v.wait_dcache := '1';
end if;
when MMU_LOOKUP =>
if v.instr_fault = '0' then
v.busy := '1';
else
v.wait_mmu := '1';
end if;
when TLBIE_WAIT =>
v.wait_mmu := '1';
when others =>
-- not busy next cycle
end case;

-- Update outputs to dcache
d_out.valid <= req;
d_out.load <= v.load;

@ -52,6 +52,7 @@ architecture behave of mmu is
-- internal state
state : state_t;
done : std_ulogic;
err : std_ulogic;
pgtbl0 : std_ulogic_vector(63 downto 0);
pt0_valid : std_ulogic;
pgtbl3 : std_ulogic_vector(63 downto 0);
@ -92,7 +93,10 @@ begin
report "MMU got tlb miss for " & to_hstring(rin.addr);
end if;
if l_out.done = '1' then
report "MMU completing op with invalid=" & std_ulogic'image(l_out.invalid) &
report "MMU completing op without error";
end if;
if l_out.err = '1' then
report "MMU completing op with err invalid=" & std_ulogic'image(l_out.invalid) &
" badtree=" & std_ulogic'image(l_out.badtree);
end if;
if rin.state = RADIX_LOOKUP then
@ -200,6 +204,7 @@ begin
v.valid := '0';
dcreq := '0';
v.done := '0';
v.err := '0';
v.invalid := '0';
v.badtree := '0';
v.segerror := '0';
@ -412,7 +417,8 @@ begin
end case;

if v.state = RADIX_FINISH or (v.state = RADIX_LOAD_TLB and r.iside = '1') then
v.done := '1';
v.err := v.invalid or v.badtree or v.segerror or v.perm_err or v.rc_error;
v.done := not v.err;
end if;

if r.addr(63) = '1' then
@ -451,6 +457,7 @@ begin
end if;

l_out.done <= r.done;
l_out.err <= r.err;
l_out.invalid <= r.invalid;
l_out.badtree <= r.badtree;
l_out.segerr <= r.segerror;

Loading…
Cancel
Save