loadstore1: Generate busy signal earlier

This makes the calculation of busy as simple as possible and dependent
only on register outputs.  The timing of busy is critical, as it gates
the valid signal for the next instruction, and therefore any delays
in dropping busy at the end of a load or store directly impact the
timing of a host of other paths.

This also separates the 'done without error' and 'done with error'
cases from the MMU into separate signals that are both driven directly
from registers.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
jtag-port
Paul Mackerras 4 years ago
parent c180ed0af0
commit 91cbeee77c

@ -315,6 +315,7 @@ package common is


type MmuToLoadstore1Type is record type MmuToLoadstore1Type is record
done : std_ulogic; done : std_ulogic;
err : std_ulogic;
invalid : std_ulogic; invalid : std_ulogic;
badtree : std_ulogic; badtree : std_ulogic;
segerr : std_ulogic; segerr : std_ulogic;

@ -80,6 +80,9 @@ architecture behave of loadstore1 is
dsisr : std_ulogic_vector(31 downto 0); dsisr : std_ulogic_vector(31 downto 0);
instr_fault : std_ulogic; instr_fault : std_ulogic;
sprval : std_ulogic_vector(63 downto 0); sprval : std_ulogic_vector(63 downto 0);
busy : std_ulogic;
wait_dcache : std_ulogic;
wait_mmu : std_ulogic;
end record; end record;


type byte_sel_t is array(0 to 7) of std_ulogic; type byte_sel_t is array(0 to 7) of std_ulogic;
@ -128,6 +131,9 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
if rst = '1' then if rst = '1' then
r.state <= IDLE; r.state <= IDLE;
r.busy <= '0';
r.wait_dcache <= '0';
r.wait_mmu <= '0';
else else
r <= rin; r <= rin;
end if; end if;
@ -228,8 +234,17 @@ begin
-- compute (addr + 8) & ~7 for the second doubleword when unaligned -- compute (addr + 8) & ~7 for the second doubleword when unaligned
next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000"; next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";


-- Busy calculation.
-- We need to minimize the delay from clock to busy valid because it
-- gates the start of execution of the next instruction.
busy := r.busy or (r.wait_dcache and not d_in.valid) or (r.wait_mmu and not m_in.done);

done := '0'; done := '0';
if r.state /= IDLE and busy = '0' then
done := '1';
end if;
exception := '0'; exception := '0';

case r.state is case r.state is
when IDLE => when IDLE =>


@ -255,7 +270,6 @@ begin
dsisr(63 - 38) := not r.load; dsisr(63 - 38) := not r.load;
-- XXX there is no architected bit for this -- XXX there is no architected bit for this
dsisr(63 - 35) := d_in.cache_paradox; dsisr(63 - 35) := d_in.cache_paradox;
v.state := IDLE;
else else
-- Look up the translation for TLB miss -- Look up the translation for TLB miss
-- and also for permission error and RC error -- and also for permission error and RC error
@ -279,8 +293,6 @@ begin
else else
-- stores write back rA update in this cycle -- stores write back rA update in this cycle
do_update := r.update; do_update := r.update;
done := '1';
v.state := IDLE;
end if; end if;
end if; end if;
end if; end if;
@ -294,53 +306,36 @@ begin
byte_sel := r.first_bytes; byte_sel := r.first_bytes;
end if; end if;
if m_in.done = '1' then if m_in.done = '1' then
if m_in.invalid = '0' and m_in.perm_error = '0' and m_in.rc_error = '0' and if r.instr_fault = '0' then
m_in.badtree = '0' and m_in.segerr = '0' then -- retry the request now that the MMU has installed a TLB entry
if r.instr_fault = '0' then req := '1';
-- retry the request now that the MMU has installed a TLB entry if r.last_dword = '0' then
req := '1'; v.state := SECOND_REQ;
if r.last_dword = '0' then
v.state := SECOND_REQ;
else
v.state := ACK_WAIT;
end if;
else else
-- nothing to do, the icache retries automatically v.state := ACK_WAIT;
done := '1';
v.state := IDLE;
end if; end if;
else
exception := '1';
dsisr(63 - 33) := m_in.invalid;
dsisr(63 - 36) := m_in.perm_error;
dsisr(63 - 38) := not r.load;
dsisr(63 - 44) := m_in.badtree;
dsisr(63 - 45) := m_in.rc_error;
v.state := IDLE;
end if; end if;
end if; end if;
if m_in.err = '1' then
exception := '1';
dsisr(63 - 33) := m_in.invalid;
dsisr(63 - 36) := m_in.perm_error;
dsisr(63 - 38) := not r.load;
dsisr(63 - 44) := m_in.badtree;
dsisr(63 - 45) := m_in.rc_error;
end if;


when TLBIE_WAIT => when TLBIE_WAIT =>
if m_in.done = '1' then
-- tlbie is finished
done := '1';
v.state := IDLE;
end if;


when LD_UPDATE => when LD_UPDATE =>
do_update := '1'; do_update := '1';
v.state := IDLE;
done := '1';


when SPR_CMPLT => when SPR_CMPLT =>
done := '1';
v.state := IDLE;


end case; end case;


busy := '1'; if done = '1' or exception = '1' then
if r.state = IDLE or done = '1' then v.state := IDLE;
busy := '0';
end if; end if;


-- Note that l_in.valid is gated with busy inside execute1 -- Note that l_in.valid is gated with busy inside execute1
@ -450,6 +445,31 @@ begin
end if; end if;
end if; end if;


-- Work out whether we'll be busy next cycle
v.busy := '0';
v.wait_dcache := '0';
v.wait_mmu := '0';
case v.state is
when SECOND_REQ =>
v.busy := '1';
when ACK_WAIT =>
if v.last_dword = '0' or (v.load = '1' and v.update = '1') then
v.busy := '1';
else
v.wait_dcache := '1';
end if;
when MMU_LOOKUP =>
if v.instr_fault = '0' then
v.busy := '1';
else
v.wait_mmu := '1';
end if;
when TLBIE_WAIT =>
v.wait_mmu := '1';
when others =>
-- not busy next cycle
end case;

-- Update outputs to dcache -- Update outputs to dcache
d_out.valid <= req; d_out.valid <= req;
d_out.load <= v.load; d_out.load <= v.load;

@ -52,6 +52,7 @@ architecture behave of mmu is
-- internal state -- internal state
state : state_t; state : state_t;
done : std_ulogic; done : std_ulogic;
err : std_ulogic;
pgtbl0 : std_ulogic_vector(63 downto 0); pgtbl0 : std_ulogic_vector(63 downto 0);
pt0_valid : std_ulogic; pt0_valid : std_ulogic;
pgtbl3 : std_ulogic_vector(63 downto 0); pgtbl3 : std_ulogic_vector(63 downto 0);
@ -92,7 +93,10 @@ begin
report "MMU got tlb miss for " & to_hstring(rin.addr); report "MMU got tlb miss for " & to_hstring(rin.addr);
end if; end if;
if l_out.done = '1' then if l_out.done = '1' then
report "MMU completing op with invalid=" & std_ulogic'image(l_out.invalid) & report "MMU completing op without error";
end if;
if l_out.err = '1' then
report "MMU completing op with err invalid=" & std_ulogic'image(l_out.invalid) &
" badtree=" & std_ulogic'image(l_out.badtree); " badtree=" & std_ulogic'image(l_out.badtree);
end if; end if;
if rin.state = RADIX_LOOKUP then if rin.state = RADIX_LOOKUP then
@ -200,6 +204,7 @@ begin
v.valid := '0'; v.valid := '0';
dcreq := '0'; dcreq := '0';
v.done := '0'; v.done := '0';
v.err := '0';
v.invalid := '0'; v.invalid := '0';
v.badtree := '0'; v.badtree := '0';
v.segerror := '0'; v.segerror := '0';
@ -412,7 +417,8 @@ begin
end case; end case;


if v.state = RADIX_FINISH or (v.state = RADIX_LOAD_TLB and r.iside = '1') then if v.state = RADIX_FINISH or (v.state = RADIX_LOAD_TLB and r.iside = '1') then
v.done := '1'; v.err := v.invalid or v.badtree or v.segerror or v.perm_err or v.rc_error;
v.done := not v.err;
end if; end if;


if r.addr(63) = '1' then if r.addr(63) = '1' then
@ -451,6 +457,7 @@ begin
end if; end if;


l_out.done <= r.done; l_out.done <= r.done;
l_out.err <= r.err;
l_out.invalid <= r.invalid; l_out.invalid <= r.invalid;
l_out.badtree <= r.badtree; l_out.badtree <= r.badtree;
l_out.segerr <= r.segerror; l_out.segerr <= r.segerror;

Loading…
Cancel
Save