dcache: Improve timing of valid/done outputs

This makes d_out.valid and m_out.done come directly from registers in
order to improve timing.  The inputs to the registers are set by the
same conditions that cause r1.hit_load_valid, r1.slow_valid,
r1.error_done and r1.stcx_fail to be set.

Note that the STORE_WAIT_ACK state doesn't test r1.mmu_req but assumes
that the request came from loadstore1.  This is because we normally
have r1.full = 0 in this state, which means that r1.mmu_req can
change at any time.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/233/head
Paul Mackerras 5 years ago
parent 893d2bc6a2
commit dc8980d5a5

@ -235,6 +235,7 @@ architecture rtl of dcache is
byte_sel : std_ulogic_vector(7 downto 0); byte_sel : std_ulogic_vector(7 downto 0);
hit_way : way_t; hit_way : way_t;
same_tag : std_ulogic; same_tag : std_ulogic;
mmu_req : std_ulogic;
end record; end record;


-- First stage register, contains state for stage 1 of load hits -- First stage register, contains state for stage 1 of load hits
@ -282,15 +283,14 @@ architecture rtl of dcache is
rows_valid : row_per_line_valid_t; rows_valid : row_per_line_valid_t;
acks_pending : unsigned(2 downto 0); acks_pending : unsigned(2 downto 0);


-- Signals to complete with error -- Signals to complete (possibly with error)
ls_valid : std_ulogic;
mmu_done : std_ulogic;
error_done : std_ulogic; error_done : std_ulogic;
cache_paradox : std_ulogic; cache_paradox : std_ulogic;


-- Signal to complete a failed stcx. -- Signal to complete a failed stcx.
stcx_fail : std_ulogic; stcx_fail : std_ulogic;

-- completion signal for tlbie
tlbie_done : std_ulogic;
end record; end record;


signal r1 : reg_stage_1_t; signal r1 : reg_stage_1_t;
@ -940,15 +940,15 @@ begin
end if; end if;
end loop; end loop;


d_out.valid <= '0'; d_out.valid <= r1.ls_valid;
d_out.data <= data_out; d_out.data <= data_out;
d_out.store_done <= '0'; d_out.store_done <= not r1.stcx_fail;
d_out.error <= '0'; d_out.error <= r1.error_done;
d_out.cache_paradox <= '0'; d_out.cache_paradox <= r1.cache_paradox;


-- Outputs to MMU -- Outputs to MMU
m_out.done <= r1.tlbie_done; m_out.done <= r1.mmu_done;
m_out.err <= '0'; m_out.err <= r1.error_done;
m_out.data <= data_out; m_out.data <= data_out;


-- We have a valid load or store hit or we just completed a slow -- We have a valid load or store hit or we just completed a slow
@ -974,47 +974,32 @@ begin
-- Load hit case is the standard path -- Load hit case is the standard path
if r1.hit_load_valid = '1' then if r1.hit_load_valid = '1' then
report "completing load hit data=" & to_hstring(data_out); report "completing load hit data=" & to_hstring(data_out);
d_out.valid <= '1';
end if; end if;


-- error cases complete without stalling -- error cases complete without stalling
if r1.error_done = '1' then if r1.error_done = '1' then
report "completing ld/st with error"; report "completing ld/st with error";
d_out.error <= '1';
d_out.cache_paradox <= r1.cache_paradox;
d_out.valid <= '1';
end if; end if;


-- Slow ops (load miss, NC, stores) -- Slow ops (load miss, NC, stores)
if r1.slow_valid = '1' then if r1.slow_valid = '1' then
d_out.store_done <= '1';
report "completing store or load miss data=" & to_hstring(data_out); report "completing store or load miss data=" & to_hstring(data_out);
d_out.valid <= '1';
end if;

if r1.stcx_fail = '1' then
d_out.store_done <= '0';
d_out.valid <= '1';
end if; end if;


else else
-- Request came from MMU -- Request came from MMU
if r1.hit_load_valid = '1' then if r1.hit_load_valid = '1' then
report "completing load hit to MMU, data=" & to_hstring(m_out.data); report "completing load hit to MMU, data=" & to_hstring(m_out.data);
m_out.done <= '1';
end if; end if;


-- error cases complete without stalling -- error cases complete without stalling
if r1.error_done = '1' then if r1.error_done = '1' then
report "completing MMU ld with error"; report "completing MMU ld with error";
m_out.err <= '1';
m_out.done <= '1';
end if; end if;


-- Slow ops (i.e. load miss) -- Slow ops (i.e. load miss)
if r1.slow_valid = '1' then if r1.slow_valid = '1' then
report "completing MMU load miss, data=" & to_hstring(m_out.data); report "completing MMU load miss, data=" & to_hstring(m_out.data);
m_out.done <= '1';
end if; end if;
end if; end if;


@ -1159,8 +1144,6 @@ begin
r1.tlb_hit_way <= tlb_hit_way; r1.tlb_hit_way <= tlb_hit_way;
r1.tlb_hit_index <= tlb_req_index; r1.tlb_hit_index <= tlb_req_index;


-- complete tlbies and TLB loads in the third cycle
r1.tlbie_done <= r0_valid and (r0.tlbie or r0.tlbld);
end if; end if;
end process; end process;


@ -1217,6 +1200,8 @@ begin
r1.slow_valid <= '0'; r1.slow_valid <= '0';
r1.wb.cyc <= '0'; r1.wb.cyc <= '0';
r1.wb.stb <= '0'; r1.wb.stb <= '0';
r1.ls_valid <= '0';
r1.mmu_done <= '0';


-- Not useful normally but helps avoiding tons of sim warnings -- Not useful normally but helps avoiding tons of sim warnings
r1.wb.adr <= (others => '0'); r1.wb.adr <= (others => '0');
@ -1225,6 +1210,17 @@ begin
r1.slow_valid <= '0'; r1.slow_valid <= '0';
r1.write_bram <= '0'; r1.write_bram <= '0';


r1.ls_valid <= '0';
-- complete tlbies and TLB loads in the third cycle
r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
if req_op = OP_LOAD_HIT or req_op = OP_BAD or req_op = OP_STCX_FAIL then
if r0.mmu_req = '0' then
r1.ls_valid <= '1';
else
r1.mmu_done <= '1';
end if;
end if;

if r1.write_tag = '1' then if r1.write_tag = '1' then
-- Store new tag in selected way -- Store new tag in selected way
for i in 0 to NUM_WAYS-1 loop for i in 0 to NUM_WAYS-1 loop
@ -1244,6 +1240,7 @@ begin
else else
req.op := req_op; req.op := req_op;
req.valid := req_go; req.valid := req_go;
req.mmu_req := r0.mmu_req;
req.dcbz := r0.req.dcbz; req.dcbz := r0.req.dcbz;
req.real_addr := ra; req.real_addr := ra;
req.data := r0.req.data; req.data := r0.req.data;
@ -1318,6 +1315,11 @@ begin
r1.acks_pending <= to_unsigned(1, 3); r1.acks_pending <= to_unsigned(1, 3);
r1.full <= '0'; r1.full <= '0';
r1.slow_valid <= '1'; r1.slow_valid <= '1';
if req.mmu_req = '0' then
r1.ls_valid <= '1';
else
r1.mmu_done <= '1';
end if;
if req.op = OP_STORE_HIT then if req.op = OP_STORE_HIT then
r1.write_bram <= '1'; r1.write_bram <= '1';
end if; end if;
@ -1380,6 +1382,11 @@ begin
r1.store_row = get_row(r1.req.real_addr) then r1.store_row = get_row(r1.req.real_addr) then
r1.full <= '0'; r1.full <= '0';
r1.slow_valid <= '1'; r1.slow_valid <= '1';
if r1.mmu_req = '0' then
r1.ls_valid <= '1';
else
r1.mmu_done <= '1';
end if;
r1.forward_sel <= (others => '1'); r1.forward_sel <= (others => '1');
r1.use_forward1 <= '1'; r1.use_forward1 <= '1';
end if; end if;
@ -1421,6 +1428,8 @@ begin
end if; end if;
r1.full <= '0'; r1.full <= '0';
r1.slow_valid <= '1'; r1.slow_valid <= '1';
-- Store requests never come from the MMU
r1.ls_valid <= '1';
acks := acks + 1; acks := acks + 1;
else else
r1.wb.stb <= '0'; r1.wb.stb <= '0';
@ -1450,6 +1459,11 @@ begin
r1.state <= IDLE; r1.state <= IDLE;
r1.full <= '0'; r1.full <= '0';
r1.slow_valid <= '1'; r1.slow_valid <= '1';
if r1.mmu_req = '0' then
r1.ls_valid <= '1';
else
r1.mmu_done <= '1';
end if;
r1.forward_sel <= (others => '1'); r1.forward_sel <= (others => '1');
r1.use_forward1 <= '1'; r1.use_forward1 <= '1';
r1.wb.cyc <= '0'; r1.wb.cyc <= '0';

Loading…
Cancel
Save