Allow integer instructions and load/store instructions to execute together

Execute1 and loadstore1 now send each other stall signals that
indicate that a valid instruction in stage 2 can't complete in this
cycle, and hence any valid instruction in stage 1 in the other unit
can't move to stage 2.  With this in place, an ALU instruction can
move into stage 1 while a LSU instruction is in stage 2.

Since the FPU doesn't yet have a way to stall completion, we can't yet
start FPU instructions while any LSU or ALU instruction is in
progress.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent 4b6148ada6
commit e030a500e8

@ -461,6 +461,7 @@ package common is
is_32bit : std_ulogic; is_32bit : std_ulogic;
repeat : std_ulogic; repeat : std_ulogic;
second : std_ulogic; second : std_ulogic;
e2stall : std_ulogic;
msr : std_ulogic_vector(63 downto 0); msr : std_ulogic_vector(63 downto 0);
end record; end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type :=
@ -473,13 +474,13 @@ package common is
write_reg => (others => '0'), write_reg => (others => '0'),
length => (others => '0'), length => (others => '0'),
mode_32bit => '0', is_32bit => '0', mode_32bit => '0', is_32bit => '0',
repeat => '0', second => '0', repeat => '0', second => '0', e2stall => '0',
msr => (others => '0')); msr => (others => '0'));


type Loadstore1ToExecute1Type is record type Loadstore1ToExecute1Type is record
busy : std_ulogic; busy : std_ulogic;
l2stall : std_ulogic;
in_progress : std_ulogic; in_progress : std_ulogic;
interrupt : std_ulogic;
end record; end record;


type Loadstore1ToDcacheType is record type Loadstore1ToDcacheType is record

@ -9,6 +9,7 @@ entity bit_counter is
port ( port (
clk : in std_logic; clk : in std_logic;
rs : in std_ulogic_vector(63 downto 0); rs : in std_ulogic_vector(63 downto 0);
stall : in std_ulogic;
count_right : in std_ulogic; count_right : in std_ulogic;
do_popcnt : in std_ulogic; do_popcnt : in std_ulogic;
is_32bit : in std_ulogic; is_32bit : in std_ulogic;
@ -49,7 +50,7 @@ architecture behaviour of bit_counter is
begin begin
countzero_r: process(clk) countzero_r: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) and stall = '0' then
inp_r <= inp; inp_r <= inp;
sum_r <= sum; sum_r <= sum;
end if; end if;
@ -88,7 +89,7 @@ begin


popcnt_r: process(clk) popcnt_r: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) and stall = '0' then
for i in 0 to 7 loop for i in 0 to 7 loop
pc8_r(i) <= pc8(i); pc8_r(i) <= pc8(i);
end loop; end loop;

@ -204,6 +204,8 @@ architecture behaviour of execute1 is
signal exception_log : std_ulogic; signal exception_log : std_ulogic;
signal irq_valid_log : std_ulogic; signal irq_valid_log : std_ulogic;


signal stage2_stall : std_ulogic;

type privilege_level is (USER, SUPER); type privilege_level is (USER, SUPER);
type op_privilege_array is array(insn_type_t) of privilege_level; type op_privilege_array is array(insn_type_t) of privilege_level;
constant op_privilege: op_privilege_array := ( constant op_privilege: op_privilege_array := (
@ -351,6 +353,7 @@ begin
port map ( port map (
clk => clk, clk => clk,
rs => c_in, rs => c_in,
stall => stage2_stall,
count_right => e_in.insn(10), count_right => e_in.insn(10),
is_32bit => e_in.is_32bit, is_32bit => e_in.is_32bit,
do_popcnt => do_popcnt, do_popcnt => do_popcnt,
@ -436,14 +439,13 @@ begin
-- XER forwarding. To avoid having to track XER hazards, we use -- XER forwarding. To avoid having to track XER hazards, we use
-- the previously latched value. Since the XER common bits -- the previously latched value. Since the XER common bits
-- (SO, OV[32] and CA[32]) are only modified by instructions that are -- (SO, OV[32] and CA[32]) are only modified by instructions that are
-- handled here, we can just forward the result being sent to -- handled here, we can just use the result most recently sent to
-- writeback. -- writeback, unless a pipeline flush has happened in the meantime.
xerc_in <= ex1.xerc when ex1.xerc_valid = '1' else e_in.xerc; xerc_in <= ex1.xerc when ex1.xerc_valid = '1' else e_in.xerc;


with e_in.unit select busy_out <= with e_in.unit select busy_out <=
l_in.busy or ex1.e.valid or ex1.busy or fp_in.busy when LDST,
l_in.busy or l_in.in_progress or ex1.e.valid or ex1.busy or fp_in.busy when FPU, l_in.busy or l_in.in_progress or ex1.e.valid or ex1.busy or fp_in.busy when FPU,
l_in.busy or l_in.in_progress or ex1.busy or fp_in.busy when others; l_in.busy or ex1.busy or fp_in.busy when others;


valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt); valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt);


@ -479,8 +481,7 @@ begin
-- We mustn't get stalled on a cycle where execute2 is -- We mustn't get stalled on a cycle where execute2 is
-- completing an instruction or generating an interrupt -- completing an instruction or generating an interrupt
if ex2.e.valid = '1' or ex2.e.interrupt = '1' then if ex2.e.valid = '1' or ex2.e.interrupt = '1' then
assert (l_in.busy or fp_in.busy) = '0' assert stage2_stall = '0' severity failure;
severity failure;
end if; end if;
end if; end if;
end if; end if;
@ -1434,6 +1435,7 @@ begin
lv.is_32bit := e_in.is_32bit; lv.is_32bit := e_in.is_32bit;
lv.repeat := e_in.repeat; lv.repeat := e_in.repeat;
lv.second := e_in.second; lv.second := e_in.second;
lv.e2stall := '0';


-- Outputs to FPU -- Outputs to FPU
fv.op := e_in.insn_type; fv.op := e_in.insn_type;
@ -1476,6 +1478,8 @@ begin
pmu_to_x.spr_val when "11", pmu_to_x.spr_val when "11",
ex1.e.write_data when others; ex1.e.write_data when others;


stage2_stall <= l_in.l2stall or fp_in.busy;

-- Second execute stage control -- Second execute stage control
execute2_1: process(all) execute2_1: process(all)
variable v : reg_stage2_type; variable v : reg_stage2_type;
@ -1487,7 +1491,7 @@ begin
variable bypass_valid : std_ulogic; variable bypass_valid : std_ulogic;
begin begin
v := ex2; v := ex2;
if (l_in.busy or fp_in.busy) = '0' then if stage2_stall = '0' then
v.e := ex1.e; v.e := ex1.e;
v.se := ex1.se; v.se := ex1.se;
v.e.write_data := ex_result; v.e.write_data := ex_result;
@ -1526,7 +1530,7 @@ begin
v.ext_interrupt := '0'; v.ext_interrupt := '0';
end if; end if;


if (l_in.busy or fp_in.busy) = '0' then if stage2_stall = '0' then
if ex1.se.write_msr = '1' then if ex1.se.write_msr = '1' then
ctrl_tmp.msr <= ex1.msr; ctrl_tmp.msr <= ex1.msr;
end if; end if;
@ -1563,7 +1567,7 @@ begin
end if; end if;


bypass_valid := ex1.e.valid; bypass_valid := ex1.e.valid;
if (ex2.busy or l_in.busy or fp_in.busy) = '1' and ex1.res2_sel(1) = '1' then if stage2_stall = '1' and ex1.res2_sel(1) = '1' then
bypass_valid := '0'; bypass_valid := '0';
end if; end if;



@ -624,7 +624,7 @@ begin
store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j); store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j);
end loop; end loop;


if (dc_stall or d_in.error or r2.busy) = '0' then if (dc_stall or d_in.error or r2.busy or l_in.e2stall) = '0' then
if r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0' then if r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0' then
v.req := r1.req; v.req := r1.req;
v.addr0 := r1.addr0; v.addr0 := r1.addr0;
@ -950,7 +950,7 @@ begin
else else
d_out.data <= r2.req.store_data; d_out.data <= r2.req.store_data;
end if; end if;
d_out.hold <= '0'; d_out.hold <= l_in.e2stall;


-- Update outputs to MMU -- Update outputs to MMU
m_out.valid <= mmureq; m_out.valid <= mmureq;
@ -980,8 +980,8 @@ begin


-- update busy signal back to execute1 -- update busy signal back to execute1
e_out.busy <= busy; e_out.busy <= busy;
e_out.l2stall <= dc_stall or d_in.error or r2.busy;
e_out.in_progress <= in_progress; e_out.in_progress <= in_progress;
e_out.interrupt <= r3.interrupt;


events <= r3.events; events <= r3.events;



Loading…
Cancel
Save