Allow integer instructions and load/store instructions to execute together

Execute1 and loadstore1 now send each other stall signals that
indicate that a valid instruction in stage 2 can't complete in this
cycle, and hence any valid instruction in stage 1 in the other unit
can't move to stage 2.  With this in place, an ALU instruction can
move into stage 1 while a LSU instruction is in stage 2.

Since the FPU doesn't yet have a way to stall completion, we can't yet
start FPU instructions while any LSU or ALU instruction is in
progress.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/379/head
Paul Mackerras 2 years ago
parent 4b6148ada6
commit e030a500e8

@ -461,6 +461,7 @@ package common is
is_32bit : std_ulogic;
repeat : std_ulogic;
second : std_ulogic;
e2stall : std_ulogic;
msr : std_ulogic_vector(63 downto 0);
end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type :=
@ -473,13 +474,13 @@ package common is
write_reg => (others => '0'),
length => (others => '0'),
mode_32bit => '0', is_32bit => '0',
repeat => '0', second => '0',
repeat => '0', second => '0', e2stall => '0',
msr => (others => '0'));

type Loadstore1ToExecute1Type is record
busy : std_ulogic;
l2stall : std_ulogic;
in_progress : std_ulogic;
interrupt : std_ulogic;
end record;

type Loadstore1ToDcacheType is record

@ -9,6 +9,7 @@ entity bit_counter is
port (
clk : in std_logic;
rs : in std_ulogic_vector(63 downto 0);
stall : in std_ulogic;
count_right : in std_ulogic;
do_popcnt : in std_ulogic;
is_32bit : in std_ulogic;
@ -49,7 +50,7 @@ architecture behaviour of bit_counter is
begin
countzero_r: process(clk)
begin
if rising_edge(clk) then
if rising_edge(clk) and stall = '0' then
inp_r <= inp;
sum_r <= sum;
end if;
@ -88,7 +89,7 @@ begin

popcnt_r: process(clk)
begin
if rising_edge(clk) then
if rising_edge(clk) and stall = '0' then
for i in 0 to 7 loop
pc8_r(i) <= pc8(i);
end loop;

@ -204,6 +204,8 @@ architecture behaviour of execute1 is
signal exception_log : std_ulogic;
signal irq_valid_log : std_ulogic;

signal stage2_stall : std_ulogic;

type privilege_level is (USER, SUPER);
type op_privilege_array is array(insn_type_t) of privilege_level;
constant op_privilege: op_privilege_array := (
@ -351,6 +353,7 @@ begin
port map (
clk => clk,
rs => c_in,
stall => stage2_stall,
count_right => e_in.insn(10),
is_32bit => e_in.is_32bit,
do_popcnt => do_popcnt,
@ -436,14 +439,13 @@ begin
-- XER forwarding. To avoid having to track XER hazards, we use
-- the previously latched value. Since the XER common bits
-- (SO, OV[32] and CA[32]) are only modified by instructions that are
-- handled here, we can just forward the result being sent to
-- writeback.
-- handled here, we can just use the result most recently sent to
-- writeback, unless a pipeline flush has happened in the meantime.
xerc_in <= ex1.xerc when ex1.xerc_valid = '1' else e_in.xerc;

with e_in.unit select busy_out <=
l_in.busy or ex1.e.valid or ex1.busy or fp_in.busy when LDST,
l_in.busy or l_in.in_progress or ex1.e.valid or ex1.busy or fp_in.busy when FPU,
l_in.busy or l_in.in_progress or ex1.busy or fp_in.busy when others;
l_in.busy or ex1.busy or fp_in.busy when others;

valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt);

@ -479,8 +481,7 @@ begin
-- We mustn't get stalled on a cycle where execute2 is
-- completing an instruction or generating an interrupt
if ex2.e.valid = '1' or ex2.e.interrupt = '1' then
assert (l_in.busy or fp_in.busy) = '0'
severity failure;
assert stage2_stall = '0' severity failure;
end if;
end if;
end if;
@ -1434,6 +1435,7 @@ begin
lv.is_32bit := e_in.is_32bit;
lv.repeat := e_in.repeat;
lv.second := e_in.second;
lv.e2stall := '0';

-- Outputs to FPU
fv.op := e_in.insn_type;
@ -1476,6 +1478,8 @@ begin
pmu_to_x.spr_val when "11",
ex1.e.write_data when others;

stage2_stall <= l_in.l2stall or fp_in.busy;

-- Second execute stage control
execute2_1: process(all)
variable v : reg_stage2_type;
@ -1487,7 +1491,7 @@ begin
variable bypass_valid : std_ulogic;
begin
v := ex2;
if (l_in.busy or fp_in.busy) = '0' then
if stage2_stall = '0' then
v.e := ex1.e;
v.se := ex1.se;
v.e.write_data := ex_result;
@ -1526,7 +1530,7 @@ begin
v.ext_interrupt := '0';
end if;

if (l_in.busy or fp_in.busy) = '0' then
if stage2_stall = '0' then
if ex1.se.write_msr = '1' then
ctrl_tmp.msr <= ex1.msr;
end if;
@ -1563,7 +1567,7 @@ begin
end if;

bypass_valid := ex1.e.valid;
if (ex2.busy or l_in.busy or fp_in.busy) = '1' and ex1.res2_sel(1) = '1' then
if stage2_stall = '1' and ex1.res2_sel(1) = '1' then
bypass_valid := '0';
end if;


@ -624,7 +624,7 @@ begin
store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j);
end loop;

if (dc_stall or d_in.error or r2.busy) = '0' then
if (dc_stall or d_in.error or r2.busy or l_in.e2stall) = '0' then
if r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0' then
v.req := r1.req;
v.addr0 := r1.addr0;
@ -950,7 +950,7 @@ begin
else
d_out.data <= r2.req.store_data;
end if;
d_out.hold <= '0';
d_out.hold <= l_in.e2stall;

-- Update outputs to MMU
m_out.valid <= mmureq;
@ -980,8 +980,8 @@ begin

-- update busy signal back to execute1
e_out.busy <= busy;
e_out.l2stall <= dc_stall or d_in.error or r2.busy;
e_out.in_progress <= in_progress;
e_out.interrupt <= r3.interrupt;

events <= r3.events;


Loading…
Cancel
Save