From e030a500e85ad0e22e47dfb7af087e7fef9df20d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 27 Jun 2022 18:53:04 +1000 Subject: [PATCH] Allow integer instructions and load/store instructions to execute together Execute1 and loadstore1 now send each other stall signals that indicate that a valid instruction in stage 2 can't complete in this cycle, and hence any valid instruction in stage 1 in the other unit can't move to stage 2. With this in place, an ALU instruction can move into stage 1 while a LSU instruction is in stage 2. Since the FPU doesn't yet have a way to stall completion, we can't yet start FPU instructions while any LSU or ALU instruction is in progress. Signed-off-by: Paul Mackerras --- common.vhdl | 5 +++-- countbits.vhdl | 5 +++-- execute1.vhdl | 22 +++++++++++++--------- loadstore1.vhdl | 6 +++--- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/common.vhdl b/common.vhdl index 6cbf181..ac733db 100644 --- a/common.vhdl +++ b/common.vhdl @@ -461,6 +461,7 @@ package common is is_32bit : std_ulogic; repeat : std_ulogic; second : std_ulogic; + e2stall : std_ulogic; msr : std_ulogic_vector(63 downto 0); end record; constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := @@ -473,13 +474,13 @@ package common is write_reg => (others => '0'), length => (others => '0'), mode_32bit => '0', is_32bit => '0', - repeat => '0', second => '0', + repeat => '0', second => '0', e2stall => '0', msr => (others => '0')); type Loadstore1ToExecute1Type is record busy : std_ulogic; + l2stall : std_ulogic; in_progress : std_ulogic; - interrupt : std_ulogic; end record; type Loadstore1ToDcacheType is record diff --git a/countbits.vhdl b/countbits.vhdl index b16baa0..87417a9 100644 --- a/countbits.vhdl +++ b/countbits.vhdl @@ -9,6 +9,7 @@ entity bit_counter is port ( clk : in std_logic; rs : in std_ulogic_vector(63 downto 0); + stall : in std_ulogic; count_right : in std_ulogic; do_popcnt : in std_ulogic; is_32bit : in std_ulogic; @@ -49,7 +50,7 @@ architecture behaviour of bit_counter is begin countzero_r: process(clk) begin - if rising_edge(clk) then + if rising_edge(clk) and stall = '0' then inp_r <= inp; sum_r <= sum; end if; @@ -88,7 +89,7 @@ begin popcnt_r: process(clk) begin - if rising_edge(clk) then + if rising_edge(clk) and stall = '0' then for i in 0 to 7 loop pc8_r(i) <= pc8(i); end loop; diff --git a/execute1.vhdl b/execute1.vhdl index ebc24c5..e4db56f 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -204,6 +204,8 @@ architecture behaviour of execute1 is signal exception_log : std_ulogic; signal irq_valid_log : std_ulogic; + signal stage2_stall : std_ulogic; + type privilege_level is (USER, SUPER); type op_privilege_array is array(insn_type_t) of privilege_level; constant op_privilege: op_privilege_array := ( @@ -351,6 +353,7 @@ begin port map ( clk => clk, rs => c_in, + stall => stage2_stall, count_right => e_in.insn(10), is_32bit => e_in.is_32bit, do_popcnt => do_popcnt, @@ -436,14 +439,13 @@ begin -- XER forwarding. To avoid having to track XER hazards, we use -- the previously latched value. Since the XER common bits -- (SO, OV[32] and CA[32]) are only modified by instructions that are - -- handled here, we can just forward the result being sent to - -- writeback. + -- handled here, we can just use the result most recently sent to + -- writeback, unless a pipeline flush has happened in the meantime. xerc_in <= ex1.xerc when ex1.xerc_valid = '1' else e_in.xerc; with e_in.unit select busy_out <= - l_in.busy or ex1.e.valid or ex1.busy or fp_in.busy when LDST, l_in.busy or l_in.in_progress or ex1.e.valid or ex1.busy or fp_in.busy when FPU, - l_in.busy or l_in.in_progress or ex1.busy or fp_in.busy when others; + l_in.busy or ex1.busy or fp_in.busy when others; valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt); @@ -479,8 +481,7 @@ begin -- We mustn't get stalled on a cycle where execute2 is -- completing an instruction or generating an interrupt if ex2.e.valid = '1' or ex2.e.interrupt = '1' then - assert (l_in.busy or fp_in.busy) = '0' - severity failure; + assert stage2_stall = '0' severity failure; end if; end if; end if; @@ -1434,6 +1435,7 @@ begin lv.is_32bit := e_in.is_32bit; lv.repeat := e_in.repeat; lv.second := e_in.second; + lv.e2stall := '0'; -- Outputs to FPU fv.op := e_in.insn_type; @@ -1476,6 +1478,8 @@ begin pmu_to_x.spr_val when "11", ex1.e.write_data when others; + stage2_stall <= l_in.l2stall or fp_in.busy; + -- Second execute stage control execute2_1: process(all) variable v : reg_stage2_type; @@ -1487,7 +1491,7 @@ begin variable bypass_valid : std_ulogic; begin v := ex2; - if (l_in.busy or fp_in.busy) = '0' then + if stage2_stall = '0' then v.e := ex1.e; v.se := ex1.se; v.e.write_data := ex_result; @@ -1526,7 +1530,7 @@ begin v.ext_interrupt := '0'; end if; - if (l_in.busy or fp_in.busy) = '0' then + if stage2_stall = '0' then if ex1.se.write_msr = '1' then ctrl_tmp.msr <= ex1.msr; end if; @@ -1563,7 +1567,7 @@ begin end if; bypass_valid := ex1.e.valid; - if (ex2.busy or l_in.busy or fp_in.busy) = '1' and ex1.res2_sel(1) = '1' then + if stage2_stall = '1' and ex1.res2_sel(1) = '1' then bypass_valid := '0'; end if; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index ea7baec..bd62f0b 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -624,7 +624,7 @@ begin store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j); end loop; - if (dc_stall or d_in.error or r2.busy) = '0' then + if (dc_stall or d_in.error or r2.busy or l_in.e2stall) = '0' then if r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0' then v.req := r1.req; v.addr0 := r1.addr0; @@ -950,7 +950,7 @@ begin else d_out.data <= r2.req.store_data; end if; - d_out.hold <= '0'; + d_out.hold <= l_in.e2stall; -- Update outputs to MMU m_out.valid <= mmureq; @@ -980,8 +980,8 @@ begin -- update busy signal back to execute1 e_out.busy <= busy; + e_out.l2stall <= dc_stall or d_in.error or r2.busy; e_out.in_progress <= in_progress; - e_out.interrupt <= r3.interrupt; events <= r3.events;