execute: Implement bypass from output of execute1 to input

This enables back-to-back execution of integer instructions where
the first instruction writes a GPR and the second reads the same
GPR.  This is done with a set of multiplexers at the start of
execute1 which enable any of the three input operands to be taken
from the output of execute1 (i.e. r.e.write_data) rather than the
input from decode2 (i.e. e_in.read_data[123]).

This also requires changes to the hazard detection and handling.
Decode2 generates a signal indicating that the GPR being written
is available for bypass, which is true for instructions that are
executed in execute1 (rather than loadstore1/dcache).  The
gpr_hazard module stores this "bypassable" bit, and if the same
GPR needs to be read by a subsequent instruction, it outputs a
"use_bypass" signal rather than generating a stall.  The
use_bypass signal is then latched at the output of decode2 and
passed down to execute1 to control the input multiplexer.

At the moment there is no bypass on the inputs to loadstore1, but that
is OK because all load and store instructions are marked as
single-issue.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/134/head
Paul Mackerras 5 years ago
parent 0c714f1be6
commit b14d982011

@ -109,6 +109,9 @@ package common is
read_data1: std_ulogic_vector(63 downto 0); read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0); read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0); read_data3: std_ulogic_vector(63 downto 0);
bypass_data1: std_ulogic;
bypass_data2: std_ulogic;
bypass_data3: std_ulogic;
cr: std_ulogic_vector(31 downto 0); cr: std_ulogic_vector(31 downto 0);
xerc: xer_common_t; xerc: xer_common_t;
lr: std_ulogic; lr: std_ulogic;
@ -126,7 +129,8 @@ package common is
data_len: std_ulogic_vector(3 downto 0); data_len: std_ulogic_vector(3 downto 0);
end record; end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type := constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', oe => '0', invert_a => '0', (valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
lr => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0')); is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0'));



@ -21,6 +21,7 @@ entity control is


gpr_write_valid_in : in std_ulogic; gpr_write_valid_in : in std_ulogic;
gpr_write_in : in gspr_index_t; gpr_write_in : in gspr_index_t;
gpr_bypassable : in std_ulogic;


gpr_a_read_valid_in : in std_ulogic; gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t; gpr_a_read_in : in gspr_index_t;
@ -36,7 +37,11 @@ entity control is


valid_out : out std_ulogic; valid_out : out std_ulogic;
stall_out : out std_ulogic; stall_out : out std_ulogic;
stopped_out : out std_ulogic stopped_out : out std_ulogic;

gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic
); );
end entity control; end entity control;


@ -71,10 +76,12 @@ begin


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_a_read_valid_in, gpr_read_valid_in => gpr_a_read_valid_in,
gpr_read_in => gpr_a_read_in, gpr_read_in => gpr_a_read_in,


stall_out => stall_a_out stall_out => stall_a_out,
use_bypass => gpr_bypass_a
); );


gpr_hazard1: entity work.gpr_hazard gpr_hazard1: entity work.gpr_hazard
@ -87,10 +94,12 @@ begin


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_b_read_valid_in, gpr_read_valid_in => gpr_b_read_valid_in,
gpr_read_in => gpr_b_read_in, gpr_read_in => gpr_b_read_in,


stall_out => stall_b_out stall_out => stall_b_out,
use_bypass => gpr_bypass_b
); );


gpr_c_read_in_fmt <= "0" & gpr_c_read_in; gpr_c_read_in_fmt <= "0" & gpr_c_read_in;
@ -105,10 +114,12 @@ begin


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
bypass_avail => gpr_bypassable,
gpr_read_valid_in => gpr_c_read_valid_in, gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in_fmt, gpr_read_in => gpr_c_read_in_fmt,


stall_out => stall_c_out stall_out => stall_c_out,
use_bypass => gpr_bypass_c
); );


cr_hazard0: entity work.cr_hazard cr_hazard0: entity work.cr_hazard

@ -9,7 +9,8 @@ use work.wishbone_types.all;
entity core is entity core is
generic ( generic (
SIM : boolean := false; SIM : boolean := false;
DISABLE_FLATTEN : boolean := false DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true
); );
port ( port (
clk : in std_logic; clk : in std_logic;
@ -176,6 +177,9 @@ begin
decode1_stall_in <= decode2_stall_out; decode1_stall_in <= decode2_stall_out;


decode2_0: entity work.decode2 decode2_0: entity work.decode2
generic map (
EX1_BYPASS => EX1_BYPASS
)
port map ( port map (
clk => clk, clk => clk,
rst => core_rst, rst => core_rst,
@ -220,6 +224,9 @@ begin
); );


execute1_0: entity work.execute1 execute1_0: entity work.execute1
generic map (
EX1_BYPASS => EX1_BYPASS
)
port map ( port map (
clk => clk, clk => clk,
rst => core_rst, rst => core_rst,

@ -9,6 +9,9 @@ use work.helpers.all;
use work.insn_helpers.all; use work.insn_helpers.all;


entity decode2 is entity decode2 is
generic (
EX1_BYPASS : boolean := true
);
port ( port (
clk : in std_ulogic; clk : in std_ulogic;
rst : in std_ulogic; rst : in std_ulogic;
@ -184,15 +187,19 @@ architecture behaviour of decode2 is


signal gpr_write_valid : std_ulogic; signal gpr_write_valid : std_ulogic;
signal gpr_write : gspr_index_t; signal gpr_write : gspr_index_t;
signal gpr_bypassable : std_ulogic;


signal gpr_a_read_valid : std_ulogic; signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read :gspr_index_t; signal gpr_a_read :gspr_index_t;
signal gpr_a_bypass : std_ulogic;


signal gpr_b_read_valid : std_ulogic; signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : gspr_index_t; signal gpr_b_read : gspr_index_t;
signal gpr_b_bypass : std_ulogic;


signal gpr_c_read_valid : std_ulogic; signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : gpr_index_t; signal gpr_c_read : gpr_index_t;
signal gpr_c_bypass : std_ulogic;


signal cr_write_valid : std_ulogic; signal cr_write_valid : std_ulogic;
begin begin
@ -213,6 +220,7 @@ begin


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write, gpr_write_in => gpr_write,
gpr_bypassable => gpr_bypassable,


gpr_a_read_valid_in => gpr_a_read_valid, gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read, gpr_a_read_in => gpr_a_read,
@ -228,7 +236,11 @@ begin


valid_out => control_valid_out, valid_out => control_valid_out,
stall_out => stall_out, stall_out => stall_out,
stopped_out => stopped_out stopped_out => stopped_out,

gpr_bypass_a => gpr_a_bypass,
gpr_bypass_b => gpr_b_bypass,
gpr_bypass_c => gpr_c_bypass
); );


decode2_0: process(clk) decode2_0: process(clk)
@ -295,9 +307,12 @@ begin
v.e.insn_type := d_in.decode.insn_type; v.e.insn_type := d_in.decode.insn_type;
v.e.read_reg1 := decoded_reg_a.reg; v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_data1 := decoded_reg_a.data; v.e.read_data1 := decoded_reg_a.data;
v.e.bypass_data1 := gpr_a_bypass;
v.e.read_reg2 := decoded_reg_b.reg; v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_data2 := decoded_reg_b.data; v.e.read_data2 := decoded_reg_b.data;
v.e.bypass_data2 := gpr_b_bypass;
v.e.read_data3 := decoded_reg_c.data; v.e.read_data3 := decoded_reg_c.data;
v.e.bypass_data3 := gpr_c_bypass;
v.e.write_reg := decoded_reg_o.reg; v.e.write_reg := decoded_reg_o.reg;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
@ -342,6 +357,10 @@ begin


gpr_write_valid <= decoded_reg_o.reg_valid; gpr_write_valid <= decoded_reg_o.reg_valid;
gpr_write <= decoded_reg_o.reg; gpr_write <= decoded_reg_o.reg;
gpr_bypassable <= '0';
if EX1_BYPASS and d_in.decode.unit = ALU then
gpr_bypassable <= '1';
end if;


gpr_a_read_valid <= decoded_reg_a.reg_valid; gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg; gpr_a_read <= decoded_reg_a.reg;

@ -11,6 +11,9 @@ use work.insn_helpers.all;
use work.ppc_fx_insns.all; use work.ppc_fx_insns.all;


entity execute1 is entity execute1 is
generic (
EX1_BYPASS : boolean := true
);
port ( port (
clk : in std_ulogic; clk : in std_ulogic;
rst : in std_ulogic; rst : in std_ulogic;
@ -46,6 +49,8 @@ architecture behaviour of execute1 is


signal r, rin : reg_type; signal r, rin : reg_type;


signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);

signal ctrl: ctrl_t := (others => (others => '0')); signal ctrl: ctrl_t := (others => (others => '0'));
signal ctrl_tmp: ctrl_t := (others => (others => '0')); signal ctrl_tmp: ctrl_t := (others => (others => '0'));


@ -109,9 +114,9 @@ begin


rotator_0: entity work.rotator rotator_0: entity work.rotator
port map ( port map (
rs => e_in.read_data3, rs => c_in,
ra => e_in.read_data1, ra => a_in,
shift => e_in.read_data2(6 downto 0), shift => b_in(6 downto 0),
insn => e_in.insn, insn => e_in.insn,
is_32bit => e_in.is_32bit, is_32bit => e_in.is_32bit,
right_shift => right_shift, right_shift => right_shift,
@ -124,8 +129,8 @@ begin


logical_0: entity work.logical logical_0: entity work.logical
port map ( port map (
rs => e_in.read_data3, rs => c_in,
rb => e_in.read_data2, rb => b_in,
op => e_in.insn_type, op => e_in.insn_type,
invert_in => e_in.invert_a, invert_in => e_in.invert_a,
invert_out => e_in.invert_out, invert_out => e_in.invert_out,
@ -137,7 +142,7 @@ begin


countzero_0: entity work.zero_counter countzero_0: entity work.zero_counter
port map ( port map (
rs => e_in.read_data3, rs => c_in,
count_right => e_in.insn(10), count_right => e_in.insn(10),
is_32bit => e_in.is_32bit, is_32bit => e_in.is_32bit,
result => countzero_result result => countzero_result
@ -158,6 +163,10 @@ begin
d_out => divider_to_x d_out => divider_to_x
); );


a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;

execute1_0: process(clk) execute1_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
@ -256,21 +265,21 @@ begin


if e_in.is_32bit = '1' then if e_in.is_32bit = '1' then
if e_in.is_signed = '1' then if e_in.is_signed = '1' then
x_to_multiply.data1 <= (others => e_in.read_data1(31)); x_to_multiply.data1 <= (others => a_in(31));
x_to_multiply.data1(31 downto 0) <= e_in.read_data1(31 downto 0); x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
x_to_multiply.data2 <= (others => e_in.read_data2(31)); x_to_multiply.data2 <= (others => b_in(31));
x_to_multiply.data2(31 downto 0) <= e_in.read_data2(31 downto 0); x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
else else
x_to_multiply.data1 <= '0' & x"00000000" & e_in.read_data1(31 downto 0); x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
x_to_multiply.data2 <= '0' & x"00000000" & e_in.read_data2(31 downto 0); x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
end if; end if;
else else
if e_in.is_signed = '1' then if e_in.is_signed = '1' then
x_to_multiply.data1 <= e_in.read_data1(63) & e_in.read_data1; x_to_multiply.data1 <= a_in(63) & a_in;
x_to_multiply.data2 <= e_in.read_data2(63) & e_in.read_data2; x_to_multiply.data2 <= b_in(63) & b_in;
else else
x_to_multiply.data1 <= '0' & e_in.read_data1; x_to_multiply.data1 <= '0' & a_in;
x_to_multiply.data2 <= '0' & e_in.read_data2; x_to_multiply.data2 <= '0' & b_in;
end if; end if;
end if; end if;


@ -279,23 +288,23 @@ begin
sign2 := '0'; sign2 := '0';
if e_in.is_signed = '1' then if e_in.is_signed = '1' then
if e_in.is_32bit = '1' then if e_in.is_32bit = '1' then
sign1 := e_in.read_data1(31); sign1 := a_in(31);
sign2 := e_in.read_data2(31); sign2 := b_in(31);
else else
sign1 := e_in.read_data1(63); sign1 := a_in(63);
sign2 := e_in.read_data2(63); sign2 := b_in(63);
end if; end if;
end if; end if;
-- take absolute values -- take absolute values
if sign1 = '0' then if sign1 = '0' then
abs1 := signed(e_in.read_data1); abs1 := signed(a_in);
else else
abs1 := - signed(e_in.read_data1); abs1 := - signed(a_in);
end if; end if;
if sign2 = '0' then if sign2 = '0' then
abs2 := signed(e_in.read_data2); abs2 := signed(b_in);
else else
abs2 := - signed(e_in.read_data2); abs2 := - signed(b_in);
end if; end if;


x_to_divider <= Execute1ToDividerInit; x_to_divider <= Execute1ToDividerInit;
@ -358,14 +367,14 @@ begin
-- Do nothing -- Do nothing
when OP_ADD | OP_CMP => when OP_ADD | OP_CMP =>
if e_in.invert_a = '0' then if e_in.invert_a = '0' then
a_inv := e_in.read_data1; a_inv := a_in;
else else
a_inv := not e_in.read_data1; a_inv := not a_in;
end if; end if;
result_with_carry := ppc_adde(a_inv, e_in.read_data2, result_with_carry := ppc_adde(a_inv, b_in,
decode_input_carry(e_in.input_carry, v.e.xerc)); decode_input_carry(e_in.input_carry, v.e.xerc));
result := result_with_carry(63 downto 0); result := result_with_carry(63 downto 0);
carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32); carry_32 := result(32) xor a_inv(32) xor b_in(32);
carry_64 := result_with_carry(64); carry_64 := result_with_carry(64);
if e_in.insn_type = OP_ADD then if e_in.insn_type = OP_ADD then
if e_in.output_carry = '1' then if e_in.output_carry = '1' then
@ -373,8 +382,8 @@ begin
end if; end if;
if e_in.oe = '1' then if e_in.oe = '1' then
set_ov(v.e, set_ov(v.e,
calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)), calc_ov(a_inv(63), b_in(63), carry_64, result_with_carry(63)),
calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31))); calc_ov(a_inv(31), b_in(31), carry_32, result_with_carry(31)));
end if; end if;
result_en := '1'; result_en := '1';
else else
@ -385,20 +394,20 @@ begin
v.e.write_cr_enable := '1'; v.e.write_cr_enable := '1';
crnum := to_integer(unsigned(bf)); crnum := to_integer(unsigned(bf));
v.e.write_cr_mask := num_to_fxm(crnum); v.e.write_cr_mask := num_to_fxm(crnum);
zerolo := not (or (e_in.read_data1(31 downto 0) xor e_in.read_data2(31 downto 0))); zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0)));
zerohi := not (or (e_in.read_data1(63 downto 32) xor e_in.read_data2(63 downto 32))); zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
if zerolo = '1' and (l = '0' or zerohi = '1') then if zerolo = '1' and (l = '0' or zerohi = '1') then
-- values are equal -- values are equal
newcrf := "001" & v.e.xerc.so; newcrf := "001" & v.e.xerc.so;
else else
if l = '1' then if l = '1' then
-- 64-bit comparison -- 64-bit comparison
msb_a := e_in.read_data1(63); msb_a := a_in(63);
msb_b := e_in.read_data2(63); msb_b := b_in(63);
else else
-- 32-bit comparison -- 32-bit comparison
msb_a := e_in.read_data1(31); msb_a := a_in(31);
msb_b := e_in.read_data2(31); msb_b := b_in(31);
end if; end if;
if msb_a /= msb_b then if msb_a /= msb_b then
-- Subtraction might overflow, but -- Subtraction might overflow, but
@ -424,25 +433,25 @@ begin
when OP_B => when OP_B =>
f_out.redirect <= '1'; f_out.redirect <= '1';
if (insn_aa(e_in.insn)) then if (insn_aa(e_in.insn)) then
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2)); f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
else else
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2)); f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
end if; end if;
when OP_BC => when OP_BC =>
-- read_data1 is CTR -- read_data1 is CTR
bo := insn_bo(e_in.insn); bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn); bi := insn_bi(e_in.insn);
if bo(4-2) = '0' then if bo(4-2) = '0' then
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1); result := std_ulogic_vector(unsigned(a_in) - 1);
result_en := '1'; result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR); v.e.write_reg := fast_spr_num(SPR_CTR);
end if; end if;
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
f_out.redirect <= '1'; f_out.redirect <= '1';
if (insn_aa(e_in.insn)) then if (insn_aa(e_in.insn)) then
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2)); f_out.redirect_nia <= std_ulogic_vector(signed(b_in));
else else
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2)); f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in));
end if; end if;
end if; end if;
when OP_BCREG => when OP_BCREG =>
@ -451,40 +460,40 @@ begin
bo := insn_bo(e_in.insn); bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn); bi := insn_bi(e_in.insn);
if bo(4-2) = '0' and e_in.insn(10) = '0' then if bo(4-2) = '0' and e_in.insn(10) = '0' then
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1); result := std_ulogic_vector(unsigned(a_in) - 1);
result_en := '1'; result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR); v.e.write_reg := fast_spr_num(SPR_CTR);
end if; end if;
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then
f_out.redirect <= '1'; f_out.redirect <= '1';
f_out.redirect_nia <= e_in.read_data2(63 downto 2) & "00"; f_out.redirect_nia <= b_in(63 downto 2) & "00";
end if; end if;
when OP_CMPB => when OP_CMPB =>
result := ppc_cmpb(e_in.read_data3, e_in.read_data2); result := ppc_cmpb(c_in, b_in);
result_en := '1'; result_en := '1';
when OP_CNTZ => when OP_CNTZ =>
result := countzero_result; result := countzero_result;
result_en := '1'; result_en := '1';
when OP_EXTS => when OP_EXTS =>
-- note data_len is a 1-hot encoding -- note data_len is a 1-hot encoding
negative := (e_in.data_len(0) and e_in.read_data3(7)) or negative := (e_in.data_len(0) and c_in(7)) or
(e_in.data_len(1) and e_in.read_data3(15)) or (e_in.data_len(1) and c_in(15)) or
(e_in.data_len(2) and e_in.read_data3(31)); (e_in.data_len(2) and c_in(31));
result := (others => negative); result := (others => negative);
if e_in.data_len(2) = '1' then if e_in.data_len(2) = '1' then
result(31 downto 16) := e_in.read_data3(31 downto 16); result(31 downto 16) := c_in(31 downto 16);
end if; end if;
if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then if e_in.data_len(2) = '1' or e_in.data_len(1) = '1' then
result(15 downto 8) := e_in.read_data3(15 downto 8); result(15 downto 8) := c_in(15 downto 8);
end if; end if;
result(7 downto 0) := e_in.read_data3(7 downto 0); result(7 downto 0) := c_in(7 downto 0);
result_en := '1'; result_en := '1';
when OP_ISEL => when OP_ISEL =>
crbit := to_integer(unsigned(insn_bc(e_in.insn))); crbit := to_integer(unsigned(insn_bc(e_in.insn)));
if e_in.cr(31-crbit) = '1' then if e_in.cr(31-crbit) = '1' then
result := e_in.read_data1; result := a_in;
else else
result := e_in.read_data2; result := b_in;
end if; end if;
result_en := '1'; result_en := '1';
when OP_MCRF => when OP_MCRF =>
@ -549,7 +558,7 @@ begin
end if; end if;
when OP_MFSPR => when OP_MFSPR =>
if is_fast_spr(e_in.read_reg1) then if is_fast_spr(e_in.read_reg1) then
result := e_in.read_data1; result := a_in;
if decode_spr_num(e_in.insn) = SPR_XER then if decode_spr_num(e_in.insn) = SPR_XER then
-- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer -- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
result(63 downto 32) := (others => '0'); result(63 downto 32) := (others => '0');
@ -596,19 +605,19 @@ begin
crnum := fxm_to_num(insn_fxm(e_in.insn)); crnum := fxm_to_num(insn_fxm(e_in.insn));
v.e.write_cr_mask := num_to_fxm(crnum); v.e.write_cr_mask := num_to_fxm(crnum);
end if; end if;
v.e.write_cr_data := e_in.read_data3(31 downto 0); v.e.write_cr_data := c_in(31 downto 0);
when OP_MTSPR => when OP_MTSPR =>
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
"=" & to_hstring(e_in.read_data3); "=" & to_hstring(c_in);
if is_fast_spr(e_in.write_reg) then if is_fast_spr(e_in.write_reg) then
result := e_in.read_data3; result := c_in;
result_en := '1'; result_en := '1';
if decode_spr_num(e_in.insn) = SPR_XER then if decode_spr_num(e_in.insn) = SPR_XER then
v.e.xerc.so := e_in.read_data3(63-32); v.e.xerc.so := c_in(63-32);
v.e.xerc.ov := e_in.read_data3(63-33); v.e.xerc.ov := c_in(63-33);
v.e.xerc.ca := e_in.read_data3(63-34); v.e.xerc.ca := c_in(63-34);
v.e.xerc.ov32 := e_in.read_data3(63-44); v.e.xerc.ov32 := c_in(63-44);
v.e.xerc.ca32 := e_in.read_data3(63-45); v.e.xerc.ca32 := c_in(63-45);
v.e.write_xerc_enable := '1'; v.e.write_xerc_enable := '1';
end if; end if;
else else

@ -12,18 +12,21 @@ entity gpr_hazard is


gpr_write_valid_in : in std_ulogic; gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(5 downto 0); gpr_write_in : in std_ulogic_vector(5 downto 0);
bypass_avail : in std_ulogic;
gpr_read_valid_in : in std_ulogic; gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(5 downto 0); gpr_read_in : in std_ulogic_vector(5 downto 0);


stall_out : out std_ulogic stall_out : out std_ulogic;
use_bypass : out std_ulogic
); );
end entity gpr_hazard; end entity gpr_hazard;
architecture behaviour of gpr_hazard is architecture behaviour of gpr_hazard is
type pipeline_entry_type is record type pipeline_entry_type is record
valid : std_ulogic; valid : std_ulogic;
gpr : std_ulogic_vector(5 downto 0); bypass : std_ulogic;
gpr : std_ulogic_vector(5 downto 0);
end record; end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', gpr => (others => '0')); constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'));


type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type; type pipeline_t is array(0 to PIPELINE_DEPTH-1) of pipeline_entry_type;
constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init); constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
@ -33,9 +36,7 @@ begin
gpr_hazard0: process(clk) gpr_hazard0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if stall_in = '0' then r <= rin;
r <= rin;
end if;
end if; end if;
end process; end process;


@ -45,22 +46,49 @@ begin
v := r; v := r;


stall_out <= '0'; stall_out <= '0';
loop_0: for i in 0 to PIPELINE_DEPTH-1 loop use_bypass <= '0';
if ((r(i).valid = gpr_read_valid_in) and r(i).gpr = gpr_read_in) then if gpr_read_valid_in = '1' then
stall_out <= '1'; if r(0).valid = '1' and r(0).gpr = gpr_read_in then
if r(0).bypass = '1' and stall_in = '0' then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if; end if;
end loop; loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
if r(i).valid = '1' and r(i).gpr = gpr_read_in then
if r(i).bypass = '1' then
use_bypass <= '1';
else
stall_out <= '1';
end if;
end if;
end loop;
end if;


v(0).valid := gpr_write_valid_in; if stall_in = '0' then
v(0).gpr := gpr_write_in; v(0).valid := gpr_write_valid_in;
loop_1: for i in 0 to PIPELINE_DEPTH-2 loop v(0).bypass := bypass_avail;
-- propagate to next slot v(0).gpr := gpr_write_in;
v(i+1) := r(i); loop_1: for i in 1 to PIPELINE_DEPTH-1 loop
end loop; -- propagate to next slot
v(i).valid := r(i-1).valid;
v(i).bypass := r(i-1).bypass;
v(i).gpr := r(i-1).gpr;
end loop;


-- asynchronous output else
if gpr_read_valid_in = '0' then -- stage 0 stalled, so stage 1 becomes empty
stall_out <= '0'; loop_1b: for i in 1 to PIPELINE_DEPTH-1 loop
-- propagate to next slot
if i = 1 then
v(i).valid := '0';
else
v(i).valid := r(i-1).valid;
v(i).bypass := r(i-1).bypass;
v(i).gpr := r(i-1).gpr;
end if;
end loop;
end if; end if;


-- update registers -- update registers

Loading…
Cancel
Save