@ -41,6 +41,7 @@ entity decode2 is
execute_cr_bypass : in cr_bypass_data_t;
execute2_bypass : in bypass_data_t;
execute2_cr_bypass : in cr_bypass_data_t;
writeback_bypass : in bypass_data_t;
log_out : out std_ulogic_vector(9 downto 0)
);
@ -49,8 +50,16 @@ end entity decode2;
architecture behaviour of decode2 is
type reg_type is record
e : Decode2ToExecute1Type;
repeat : std_ulogic;
repeat : repeat_t;
busy : std_ulogic;
sgl_pipe : std_ulogic;
reg_a_valid : std_ulogic;
reg_b_valid : std_ulogic;
reg_c_valid : std_ulogic;
reg_o_valid : std_ulogic;
end record;
constant reg_type_init : reg_type :=
(e => Decode2ToExecute1Init, repeat => NONE, others => '0');
signal dc2, dc2in : reg_type;
@ -61,20 +70,21 @@ architecture behaviour of decode2 is
reg : gspr_index_t;
data : std_ulogic_vector(63 downto 0);
end record;
constant decode_input_reg_init : decode_input_reg_t := ('0', (others => '0'), (others => '0'));
type decode_output_reg_t is record
reg_valid : std_ulogic;
reg : gspr_index_t;
end record;
constant decode_output_reg_init : decode_output_reg_t := ('0', (others => '0'));
function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t;
instr_addr : std_ulogic_vector(63 downto 0))
return decode_input_reg_t is
begin
if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data);
return ('1', gpr_to_gspr(insn_ra(insn_in)), (others => '0'));
elsif t = SPR then
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
@ -83,27 +93,26 @@ architecture behaviour of decode2 is
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
return (is_fast_spr(ispr), ispr, (others => '0'));
elsif t = CIA then
return ('0', (others => '0'), instr_addr);
elsif HAS_FPU and t = FRA then
return ('1', fpr_to_gspr(insn_fra(insn_in)), reg_data);
return ('1', fpr_to_gspr(insn_fra(insn_in)), (others => '0'));
else
return ('0', (others => '0'), (others => '0'));
end if;
end;
function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t) return decode_input_reg_t is
variable ret : decode_input_reg_t;
begin
case t is
when RB =>
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
ret := ('1', gpr_to_gspr(insn_rb(insn_in)), (others => '0'));
when FRB =>
if HAS_FPU then
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), reg_data);
ret := ('1', fpr_to_gspr(insn_frb(insn_in)), (others => '0'));
else
ret := ('0', (others => '0'), (others => '0'));
end if;
@ -138,7 +147,7 @@ architecture behaviour of decode2 is
assert is_fast_spr(ispr) = '1' or ispr = "0000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
ret := (is_fast_spr(ispr), ispr, reg_data);
ret := (is_fast_spr(ispr), ispr, (others => '0'));
when NONE =>
ret := ('0', (others => '0'), (others => '0'));
end case;
@ -146,23 +155,23 @@ architecture behaviour of decode2 is
return ret;
end;
function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0))
return decode_input_reg_t is
begin
case t is
when RS =>
return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
return ('1', gpr_to_gspr(insn_rs(insn_in)), (others => '0'));
when RCR =>
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data);
return ('1', gpr_to_gspr(insn_rcreg(insn_in)), (others => '0'));
when FRS =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frt(insn_in)), reg_data);
return ('1', fpr_to_gspr(insn_frt(insn_in)), (others => '0'));
else
return ('0', (others => '0'), (others => '0'));
end if;
when FRC =>
if HAS_FPU then
return ('1', fpr_to_gspr(insn_frc(insn_in)), reg_data);
return ('1', fpr_to_gspr(insn_frc(insn_in)), (others => '0'));
else
return ('0', (others => '0'), (others => '0'));
end if;
@ -264,10 +273,14 @@ architecture behaviour of decode2 is
others => "000"
);
signal decoded_reg_a : decode_input_reg_t;
signal decoded_reg_b : decode_input_reg_t;
signal decoded_reg_c : decode_input_reg_t;
signal decoded_reg_o : decode_output_reg_t;
-- issue control signals
signal control_valid_in : std_ulogic;
signal control_valid_out : std_ulogic;
signal control_stall_out : std_ulogic;
signal control_sgl_pipe : std_logic;
signal gpr_write_valid : std_ulogic;
@ -302,8 +315,6 @@ begin
complete_in => complete_in,
valid_in => control_valid_in,
repeated => dc2.repeat,
busy_in => busy_in,
deferred => deferred,
flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe,
@ -331,7 +342,6 @@ begin
cr_bypass => cr_bypass,
valid_out => control_valid_out,
stall_out => control_stall_out,
stopped_out => stopped_out,
gpr_bypass_a => gpr_a_bypass,
@ -346,9 +356,12 @@ begin
decode2_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' or flush_in = '1' or deferred = '0' then
if rst = '1' or flush_in = '1' then
dc2 <= reg_type_init;
elsif deferred = '0' then
if dc2in.e.valid = '1' then
report "execute " & to_hstring(dc2in.e.nia);
report "execute " & to_hstring(dc2in.e.nia) &
" tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid);
end if;
dc2 <= dc2in;
end if;
@ -357,20 +370,44 @@ begin
c_out.read <= d_in.decode.input_cr;
decode2_addrs: process(all)
begin
decoded_reg_a <= decode_input_reg_init;
decoded_reg_b <= decode_input_reg_init;
decoded_reg_c <= decode_input_reg_init;
decoded_reg_o <= decode_output_reg_init;
if d_in.valid = '1' then
decoded_reg_a <= decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, d_in.ispr1, d_in.nia);
decoded_reg_b <= decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.ispr2);
decoded_reg_c <= decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
decoded_reg_o <= decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro);
end if;
r_out.read1_enable <= decoded_reg_a.reg_valid;
r_out.read1_reg <= decoded_reg_a.reg;
r_out.read2_enable <= decoded_reg_b.reg_valid;
r_out.read2_reg <= decoded_reg_b.reg;
r_out.read3_enable <= decoded_reg_c.reg_valid;
r_out.read3_reg <= decoded_reg_c.reg;
end process;
decode2_1: process(all)
variable v : reg_type;
variable decoded_reg_a : decode_input_reg_t;
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable length : std_ulogic_vector(3 downto 0);
variable op : insn_type_t;
variable valid_in : std_ulogic;
begin
v := dc2;
valid_in := d_in.valid or dc2.busy;
if dc2.busy = '0' then
v.e := Decode2ToExecute1Init;
--v.e.input_cr := d_in.decode.input_cr;
v.sgl_pipe := d_in.decode.sgl_pipe;
v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr;
-- Work out whether XER common bits are set
@ -389,11 +426,10 @@ begin
when others =>
end case;
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2);
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro);
v.reg_a_valid := decoded_reg_a.reg_valid;
v.reg_b_valid := decoded_reg_b.reg_valid;
v.reg_c_valid := decoded_reg_c.reg_valid;
v.reg_o_valid := decoded_reg_o.reg_valid;
if d_in.decode.lr = '1' then
v.e.lr := insn_lk(d_in.insn);
@ -402,34 +438,16 @@ begin
end if;
op := d_in.decode.insn_type;
v.repeat := d_in.decode.repeat;
if d_in.decode.repeat /= NONE then
v.e.repeat := '1';
v.e.second := dc2.repeat;
case d_in.decode.repeat is
when DUPD =>
-- update-form loads, 2nd instruction writes RA
if dc2.repeat = '1' then
decoded_reg_o.reg := decoded_reg_a.reg;
end if;
when others =>
end case;
elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then
-- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled
v.e.repeat := '1';
v.e.second := dc2.repeat;
-- first one does CTR, second does LR
decoded_reg_o.reg(0) := not dc2.repeat;
end if;
v.e.spr_select := d_in.spr_info;
r_out.read1_enable <= decoded_reg_a.reg_valid and d_in.valid;
r_out.read1_reg <= decoded_reg_a.reg;
r_out.read2_enable <= decoded_reg_b.reg_valid and d_in.valid;
r_out.read2_reg <= decoded_reg_b.reg;
r_out.read3_enable <= decoded_reg_c.reg_valid and d_in.valid;
r_out.read3_reg <= decoded_reg_c.reg;
case d_in.decode.length is
when is1B =>
length := "0001";
@ -447,9 +465,9 @@ begin
v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit;
v.e.fac := d_in.decode.facility;
v.e.instr_tag := instr_tag;
v.e.read_reg1 := decoded_reg_a.reg;
v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_reg3 := decoded_reg_c.reg;
v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
@ -472,7 +490,7 @@ begin
v.e.result_sel := result_select(op);
v.e.sub_select := subresult_select(op);
if op = OP_BC or op = OP_BCREG then
if d_in.insn(23) = '0' and dc2.repeat = '0' and
if d_in.insn(23) = '0' and
not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
-- decrement CTR if BO(2) = 0 and not bcctr
v.e.addm1 := '1';
@ -491,71 +509,107 @@ begin
end if;
end if;
elsif dc2.e.valid = '1' then
-- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction.
-- Set up for the second iteration (if deferred = 1 this will all be ignored)
v.e.second := '1';
case dc2.repeat is
when DUPD =>
-- update-form loads, 2nd instruction writes RA
v.e.write_reg := dc2.e.read_reg1;
when NONE =>
-- bcl/bclrl/bctarl that needs to write both CTR and LR
v.e.write_reg(0) := '0'; -- point to LR
v.e.result_sel := "110"; -- select NIA (to go to LR)
when others =>
end case;
end if;
-- issue control
control_valid_in <= valid_in;
control_sgl_pipe <= v.sgl_pipe;
gpr_write_valid <= v.reg_o_valid;
gpr_write <= v.e.write_reg;
gpr_a_read_valid <= v.reg_a_valid;
gpr_a_read <= v.e.read_reg1;
gpr_b_read_valid <= v.reg_b_valid;
gpr_b_read <= v.e.read_reg2;
gpr_c_read_valid <= v.reg_c_valid;
gpr_c_read <= v.e.read_reg3;
cr_write_valid <= v.e.output_cr or v.e.rc;
-- Since ops that write CR only write some of the fields,
-- any op that writes CR effectively also reads it.
cr_read_valid <= cr_write_valid or v.e.input_cr;
-- See if any of the operands can get their value via the bypass path.
if dc2.busy = '0' or gpr_a_bypass /= "00" then
case gpr_a_bypass is
when "10" =>
when "01" =>
v.e.read_data1 := execute_bypass.data;
when "11" =>
when "10" =>
v.e.read_data1 := execute2_bypass.data;
when "11" =>
v.e.read_data1 := writeback_bypass.data;
when others =>
if decoded_reg_a.reg_valid = '1' then
v.e.read_data1 := r_in.read1_data;
else
v.e.read_data1 := decoded_reg_a.data;
end if;
end case;
end if;
if dc2.busy = '0' or gpr_b_bypass /= "00" then
case gpr_b_bypass is
when "10" =>
when "01" =>
v.e.read_data2 := execute_bypass.data;
when "11" =>
when "10" =>
v.e.read_data2 := execute2_bypass.data;
when "11" =>
v.e.read_data2 := writeback_bypass.data;
when others =>
if decoded_reg_b.reg_valid = '1' then
v.e.read_data2 := r_in.read2_data;
else
v.e.read_data2 := decoded_reg_b.data;
end if;
end case;
end if;
if dc2.busy = '0' or gpr_c_bypass /= "00" then
case gpr_c_bypass is
when "10" =>
when "01" =>
v.e.read_data3 := execute_bypass.data;
when "11" =>
when "10" =>
v.e.read_data3 := execute2_bypass.data;
when "11" =>
v.e.read_data3 := writeback_bypass.data;
when others =>
if decoded_reg_c.reg_valid = '1' then
v.e.read_data3 := r_in.read3_data;
else
v.e.read_data3 := decoded_reg_c.data;
end if;
end case;
end if;
v.e.cr := c_in.read_cr_data;
if cr_bypass = "10" then
case cr_bypass is
when "10" =>
v.e.cr := execute_cr_bypass.data;
elsif cr_bypass = "11" then
when "11" =>
v.e.cr := execute2_cr_bypass.data;
end if;
-- issue control
control_valid_in <= d_in.valid;
control_sgl_pipe <= d_in.decode.sgl_pipe;
gpr_write_valid <= v.e.write_reg_enable;
gpr_write <= decoded_reg_o.reg;
gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;
gpr_b_read_valid <= decoded_reg_b.reg_valid;
gpr_b_read <= decoded_reg_b.reg;
gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= decoded_reg_c.reg;
cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
-- Since ops that write CR only write some of the fields,
-- any op that writes CR effectively also reads it.
cr_read_valid <= cr_write_valid or d_in.decode.input_cr;
when others =>
v.e.cr := c_in.read_cr_data;
end case;
v.e.valid := control_valid_out;
if control_valid_out = '1' then
v.repeat := v.e.repeat and not dc2.repeat;
end if;
stall_out <= control_stall_out or v.repeat;
v.e.instr_tag := instr_tag;
v.busy := valid_in and (not control_valid_out or (v.e.repeat and not v.e.second));
if rst = '1' or flush_in = '1' then
v.e := Decode2ToExecute1Init;
v.repeat := '0';
end if;
stall_out <= dc2.busy or deferred;
-- Update registers
dc2in <= v;
@ -574,9 +628,9 @@ begin
dc2.e.valid &
stopped_out &
stall_out &
(gpr_a_bypass(1) or gpr_a_bypass(0)) &
(gpr_b_bypass(1) or gpr_b_bypass(0)) &
(gpr_c_bypass(1) or gpr_c_bypass(0));
(gpr_a_bypass(1) xor gpr_a_bypass(0)) &
(gpr_b_bypass(1) xor gpr_b_bypass(0)) &
(gpr_c_bypass(1) xor gpr_c_bypass(0));
end if;
end process;
log_out <= log_data;