core: Make result multiplexing explicit

This adds an explicit multiplexer feeding v.e.write_data in execute1,
with the select lines determined in the previous cycle based on the
insn_type.  Similarly, for multiply and divide instructions, there is
now an explicit multiplexer.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/268/head
Paul Mackerras 4 years ago
parent 9ea1ab0215
commit 658feabfd4

@ -210,6 +210,7 @@ package common is
rc: std_ulogic;
oe: std_ulogic;
invert_a: std_ulogic;
addm1 : std_ulogic;
invert_out: std_ulogic;
input_carry: carry_in_t;
output_carry: std_ulogic;
@ -224,18 +225,21 @@ package common is
update : std_ulogic; -- is this an update instruction?
reserve : std_ulogic; -- set for larx/stcx
br_pred : std_ulogic;
result_sel : std_ulogic_vector(2 downto 0); -- select source of result
sub_select : std_ulogic_vector(2 downto 0); -- sub-result selection
repeat : std_ulogic; -- set if instruction is cracked into two ops
second : std_ulogic; -- set if this is the second op
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL,
bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0',
bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'),
read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'),
cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'),
result_sel => "000", sub_select => "000",
repeat => '0', second => '0', others => (others => '0'));

type MultiplyInputType is record

@ -221,6 +221,52 @@ architecture behaviour of decode2 is
end case;
end;

-- control signals that are derived from insn_type
type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0);

constant result_select : mux_select_array_t := (
OP_AND => "001", -- logical_result
OP_OR => "001",
OP_XOR => "001",
OP_POPCNT => "001",
OP_PRTY => "001",
OP_CMPB => "001",
OP_EXTS => "001",
OP_BPERM => "001",
OP_BCD => "001",
OP_MTSPR => "001",
OP_RLC => "010", -- rotator_result
OP_RLCL => "010",
OP_RLCR => "010",
OP_SHL => "010",
OP_SHR => "010",
OP_EXTSWSLI => "010",
OP_MUL_L64 => "011", -- muldiv_result
OP_MUL_H64 => "011",
OP_MUL_H32 => "011",
OP_DIV => "011",
OP_DIVE => "011",
OP_MOD => "011",
OP_CNTZ => "100", -- countzero_result
OP_MFSPR => "101", -- spr_result
OP_ISEL => "111", -- misc_result
OP_DARN => "111",
OP_MFMSR => "111",
OP_MFCR => "111",
OP_SETB => "111",
others => "000" -- default to adder_result
);

constant subresult_select : mux_select_array_t := (
OP_MUL_L64 => "000", -- muldiv_result
OP_MUL_H64 => "001",
OP_MUL_H32 => "010",
OP_DIV => "011",
OP_DIVE => "011",
OP_MOD => "011",
others => "000"
);

-- issue control signals
signal control_valid_in : std_ulogic;
signal control_valid_out : std_ulogic;
@ -400,6 +446,10 @@ begin
v.e.bypass_cr := cr_bypass;
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
v.e.addm1 := '0';
if d_in.decode.insn_type = OP_BC or d_in.decode.insn_type = OP_BCREG then
v.e.addm1 := '1';
end if;
v.e.invert_out := d_in.decode.invert_out;
v.e.input_carry := d_in.decode.input_carry;
v.e.output_carry := d_in.decode.output_carry;
@ -415,6 +465,8 @@ begin
v.e.update := d_in.decode.update;
v.e.reserve := d_in.decode.reserve;
v.e.br_pred := d_in.br_pred;
v.e.result_sel := result_select(d_in.decode.insn_type);
v.e.sub_select := subresult_select(d_in.decode.insn_type);

-- issue control
control_valid_in <= d_in.valid;

@ -60,6 +60,8 @@ architecture behaviour of execute1 is
prev_op : insn_type_t;
lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0);
resmux : std_ulogic_vector(2 downto 0);
submux : std_ulogic_vector(2 downto 0);
mul_in_progress : std_ulogic;
mul_finish : std_ulogic;
div_in_progress : std_ulogic;
@ -103,6 +105,13 @@ architecture behaviour of execute1 is
signal rotator_carry: std_ulogic;
signal logical_result: std_ulogic_vector(63 downto 0);
signal countzero_result: std_ulogic_vector(63 downto 0);
signal alu_result: std_ulogic_vector(63 downto 0);
signal adder_result: std_ulogic_vector(63 downto 0);
signal misc_result: std_ulogic_vector(63 downto 0);
signal muldiv_result: std_ulogic_vector(63 downto 0);
signal spr_result: std_ulogic_vector(63 downto 0);
signal result_mux_sel: std_ulogic_vector(2 downto 0);
signal sub_mux_sel: std_ulogic_vector(2 downto 0);

-- multiply signals
signal x_to_multiply: MultiplyInputType;
@ -285,6 +294,18 @@ begin

terminate_out <= r.terminate;

-- Result mux
result_mux_sel <= e_in.result_sel when r.busy = '0' else r.resmux;
sub_mux_sel <= e_in.sub_select when r.busy = '0' else r.submux;
with result_mux_sel select alu_result <=
adder_result when "000",
logical_result when "001",
rotator_result when "010",
muldiv_result when "011",
countzero_result when "100",
spr_result when "101",
misc_result when others;

execute1_0: process(clk)
begin
if rising_edge(clk) then
@ -310,7 +331,8 @@ begin
execute1_1: process(all)
variable v : reg_type;
variable a_inv : std_ulogic_vector(63 downto 0);
variable result : std_ulogic_vector(63 downto 0);
variable b_or_m1 : std_ulogic_vector(63 downto 0);
variable addg6s : std_ulogic_vector(63 downto 0);
variable newcrf : std_ulogic_vector(3 downto 0);
variable sum_with_carry : std_ulogic_vector(64 downto 0);
variable result_en : std_ulogic;
@ -348,16 +370,17 @@ begin
variable spr_val : std_ulogic_vector(63 downto 0);
variable addend : std_ulogic_vector(127 downto 0);
variable do_trace : std_ulogic;
variable hold_wr_data : std_ulogic;
variable f : Execute1ToFetch1Type;
variable fv : Execute1ToFPUType;
begin
result := (others => '0');
sum_with_carry := (others => '0');
result_en := '0';
newcrf := (others => '0');
is_branch := '0';
taken_branch := '0';
abs_branch := '0';
hold_wr_data := '0';

v := r;
v.e := Execute1ToWritebackInit;
@ -399,14 +422,24 @@ begin
v.cntz_in_progress := '0';
v.mul_finish := '0';

misc_result <= (others => '0');
spr_result <= (others => '0');
spr_val := (others => '0');

-- Main adder
if e_in.invert_a = '0' then
a_inv := a_in;
else
a_inv := not a_in;
end if;
sum_with_carry := ppc_adde(a_inv, b_in,
if e_in.addm1 = '0' then
b_or_m1 := b_in;
else
b_or_m1 := (others => '1');
end if;
sum_with_carry := ppc_adde(a_inv, b_or_m1,
decode_input_carry(e_in.input_carry, v.e.xerc));
adder_result <= sum_with_carry(63 downto 0);

-- signals to multiply and divide units
sign1 := '0';
@ -432,6 +465,7 @@ begin
abs2 := - signed(b_in);
end if;

-- Interface to multiply and divide units
x_to_multiply <= MultiplyInputInit;
x_to_multiply.is_32bit <= e_in.is_32bit;

@ -479,6 +513,18 @@ begin
x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
end if;

case sub_mux_sel(1 downto 0) is
when "00" =>
muldiv_result <= multiply_to_x.result(63 downto 0);
when "01" =>
muldiv_result <= multiply_to_x.result(127 downto 64);
when "10" =>
muldiv_result <= multiply_to_x.result(63 downto 32) &
multiply_to_x.result(63 downto 32);
when others =>
muldiv_result <= divider_to_x.write_reg_data;
end case;

ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq
ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
@ -611,6 +657,8 @@ begin
v.slow_op_rc := e_in.rc;
v.slow_op_oe := e_in.oe;
v.slow_op_xerc := v.e.xerc;
v.resmux := e_in.result_sel;
v.submux := e_in.sub_select;

case_0: case e_in.insn_type is

@ -642,8 +690,7 @@ begin
when OP_NOP | OP_DCBF | OP_DCBST | OP_DCBT | OP_DCBTST | OP_ICBT =>
-- Do nothing
when OP_ADD | OP_CMP | OP_TRAP =>
result := sum_with_carry(63 downto 0);
carry_32 := result(32) xor a_inv(32) xor b_in(32);
carry_32 := sum_with_carry(32) xor a_inv(32) xor b_in(32);
carry_64 := sum_with_carry(64);
if e_in.insn_type = OP_ADD then
if e_in.output_carry = '1' then
@ -724,17 +771,18 @@ begin
end if;
end if;
when OP_ADDG6S =>
result := (others => '0');
addg6s := (others => '0');
for i in 0 to 14 loop
lo := i * 4;
hi := (i + 1) * 4;
if (a_in(hi) xor b_in(hi) xor sum_with_carry(hi)) = '0' then
result(lo + 3 downto lo) := "0110";
addg6s(lo + 3 downto lo) := "0110";
end if;
end loop;
if sum_with_carry(64) = '0' then
result(63 downto 60) := "0110";
addg6s(63 downto 60) := "0110";
end if;
misc_result <= addg6s;
result_en := '1';
when OP_CMPRB =>
newcrf := ppc_cmprb(a_in, b_in, insn_l(e_in.insn));
@ -754,7 +802,6 @@ begin
newcrf & newcrf & newcrf & newcrf;
when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS |
OP_BPERM | OP_BCD =>
result := logical_result;
result_en := '1';
when OP_B =>
is_branch := '1';
@ -765,12 +812,11 @@ begin
end if;
when OP_BC =>
-- read_data1 is CTR
v.e.write_reg := fast_spr_num(SPR_CTR);
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
if bo(4-2) = '0' then
result := std_ulogic_vector(unsigned(a_in) - 1);
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if;
is_branch := '1';
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
@ -781,12 +827,11 @@ begin
when OP_BCREG =>
-- read_data1 is CTR
-- read_data2 is target register (CTR, LR or TAR)
v.e.write_reg := fast_spr_num(SPR_CTR);
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
if bo(4-2) = '0' and e_in.insn(10) = '0' then
result := std_ulogic_vector(unsigned(a_in) - 1);
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if;
is_branch := '1';
taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
@ -825,9 +870,9 @@ begin
when OP_ISEL =>
crbit := to_integer(unsigned(insn_bc(e_in.insn)));
if cr_in(31-crbit) = '1' then
result := a_in;
misc_result <= a_in;
else
result := b_in;
misc_result <= b_in;
end if;
result_en := '1';
when OP_CROP =>
@ -885,38 +930,38 @@ begin
if random_err = '0' then
case e_in.insn(17 downto 16) is
when "00" =>
result := x"00000000" & random_cond(31 downto 0);
misc_result <= x"00000000" & random_cond(31 downto 0);
when "10" =>
result := random_raw;
misc_result <= random_raw;
when others =>
result := random_cond;
misc_result <= random_cond;
end case;
else
result := (others => '1');
misc_result <= (others => '1');
end if;
result_en := '1';
when OP_MFMSR =>
result := ctrl.msr;
misc_result <= ctrl.msr;
result_en := '1';
when OP_MFSPR =>
report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
"=" & to_hstring(a_in);
result_en := '1';
if is_fast_spr(e_in.read_reg1) then
result := a_in;
if decode_spr_num(e_in.insn) = SPR_XER then
spr_val := a_in;
if decode_spr_num(e_in.insn) = SPR_XER then
-- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
result(63 downto 32) := (others => '0');
result(63-32) := v.e.xerc.so;
result(63-33) := v.e.xerc.ov;
result(63-34) := v.e.xerc.ca;
result(63-35 downto 63-43) := "000000000";
result(63-44) := v.e.xerc.ov32;
result(63-45) := v.e.xerc.ca32;
end if;
spr_val(63 downto 32) := (others => '0');
spr_val(63-32) := v.e.xerc.so;
spr_val(63-33) := v.e.xerc.ov;
spr_val(63-34) := v.e.xerc.ca;
spr_val(63-35 downto 63-43) := "000000000";
spr_val(63-44) := v.e.xerc.ov32;
spr_val(63-45) := v.e.xerc.ca32;
end if;
else
spr_val := c_in;
case decode_spr_num(e_in.insn) is
case decode_spr_num(e_in.insn) is
when SPR_TB =>
spr_val := ctrl.tb;
when SPR_TBU =>
@ -940,22 +985,23 @@ begin
if ctrl.msr(MSR_PR) = '1' then
illegal := '1';
end if;
end case;
result := spr_val;
end if;
end case;
end if;
spr_result <= spr_val;

when OP_MFCR =>
if e_in.insn(20) = '0' then
-- mfcr
result := x"00000000" & cr_in;
misc_result <= x"00000000" & cr_in;
else
-- mfocrf
crnum := fxm_to_num(insn_fxm(e_in.insn));
result := (others => '0');
misc_result <= (others => '0');
for i in 0 to 7 loop
lo := (7-i)*4;
hi := lo + 3;
if crnum = i then
result(hi downto lo) := cr_in(hi downto lo);
misc_result(hi downto lo) <= cr_in(hi downto lo);
end if;
end loop;
end if;
@ -999,7 +1045,6 @@ begin
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
"=" & to_hstring(c_in);
if is_fast_spr(e_in.write_reg) then
result := c_in;
result_en := '1';
if decode_spr_num(e_in.insn) = SPR_XER then
v.e.xerc.so := c_in(63-32);
@ -1025,7 +1070,6 @@ begin
end case;
end if;
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR | OP_EXTSWSLI =>
result := rotator_result;
if e_in.output_carry = '1' then
set_carry(v.e, rotator_carry, rotator_carry);
end if;
@ -1033,11 +1077,11 @@ begin
when OP_SETB =>
bfa := insn_bfa(e_in.insn);
crbit := to_integer(unsigned(bfa)) * 4;
result := (others => '0');
misc_result <= (others => '0');
if cr_in(31 - crbit) = '1' then
result := (others => '1');
misc_result <= (others => '1');
elsif cr_in(30 - crbit) = '1' then
result(0) := '1';
misc_result(0) <= '1';
end if;

when OP_ISYNC =>
@ -1130,10 +1174,9 @@ begin
v.e.valid := '1';
-- Keep r.e.write_data unchanged next cycle in case it is needed
-- for a forwarded result (e.g. for CTR).
result := r.e.write_data;
hold_wr_data := '1';
elsif r.cntz_in_progress = '1' then
-- cnt[lt]z always takes two cycles
result := countzero_result;
result_en := '1';
v.e.write_reg := gpr_to_gspr(r.slow_op_dest);
v.e.rc := r.slow_op_rc;
@ -1144,18 +1187,7 @@ begin
(r.div_in_progress = '1' and divider_to_x.valid = '1') then
if r.mul_in_progress = '1' then
overflow := '0';
case r.slow_op_insn is
when OP_MUL_H32 =>
result := multiply_to_x.result(63 downto 32) &
multiply_to_x.result(63 downto 32);
when OP_MUL_H64 =>
result := multiply_to_x.result(127 downto 64);
when others =>
-- i.e. OP_MUL_L64
result := multiply_to_x.result(63 downto 0);
end case;
else
result := divider_to_x.write_reg_data;
overflow := divider_to_x.overflow;
end if;
if r.mul_in_progress = '1' and r.slow_op_oe = '1' then
@ -1184,7 +1216,7 @@ begin
v.div_in_progress := r.div_in_progress;
end if;
elsif r.mul_finish = '1' then
result := r.e.write_data;
hold_wr_data := '1';
result_en := '1';
v.e.write_reg := gpr_to_gspr(r.slow_op_dest);
v.e.rc := r.slow_op_rc;
@ -1225,7 +1257,11 @@ begin
v.trace_next := '1';
end if;

v.e.write_data := result;
if hold_wr_data = '0' then
v.e.write_data := alu_result;
else
v.e.write_data := r.e.write_data;
end if;
v.e.write_enable := result_en and not exception;

-- generate DSI or DSegI for load/store exceptions

@ -197,8 +197,7 @@ begin
tmp := x"00" & dpd_to_bcd(rs(51 downto 42)) & dpd_to_bcd(rs(41 downto 32)) &
x"00" & dpd_to_bcd(rs(19 downto 10)) & dpd_to_bcd(rs(9 downto 0));
end if;
when others =>
-- EXTS
when OP_EXTS =>
-- note datalen is a 1-hot encoding
negative := (datalen(0) and rs(7)) or
(datalen(1) and rs(15)) or
@ -211,6 +210,9 @@ begin
tmp(15 downto 8) := rs(15 downto 8);
end if;
tmp(7 downto 0) := rs(7 downto 0);
when others =>
-- e.g. OP_MTSPR
tmp := rs;
end case;

result <= tmp;

Loading…
Cancel
Save