Add basic XER support

The carry is currently internal to execute1. We don't handle any of
the other XER fields.

This creates type called "xer_common_t" that contains the commonly
used XER bits (CA, CA32, SO, OV, OV32).

The value is stored in the CR file (though it could be a separate
module). The rest of the bits will be implemented as a separate
SPR and the two parts reconciled in mfspr/mtspr in latter commits.

We always read XER in decode2 (there is little point not to)
and send it down all pipeline branches as it will be needed in
writeback for all type of instructions when CR0:SO needs to be
updated (such forms exist for all pipeline branches even if we don't
yet implement them).

To avoid having to track XER hazards, we forward it back in EX1. This
assumes that other pipeline branches that can modify it (mult and div)
are running single issue for now.

One additional hazard to beware of is an XER:SO modifying instruction
in EX1 followed immediately by a store conditional. Due to our writeback
latency, the store will go down the LSU with the previous XER value,
thus the stcx. will set CR0:SO using an obsolete SO value.

I doubt there exist any code relying on this behaviour being correct
but we should account for it regardless, possibly by ensuring that
stcx. remain single issue initially, or later by adding some minimal
tracking or moving the LSU into the same pipeline as execute.

Missing some obscure XER affecting instructions like addex or mcrxrx.

[paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of
 arguments to set_ov]

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/122/head
Benjamin Herrenschmidt 5 years ago committed by Paul Mackerras
parent f291efa266
commit 501b6daf9b

@ -12,15 +12,28 @@ package common is


function decode_spr_num(insn: std_ulogic_vector(31 downto 0)) return spr_num_t; function decode_spr_num(insn: std_ulogic_vector(31 downto 0)) return spr_num_t;


constant SPR_XER : spr_num_t := 1;
constant SPR_LR : spr_num_t := 8; constant SPR_LR : spr_num_t := 8;
constant SPR_CTR : spr_num_t := 9; constant SPR_CTR : spr_num_t := 9;
constant SPR_TB : spr_num_t := 268; constant SPR_TB : spr_num_t := 268;


-- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are
-- in the CR file as a kind of CR extension (with a separate write
-- control). The rest is stored as a fast SPR.
type xer_common_t is record
ca : std_ulogic;
ca32 : std_ulogic;
ov : std_ulogic;
ov32 : std_ulogic;
so : std_ulogic;
end record;
constant xerc_init : xer_common_t := (others => '0');

-- This needs to die...
type ctrl_t is record type ctrl_t is record
lr: std_ulogic_vector(63 downto 0); lr: std_ulogic_vector(63 downto 0);
ctr: std_ulogic_vector(63 downto 0); ctr: std_ulogic_vector(63 downto 0);
tb: std_ulogic_vector(63 downto 0); tb: std_ulogic_vector(63 downto 0);
carry: std_ulogic;
end record; end record;


type Fetch1ToIcacheType is record type Fetch1ToIcacheType is record
@ -64,8 +77,10 @@ package common is
read_data2: std_ulogic_vector(63 downto 0); read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0); read_data3: std_ulogic_vector(63 downto 0);
cr: std_ulogic_vector(31 downto 0); cr: std_ulogic_vector(31 downto 0);
xerc: xer_common_t;
lr: std_ulogic; lr: std_ulogic;
rc: std_ulogic; rc: std_ulogic;
oe: std_ulogic;
invert_a: std_ulogic; invert_a: std_ulogic;
invert_out: std_ulogic; invert_out: std_ulogic;
input_carry: carry_in_t; input_carry: carry_in_t;
@ -78,9 +93,9 @@ package common is
data_len: std_ulogic_vector(3 downto 0); data_len: std_ulogic_vector(3 downto 0);
end record; end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type := constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', invert_a => '0', (valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', others => (others => '0')); is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0'));


type Decode2ToMultiplyType is record type Decode2ToMultiplyType is record
valid: std_ulogic; valid: std_ulogic;
@ -89,8 +104,13 @@ package common is
data1: std_ulogic_vector(64 downto 0); data1: std_ulogic_vector(64 downto 0);
data2: std_ulogic_vector(64 downto 0); data2: std_ulogic_vector(64 downto 0);
rc: std_ulogic; rc: std_ulogic;
oe: std_ulogic;
is_32bit: std_ulogic;
xerc: xer_common_t;
end record; end record;
constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', others => (others => '0')); constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0',
oe => '0', is_32bit => '0', xerc => xerc_init,
others => (others => '0'));


type Decode2ToDividerType is record type Decode2ToDividerType is record
valid: std_ulogic; valid: std_ulogic;
@ -102,8 +122,13 @@ package common is
is_extended: std_ulogic; is_extended: std_ulogic;
is_modulus: std_ulogic; is_modulus: std_ulogic;
rc: std_ulogic; rc: std_ulogic;
oe: std_ulogic;
xerc: xer_common_t;
end record; end record;
constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', rc => '0', others => (others => '0')); constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
rc => '0', oe => '0', xerc => xerc_init,
others => (others => '0'));


type Decode2ToRegisterFileType is record type Decode2ToRegisterFileType is record
read1_enable : std_ulogic; read1_enable : std_ulogic;
@ -126,6 +151,7 @@ package common is


type CrFileToDecode2Type is record type CrFileToDecode2Type is record
read_cr_data : std_ulogic_vector(31 downto 0); read_cr_data : std_ulogic_vector(31 downto 0);
read_xerc_data : xer_common_t;
end record; end record;


type Execute1ToFetch1Type is record type Execute1ToFetch1Type is record
@ -146,8 +172,11 @@ package common is
sign_extend : std_ulogic; -- do we need to sign extend? sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction? update : std_ulogic; -- is this an update instruction?
update_reg : std_ulogic_vector(4 downto 0); -- if so, the register to update update_reg : std_ulogic_vector(4 downto 0); -- if so, the register to update
xerc : xer_common_t;
end record; end record;
constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0', sign_extend => '0', update => '0', others => (others => '0')); constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
others => (others => '0'));


type Loadstore1ToDcacheType is record type Loadstore1ToDcacheType is record
valid : std_ulogic; valid : std_ulogic;
@ -161,6 +190,7 @@ package common is
sign_extend : std_ulogic; sign_extend : std_ulogic;
update : std_ulogic; update : std_ulogic;
update_reg : std_ulogic_vector(4 downto 0); update_reg : std_ulogic_vector(4 downto 0);
xerc : xer_common_t;
end record; end record;


type DcacheToWritebackType is record type DcacheToWritebackType is record
@ -173,8 +203,11 @@ package common is
sign_extend : std_ulogic; sign_extend : std_ulogic;
byte_reverse : std_ulogic; byte_reverse : std_ulogic;
second_word : std_ulogic; second_word : std_ulogic;
xerc : xer_common_t;
end record; end record;
constant DcacheToWritebackInit : DcacheToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0', byte_reverse => '0', second_word => '0', others => (others => '0')); constant DcacheToWritebackInit : DcacheToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0',
byte_reverse => '0', second_word => '0', xerc => xerc_init,
others => (others => '0'));


type Execute1ToWritebackType is record type Execute1ToWritebackType is record
valid: std_ulogic; valid: std_ulogic;
@ -186,9 +219,14 @@ package common is
write_cr_enable : std_ulogic; write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0); write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
sign_extend: std_ulogic; sign_extend: std_ulogic;
end record; end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0', write_cr_enable => '0', sign_extend => '0', others => (others => '0')); constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0',
write_cr_enable => '0', sign_extend => '0',
write_xerc_enable => '0', xerc => xerc_init,
others => (others => '0'));


type MultiplyToWritebackType is record type MultiplyToWritebackType is record
valid: std_ulogic; valid: std_ulogic;
@ -196,9 +234,14 @@ package common is
write_reg_enable : std_ulogic; write_reg_enable : std_ulogic;
write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_nr: std_ulogic_vector(4 downto 0);
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic; rc: std_ulogic;
end record; end record;
constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0')); constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));


type DividerToWritebackType is record type DividerToWritebackType is record
valid: std_ulogic; valid: std_ulogic;
@ -206,9 +249,14 @@ package common is
write_reg_enable : std_ulogic; write_reg_enable : std_ulogic;
write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_nr: std_ulogic_vector(4 downto 0);
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic; rc: std_ulogic;
end record; end record;
constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0')); constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));


type WritebackToRegisterFileType is record type WritebackToRegisterFileType is record
write_reg : std_ulogic_vector(4 downto 0); write_reg : std_ulogic_vector(4 downto 0);
@ -221,9 +269,12 @@ package common is
write_cr_enable : std_ulogic; write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0); write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0); write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
write_xerc_data : xer_common_t;
end record; end record;
constant WritebackToCrFileInit : WritebackToCrFileType := (write_cr_enable => '0', others => (others => '0')); constant WritebackToCrFileInit : WritebackToCrFileType := (write_cr_enable => '0', write_xerc_enable => '0',

write_xerc_data => xerc_init,
others => (others => '0'));
end common; end common;


package body common is package body common is

@ -18,7 +18,9 @@ end entity cr_file;


architecture behaviour of cr_file is architecture behaviour of cr_file is
signal crs : std_ulogic_vector(31 downto 0) := (others => '0'); signal crs : std_ulogic_vector(31 downto 0) := (others => '0');
signal crs_updated : std_ulogic_vector(31 downto 0) := (others => '0'); signal crs_updated : std_ulogic_vector(31 downto 0);
signal xerc : xer_common_t := xerc_init;
signal xerc_updated : xer_common_t;
begin begin
cr_create_0: process(all) cr_create_0: process(all)
variable hi, lo : integer := 0; variable hi, lo : integer := 0;
@ -35,6 +37,13 @@ begin
end loop; end loop;


crs_updated <= cr_tmp; crs_updated <= cr_tmp;

if w_in.write_xerc_enable = '1' then
xerc_updated <= w_in.write_xerc_data;
else
xerc_updated <= xerc;
end if;

end process; end process;


-- synchronous writes -- synchronous writes
@ -45,6 +54,10 @@ begin
report "Writing " & to_hstring(w_in.write_cr_data) & " to CR mask " & to_hstring(w_in.write_cr_mask); report "Writing " & to_hstring(w_in.write_cr_data) & " to CR mask " & to_hstring(w_in.write_cr_mask);
crs <= crs_updated; crs <= crs_updated;
end if; end if;
if w_in.write_xerc_enable = '1' then
report "Writing XERC";
xerc <= xerc_updated;
end if;
end if; end if;
end process; end process;


@ -56,5 +69,6 @@ begin
report "Reading CR " & to_hstring(crs_updated); report "Reading CR " & to_hstring(crs_updated);
end if; end if;
d_out.read_cr_data <= crs_updated; d_out.read_cr_data <= crs_updated;
d_out.read_xerc_data <= xerc_updated;
end process; end process;
end architecture behaviour; end architecture behaviour;

@ -185,6 +185,7 @@ architecture rtl of dcache is
length : std_ulogic_vector(3 downto 0); length : std_ulogic_vector(3 downto 0);
sign_extend : std_ulogic; sign_extend : std_ulogic;
byte_reverse : std_ulogic; byte_reverse : std_ulogic;
xerc : xer_common_t;
end record; end record;


signal r2 : reg_stage_2_t; signal r2 : reg_stage_2_t;
@ -469,6 +470,7 @@ begin
d_out.sign_extend <= r2.sign_extend; d_out.sign_extend <= r2.sign_extend;
d_out.byte_reverse <= r2.byte_reverse; d_out.byte_reverse <= r2.byte_reverse;
d_out.second_word <= '0'; d_out.second_word <= '0';
d_out.xerc <= r2.xerc;


-- We have a valid load or store hit or we just completed a slow -- We have a valid load or store hit or we just completed a slow
-- op such as a load miss, a NC load or a store -- op such as a load miss, a NC load or a store
@ -518,6 +520,7 @@ begin
d_out.sign_extend <= r1.req.sign_extend; d_out.sign_extend <= r1.req.sign_extend;
d_out.byte_reverse <= r1.req.byte_reverse; d_out.byte_reverse <= r1.req.byte_reverse;
d_out.write_len <= r1.req.length; d_out.write_len <= r1.req.length;
d_out.xerc <= r1.req.xerc;
end if; end if;


-- If it's a store or a non-update load form, complete now -- If it's a store or a non-update load form, complete now
@ -539,6 +542,7 @@ begin
d_out.write_len <= "1000"; d_out.write_len <= "1000";
d_out.sign_extend <= '0'; d_out.sign_extend <= '0';
d_out.byte_reverse <= '0'; d_out.byte_reverse <= '0';
d_out.xerc <= r1.req.xerc;


-- If it was a load, this completes the operation (load with -- If it was a load, this completes the operation (load with
-- update case). -- update case).

@ -131,6 +131,22 @@ architecture behaviour of decode2 is
end case; end case;
end; end;


-- For now, use "rc" in the decode table to decide whether oe exists.
-- This is not entirely correct architecturally: For mulhd and
-- mulhdu, the OE field is reserved. It remains to be seen what an
-- actual POWER9 does if we set it on those instructions, for now we
-- test that further down when assigning to the multiplier oe input.
--
function decode_oe (t : rc_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic is
begin
case t is
when RC =>
return insn_oe(insn_in);
when OTHERS =>
return '0';
end case;
end;

-- issue control signals -- issue control signals
signal control_valid_in : std_ulogic; signal control_valid_in : std_ulogic;
signal control_valid_out : std_ulogic; signal control_valid_out : std_ulogic;
@ -255,7 +271,9 @@ begin
v.e.read_data3 := decoded_reg_c.data; v.e.read_data3 := decoded_reg_c.data;
v.e.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.e.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
v.e.cr := c_in.read_cr_data; v.e.cr := c_in.read_cr_data;
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a; v.e.invert_a := d_in.decode.invert_a;
v.e.invert_out := d_in.decode.invert_out; v.e.invert_out := d_in.decode.invert_out;
v.e.input_carry := d_in.decode.input_carry; v.e.input_carry := d_in.decode.input_carry;
@ -274,6 +292,11 @@ begin
mul_b := decoded_reg_b.data; mul_b := decoded_reg_b.data;
v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.m.rc := decode_rc(d_in.decode.rc, d_in.insn); v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.m.xerc := c_in.read_xerc_data;
if v.m.insn_type = OP_MUL_L64 then
v.m.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.m.is_32bit := d_in.decode.is_32bit;


if d_in.decode.is_32bit = '1' then if d_in.decode.is_32bit = '1' then
if d_in.decode.is_signed = '1' then if d_in.decode.is_signed = '1' then
@ -337,6 +360,8 @@ begin
end if; end if;
end if; end if;
v.d.rc := decode_rc(d_in.decode.rc, d_in.insn); v.d.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.d.xerc := c_in.read_xerc_data;
v.d.oe := decode_oe(d_in.decode.rc, d_in.insn);


-- load/store unit -- load/store unit
v.l.update_reg := decoded_reg_a.reg; v.l.update_reg := decoded_reg_a.reg;
@ -355,6 +380,7 @@ begin
v.l.byte_reverse := d_in.decode.byte_reverse; v.l.byte_reverse := d_in.decode.byte_reverse;
v.l.sign_extend := d_in.decode.sign_extend; v.l.sign_extend := d_in.decode.sign_extend;
v.l.update := d_in.decode.update; v.l.update := d_in.decode.update;
v.l.xerc := c_in.read_xerc_data;


-- issue control -- issue control
control_valid_in <= d_in.valid; control_valid_in <= d_in.valid;

@ -36,7 +36,8 @@ architecture behaviour of divider is
signal overflow : std_ulogic; signal overflow : std_ulogic;
signal ovf32 : std_ulogic; signal ovf32 : std_ulogic;
signal did_ovf : std_ulogic; signal did_ovf : std_ulogic;

signal oe : std_ulogic;
signal xerc : xer_common_t;
begin begin
divider_0: process(clk) divider_0: process(clk)
begin begin
@ -62,6 +63,8 @@ begin
is_32bit <= d_in.is_32bit; is_32bit <= d_in.is_32bit;
is_signed <= d_in.is_signed; is_signed <= d_in.is_signed;
rc <= d_in.rc; rc <= d_in.rc;
oe <= d_in.oe;
xerc <= d_in.xerc;
count <= "1111111"; count <= "1111111";
running <= '1'; running <= '1';
overflow <= '0'; overflow <= '0';
@ -147,13 +150,25 @@ begin
divider_out: process(clk) divider_out: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
d_out.valid <= '0';
d_out.write_reg_data <= oresult; d_out.write_reg_data <= oresult;
d_out.write_reg_enable <= '0';
d_out.write_xerc_enable <= '0';
d_out.xerc <= xerc;
if count = "1000000" then if count = "1000000" then
d_out.valid <= '1'; d_out.valid <= '1';
d_out.write_reg_enable <= '1'; d_out.write_reg_enable <= '1';
else d_out.write_xerc_enable <= oe;
d_out.valid <= '0';
d_out.write_reg_enable <= '0'; -- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
--
if oe = '1' then
d_out.xerc.ov <= did_ovf;
d_out.xerc.ov32 <= did_ovf;
d_out.xerc.so <= xerc.so or did_ovf;
end if;
end if; end if;
end if; end if;
end process; end process;

@ -31,14 +31,13 @@ end entity execute1;


architecture behaviour of execute1 is architecture behaviour of execute1 is
type reg_type is record type reg_type is record
--f : Execute1ToFetch1Type;
e : Execute1ToWritebackType; e : Execute1ToWritebackType;
end record; end record;


signal r, rin : reg_type; signal r, rin : reg_type;


signal ctrl: ctrl_t := (carry => '0', others => (others => '0')); signal ctrl: ctrl_t := (others => (others => '0'));
signal ctrl_tmp: ctrl_t := (carry => '0', others => (others => '0')); signal ctrl_tmp: ctrl_t := (others => (others => '0'));


signal right_shift, rot_clear_left, rot_clear_right: std_ulogic; signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
signal rotator_result: std_ulogic_vector(63 downto 0); signal rotator_result: std_ulogic_vector(63 downto 0);
@ -46,17 +45,46 @@ architecture behaviour of execute1 is
signal logical_result: std_ulogic_vector(63 downto 0); signal logical_result: std_ulogic_vector(63 downto 0);
signal countzero_result: std_ulogic_vector(63 downto 0); signal countzero_result: std_ulogic_vector(63 downto 0);


function decode_input_carry (carry_sel : carry_in_t; ca_in : std_ulogic) return std_ulogic is procedure set_carry(e: inout Execute1ToWritebackType;
carry32 : in std_ulogic;
carry : in std_ulogic) is
begin begin
case carry_sel is e.xerc.ca32 := carry32;
e.xerc.ca := carry;
e.write_xerc_enable := '1';
end;

procedure set_ov(e: inout Execute1ToWritebackType;
ov : in std_ulogic;
ov32 : in std_ulogic) is
begin
e.xerc.ov32 := ov32;
e.xerc.ov := ov;
if ov = '1' then
e.xerc.so := '1';
end if;
e.write_xerc_enable := '1';
end;

function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is
begin
return (ca xor msb_r) and not (msb_a xor msb_b);
end;

function decode_input_carry(ic : carry_in_t;
xerc : xer_common_t) return std_ulogic is
begin
case ic is
when ZERO => when ZERO =>
return '0'; return '0';
when CA => when CA =>
return ca_in; return xerc.ca;
when ONE => when ONE =>
return '1'; return '1';
end case; end case;
end; end;

begin begin


rotator_0: entity work.rotator rotator_0: entity work.rotator
@ -117,6 +145,7 @@ begin
variable bf, bfa : std_ulogic_vector(2 downto 0); variable bf, bfa : std_ulogic_vector(2 downto 0);
variable l : std_ulogic; variable l : std_ulogic;
variable next_nia : std_ulogic_vector(63 downto 0); variable next_nia : std_ulogic_vector(63 downto 0);
variable carry_32, carry_64 : std_ulogic;
begin begin
result := (others => '0'); result := (others => '0');
result_with_carry := (others => '0'); result_with_carry := (others => '0');
@ -125,7 +154,41 @@ begin


v := r; v := r;
v.e := Execute1ToWritebackInit; v.e := Execute1ToWritebackInit;
--v.f := Execute1ToFetch1TypeInit;
-- XER forwarding. To avoid having to track XER hazards, we
-- use the previously latched value.
--
-- If the XER was modified by a multiply or a divide, those are
-- single issue, we'll get the up to date value from decode2 from
-- the register file.
--
-- If it was modified by an instruction older than the previous
-- one in EX1, it will have also hit writeback and will be up
-- to date in decode2.
--
-- That leaves us with the case where it was updated by the previous
-- instruction in EX1. In that case, we can forward it back here.
--
-- This will break if we allow pipelining of multiply and divide,
-- but ideally, those should go via EX1 anyway and run as a state
-- machine from here.
--
-- One additional hazard to beware of is an XER:SO modifying instruction
-- in EX1 followed immediately by a store conditional. Due to our
-- writeback latency, the store will go down the LSU with the previous
-- XER value, thus the stcx. will set CR0:SO using an obsolete SO value.
--
-- We will need to handle that if we ever make stcx. not single issue
--
-- We always pass a valid XER value downto writeback even when
-- we aren't updating it, in order for XER:SO -> CR0:SO transfer
-- to work for RC instructions.
--
if r.e.write_xerc_enable = '1' then
v.e.xerc := r.e.xerc;
else
v.e.xerc := e_in.xerc;
end if;


ctrl_tmp <= ctrl; ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq -- FIXME: run at 512MHz not core freq
@ -163,10 +226,18 @@ begin
else else
a_inv := not e_in.read_data1; a_inv := not e_in.read_data1;
end if; end if;
result_with_carry := ppc_adde(a_inv, e_in.read_data2, decode_input_carry(e_in.input_carry, ctrl.carry)); result_with_carry := ppc_adde(a_inv, e_in.read_data2,
decode_input_carry(e_in.input_carry, v.e.xerc));
result := result_with_carry(63 downto 0); result := result_with_carry(63 downto 0);
if e_in.output_carry then carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32);
ctrl_tmp.carry <= result_with_carry(64); carry_64 := result_with_carry(64);
if e_in.output_carry = '1' then
set_carry(v.e, carry_32, carry_64);
end if;
if e_in.oe = '1' then
set_ov(v.e,
calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)),
calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31)));
end if; end if;
result_en := '1'; result_en := '1';
when OP_AND | OP_OR | OP_XOR => when OP_AND | OP_OR | OP_XOR =>
@ -270,6 +341,13 @@ begin
end loop; end loop;
when OP_MFSPR => when OP_MFSPR =>
case decode_spr_num(e_in.insn) is case decode_spr_num(e_in.insn) is
when SPR_XER =>
result := ( 63-32 => v.e.xerc.so,
63-33 => v.e.xerc.ov,
63-34 => v.e.xerc.ca,
63-44 => v.e.xerc.ov32,
63-45 => v.e.xerc.ca32,
others => '0');
when SPR_CTR => when SPR_CTR =>
result := ctrl.ctr; result := ctrl.ctr;
when SPR_LR => when SPR_LR =>
@ -310,6 +388,13 @@ begin
v.e.write_cr_data := e_in.read_data3(31 downto 0); v.e.write_cr_data := e_in.read_data3(31 downto 0);
when OP_MTSPR => when OP_MTSPR =>
case decode_spr_num(e_in.insn) is case decode_spr_num(e_in.insn) is
when SPR_XER =>
v.e.xerc.so := e_in.read_data3(63-32);
v.e.xerc.ov := e_in.read_data3(63-33);
v.e.xerc.ca := e_in.read_data3(63-34);
v.e.xerc.ov32 := e_in.read_data3(63-44);
v.e.xerc.ca32 := e_in.read_data3(63-45);
v.e.write_xerc_enable := '1';
when SPR_CTR => when SPR_CTR =>
ctrl_tmp.ctr <= e_in.read_data3; ctrl_tmp.ctr <= e_in.read_data3;
when SPR_LR => when SPR_LR =>
@ -334,7 +419,7 @@ begin
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR => when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
result := rotator_result; result := rotator_result;
if e_in.output_carry = '1' then if e_in.output_carry = '1' then
ctrl_tmp.carry <= rotator_carry; set_carry(v.e, rotator_carry, rotator_carry);
end if; end if;
result_en := '1'; result_en := '1';
when OP_SIM_CONFIG => when OP_SIM_CONFIG =>

@ -16,6 +16,7 @@ package insn_helpers is
function insn_lk (insn_in : std_ulogic_vector) return std_ulogic; function insn_lk (insn_in : std_ulogic_vector) return std_ulogic;
function insn_aa (insn_in : std_ulogic_vector) return std_ulogic; function insn_aa (insn_in : std_ulogic_vector) return std_ulogic;
function insn_rc (insn_in : std_ulogic_vector) return std_ulogic; function insn_rc (insn_in : std_ulogic_vector) return std_ulogic;
function insn_oe (insn_in : std_ulogic_vector) return std_ulogic;
function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bf (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_bf (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bfa (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_bfa (insn_in : std_ulogic_vector) return std_ulogic_vector;
@ -103,6 +104,11 @@ package body insn_helpers is
return insn_in(0); return insn_in(0);
end; end;


function insn_oe (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(10);
end;

function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector is function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin begin
return insn_in(15 downto 2); return insn_in(15 downto 2);

@ -47,6 +47,7 @@ begin
v.sign_extend := l_in.sign_extend; v.sign_extend := l_in.sign_extend;
v.update := l_in.update; v.update := l_in.update;
v.update_reg := l_in.update_reg; v.update_reg := l_in.update_reg;
v.xerc := l_in.xerc;


-- XXX Temporary hack. Mark the op as non-cachable if the address -- XXX Temporary hack. Mark the op as non-cachable if the address
-- is the form 0xc------- -- is the form 0xc-------

@ -27,8 +27,17 @@ architecture behaviour of multiply is
data : signed(129 downto 0); data : signed(129 downto 0);
write_reg : std_ulogic_vector(4 downto 0); write_reg : std_ulogic_vector(4 downto 0);
rc : std_ulogic; rc : std_ulogic;
oe : std_ulogic;
is_32bit : std_ulogic;
xerc : xer_common_t;
end record; end record;
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', data => (others => '0'), others => (others => '0')); constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
insn_type => OP_ILLEGAL,
rc => '0', oe => '0',
is_32bit => '0',
xerc => xerc_init,
data => (others => '0'),
others => (others => '0'));


type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit); constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit);
@ -51,6 +60,7 @@ begin
variable v : reg_type; variable v : reg_type;
variable d : std_ulogic_vector(129 downto 0); variable d : std_ulogic_vector(129 downto 0);
variable d2 : std_ulogic_vector(63 downto 0); variable d2 : std_ulogic_vector(63 downto 0);
variable ov : std_ulogic;
begin begin
v := r; v := r;


@ -61,16 +71,26 @@ begin
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2); v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
v.multiply_pipeline(0).write_reg := m.write_reg; v.multiply_pipeline(0).write_reg := m.write_reg;
v.multiply_pipeline(0).rc := m.rc; v.multiply_pipeline(0).rc := m.rc;
v.multiply_pipeline(0).oe := m.oe;
v.multiply_pipeline(0).is_32bit := m.is_32bit;
v.multiply_pipeline(0).xerc := m.xerc;


loop_0: for i in 1 to PIPELINE_DEPTH-1 loop loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1); v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
end loop; end loop;


d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data); d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
ov := '0';


-- TODO: Handle overflows
case_0: case v.multiply_pipeline(PIPELINE_DEPTH-1).insn_type is case_0: case v.multiply_pipeline(PIPELINE_DEPTH-1).insn_type is
when OP_MUL_L64 => when OP_MUL_L64 =>
d2 := d(63 downto 0); d2 := d(63 downto 0);
if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then
ov := (or d(63 downto 31)) and not (and d(63 downto 31));
else
ov := (or d(127 downto 63)) and not (and d(127 downto 63));
end if;
when OP_MUL_H32 => when OP_MUL_H32 =>
d2 := d(63 downto 32) & d(63 downto 32); d2 := d(63 downto 32) & d(63 downto 32);
when OP_MUL_H64 => when OP_MUL_H64 =>
@ -82,11 +102,24 @@ begin


m_out.write_reg_data <= d2; m_out.write_reg_data <= d2;
m_out.write_reg_nr <= v.multiply_pipeline(PIPELINE_DEPTH-1).write_reg; m_out.write_reg_nr <= v.multiply_pipeline(PIPELINE_DEPTH-1).write_reg;
m_out.xerc <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc;


-- Generate OV/OV32/SO when OE=1
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
m_out.valid <= '1'; m_out.valid <= '1';
m_out.write_reg_enable <= '1'; m_out.write_reg_enable <= '1';
m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc; m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc;
m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe;

-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
--
if v.multiply_pipeline(PIPELINE_DEPTH-1).oe = '1' then
m_out.xerc.ov <= ov;
m_out.xerc.ov32 <= ov;
m_out.xerc.so <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc.so or ov;
end if;
end if; end if;


rin <= v; rin <= v;

@ -62,6 +62,8 @@ begin
variable w : std_ulogic_vector(0 downto 0); variable w : std_ulogic_vector(0 downto 0);
variable j : integer; variable j : integer;
variable k : unsigned(3 downto 0); variable k : unsigned(3 downto 0);
variable cf: std_ulogic_vector(3 downto 0);
variable xe: xer_common_t;
begin begin
x := "" & e_in.valid; x := "" & e_in.valid;
y := "" & l_in.valid; y := "" & l_in.valid;
@ -81,6 +83,11 @@ begin
z := "" & (d_in.valid and d_in.rc); z := "" & (d_in.valid and d_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;


x := "" & e_in.write_xerc_enable;
y := "" & m_in.write_xerc_enable;
z := "" & D_in.write_xerc_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;

w_out <= WritebackToRegisterFileInit; w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit; c_out <= WritebackToCrFileInit;


@ -96,12 +103,12 @@ begin
partial_write <= '0'; partial_write <= '0';
sign_extend <= '0'; sign_extend <= '0';
second_word <= '0'; second_word <= '0';
data_in <= e_in.write_data; xe := e_in.xerc;


if e_in.write_enable = '1' then if e_in.write_enable = '1' then
w_out.write_reg <= e_in.write_reg; w_out.write_reg <= e_in.write_reg;
data_in <= e_in.write_data;
w_out.write_enable <= '1'; w_out.write_enable <= '1';
data_in <= e_in.write_data;
data_len <= unsigned(e_in.write_len); data_len <= unsigned(e_in.write_len);
sign_extend <= e_in.sign_extend; sign_extend <= e_in.sign_extend;
rc <= e_in.rc; rc <= e_in.rc;
@ -113,6 +120,11 @@ begin
c_out.write_cr_data <= e_in.write_cr_data; c_out.write_cr_data <= e_in.write_cr_data;
end if; end if;


if e_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= e_in.xerc;
end if;

if l_in.write_enable = '1' then if l_in.write_enable = '1' then
w_out.write_reg <= l_in.write_reg; w_out.write_reg <= l_in.write_reg;
data_in <= l_in.write_data; data_in <= l_in.write_data;
@ -127,6 +139,7 @@ begin
if l_in.valid = '0' and (data_len + byte_offset > 8) then if l_in.valid = '0' and (data_len + byte_offset > 8) then
partial_write <= '1'; partial_write <= '1';
end if; end if;
xe := l_in.xerc;
end if; end if;


if m_in.write_reg_enable = '1' then if m_in.write_reg_enable = '1' then
@ -134,6 +147,12 @@ begin
w_out.write_reg <= m_in.write_reg_nr; w_out.write_reg <= m_in.write_reg_nr;
data_in <= m_in.write_reg_data; data_in <= m_in.write_reg_data;
rc <= m_in.rc; rc <= m_in.rc;
xe := m_in.xerc;
end if;

if m_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= m_in.xerc;
end if; end if;


if d_in.write_reg_enable = '1' then if d_in.write_reg_enable = '1' then
@ -141,6 +160,12 @@ begin
w_out.write_reg <= d_in.write_reg_nr; w_out.write_reg <= d_in.write_reg_nr;
data_in <= d_in.write_reg_data; data_in <= d_in.write_reg_data;
rc <= d_in.rc; rc <= d_in.rc;
xe := d_in.xerc;
end if;

if d_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= d_in.xerc;
end if; end if;


-- shift and byte-reverse data bytes -- shift and byte-reverse data bytes
@ -193,17 +218,15 @@ begin
-- deliver to regfile -- deliver to regfile
w_out.write_data <= data_trimmed; w_out.write_data <= data_trimmed;


-- test value against 0 and set CR0 if requested -- Perform CR0 update for RC forms
if rc = '1' then if rc = '1' then
c_out.write_cr_enable <= '1'; c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= num_to_fxm(0); c_out.write_cr_mask <= num_to_fxm(0);
if negative = '1' then cf(3) := negative;
c_out.write_cr_data <= x"80000000"; cf(2) := not negative and not zero;
elsif zero = '0' then cf(1) := zero;
c_out.write_cr_data <= x"40000000"; cf(0) := xe.so;
else c_out.write_cr_data(31 downto 28) <= cf;
c_out.write_cr_data <= x"20000000";
end if;
end if; end if;
end process; end process;
end; end;

Loading…
Cancel
Save