Merge pull request #122 from paulusmack/benh-sprs

Benh sprs
pull/130/head
Anton Blanchard 5 years ago committed by GitHub
commit 1a826f077b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -14,7 +14,7 @@ all: $(all)
$(GHDL) -a $(GHDLFLAGS) $<

common.o: decode_types.o
control.o: gpr_hazard.o cr_hazard.o
control.o: gpr_hazard.o cr_hazard.o common.o
sim_jtag.o: sim_jtag_socket.o
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o multiply.o writeback.o core_debug.o divider.o

@ -12,15 +12,59 @@ package common is

function decode_spr_num(insn: std_ulogic_vector(31 downto 0)) return spr_num_t;

constant SPR_XER : spr_num_t := 1;
constant SPR_LR : spr_num_t := 8;
constant SPR_CTR : spr_num_t := 9;
constant SPR_TB : spr_num_t := 268;
constant SPR_SRR0 : spr_num_t := 26;
constant SPR_SRR1 : spr_num_t := 27;
constant SPR_HSRR0 : spr_num_t := 314;
constant SPR_HSRR1 : spr_num_t := 315;
constant SPR_SPRG0 : spr_num_t := 272;
constant SPR_SPRG1 : spr_num_t := 273;
constant SPR_SPRG2 : spr_num_t := 274;
constant SPR_SPRG3 : spr_num_t := 275;
constant SPR_SPRG3U : spr_num_t := 259;
constant SPR_HSPRG0 : spr_num_t := 304;
constant SPR_HSPRG1 : spr_num_t := 305;

-- GPR indices in the register file (GPR only)
subtype gpr_index_t is std_ulogic_vector(4 downto 0);

-- Extended GPR indice (can hold an SPR)
subtype gspr_index_t is std_ulogic_vector(5 downto 0);

-- Some SPRs are stored in the register file, they use the magic
-- GPR numbers above 31.
--
-- The function fast_spr_num() returns the corresponding fast
-- pseudo-GPR number for a given SPR number. The result MSB
-- indicates if this is indeed a fast SPR. If clear, then
-- the SPR is not stored in the GPR file.
--
function fast_spr_num(spr: spr_num_t) return gspr_index_t;

-- Indices conversion functions
function gspr_to_gpr(i: gspr_index_t) return gpr_index_t;
function gpr_to_gspr(i: gpr_index_t) return gspr_index_t;
function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t;
function is_fast_spr(s: gspr_index_t) return std_ulogic;

-- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are
-- in the CR file as a kind of CR extension (with a separate write
-- control). The rest is stored as a fast SPR.
type xer_common_t is record
ca : std_ulogic;
ca32 : std_ulogic;
ov : std_ulogic;
ov32 : std_ulogic;
so : std_ulogic;
end record;
constant xerc_init : xer_common_t := (others => '0');

-- This needs to die...
type ctrl_t is record
lr: std_ulogic_vector(63 downto 0);
ctr: std_ulogic_vector(63 downto 0);
tb: std_ulogic_vector(63 downto 0);
carry: std_ulogic;
end record;

type Fetch1ToIcacheType is record
@ -49,6 +93,8 @@ package common is
stop_mark : std_ulogic;
nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0);
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
decode: decode_rom_t;
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', decode => decode_rom_init, others => (others => '0'));
@ -57,15 +103,17 @@ package common is
valid: std_ulogic;
insn_type: insn_type_t;
nia: std_ulogic_vector(63 downto 0);
write_reg: std_ulogic_vector(4 downto 0);
read_reg1: std_ulogic_vector(4 downto 0);
read_reg2: std_ulogic_vector(4 downto 0);
write_reg: gspr_index_t;
read_reg1: gspr_index_t;
read_reg2: gspr_index_t;
read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0);
cr: std_ulogic_vector(31 downto 0);
xerc: xer_common_t;
lr: std_ulogic;
rc: std_ulogic;
oe: std_ulogic;
invert_a: std_ulogic;
invert_out: std_ulogic;
input_carry: carry_in_t;
@ -78,23 +126,28 @@ package common is
data_len: std_ulogic_vector(3 downto 0);
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', invert_a => '0',
(valid => '0', insn_type => OP_ILLEGAL, lr => '0', rc => '0', oe => '0', invert_a => '0',
invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
is_32bit => '0', is_signed => '0', others => (others => '0'));
is_32bit => '0', is_signed => '0', xerc => xerc_init, others => (others => '0'));

type Decode2ToMultiplyType is record
valid: std_ulogic;
insn_type: insn_type_t;
write_reg: std_ulogic_vector(4 downto 0);
write_reg: gpr_index_t;
data1: std_ulogic_vector(64 downto 0);
data2: std_ulogic_vector(64 downto 0);
rc: std_ulogic;
oe: std_ulogic;
is_32bit: std_ulogic;
xerc: xer_common_t;
end record;
constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', others => (others => '0'));
constant Decode2ToMultiplyInit : Decode2ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, rc => '0',
oe => '0', is_32bit => '0', xerc => xerc_init,
others => (others => '0'));

type Decode2ToDividerType is record
valid: std_ulogic;
write_reg: std_ulogic_vector(4 downto 0);
write_reg: gpr_index_t;
dividend: std_ulogic_vector(63 downto 0);
divisor: std_ulogic_vector(63 downto 0);
is_signed: std_ulogic;
@ -102,16 +155,21 @@ package common is
is_extended: std_ulogic;
is_modulus: std_ulogic;
rc: std_ulogic;
oe: std_ulogic;
xerc: xer_common_t;
end record;
constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', rc => '0', others => (others => '0'));
constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0',
is_extended => '0', is_modulus => '0',
rc => '0', oe => '0', xerc => xerc_init,
others => (others => '0'));

type Decode2ToRegisterFileType is record
read1_enable : std_ulogic;
read1_reg : std_ulogic_vector(4 downto 0);
read1_reg : gspr_index_t;
read2_enable : std_ulogic;
read2_reg : std_ulogic_vector(4 downto 0);
read2_reg : gspr_index_t;
read3_enable : std_ulogic;
read3_reg : std_ulogic_vector(4 downto 0);
read3_reg : gpr_index_t;
end record;

type RegisterFileToDecode2Type is record
@ -126,6 +184,7 @@ package common is

type CrFileToDecode2Type is record
read_cr_data : std_ulogic_vector(31 downto 0);
read_xerc_data : xer_common_t;
end record;

type Execute1ToFetch1Type is record
@ -140,14 +199,17 @@ package common is
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
write_reg : std_ulogic_vector(4 downto 0); -- read data goes to this register
write_reg : gpr_index_t;
length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
update_reg : std_ulogic_vector(4 downto 0); -- if so, the register to update
update_reg : gpr_index_t; -- if so, the register to update
xerc : xer_common_t;
end record;
constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0', sign_extend => '0', update => '0', others => (others => '0'));
constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init,
others => (others => '0'));

type Loadstore1ToDcacheType is record
valid : std_ulogic;
@ -155,63 +217,82 @@ package common is
nc : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0);
write_reg : std_ulogic_vector(4 downto 0);
write_reg : gpr_index_t;
length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
update : std_ulogic;
update_reg : std_ulogic_vector(4 downto 0);
update_reg : gpr_index_t;
xerc : xer_common_t;
end record;

type DcacheToWritebackType is record
valid : std_ulogic;
write_enable: std_ulogic;
write_reg : std_ulogic_vector(4 downto 0);
write_reg : gpr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_len : std_ulogic_vector(3 downto 0);
write_shift : std_ulogic_vector(2 downto 0);
sign_extend : std_ulogic;
byte_reverse : std_ulogic;
second_word : std_ulogic;
xerc : xer_common_t;
end record;
constant DcacheToWritebackInit : DcacheToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0', byte_reverse => '0', second_word => '0', others => (others => '0'));
constant DcacheToWritebackInit : DcacheToWritebackType := (valid => '0', write_enable => '0', sign_extend => '0',
byte_reverse => '0', second_word => '0', xerc => xerc_init,
others => (others => '0'));

type Execute1ToWritebackType is record
valid: std_ulogic;
rc : std_ulogic;
write_enable : std_ulogic;
write_reg: std_ulogic_vector(4 downto 0);
write_reg: gspr_index_t;
write_data: std_ulogic_vector(63 downto 0);
write_len : std_ulogic_vector(3 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
sign_extend: std_ulogic;
end record;
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0', write_cr_enable => '0', sign_extend => '0', others => (others => '0'));
constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', write_enable => '0',
write_cr_enable => '0', sign_extend => '0',
write_xerc_enable => '0', xerc => xerc_init,
others => (others => '0'));

type MultiplyToWritebackType is record
valid: std_ulogic;

write_reg_enable : std_ulogic;
write_reg_nr: std_ulogic_vector(4 downto 0);
write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic;
end record;
constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0'));
constant MultiplyToWritebackInit : MultiplyToWritebackType := (valid => '0', write_reg_enable => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));

type DividerToWritebackType is record
valid: std_ulogic;

write_reg_enable : std_ulogic;
write_reg_nr: std_ulogic_vector(4 downto 0);
write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
rc: std_ulogic;
end record;
constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0', rc => '0', others => (others => '0'));
constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0',
rc => '0', write_xerc_enable => '0',
xerc => xerc_init,
others => (others => '0'));

type WritebackToRegisterFileType is record
write_reg : std_ulogic_vector(4 downto 0);
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_enable : std_ulogic;
end record;
@ -221,9 +302,12 @@ package common is
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
write_xerc_data : xer_common_t;
end record;
constant WritebackToCrFileInit : WritebackToCrFileType := (write_cr_enable => '0', others => (others => '0'));

constant WritebackToCrFileInit : WritebackToCrFileType := (write_cr_enable => '0', write_xerc_enable => '0',
write_xerc_data => xerc_init,
others => (others => '0'));
end common;

package body common is
@ -231,4 +315,63 @@ package body common is
begin
return to_integer(unsigned(insn(15 downto 11) & insn(20 downto 16)));
end;
function fast_spr_num(spr: spr_num_t) return gspr_index_t is
variable n : integer range 0 to 31;
begin
case spr is
when SPR_LR =>
n := 0;
when SPR_CTR =>
n:= 1;
when SPR_SRR0 =>
n := 2;
when SPR_SRR1 =>
n := 3;
when SPR_HSRR0 =>
n := 4;
when SPR_HSRR1 =>
n := 5;
when SPR_SPRG0 =>
n := 6;
when SPR_SPRG1 =>
n := 7;
when SPR_SPRG2 =>
n := 8;
when SPR_SPRG3 | SPR_SPRG3U =>
n := 9;
when SPR_HSPRG0 =>
n := 10;
when SPR_HSPRG1 =>
n := 11;
when SPR_XER =>
n := 12;
when others =>
return "000000";
end case;
return "1" & std_ulogic_vector(to_unsigned(n, 5));
end;

function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is
begin
return i(4 downto 0);
end;

function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is
begin
return "0" & i;
end;

function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is
begin
if s(5) = '1' then
return s;
else
return gpr_to_gspr(g);
end if;
end;

function is_fast_spr(s: gspr_index_t) return std_ulogic is
begin
return s(5);
end;
end common;

@ -1,6 +1,9 @@
library ieee;
use ieee.std_logic_1164.all;

library work;
use work.common.all;

entity control is
generic (
PIPELINE_DEPTH : natural := 2
@ -12,20 +15,21 @@ entity control is
complete_in : in std_ulogic;
valid_in : in std_ulogic;
flush_in : in std_ulogic;
stall_in : in std_ulogic;
sgl_pipe_in : in std_ulogic;
stop_mark_in : in std_ulogic;

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(4 downto 0);
gpr_write_in : in gspr_index_t;

gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in std_ulogic_vector(4 downto 0);
gpr_a_read_in : in gspr_index_t;

gpr_b_read_valid_in : in std_ulogic;
gpr_b_read_in : in std_ulogic_vector(4 downto 0);
gpr_b_read_in : in gspr_index_t;

gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in std_ulogic_vector(4 downto 0);
gpr_c_read_in : in gpr_index_t;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
@ -61,6 +65,7 @@ begin
)
port map (
clk => clk,
stall_in => stall_in,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
@ -76,6 +81,7 @@ begin
)
port map (
clk => clk,
stall_in => stall_in,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
@ -91,11 +97,12 @@ begin
)
port map (
clk => clk,
stall_in => stall_in,

gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in,
gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in,
gpr_read_in => "0" & gpr_c_read_in,

stall_out => stall_c_out
);
@ -106,6 +113,7 @@ begin
)
port map (
clk => clk,
stall_in => stall_in,

cr_read_in => cr_read_in,
cr_write_in => cr_write_valid,
@ -129,8 +137,8 @@ begin
v_int := r_int;

-- asynchronous
valid_tmp := valid_in and not flush_in;
stall_tmp := '0';
valid_tmp := valid_in and not flush_in and not stall_in;
stall_tmp := stall_in;

if complete_in = '1' then
v_int.outstanding := r_int.outstanding - 1;

@ -76,8 +76,10 @@ architecture behave of core is
signal icache_stall_out : std_ulogic;
signal fetch2_stall_in : std_ulogic;
signal decode1_stall_in : std_ulogic;
signal decode2_stall_in : std_ulogic;
signal decode2_stall_out : std_ulogic;
signal ex1_icache_inval: std_ulogic;
signal ex1_stall_out: std_ulogic;

signal flush: std_ulogic;

@ -184,6 +186,7 @@ begin
port map (
clk => clk,
rst => core_rst,
stall_in => decode2_stall_in,
stall_out => decode2_stall_out,
flush_in => flush,
complete_in => complete,
@ -198,6 +201,7 @@ begin
c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file
);
decode2_stall_in <= ex1_stall_out;

register_file_0: entity work.register_file
generic map (
@ -223,6 +227,7 @@ begin
port map (
clk => clk,
flush_out => flush,
stall_out => ex1_stall_out,
e_in => decode2_to_execute1,
f_out => execute1_to_fetch1,
e_out => execute1_to_writeback,

@ -18,7 +18,9 @@ end entity cr_file;

architecture behaviour of cr_file is
signal crs : std_ulogic_vector(31 downto 0) := (others => '0');
signal crs_updated : std_ulogic_vector(31 downto 0) := (others => '0');
signal crs_updated : std_ulogic_vector(31 downto 0);
signal xerc : xer_common_t := xerc_init;
signal xerc_updated : xer_common_t;
begin
cr_create_0: process(all)
variable hi, lo : integer := 0;
@ -35,6 +37,13 @@ begin
end loop;

crs_updated <= cr_tmp;

if w_in.write_xerc_enable = '1' then
xerc_updated <= w_in.write_xerc_data;
else
xerc_updated <= xerc;
end if;

end process;

-- synchronous writes
@ -43,9 +52,13 @@ begin
if rising_edge(clk) then
if w_in.write_cr_enable = '1' then
report "Writing " & to_hstring(w_in.write_cr_data) & " to CR mask " & to_hstring(w_in.write_cr_mask);
end if;
crs <= crs_updated;
end if;
if w_in.write_xerc_enable = '1' then
report "Writing XERC";
xerc <= xerc_updated;
end if;
end if;
end process;

-- asynchronous reads
@ -56,5 +69,6 @@ begin
report "Reading CR " & to_hstring(crs_updated);
end if;
d_out.read_cr_data <= crs_updated;
d_out.read_xerc_data <= xerc_updated;
end process;
end architecture behaviour;

@ -7,7 +7,8 @@ entity cr_hazard is
PIPELINE_DEPTH : natural := 2
);
port(
clk : in std_logic;
clk : in std_ulogic;
stall_in : in std_ulogic;

cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
@ -29,8 +30,10 @@ begin
cr_hazard0: process(clk)
begin
if rising_edge(clk) then
if stall_in = '0' then
r <= rin;
end if;
end if;
end process;

cr_hazard1: process(all)

@ -185,6 +185,7 @@ architecture rtl of dcache is
length : std_ulogic_vector(3 downto 0);
sign_extend : std_ulogic;
byte_reverse : std_ulogic;
xerc : xer_common_t;
end record;

signal r2 : reg_stage_2_t;
@ -469,6 +470,7 @@ begin
d_out.sign_extend <= r2.sign_extend;
d_out.byte_reverse <= r2.byte_reverse;
d_out.second_word <= '0';
d_out.xerc <= r2.xerc;

-- We have a valid load or store hit or we just completed a slow
-- op such as a load miss, a NC load or a store
@ -518,6 +520,7 @@ begin
d_out.sign_extend <= r1.req.sign_extend;
d_out.byte_reverse <= r1.req.byte_reverse;
d_out.write_len <= r1.req.length;
d_out.xerc <= r1.req.xerc;
end if;

-- If it's a store or a non-update load form, complete now
@ -539,6 +542,7 @@ begin
d_out.write_len <= "1000";
d_out.sign_extend <= '0';
d_out.byte_reverse <= '0';
d_out.xerc <= r1.req.xerc;

-- If it was a load, this completes the operation (load with
-- update case).

@ -36,14 +36,14 @@ architecture behaviour of decode1 is
constant major_decode_rom_array : major_rom_array_t := (
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
12 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- addic
13 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '1'), -- addic.
12 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- addic
13 => (ALU, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- addic.
14 => (ALU, OP_ADD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- addi
15 => (ALU, OP_ADD, RA_OR_ZERO, CONST_SI_HI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- addis
28 => (ALU, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andi.
29 => (ALU, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andis.
18 => (ALU, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- b
16 => (ALU, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc
16 => (ALU, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc
11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpi
10 => (ALU, OP_CMPL, RA, CONST_UI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lbz
@ -106,7 +106,7 @@ architecture behaviour of decode1 is
-- addpcis not implemented yet
2#001# => (ALU, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'),
-- bclr, bcctr, bctar
2#100# => (ALU, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '1'),
2#100# => (ALU, OP_BCREG, SPR, SPR, NONE, SPR, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'),
-- isync
2#111# => (ALU, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'),
others => illegal_inst
@ -133,10 +133,15 @@ architecture behaviour of decode1 is
-- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl
-- op in out A out in out len ext pipe
2#0100001010# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- add
2#0000001010# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- addc
2#0010001010# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- adde
2#0011101010# => (ALU, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- addme
2#0011001010# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- addze
2#1100001010# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- addo
2#0000001010# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- addc
2#1000001010# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- addco
2#0010001010# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- adde
2#1010001010# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- addeo
2#0011101010# => (ALU, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- addme
2#1011101010# => (ALU, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- addmeo
2#0011001010# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- addze
2#1011001010# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- addzeo
2#0000011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- and
2#0000111100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- andc
-- 2#0011111100# bperm
@ -156,13 +161,21 @@ architecture behaviour of decode1 is
2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst
-- 2#1111110110# dcbz
2#0110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeu
2#1110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeuo
2#0110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweu
2#1110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweuo
2#0110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divde
2#1110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeo
2#0110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwe
2#1110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweo
2#0111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdu
2#1111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divduo
2#0111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwu
2#1111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwuo
2#0111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divd
2#1111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdo
2#0111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divw
2#1111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwo
2#0100011100# => (ALU, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- eqv
2#1110111010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb
2#1110011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh
@ -224,13 +237,13 @@ architecture behaviour of decode1 is
-- 2#1000000000# mcrxr
-- 2#1001000000# mcrxrx
2#0000010011# => (ALU, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfcr/mfocrf
2#0101010011# => (ALU, OP_MFSPR, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- mfspr
2#0101010011# => (ALU, OP_MFSPR, SPR, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfspr
2#0100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modud
2#0100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- moduw
2#1100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsd
2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw
2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf
2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- mtspr
2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr
2#0001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd
2#0000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu
2#0001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw
@ -241,9 +254,12 @@ architecture behaviour of decode1 is
2#1001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw
2#1000001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- mulhwu
2#0011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulld
2#1011101001# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulldo
2#0011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullw
2#1011101011# => (MUL, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mullwo
2#0111011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nand
2#0001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- neg
2#1001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nego
2#0001111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nor
2#0110111100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- or
2#0110011100# => (ALU, OP_OR, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- orc
@ -278,10 +294,15 @@ architecture behaviour of decode1 is
2#0010110111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- stwux
2#0010010111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- stwx
2#0000101000# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subf
2#0000001000# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- subfc
2#0010001000# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- subfe
2#0011101000# => (ALU, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- subfme
2#0011001000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- subfze
2#1000101000# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subfo
2#0000001000# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subfc
2#1000001000# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subfco
2#0010001000# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subfe
2#1010001000# => (ALU, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subfeo
2#0011101000# => (ALU, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subfme
2#1011101000# => (ALU, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subfmeo
2#0011001000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subfze
2#1011001000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- subfzeo
2#1001010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- sync
-- 2#0001000100# td
2#0000000100# => (ALU, OP_TW, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- tw
@ -334,6 +355,8 @@ begin
v.nia := f_in.nia;
v.insn := f_in.insn;
v.stop_mark := f_in.stop_mark;
v.ispr1 := (others => '0');
v.ispr2 := (others => '0');

if f_in.valid = '1' then
report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
@ -377,6 +400,33 @@ begin
v.decode := major_decode_rom_array(to_integer(majorop));
end if;

-- Set ISPR1/ISPR2 when needed
if v.decode.insn_type = OP_BC or v.decode.insn_type = OP_BCREG then
-- Branch uses CTR as condition when BO(2) is 0. This is
-- also used to indicate that CTR is modified (they go
-- together).
--
if f_in.insn(23) = '0' then
v.ispr1 := fast_spr_num(SPR_CTR);
end if;

-- Branch source register is an SPR
if v.decode.insn_type = OP_BCREG then
-- TODO: Add TAR
if f_in.insn(10) = '0' then
v.ispr2 := fast_spr_num(SPR_LR);
else
v.ispr2 := fast_spr_num(SPR_CTR);
end if;
end if;
elsif v.decode.insn_type = OP_MFSPR or v.decode.insn_type = OP_MTSPR then
v.ispr1 := fast_spr_num(decode_spr_num(f_in.insn));
-- Make slow SPRs single issue
if is_fast_spr(v.ispr1) = '0' then
v.decode.sgl_pipe := '1';
end if;
end if;

if flush_in = '1' then
v.valid := '0';
end if;

@ -14,6 +14,7 @@ entity decode2 is
rst : in std_ulogic;

complete_in : in std_ulogic;
stall_in : in std_ulogic;
stall_out : out std_ulogic;

stopped_out : out std_ulogic;
@ -47,30 +48,46 @@ architecture behaviour of decode2 is

type decode_input_reg_t is record
reg_valid : std_ulogic;
reg : std_ulogic_vector(4 downto 0);
reg : gspr_index_t;
data : std_ulogic_vector(63 downto 0);
end record;

type decode_output_reg_t is record
reg_valid : std_ulogic;
reg : gspr_index_t;
end record;

function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
variable is_reg : std_ulogic;
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t) return decode_input_reg_t is
begin
is_reg := '0' when insn_ra(insn_in) = "00000" else '1';

if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
--return (is_reg, insn_ra(insn_in), reg_data);
return ('1', insn_ra(insn_in), reg_data);
assert is_fast_spr(ispr) = '0' report "Decode A says GPR but ISPR says SPR:" &
to_hstring(ispr) severity failure;
return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data);
elsif t = SPR then
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
--
assert is_fast_spr(ispr) = '1' or ispr = "000000"
report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
else
return ('0', (others => '0'), (others => '0'));
end if;
end;

function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t) return decode_input_reg_t is
begin
case t is
when RB =>
return ('1', insn_rb(insn_in), reg_data);
assert is_fast_spr(ispr) = '0' report "Decode B says GPR but ISPR says SPR:" &
to_hstring(ispr) severity failure;
return ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
when CONST_UI =>
return ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
when CONST_SI =>
@ -91,6 +108,14 @@ architecture behaviour of decode2 is
return ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11));
when CONST_SH32 =>
return ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11));
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;
@ -101,21 +126,30 @@ architecture behaviour of decode2 is
begin
case t is
when RS =>
return ('1', insn_rs(insn_in), reg_data);
return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
when NONE =>
return ('0', (others => '0'), (others => '0'));
end case;
end;

function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
ispr : gspr_index_t) return decode_output_reg_t is
begin
case t is
when RT =>
return insn_rt(insn_in);
return ('1', gpr_to_gspr(insn_rt(insn_in)));
when RA =>
return insn_ra(insn_in);
return ('1', gpr_to_gspr(insn_ra(insn_in)));
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr);
when NONE =>
return "00000";
return ('0', "000000");
end case;
end;

@ -131,22 +165,38 @@ architecture behaviour of decode2 is
end case;
end;

-- For now, use "rc" in the decode table to decide whether oe exists.
-- This is not entirely correct architecturally: For mulhd and
-- mulhdu, the OE field is reserved. It remains to be seen what an
-- actual POWER9 does if we set it on those instructions, for now we
-- test that further down when assigning to the multiplier oe input.
--
function decode_oe (t : rc_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic is
begin
case t is
when RC =>
return insn_oe(insn_in);
when OTHERS =>
return '0';
end case;
end;

-- issue control signals
signal control_valid_in : std_ulogic;
signal control_valid_out : std_ulogic;
signal control_sgl_pipe : std_logic;

signal gpr_write_valid : std_ulogic;
signal gpr_write : std_ulogic_vector(4 downto 0);
signal gpr_write : gspr_index_t;

signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read : std_ulogic_vector(4 downto 0);
signal gpr_a_read :gspr_index_t;

signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : std_ulogic_vector(4 downto 0);
signal gpr_b_read : gspr_index_t;

signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : std_ulogic_vector(4 downto 0);
signal gpr_c_read : gpr_index_t;

signal cr_write_valid : std_ulogic;
begin
@ -160,6 +210,7 @@ begin

complete_in => complete_in,
valid_in => control_valid_in,
stall_in => stall_in,
flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe,
stop_mark_in => d_in.stop_mark,
@ -194,8 +245,8 @@ begin
end if;
end process;

r_out.read1_reg <= insn_ra(d_in.insn);
r_out.read2_reg <= insn_rb(d_in.insn);
r_out.read1_reg <= gpr_or_spr_to_gspr(insn_ra(d_in.insn), d_in.ispr1);
r_out.read2_reg <= gpr_or_spr_to_gspr(insn_rb(d_in.insn), d_in.ispr2);
r_out.read3_reg <= insn_rs(d_in.insn);

c_out.read <= d_in.decode.input_cr;
@ -207,6 +258,7 @@ begin
variable decoded_reg_a : decode_input_reg_t;
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable signed_division: std_ulogic;
variable length : std_ulogic_vector(3 downto 0);
begin
@ -224,9 +276,10 @@ begin
--v.m.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr;
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data);
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2);
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispr1);

r_out.read1_enable <= decoded_reg_a.reg_valid;
r_out.read2_enable <= decoded_reg_b.reg_valid;
@ -253,9 +306,11 @@ begin
v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_data2 := decoded_reg_b.data;
v.e.read_data3 := decoded_reg_c.data;
v.e.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.e.write_reg := decoded_reg_o.reg;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
v.e.cr := c_in.read_cr_data;
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
v.e.invert_out := d_in.decode.invert_out;
v.e.input_carry := d_in.decode.input_carry;
@ -272,8 +327,13 @@ begin
v.m.insn_type := d_in.decode.insn_type;
mul_a := decoded_reg_a.data;
mul_b := decoded_reg_b.data;
v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.m.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.m.xerc := c_in.read_xerc_data;
if v.m.insn_type = OP_MUL_L64 then
v.m.oe := decode_oe(d_in.decode.rc, d_in.insn);
end if;
v.m.is_32bit := d_in.decode.is_32bit;

if d_in.decode.is_32bit = '1' then
if d_in.decode.is_signed = '1' then
@ -304,7 +364,7 @@ begin
-- s = 1 for signed, 0 for unsigned (for div*)
-- t = 1 for 32-bit, 0 for 64-bit
-- r = RC bit (record condition code)
v.d.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.d.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.d.is_modulus := not d_in.insn(8);
v.d.is_32bit := d_in.insn(2);
if d_in.insn(8) = '1' then
@ -337,13 +397,15 @@ begin
end if;
end if;
v.d.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.d.xerc := c_in.read_xerc_data;
v.d.oe := decode_oe(d_in.decode.rc, d_in.insn);

-- load/store unit
v.l.update_reg := decoded_reg_a.reg;
v.l.update_reg := gspr_to_gpr(decoded_reg_a.reg);
v.l.addr1 := decoded_reg_a.data;
v.l.addr2 := decoded_reg_b.data;
v.l.data := decoded_reg_c.data;
v.l.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.l.write_reg := gspr_to_gpr(decoded_reg_o.reg);

if d_in.decode.insn_type = OP_LOAD then
v.l.load := '1';
@ -355,13 +417,14 @@ begin
v.l.byte_reverse := d_in.decode.byte_reverse;
v.l.sign_extend := d_in.decode.sign_extend;
v.l.update := d_in.decode.update;
v.l.xerc := c_in.read_xerc_data;

-- issue control
control_valid_in <= d_in.valid;
control_sgl_pipe <= d_in.decode.sgl_pipe;

gpr_write_valid <= '1' when d_in.decode.output_reg_a /= NONE else '0';
gpr_write <= decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
gpr_write_valid <= decoded_reg_o.reg_valid;
gpr_write <= decoded_reg_o.reg;

gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;
@ -370,7 +433,7 @@ begin
gpr_b_read <= decoded_reg_b.reg;

gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= decoded_reg_c.reg;
gpr_c_read <= gspr_to_gpr(decoded_reg_c.reg);

cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);


@ -21,10 +21,10 @@ package decode_types is
OP_TWI, OP_XOR, OP_SIM_CONFIG
);

type input_reg_a_t is (NONE, RA, RA_OR_ZERO);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR);
type input_reg_c_t is (NONE, RS);
type output_reg_a_t is (NONE, RT, RA);
type output_reg_a_t is (NONE, RT, RA, SPR);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, ONE);


@ -20,7 +20,7 @@ architecture behaviour of divider is
signal div : unsigned(63 downto 0);
signal quot : std_ulogic_vector(63 downto 0);
signal result : std_ulogic_vector(63 downto 0);
signal sresult : std_ulogic_vector(63 downto 0);
signal sresult : std_ulogic_vector(64 downto 0);
signal oresult : std_ulogic_vector(63 downto 0);
signal qbit : std_ulogic;
signal running : std_ulogic;
@ -36,7 +36,8 @@ architecture behaviour of divider is
signal overflow : std_ulogic;
signal ovf32 : std_ulogic;
signal did_ovf : std_ulogic;

signal oe : std_ulogic;
signal xerc : xer_common_t;
begin
divider_0: process(clk)
begin
@ -62,6 +63,8 @@ begin
is_32bit <= d_in.is_32bit;
is_signed <= d_in.is_signed;
rc <= d_in.rc;
oe <= d_in.oe;
xerc <= d_in.xerc;
count <= "1111111";
running <= '1';
overflow <= '0';
@ -120,13 +123,13 @@ begin
result <= quot;
end if;
if neg_result = '1' then
sresult <= std_ulogic_vector(- signed(result));
sresult <= std_ulogic_vector(- signed('0' & result));
else
sresult <= result;
sresult <= '0' & result;
end if;
did_ovf <= '0';
if is_32bit = '0' then
did_ovf <= overflow or (is_signed and (sresult(63) xor neg_result));
did_ovf <= overflow or (is_signed and (sresult(64) xor sresult(63)));
elsif is_signed = '1' then
if ovf32 = '1' or sresult(32) /= sresult(31) then
did_ovf <= '1';
@ -140,20 +143,32 @@ begin
-- 32-bit divisions set the top 32 bits of the result to 0
oresult <= x"00000000" & sresult(31 downto 0);
else
oresult <= sresult;
oresult <= sresult(63 downto 0);
end if;
end process;

divider_out: process(clk)
begin
if rising_edge(clk) then
d_out.valid <= '0';
d_out.write_reg_data <= oresult;
d_out.write_reg_enable <= '0';
d_out.write_xerc_enable <= '0';
d_out.xerc <= xerc;
if count = "1000000" then
d_out.valid <= '1';
d_out.write_reg_enable <= '1';
else
d_out.valid <= '0';
d_out.write_reg_enable <= '0';
d_out.write_xerc_enable <= oe;

-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
--
if oe = '1' then
d_out.xerc.ov <= did_ovf;
d_out.xerc.ov32 <= did_ovf;
d_out.xerc.so <= xerc.so or did_ovf;
end if;
end if;
end if;
end process;

@ -12,10 +12,11 @@ use work.ppc_fx_insns.all;

entity execute1 is
port (
clk : in std_logic;
clk : in std_ulogic;

-- asynchronous
flush_out : out std_ulogic;
stall_out : out std_ulogic;

e_in : in Decode2ToExecute1Type;

@ -31,14 +32,15 @@ end entity execute1;

architecture behaviour of execute1 is
type reg_type is record
--f : Execute1ToFetch1Type;
e : Execute1ToWritebackType;
lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0);
end record;

signal r, rin : reg_type;

signal ctrl: ctrl_t := (carry => '0', others => (others => '0'));
signal ctrl_tmp: ctrl_t := (carry => '0', others => (others => '0'));
signal ctrl: ctrl_t := (others => (others => '0'));
signal ctrl_tmp: ctrl_t := (others => (others => '0'));

signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
signal rotator_result: std_ulogic_vector(63 downto 0);
@ -46,17 +48,46 @@ architecture behaviour of execute1 is
signal logical_result: std_ulogic_vector(63 downto 0);
signal countzero_result: std_ulogic_vector(63 downto 0);

function decode_input_carry (carry_sel : carry_in_t; ca_in : std_ulogic) return std_ulogic is
procedure set_carry(e: inout Execute1ToWritebackType;
carry32 : in std_ulogic;
carry : in std_ulogic) is
begin
case carry_sel is
e.xerc.ca32 := carry32;
e.xerc.ca := carry;
e.write_xerc_enable := '1';
end;

procedure set_ov(e: inout Execute1ToWritebackType;
ov : in std_ulogic;
ov32 : in std_ulogic) is
begin
e.xerc.ov32 := ov32;
e.xerc.ov := ov;
if ov = '1' then
e.xerc.so := '1';
end if;
e.write_xerc_enable := '1';
end;

function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
ca: std_ulogic; msb_r: std_ulogic) return std_ulogic is
begin
return (ca xor msb_r) and not (msb_a xor msb_b);
end;

function decode_input_carry(ic : carry_in_t;
xerc : xer_common_t) return std_ulogic is
begin
case ic is
when ZERO =>
return '0';
when CA =>
return ca_in;
return xerc.ca;
when ONE =>
return '1';
end case;
end;

begin

rotator_0: entity work.rotator
@ -97,6 +128,12 @@ begin
if rising_edge(clk) then
r <= rin;
ctrl <= ctrl_tmp;
assert not (r.lr_update = '1' and e_in.valid = '1')
report "LR update collision with valid in EX1"
severity failure;
if r.lr_update = '1' then
report "LR update to " & to_hstring(r.next_lr);
end if;
end if;
end process;

@ -117,6 +154,7 @@ begin
variable bf, bfa : std_ulogic_vector(2 downto 0);
variable l : std_ulogic;
variable next_nia : std_ulogic_vector(63 downto 0);
variable carry_32, carry_64 : std_ulogic;
begin
result := (others => '0');
result_with_carry := (others => '0');
@ -125,7 +163,43 @@ begin

v := r;
v.e := Execute1ToWritebackInit;
--v.f := Execute1ToFetch1TypeInit;

-- XER forwarding. To avoid having to track XER hazards, we
-- use the previously latched value.
--
-- If the XER was modified by a multiply or a divide, those are
-- single issue, we'll get the up to date value from decode2 from
-- the register file.
--
-- If it was modified by an instruction older than the previous
-- one in EX1, it will have also hit writeback and will be up
-- to date in decode2.
--
-- That leaves us with the case where it was updated by the previous
-- instruction in EX1. In that case, we can forward it back here.
--
-- This will break if we allow pipelining of multiply and divide,
-- but ideally, those should go via EX1 anyway and run as a state
-- machine from here.
--
-- One additional hazard to beware of is an XER:SO modifying instruction
-- in EX1 followed immediately by a store conditional. Due to our
-- writeback latency, the store will go down the LSU with the previous
-- XER value, thus the stcx. will set CR0:SO using an obsolete SO value.
--
-- We will need to handle that if we ever make stcx. not single issue
--
-- We always pass a valid XER value downto writeback even when
-- we aren't updating it, in order for XER:SO -> CR0:SO transfer
-- to work for RC instructions.
--
if r.e.write_xerc_enable = '1' then
v.e.xerc := r.e.xerc;
else
v.e.xerc := e_in.xerc;
end if;

v.lr_update := '0';

ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq
@ -133,6 +207,7 @@ begin

terminate_out <= '0';
icache_inval <= '0';
stall_out <= '0';
f_out <= Execute1ToFetch1TypeInit;

-- Next insn adder used in a couple of places
@ -163,10 +238,18 @@ begin
else
a_inv := not e_in.read_data1;
end if;
result_with_carry := ppc_adde(a_inv, e_in.read_data2, decode_input_carry(e_in.input_carry, ctrl.carry));
result_with_carry := ppc_adde(a_inv, e_in.read_data2,
decode_input_carry(e_in.input_carry, v.e.xerc));
result := result_with_carry(63 downto 0);
if e_in.output_carry then
ctrl_tmp.carry <= result_with_carry(64);
carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32);
carry_64 := result_with_carry(64);
if e_in.output_carry = '1' then
set_carry(v.e, carry_32, carry_64);
end if;
if e_in.oe = '1' then
set_ov(v.e,
calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)),
calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31)));
end if;
result_en := '1';
when OP_AND | OP_OR | OP_XOR =>
@ -180,12 +263,15 @@ begin
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2));
end if;
when OP_BC =>
-- read_data1 is CTR
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
if bo(4-2) = '0' then
ctrl_tmp.ctr <= std_ulogic_vector(unsigned(ctrl.ctr) - 1);
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if;
if ppc_bc_taken(bo, bi, e_in.cr, ctrl.ctr) = 1 then
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
f_out.redirect <= '1';
if (insn_aa(e_in.insn)) then
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2));
@ -194,19 +280,18 @@ begin
end if;
end if;
when OP_BCREG =>
-- bits 10 and 6 distinguish between bclr, bcctr and bctar
-- read_data1 is CTR
-- read_data2 is target register (CTR, LR or TAR)
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
if bo(4-2) = '0' and e_in.insn(10) = '0' then
ctrl_tmp.ctr <= std_ulogic_vector(unsigned(ctrl.ctr) - 1);
result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if;
if ppc_bc_taken(bo, bi, e_in.cr, ctrl.ctr) = 1 then
if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
f_out.redirect <= '1';
if e_in.insn(10) = '0' then
f_out.redirect_nia <= ctrl.lr(63 downto 2) & "00";
else
f_out.redirect_nia <= ctrl.ctr(63 downto 2) & "00";
end if;
f_out.redirect_nia <= e_in.read_data2(63 downto 2) & "00";
end if;
when OP_CMPB =>
result := ppc_cmpb(e_in.read_data3, e_in.read_data2);
@ -220,7 +305,7 @@ begin
for i in 0 to 7 loop
lo := i*4;
hi := lo + 3;
v.e.write_cr_data(hi downto lo) := ppc_cmp(l, e_in.read_data1, e_in.read_data2);
v.e.write_cr_data(hi downto lo) := ppc_cmp(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so);
end loop;
when OP_CMPL =>
bf := insn_bf(e_in.insn);
@ -231,7 +316,7 @@ begin
for i in 0 to 7 loop
lo := i*4;
hi := lo + 3;
v.e.write_cr_data(hi downto lo) := ppc_cmpl(l, e_in.read_data1, e_in.read_data2);
v.e.write_cr_data(hi downto lo) := ppc_cmpl(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so);
end loop;
when OP_CNTZ =>
result := countzero_result;
@ -269,16 +354,24 @@ begin
v.e.write_cr_data(hi downto lo) := newcrf;
end loop;
when OP_MFSPR =>
if is_fast_spr(e_in.read_reg1) then
result := e_in.read_data1;
if decode_spr_num(e_in.insn) = SPR_XER then
result(63-32) := v.e.xerc.so;
result(63-33) := v.e.xerc.ov;
result(63-34) := v.e.xerc.ca;
result(63-35 downto 63-43) := "000000000";
result(63-44) := v.e.xerc.ov32;
result(63-45) := v.e.xerc.ca32;
end if;
else
case decode_spr_num(e_in.insn) is
when SPR_CTR =>
result := ctrl.ctr;
when SPR_LR =>
result := ctrl.lr;
when SPR_TB =>
result := ctrl.tb;
when others =>
result := (others => '0');
end case;
end if;
result_en := '1';
when OP_MFCR =>
if e_in.insn(20) = '0' then
@ -309,13 +402,25 @@ begin
end if;
v.e.write_cr_data := e_in.read_data3(31 downto 0);
when OP_MTSPR =>
case decode_spr_num(e_in.insn) is
when SPR_CTR =>
ctrl_tmp.ctr <= e_in.read_data3;
when SPR_LR =>
ctrl_tmp.lr <= e_in.read_data3;
when others =>
end case;
report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
"=" & to_hstring(e_in.read_data3);
if is_fast_spr(e_in.write_reg) then
result := e_in.read_data3;
result_en := '1';
if decode_spr_num(e_in.insn) = SPR_XER then
v.e.xerc.so := e_in.read_data3(63-32);
v.e.xerc.ov := e_in.read_data3(63-33);
v.e.xerc.ca := e_in.read_data3(63-34);
v.e.xerc.ov32 := e_in.read_data3(63-44);
v.e.xerc.ca32 := e_in.read_data3(63-45);
v.e.write_xerc_enable := '1';
end if;
else
-- TODO: Implement slow SPRs
-- case decode_spr_num(e_in.insn) is
-- when others =>
-- end case;
end if;
when OP_POPCNTB =>
result := ppc_popcntb(e_in.read_data3);
result_en := '1';
@ -334,7 +439,7 @@ begin
when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR =>
result := rotator_result;
if e_in.output_carry = '1' then
ctrl_tmp.carry <= rotator_carry;
set_carry(v.e, rotator_carry, rotator_carry);
end if;
result_en := '1';
when OP_SIM_CONFIG =>
@ -359,15 +464,36 @@ begin
report "illegal";
end case;

-- Update LR on the next cycle after a branch link
--
-- WARNING: The LR update isn't tracked by our hazard tracker. This
-- will work (well I hope) because it only happens on branches
-- which will flush all decoded instructions. By the time
-- fetch catches up, we'll have the new LR. This will
-- *not* work properly however if we have a branch predictor,
-- in which case the solution would probably be to keep a
-- local cache of the updated LR in execute1 (flushed on
-- exceptions) that is used instead of the value from
-- decode when its content is valid.
if e_in.lr = '1' then
ctrl_tmp.lr <= next_nia;
v.lr_update := '1';
v.next_lr := next_nia;
v.e.valid := '0';
report "Delayed LR update to " & to_hstring(next_nia);
stall_out <= '1';
end if;

elsif r.lr_update = '1' then
result_en := '1';
result := r.next_lr;
v.e.write_reg := fast_spr_num(SPR_LR);
v.e.write_len := x"8";
v.e.sign_extend := '0';
v.e.valid := '1';
end if;

v.e.write_data := result;
v.e.write_enable := result_en;
v.e.rc := e_in.rc;
v.e.rc := e_in.rc and e_in.valid;

-- Update registers
rin <= v;

@ -7,12 +7,13 @@ entity gpr_hazard is
PIPELINE_DEPTH : natural := 2
);
port(
clk : in std_logic;
clk : in std_ulogic;
stall_in : in std_ulogic;

gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(4 downto 0);
gpr_write_in : in std_ulogic_vector(5 downto 0);
gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(4 downto 0);
gpr_read_in : in std_ulogic_vector(5 downto 0);

stall_out : out std_ulogic
);
@ -20,7 +21,7 @@ end entity gpr_hazard;
architecture behaviour of gpr_hazard is
type pipeline_entry_type is record
valid : std_ulogic;
gpr : std_ulogic_vector(4 downto 0);
gpr : std_ulogic_vector(5 downto 0);
end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', gpr => (others => '0'));

@ -32,8 +33,10 @@ begin
gpr_hazard0: process(clk)
begin
if rising_edge(clk) then
if stall_in = '0' then
r <= rin;
end if;
end if;
end process;

gpr_hazard1: process(all)

@ -17,8 +17,8 @@ package helpers is

function cmp_one_byte(a, b: std_ulogic_vector(7 downto 0)) return std_ulogic_vector;

function ppc_signed_compare(a, b: signed(63 downto 0)) return std_ulogic_vector;
function ppc_unsigned_compare(a, b: unsigned(63 downto 0)) return std_ulogic_vector;
function ppc_signed_compare(a, b: signed(63 downto 0); so: std_ulogic) return std_ulogic_vector;
function ppc_unsigned_compare(a, b: unsigned(63 downto 0); so: std_ulogic) return std_ulogic_vector;

function ra_or_zero(ra: std_ulogic_vector(63 downto 0); reg: std_ulogic_vector(4 downto 0)) return std_ulogic_vector;

@ -126,32 +126,32 @@ package body helpers is
return ret;
end;

function ppc_signed_compare(a, b: signed(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(3 downto 0);
function ppc_signed_compare(a, b: signed(63 downto 0); so: std_ulogic) return std_ulogic_vector is
variable ret: std_ulogic_vector(2 downto 0);
begin
if a < b then
ret := "1000";
ret := "100";
elsif a > b then
ret := "0100";
ret := "010";
else
ret := "0010";
ret := "001";
end if;

return ret;
return ret & so;
end;

function ppc_unsigned_compare(a, b: unsigned(63 downto 0)) return std_ulogic_vector is
variable ret: std_ulogic_vector(3 downto 0);
function ppc_unsigned_compare(a, b: unsigned(63 downto 0); so: std_ulogic) return std_ulogic_vector is
variable ret: std_ulogic_vector(2 downto 0);
begin
if a < b then
ret := "1000";
ret := "100";
elsif a > b then
ret := "0100";
ret := "010";
else
ret := "0010";
ret := "001";
end if;

return ret;
return ret & so;
end;

function ra_or_zero(ra: std_ulogic_vector(63 downto 0); reg: std_ulogic_vector(4 downto 0)) return std_ulogic_vector is

@ -16,6 +16,7 @@ package insn_helpers is
function insn_lk (insn_in : std_ulogic_vector) return std_ulogic;
function insn_aa (insn_in : std_ulogic_vector) return std_ulogic;
function insn_rc (insn_in : std_ulogic_vector) return std_ulogic;
function insn_oe (insn_in : std_ulogic_vector) return std_ulogic;
function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bf (insn_in : std_ulogic_vector) return std_ulogic_vector;
function insn_bfa (insn_in : std_ulogic_vector) return std_ulogic_vector;
@ -103,6 +104,11 @@ package body insn_helpers is
return insn_in(0);
end;

function insn_oe (insn_in : std_ulogic_vector) return std_ulogic is
begin
return insn_in(10);
end;

function insn_bd (insn_in : std_ulogic_vector) return std_ulogic_vector is
begin
return insn_in(15 downto 2);

@ -47,6 +47,7 @@ begin
v.sign_extend := l_in.sign_extend;
v.update := l_in.update;
v.update_reg := l_in.update_reg;
v.xerc := l_in.xerc;

-- XXX Temporary hack. Mark the op as non-cachable if the address
-- is the form 0xc-------

@ -27,8 +27,17 @@ architecture behaviour of multiply is
data : signed(129 downto 0);
write_reg : std_ulogic_vector(4 downto 0);
rc : std_ulogic;
oe : std_ulogic;
is_32bit : std_ulogic;
xerc : xer_common_t;
end record;
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', insn_type => OP_ILLEGAL, rc => '0', data => (others => '0'), others => (others => '0'));
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
insn_type => OP_ILLEGAL,
rc => '0', oe => '0',
is_32bit => '0',
xerc => xerc_init,
data => (others => '0'),
others => (others => '0'));

type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit);
@ -51,6 +60,7 @@ begin
variable v : reg_type;
variable d : std_ulogic_vector(129 downto 0);
variable d2 : std_ulogic_vector(63 downto 0);
variable ov : std_ulogic;
begin
v := r;

@ -61,16 +71,26 @@ begin
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
v.multiply_pipeline(0).write_reg := m.write_reg;
v.multiply_pipeline(0).rc := m.rc;
v.multiply_pipeline(0).oe := m.oe;
v.multiply_pipeline(0).is_32bit := m.is_32bit;
v.multiply_pipeline(0).xerc := m.xerc;

loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
end loop;

d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
ov := '0';

-- TODO: Handle overflows
case_0: case v.multiply_pipeline(PIPELINE_DEPTH-1).insn_type is
when OP_MUL_L64 =>
d2 := d(63 downto 0);
if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then
ov := (or d(63 downto 31)) and not (and d(63 downto 31));
else
ov := (or d(127 downto 63)) and not (and d(127 downto 63));
end if;
when OP_MUL_H32 =>
d2 := d(63 downto 32) & d(63 downto 32);
when OP_MUL_H64 =>
@ -82,11 +102,24 @@ begin

m_out.write_reg_data <= d2;
m_out.write_reg_nr <= v.multiply_pipeline(PIPELINE_DEPTH-1).write_reg;
m_out.xerc <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc;

-- Generate OV/OV32/SO when OE=1
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
m_out.valid <= '1';
m_out.write_reg_enable <= '1';
m_out.rc <= v.multiply_pipeline(PIPELINE_DEPTH-1).rc;
m_out.write_xerc_enable <= v.multiply_pipeline(PIPELINE_DEPTH-1).oe;

-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
--
if v.multiply_pipeline(PIPELINE_DEPTH-1).oe = '1' then
m_out.xerc.ov <= ov;
m_out.xerc.ov32 <= ov;
m_out.xerc.so <= v.multiply_pipeline(PIPELINE_DEPTH-1).xerc.so or ov;
end if;
end if;

rin <= v;

@ -77,10 +77,14 @@ package ppc_fx_insns is
function ppc_mulhw (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function ppc_mulhwu (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;

function ppc_cmpi (l: std_ulogic; ra: std_ulogic_vector(63 downto 0); si: std_ulogic_vector(15 downto 0)) return std_ulogic_vector;
function ppc_cmp (l: std_ulogic; ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function ppc_cmpli (l: std_ulogic; ra: std_ulogic_vector(63 downto 0); si: std_ulogic_vector(15 downto 0)) return std_ulogic_vector;
function ppc_cmpl (l: std_ulogic; ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;
function ppc_cmpi (l: std_ulogic; ra: std_ulogic_vector(63 downto 0); si: std_ulogic_vector(15 downto 0);
so: std_ulogic) return std_ulogic_vector;
function ppc_cmp (l: std_ulogic; ra, rb: std_ulogic_vector(63 downto 0);
so: std_ulogic) return std_ulogic_vector;
function ppc_cmpli (l: std_ulogic; ra: std_ulogic_vector(63 downto 0); si: std_ulogic_vector(15 downto 0);
so: std_ulogic) return std_ulogic_vector;
function ppc_cmpl (l: std_ulogic; ra, rb: std_ulogic_vector(63 downto 0);
so: std_ulogic) return std_ulogic_vector;

function ppc_cmpb (rs, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;

@ -90,7 +94,6 @@ package ppc_fx_insns is
function ppc_divwu (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;

function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return integer;
function ppc_bcctr_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0)) return integer;
end package ppc_fx_insns;

package body ppc_fx_insns is
@ -677,7 +680,8 @@ package body ppc_fx_insns is
return std_ulogic_vector(tmp(63 downto 32)) & std_ulogic_vector(tmp(63 downto 32));
end;

function ppc_cmpi (l: std_ulogic; ra: std_ulogic_vector(63 downto 0); si: std_ulogic_vector(15 downto 0)) return std_ulogic_vector is
function ppc_cmpi (l: std_ulogic; ra: std_ulogic_vector(63 downto 0); si: std_ulogic_vector(15 downto 0);
so: std_ulogic) return std_ulogic_vector is
variable tmp: signed(ra'range);
begin
tmp := signed(ra);
@ -685,10 +689,11 @@ package body ppc_fx_insns is
tmp := resize(signed(ra(31 downto 0)), tmp'length);
end if;

return ppc_signed_compare(tmp, resize(signed(si), tmp'length));
return ppc_signed_compare(tmp, resize(signed(si), tmp'length), so);
end;

function ppc_cmp (l: std_ulogic; ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
function ppc_cmp (l: std_ulogic; ra, rb: std_ulogic_vector(63 downto 0);
so: std_ulogic) return std_ulogic_vector is
variable tmpa, tmpb: signed(ra'range);
begin
tmpa := signed(ra);
@ -698,10 +703,11 @@ package body ppc_fx_insns is
tmpb := resize(signed(rb(31 downto 0)), ra'length);
end if;

return ppc_signed_compare(tmpa, tmpb);
return ppc_signed_compare(tmpa, tmpb, so);
end;

function ppc_cmpli (l: std_ulogic; ra: std_ulogic_vector(63 downto 0); si: std_ulogic_vector(15 downto 0)) return std_ulogic_vector is
function ppc_cmpli (l: std_ulogic; ra: std_ulogic_vector(63 downto 0); si: std_ulogic_vector(15 downto 0);
so: std_ulogic) return std_ulogic_vector is
variable tmp: unsigned(ra'range);
begin
tmp := unsigned(ra);
@ -709,10 +715,11 @@ package body ppc_fx_insns is
tmp := resize(unsigned(ra(31 downto 0)), tmp'length);
end if;

return ppc_unsigned_compare(tmp, resize(unsigned(si), tmp'length));
return ppc_unsigned_compare(tmp, resize(unsigned(si), tmp'length), so);
end;

function ppc_cmpl (l: std_ulogic; ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
function ppc_cmpl (l: std_ulogic; ra, rb: std_ulogic_vector(63 downto 0);
so: std_ulogic) return std_ulogic_vector is
variable tmpa, tmpb: unsigned(ra'range);
begin
tmpa := unsigned(ra);
@ -722,7 +729,7 @@ package body ppc_fx_insns is
tmpb := resize(unsigned(rb(31 downto 0)), ra'length);
end if;

return ppc_unsigned_compare(tmpa, tmpb);
return ppc_unsigned_compare(tmpa, tmpb, so);
end;

function ppc_cmpb (rs, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
@ -801,21 +808,4 @@ package body ppc_fx_insns is
return ret;
end;

function ppc_bcctr_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0)) return integer is
variable crfield: integer;
variable crbit_match: std_ulogic;
variable cond_ok: std_ulogic;
variable ret: integer;
begin
crfield := to_integer(unsigned(bi));
-- BE bit numbering
crbit_match := '1' when cr(31-crfield) = bo(4-1) else '0';
cond_ok := bo(4-0) or crbit_match;
if cond_ok = '1' then
ret := 1;
else
ret := 0;
end if;
return ret;
end;
end package body ppc_fx_insns;

@ -23,7 +23,7 @@ entity register_file is
end entity register_file;

architecture behaviour of register_file is
type regfile is array(0 to 31) of std_ulogic_vector(63 downto 0);
type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0'));
begin
-- synchronous writes
@ -32,7 +32,11 @@ begin
if rising_edge(clk) then
if w_in.write_enable = '1' then
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
if w_in.write_reg(5) = '0' then
report "Writing GPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
else
report "Writing GSPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
end if;
registers(to_integer(unsigned(w_in.write_reg))) <= w_in.write_data;
end if;
end if;
@ -52,7 +56,7 @@ begin
end if;
d_out.read1_data <= registers(to_integer(unsigned(d_in.read1_reg)));
d_out.read2_data <= registers(to_integer(unsigned(d_in.read2_reg)));
d_out.read3_data <= registers(to_integer(unsigned(d_in.read3_reg)));
d_out.read3_data <= registers(to_integer(unsigned(gpr_to_gspr(d_in.read3_reg))));

-- Forward any written data
if w_in.write_enable = '1' then
@ -62,7 +66,7 @@ begin
if d_in.read2_reg = w_in.write_reg then
d_out.read2_data <= w_in.write_data;
end if;
if d_in.read3_reg = w_in.write_reg then
if gpr_to_gspr(d_in.read3_reg) = w_in.write_reg then
d_out.read3_data <= w_in.write_data;
end if;
end if;

@ -62,6 +62,8 @@ begin
variable w : std_ulogic_vector(0 downto 0);
variable j : integer;
variable k : unsigned(3 downto 0);
variable cf: std_ulogic_vector(3 downto 0);
variable xe: xer_common_t;
begin
x := "" & e_in.valid;
y := "" & l_in.valid;
@ -81,6 +83,11 @@ begin
z := "" & (d_in.valid and d_in.rc);
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;

x := "" & e_in.write_xerc_enable;
y := "" & m_in.write_xerc_enable;
z := "" & D_in.write_xerc_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure;

w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit;

@ -96,12 +103,12 @@ begin
partial_write <= '0';
sign_extend <= '0';
second_word <= '0';
data_in <= e_in.write_data;
xe := e_in.xerc;

if e_in.write_enable = '1' then
w_out.write_reg <= e_in.write_reg;
data_in <= e_in.write_data;
w_out.write_enable <= '1';
data_in <= e_in.write_data;
data_len <= unsigned(e_in.write_len);
sign_extend <= e_in.sign_extend;
rc <= e_in.rc;
@ -113,8 +120,13 @@ begin
c_out.write_cr_data <= e_in.write_cr_data;
end if;

if e_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= e_in.xerc;
end if;

if l_in.write_enable = '1' then
w_out.write_reg <= l_in.write_reg;
w_out.write_reg <= gpr_to_gspr(l_in.write_reg);
data_in <= l_in.write_data;
data_len <= unsigned(l_in.write_len);
byte_offset <= unsigned(l_in.write_shift);
@ -127,20 +139,33 @@ begin
if l_in.valid = '0' and (data_len + byte_offset > 8) then
partial_write <= '1';
end if;
xe := l_in.xerc;
end if;

if m_in.write_reg_enable = '1' then
w_out.write_enable <= '1';
w_out.write_reg <= m_in.write_reg_nr;
w_out.write_reg <= gpr_to_gspr(m_in.write_reg_nr);
data_in <= m_in.write_reg_data;
rc <= m_in.rc;
xe := m_in.xerc;
end if;

if m_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= m_in.xerc;
end if;

if d_in.write_reg_enable = '1' then
w_out.write_enable <= '1';
w_out.write_reg <= d_in.write_reg_nr;
w_out.write_reg <= gpr_to_gspr(d_in.write_reg_nr);
data_in <= d_in.write_reg_data;
rc <= d_in.rc;
xe := d_in.xerc;
end if;

if d_in.write_xerc_enable = '1' then
c_out.write_xerc_enable <= '1';
c_out.write_xerc_data <= d_in.xerc;
end if;

-- shift and byte-reverse data bytes
@ -193,17 +218,15 @@ begin
-- deliver to regfile
w_out.write_data <= data_trimmed;

-- test value against 0 and set CR0 if requested
-- Perform CR0 update for RC forms
if rc = '1' then
c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= num_to_fxm(0);
if negative = '1' then
c_out.write_cr_data <= x"80000000";
elsif zero = '0' then
c_out.write_cr_data <= x"40000000";
else
c_out.write_cr_data <= x"20000000";
end if;
cf(3) := negative;
cf(2) := not negative and not zero;
cf(1) := zero;
cf(0) := xe.so;
c_out.write_cr_data(31 downto 28) <= cf;
end if;
end process;
end;

Loading…
Cancel
Save