sprs: Store common SPRs in register file

This stores the most common SPRs in the register file.

This includes CTR and LR and a not yet final list of others.

The register file is set to 64 entries for now. Specific types
are defined that can represent a GPR index (gpr_index_t) or
a GPR/SPR index (gspr_index_t) along with conversion functions
between the two.

On order to deal with some forms of branch updating both LR and
CTR, we introduced a delayed update of LR after a branch link.

Note: We currently stall the pipeline on such a delayed branch,
but we could avoid stalling fetch in that specific case as we
know we have a branch delay. We could also limit that to the
specific case where we need to update both CTR and LR.

This allows us to make bcreg, mtspr and mfspr pipelined. decode1
will automatically force the single issue flag on mfspr/mtspr to
a "slow" SPR.

[paulus@ozlabs.org - fix direction of decode2.stall_in]

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
jtag-port
Benjamin Herrenschmidt 5 years ago committed by Paul Mackerras
parent afdd593502
commit e4f475e17f

@ -14,7 +14,7 @@ all: $(all)
$(GHDL) -a $(GHDLFLAGS) $< $(GHDL) -a $(GHDLFLAGS) $<


common.o: decode_types.o common.o: decode_types.o
control.o: gpr_hazard.o cr_hazard.o control.o: gpr_hazard.o cr_hazard.o common.o
sim_jtag.o: sim_jtag_socket.o sim_jtag.o: sim_jtag_socket.o
core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o
core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o multiply.o writeback.o core_debug.o divider.o core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o multiply.o writeback.o core_debug.o divider.o

@ -28,6 +28,12 @@ package common is
constant SPR_HSPRG0 : spr_num_t := 304; constant SPR_HSPRG0 : spr_num_t := 304;
constant SPR_HSPRG1 : spr_num_t := 305; constant SPR_HSPRG1 : spr_num_t := 305;


-- GPR indices in the register file (GPR only)
subtype gpr_index_t is std_ulogic_vector(4 downto 0);

-- Extended GPR indice (can hold an SPR)
subtype gspr_index_t is std_ulogic_vector(5 downto 0);

-- Some SPRs are stored in the register file, they use the magic -- Some SPRs are stored in the register file, they use the magic
-- GPR numbers above 31. -- GPR numbers above 31.
-- --
@ -36,7 +42,13 @@ package common is
-- indicates if this is indeed a fast SPR. If clear, then -- indicates if this is indeed a fast SPR. If clear, then
-- the SPR is not stored in the GPR file. -- the SPR is not stored in the GPR file.
-- --
function fast_spr_num(spr: spr_num_t) return std_ulogic_vector; function fast_spr_num(spr: spr_num_t) return gspr_index_t;

-- Indices conversion functions
function gspr_to_gpr(i: gspr_index_t) return gpr_index_t;
function gpr_to_gspr(i: gpr_index_t) return gspr_index_t;
function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t;
function is_fast_spr(s: gspr_index_t) return std_ulogic;


-- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are -- The XER is split: the common bits (CA, OV, SO, OV32 and CA32) are
-- in the CR file as a kind of CR extension (with a separate write -- in the CR file as a kind of CR extension (with a separate write
@ -52,8 +64,6 @@ package common is


-- This needs to die... -- This needs to die...
type ctrl_t is record type ctrl_t is record
lr: std_ulogic_vector(63 downto 0);
ctr: std_ulogic_vector(63 downto 0);
tb: std_ulogic_vector(63 downto 0); tb: std_ulogic_vector(63 downto 0);
end record; end record;


@ -83,6 +93,8 @@ package common is
stop_mark : std_ulogic; stop_mark : std_ulogic;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
insn: std_ulogic_vector(31 downto 0); insn: std_ulogic_vector(31 downto 0);
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
decode: decode_rom_t; decode: decode_rom_t;
end record; end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', decode => decode_rom_init, others => (others => '0')); constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', decode => decode_rom_init, others => (others => '0'));
@ -91,9 +103,9 @@ package common is
valid: std_ulogic; valid: std_ulogic;
insn_type: insn_type_t; insn_type: insn_type_t;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
write_reg: std_ulogic_vector(4 downto 0); write_reg: gspr_index_t;
read_reg1: std_ulogic_vector(4 downto 0); read_reg1: gspr_index_t;
read_reg2: std_ulogic_vector(4 downto 0); read_reg2: gspr_index_t;
read_data1: std_ulogic_vector(63 downto 0); read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0); read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0); read_data3: std_ulogic_vector(63 downto 0);
@ -121,7 +133,7 @@ package common is
type Decode2ToMultiplyType is record type Decode2ToMultiplyType is record
valid: std_ulogic; valid: std_ulogic;
insn_type: insn_type_t; insn_type: insn_type_t;
write_reg: std_ulogic_vector(4 downto 0); write_reg: gpr_index_t;
data1: std_ulogic_vector(64 downto 0); data1: std_ulogic_vector(64 downto 0);
data2: std_ulogic_vector(64 downto 0); data2: std_ulogic_vector(64 downto 0);
rc: std_ulogic; rc: std_ulogic;
@ -135,7 +147,7 @@ package common is


type Decode2ToDividerType is record type Decode2ToDividerType is record
valid: std_ulogic; valid: std_ulogic;
write_reg: std_ulogic_vector(4 downto 0); write_reg: gpr_index_t;
dividend: std_ulogic_vector(63 downto 0); dividend: std_ulogic_vector(63 downto 0);
divisor: std_ulogic_vector(63 downto 0); divisor: std_ulogic_vector(63 downto 0);
is_signed: std_ulogic; is_signed: std_ulogic;
@ -153,11 +165,11 @@ package common is


type Decode2ToRegisterFileType is record type Decode2ToRegisterFileType is record
read1_enable : std_ulogic; read1_enable : std_ulogic;
read1_reg : std_ulogic_vector(4 downto 0); read1_reg : gspr_index_t;
read2_enable : std_ulogic; read2_enable : std_ulogic;
read2_reg : std_ulogic_vector(4 downto 0); read2_reg : gspr_index_t;
read3_enable : std_ulogic; read3_enable : std_ulogic;
read3_reg : std_ulogic_vector(4 downto 0); read3_reg : gpr_index_t;
end record; end record;


type RegisterFileToDecode2Type is record type RegisterFileToDecode2Type is record
@ -187,12 +199,12 @@ package common is
addr1 : std_ulogic_vector(63 downto 0); addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
write_reg : std_ulogic_vector(4 downto 0); -- read data goes to this register write_reg : gpr_index_t;
length : std_ulogic_vector(3 downto 0); length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic; byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend? sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction? update : std_ulogic; -- is this an update instruction?
update_reg : std_ulogic_vector(4 downto 0); -- if so, the register to update update_reg : gpr_index_t; -- if so, the register to update
xerc : xer_common_t; xerc : xer_common_t;
end record; end record;
constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0', constant Decode2ToLoadstore1Init : Decode2ToLoadstore1Type := (valid => '0', load => '0', byte_reverse => '0',
@ -205,19 +217,19 @@ package common is
nc : std_ulogic; nc : std_ulogic;
addr : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0);
write_reg : std_ulogic_vector(4 downto 0); write_reg : gpr_index_t;
length : std_ulogic_vector(3 downto 0); length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic; byte_reverse : std_ulogic;
sign_extend : std_ulogic; sign_extend : std_ulogic;
update : std_ulogic; update : std_ulogic;
update_reg : std_ulogic_vector(4 downto 0); update_reg : gpr_index_t;
xerc : xer_common_t; xerc : xer_common_t;
end record; end record;


type DcacheToWritebackType is record type DcacheToWritebackType is record
valid : std_ulogic; valid : std_ulogic;
write_enable: std_ulogic; write_enable: std_ulogic;
write_reg : std_ulogic_vector(4 downto 0); write_reg : gpr_index_t;
write_data : std_ulogic_vector(63 downto 0); write_data : std_ulogic_vector(63 downto 0);
write_len : std_ulogic_vector(3 downto 0); write_len : std_ulogic_vector(3 downto 0);
write_shift : std_ulogic_vector(2 downto 0); write_shift : std_ulogic_vector(2 downto 0);
@ -234,7 +246,7 @@ package common is
valid: std_ulogic; valid: std_ulogic;
rc : std_ulogic; rc : std_ulogic;
write_enable : std_ulogic; write_enable : std_ulogic;
write_reg: std_ulogic_vector(4 downto 0); write_reg: gspr_index_t;
write_data: std_ulogic_vector(63 downto 0); write_data: std_ulogic_vector(63 downto 0);
write_len : std_ulogic_vector(3 downto 0); write_len : std_ulogic_vector(3 downto 0);
write_cr_enable : std_ulogic; write_cr_enable : std_ulogic;
@ -253,7 +265,7 @@ package common is
valid: std_ulogic; valid: std_ulogic;


write_reg_enable : std_ulogic; write_reg_enable : std_ulogic;
write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic; write_xerc_enable : std_ulogic;
xerc : xer_common_t; xerc : xer_common_t;
@ -268,7 +280,7 @@ package common is
valid: std_ulogic; valid: std_ulogic;


write_reg_enable : std_ulogic; write_reg_enable : std_ulogic;
write_reg_nr: std_ulogic_vector(4 downto 0); write_reg_nr: gpr_index_t;
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);
write_xerc_enable : std_ulogic; write_xerc_enable : std_ulogic;
xerc : xer_common_t; xerc : xer_common_t;
@ -280,7 +292,7 @@ package common is
others => (others => '0')); others => (others => '0'));


type WritebackToRegisterFileType is record type WritebackToRegisterFileType is record
write_reg : std_ulogic_vector(4 downto 0); write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0); write_data : std_ulogic_vector(63 downto 0);
write_enable : std_ulogic; write_enable : std_ulogic;
end record; end record;
@ -303,7 +315,7 @@ package body common is
begin begin
return to_integer(unsigned(insn(15 downto 11) & insn(20 downto 16))); return to_integer(unsigned(insn(15 downto 11) & insn(20 downto 16)));
end; end;
function fast_spr_num(spr: spr_num_t) return std_ulogic_vector is function fast_spr_num(spr: spr_num_t) return gspr_index_t is
variable n : integer range 0 to 31; variable n : integer range 0 to 31;
begin begin
case spr is case spr is
@ -338,4 +350,28 @@ package body common is
end case; end case;
return "1" & std_ulogic_vector(to_unsigned(n, 5)); return "1" & std_ulogic_vector(to_unsigned(n, 5));
end; end;

function gspr_to_gpr(i: gspr_index_t) return gpr_index_t is
begin
return i(4 downto 0);
end;

function gpr_to_gspr(i: gpr_index_t) return gspr_index_t is
begin
return "0" & i;
end;

function gpr_or_spr_to_gspr(g: gpr_index_t; s: gspr_index_t) return gspr_index_t is
begin
if s(5) = '1' then
return s;
else
return gpr_to_gspr(g);
end if;
end;

function is_fast_spr(s: gspr_index_t) return std_ulogic is
begin
return s(5);
end;
end common; end common;

@ -1,6 +1,9 @@
library ieee; library ieee;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;


library work;
use work.common.all;

entity control is entity control is
generic ( generic (
PIPELINE_DEPTH : natural := 2 PIPELINE_DEPTH : natural := 2
@ -12,20 +15,21 @@ entity control is
complete_in : in std_ulogic; complete_in : in std_ulogic;
valid_in : in std_ulogic; valid_in : in std_ulogic;
flush_in : in std_ulogic; flush_in : in std_ulogic;
stall_in : in std_ulogic;
sgl_pipe_in : in std_ulogic; sgl_pipe_in : in std_ulogic;
stop_mark_in : in std_ulogic; stop_mark_in : in std_ulogic;


gpr_write_valid_in : in std_ulogic; gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(4 downto 0); gpr_write_in : in gspr_index_t;


gpr_a_read_valid_in : in std_ulogic; gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in std_ulogic_vector(4 downto 0); gpr_a_read_in : in gspr_index_t;


gpr_b_read_valid_in : in std_ulogic; gpr_b_read_valid_in : in std_ulogic;
gpr_b_read_in : in std_ulogic_vector(4 downto 0); gpr_b_read_in : in gspr_index_t;


gpr_c_read_valid_in : in std_ulogic; gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in std_ulogic_vector(4 downto 0); gpr_c_read_in : in gpr_index_t;


cr_read_in : in std_ulogic; cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic; cr_write_in : in std_ulogic;
@ -61,6 +65,7 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
@ -76,6 +81,7 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
@ -91,11 +97,12 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in,


gpr_write_valid_in => gpr_write_valid, gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write_in, gpr_write_in => gpr_write_in,
gpr_read_valid_in => gpr_c_read_valid_in, gpr_read_valid_in => gpr_c_read_valid_in,
gpr_read_in => gpr_c_read_in, gpr_read_in => "0" & gpr_c_read_in,


stall_out => stall_c_out stall_out => stall_c_out
); );
@ -106,6 +113,7 @@ begin
) )
port map ( port map (
clk => clk, clk => clk,
stall_in => stall_in,


cr_read_in => cr_read_in, cr_read_in => cr_read_in,
cr_write_in => cr_write_valid, cr_write_in => cr_write_valid,
@ -129,8 +137,8 @@ begin
v_int := r_int; v_int := r_int;


-- asynchronous -- asynchronous
valid_tmp := valid_in and not flush_in; valid_tmp := valid_in and not flush_in and not stall_in;
stall_tmp := '0'; stall_tmp := stall_in;


if complete_in = '1' then if complete_in = '1' then
v_int.outstanding := r_int.outstanding - 1; v_int.outstanding := r_int.outstanding - 1;

@ -76,8 +76,10 @@ architecture behave of core is
signal icache_stall_out : std_ulogic; signal icache_stall_out : std_ulogic;
signal fetch2_stall_in : std_ulogic; signal fetch2_stall_in : std_ulogic;
signal decode1_stall_in : std_ulogic; signal decode1_stall_in : std_ulogic;
signal decode2_stall_in : std_ulogic;
signal decode2_stall_out : std_ulogic; signal decode2_stall_out : std_ulogic;
signal ex1_icache_inval: std_ulogic; signal ex1_icache_inval: std_ulogic;
signal ex1_stall_out: std_ulogic;


signal flush: std_ulogic; signal flush: std_ulogic;


@ -184,6 +186,7 @@ begin
port map ( port map (
clk => clk, clk => clk,
rst => core_rst, rst => core_rst,
stall_in => decode2_stall_in,
stall_out => decode2_stall_out, stall_out => decode2_stall_out,
flush_in => flush, flush_in => flush,
complete_in => complete, complete_in => complete,
@ -198,6 +201,7 @@ begin
c_in => cr_file_to_decode2, c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file c_out => decode2_to_cr_file
); );
decode2_stall_in <= ex1_stall_out;


register_file_0: entity work.register_file register_file_0: entity work.register_file
generic map ( generic map (
@ -223,6 +227,7 @@ begin
port map ( port map (
clk => clk, clk => clk,
flush_out => flush, flush_out => flush,
stall_out => ex1_stall_out,
e_in => decode2_to_execute1, e_in => decode2_to_execute1,
f_out => execute1_to_fetch1, f_out => execute1_to_fetch1,
e_out => execute1_to_writeback, e_out => execute1_to_writeback,

@ -7,7 +7,8 @@ entity cr_hazard is
PIPELINE_DEPTH : natural := 2 PIPELINE_DEPTH : natural := 2
); );
port( port(
clk : in std_logic; clk : in std_ulogic;
stall_in : in std_ulogic;


cr_read_in : in std_ulogic; cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic; cr_write_in : in std_ulogic;
@ -29,7 +30,9 @@ begin
cr_hazard0: process(clk) cr_hazard0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
r <= rin; if stall_in = '0' then
r <= rin;
end if;
end if; end if;
end process; end process;



@ -43,7 +43,7 @@ architecture behaviour of decode1 is
28 => (ALU, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andi. 28 => (ALU, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andi.
29 => (ALU, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andis. 29 => (ALU, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andis.
18 => (ALU, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- b 18 => (ALU, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- b
16 => (ALU, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc 16 => (ALU, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc
11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpi 11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpi
10 => (ALU, OP_CMPL, RA, CONST_UI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli 10 => (ALU, OP_CMPL, RA, CONST_UI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lbz 34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lbz
@ -106,7 +106,7 @@ architecture behaviour of decode1 is
-- addpcis not implemented yet -- addpcis not implemented yet
2#001# => (ALU, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), 2#001# => (ALU, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'),
-- bclr, bcctr, bctar -- bclr, bcctr, bctar
2#100# => (ALU, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '1'), 2#100# => (ALU, OP_BCREG, SPR, SPR, NONE, SPR, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'),
-- isync -- isync
2#111# => (ALU, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), 2#111# => (ALU, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'),
others => illegal_inst others => illegal_inst
@ -237,13 +237,13 @@ architecture behaviour of decode1 is
-- 2#1000000000# mcrxr -- 2#1000000000# mcrxr
-- 2#1001000000# mcrxrx -- 2#1001000000# mcrxrx
2#0000010011# => (ALU, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfcr/mfocrf 2#0000010011# => (ALU, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfcr/mfocrf
2#0101010011# => (ALU, OP_MFSPR, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- mfspr 2#0101010011# => (ALU, OP_MFSPR, SPR, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfspr
2#0100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modud 2#0100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modud
2#0100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- moduw 2#0100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- moduw
2#1100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsd 2#1100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsd
2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw 2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw
2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf 2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf
2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- mtspr 2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr
2#0001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd 2#0001001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- mulhd
2#0000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu 2#0000001001# => (MUL, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- mulhdu
2#0001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw 2#0001001011# => (MUL, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- mulhw
@ -355,6 +355,8 @@ begin
v.nia := f_in.nia; v.nia := f_in.nia;
v.insn := f_in.insn; v.insn := f_in.insn;
v.stop_mark := f_in.stop_mark; v.stop_mark := f_in.stop_mark;
v.ispr1 := (others => '0');
v.ispr2 := (others => '0');


if f_in.valid = '1' then if f_in.valid = '1' then
report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia); report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia);
@ -398,6 +400,33 @@ begin
v.decode := major_decode_rom_array(to_integer(majorop)); v.decode := major_decode_rom_array(to_integer(majorop));
end if; end if;


-- Set ISPR1/ISPR2 when needed
if v.decode.insn_type = OP_BC or v.decode.insn_type = OP_BCREG then
-- Branch uses CTR as condition when BO(2) is 0. This is
-- also used to indicate that CTR is modified (they go
-- together).
--
if f_in.insn(23) = '0' then
v.ispr1 := fast_spr_num(SPR_CTR);
end if;

-- Branch source register is an SPR
if v.decode.insn_type = OP_BCREG then
-- TODO: Add TAR
if f_in.insn(10) = '0' then
v.ispr2 := fast_spr_num(SPR_LR);
else
v.ispr2 := fast_spr_num(SPR_CTR);
end if;
end if;
elsif v.decode.insn_type = OP_MFSPR or v.decode.insn_type = OP_MTSPR then
v.ispr1 := fast_spr_num(decode_spr_num(f_in.insn));
-- Make slow SPRs single issue
if is_fast_spr(v.ispr1) = '0' then
v.decode.sgl_pipe := '1';
end if;
end if;

if flush_in = '1' then if flush_in = '1' then
v.valid := '0'; v.valid := '0';
end if; end if;

@ -14,6 +14,7 @@ entity decode2 is
rst : in std_ulogic; rst : in std_ulogic;


complete_in : in std_ulogic; complete_in : in std_ulogic;
stall_in : in std_ulogic;
stall_out : out std_ulogic; stall_out : out std_ulogic;


stopped_out : out std_ulogic; stopped_out : out std_ulogic;
@ -47,30 +48,49 @@ architecture behaviour of decode2 is


type decode_input_reg_t is record type decode_input_reg_t is record
reg_valid : std_ulogic; reg_valid : std_ulogic;
reg : std_ulogic_vector(4 downto 0); reg : gspr_index_t;
data : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0);
end record; end record;


type decode_output_reg_t is record
reg_valid : std_ulogic;
reg : gspr_index_t;
end record;

function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0); function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t) return decode_input_reg_t is
variable is_reg : std_ulogic; variable is_reg : std_ulogic;
begin begin
is_reg := '0' when insn_ra(insn_in) = "00000" else '1'; is_reg := '0' when insn_ra(insn_in) = "00000" else '1';


if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then if t = RA or (t = RA_OR_ZERO and insn_ra(insn_in) /= "00000") then
--return (is_reg, insn_ra(insn_in), reg_data); assert is_fast_spr(ispr) = '0' report "Decode A says GPR but ISPR says SPR:" &
return ('1', insn_ra(insn_in), reg_data); to_hstring(ispr) severity failure;
return ('1', gpr_to_gspr(insn_ra(insn_in)), reg_data);
elsif t = SPR then
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
--
assert is_fast_spr(ispr) = '1' or ispr = "000000"
report "Decode A says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
else else
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end if; end if;
end; end;


function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0); function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is reg_data : std_ulogic_vector(63 downto 0);
ispr : gspr_index_t) return decode_input_reg_t is
begin begin
case t is case t is
when RB => when RB =>
return ('1', insn_rb(insn_in), reg_data); assert is_fast_spr(ispr) = '0' report "Decode B says GPR but ISPR says SPR:" &
to_hstring(ispr) severity failure;
return ('1', gpr_to_gspr(insn_rb(insn_in)), reg_data);
when CONST_UI => when CONST_UI =>
return ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64))); return ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
when CONST_SI => when CONST_SI =>
@ -91,6 +111,14 @@ architecture behaviour of decode2 is
return ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11)); return ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11));
when CONST_SH32 => when CONST_SH32 =>
return ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11)); return ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11));
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr, reg_data);
when NONE => when NONE =>
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end case; end case;
@ -101,21 +129,30 @@ architecture behaviour of decode2 is
begin begin
case t is case t is
when RS => when RS =>
return ('1', insn_rs(insn_in), reg_data); return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data);
when NONE => when NONE =>
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end case; end case;
end; end;


function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
ispr : gspr_index_t) return decode_output_reg_t is
begin begin
case t is case t is
when RT => when RT =>
return insn_rt(insn_in); return ('1', gpr_to_gspr(insn_rt(insn_in)));
when RA => when RA =>
return insn_ra(insn_in); return ('1', gpr_to_gspr(insn_ra(insn_in)));
when SPR =>
-- ISPR must be either a valid fast SPR number or all 0 for a slow SPR.
-- If it's all 0, we don't treat it as a dependency as slow SPRs
-- operations are single issue.
assert is_fast_spr(ispr) = '1' or ispr = "000000"
report "Decode B says SPR but ISPR is invalid:" &
to_hstring(ispr) severity failure;
return (is_fast_spr(ispr), ispr);
when NONE => when NONE =>
return "00000"; return ('0', "000000");
end case; end case;
end; end;


@ -153,16 +190,16 @@ architecture behaviour of decode2 is
signal control_sgl_pipe : std_logic; signal control_sgl_pipe : std_logic;


signal gpr_write_valid : std_ulogic; signal gpr_write_valid : std_ulogic;
signal gpr_write : std_ulogic_vector(4 downto 0); signal gpr_write : gspr_index_t;


signal gpr_a_read_valid : std_ulogic; signal gpr_a_read_valid : std_ulogic;
signal gpr_a_read : std_ulogic_vector(4 downto 0); signal gpr_a_read :gspr_index_t;


signal gpr_b_read_valid : std_ulogic; signal gpr_b_read_valid : std_ulogic;
signal gpr_b_read : std_ulogic_vector(4 downto 0); signal gpr_b_read : gspr_index_t;


signal gpr_c_read_valid : std_ulogic; signal gpr_c_read_valid : std_ulogic;
signal gpr_c_read : std_ulogic_vector(4 downto 0); signal gpr_c_read : gpr_index_t;


signal cr_write_valid : std_ulogic; signal cr_write_valid : std_ulogic;
begin begin
@ -176,6 +213,7 @@ begin


complete_in => complete_in, complete_in => complete_in,
valid_in => control_valid_in, valid_in => control_valid_in,
stall_in => stall_in,
flush_in => flush_in, flush_in => flush_in,
sgl_pipe_in => control_sgl_pipe, sgl_pipe_in => control_sgl_pipe,
stop_mark_in => d_in.stop_mark, stop_mark_in => d_in.stop_mark,
@ -210,8 +248,8 @@ begin
end if; end if;
end process; end process;


r_out.read1_reg <= insn_ra(d_in.insn); r_out.read1_reg <= gpr_or_spr_to_gspr(insn_ra(d_in.insn), d_in.ispr1);
r_out.read2_reg <= insn_rb(d_in.insn); r_out.read2_reg <= gpr_or_spr_to_gspr(insn_rb(d_in.insn), d_in.ispr2);
r_out.read3_reg <= insn_rs(d_in.insn); r_out.read3_reg <= insn_rs(d_in.insn);


c_out.read <= d_in.decode.input_cr; c_out.read <= d_in.decode.input_cr;
@ -223,6 +261,7 @@ begin
variable decoded_reg_a : decode_input_reg_t; variable decoded_reg_a : decode_input_reg_t;
variable decoded_reg_b : decode_input_reg_t; variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t; variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable signed_division: std_ulogic; variable signed_division: std_ulogic;
variable length : std_ulogic_vector(3 downto 0); variable length : std_ulogic_vector(3 downto 0);
begin begin
@ -239,10 +278,11 @@ begin
--v.e.input_cr := d_in.decode.input_cr; --v.e.input_cr := d_in.decode.input_cr;
--v.m.input_cr := d_in.decode.input_cr; --v.m.input_cr := d_in.decode.input_cr;
--v.e.output_cr := d_in.decode.output_cr; --v.e.output_cr := d_in.decode.output_cr;

decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data); decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data); decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2);
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data); decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);
decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispr1);


r_out.read1_enable <= decoded_reg_a.reg_valid; r_out.read1_enable <= decoded_reg_a.reg_valid;
r_out.read2_enable <= decoded_reg_b.reg_valid; r_out.read2_enable <= decoded_reg_b.reg_valid;
@ -269,7 +309,7 @@ begin
v.e.read_reg2 := decoded_reg_b.reg; v.e.read_reg2 := decoded_reg_b.reg;
v.e.read_data2 := decoded_reg_b.data; v.e.read_data2 := decoded_reg_b.data;
v.e.read_data3 := decoded_reg_c.data; v.e.read_data3 := decoded_reg_c.data;
v.e.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.e.write_reg := decoded_reg_o.reg;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.e.oe := decode_oe(d_in.decode.rc, d_in.insn); v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
v.e.cr := c_in.read_cr_data; v.e.cr := c_in.read_cr_data;
@ -290,7 +330,7 @@ begin
v.m.insn_type := d_in.decode.insn_type; v.m.insn_type := d_in.decode.insn_type;
mul_a := decoded_reg_a.data; mul_a := decoded_reg_a.data;
mul_b := decoded_reg_b.data; mul_b := decoded_reg_b.data;
v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.m.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.m.rc := decode_rc(d_in.decode.rc, d_in.insn); v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);
v.m.xerc := c_in.read_xerc_data; v.m.xerc := c_in.read_xerc_data;
if v.m.insn_type = OP_MUL_L64 then if v.m.insn_type = OP_MUL_L64 then
@ -327,7 +367,7 @@ begin
-- s = 1 for signed, 0 for unsigned (for div*) -- s = 1 for signed, 0 for unsigned (for div*)
-- t = 1 for 32-bit, 0 for 64-bit -- t = 1 for 32-bit, 0 for 64-bit
-- r = RC bit (record condition code) -- r = RC bit (record condition code)
v.d.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.d.write_reg := gspr_to_gpr(decoded_reg_o.reg);
v.d.is_modulus := not d_in.insn(8); v.d.is_modulus := not d_in.insn(8);
v.d.is_32bit := d_in.insn(2); v.d.is_32bit := d_in.insn(2);
if d_in.insn(8) = '1' then if d_in.insn(8) = '1' then
@ -364,11 +404,11 @@ begin
v.d.oe := decode_oe(d_in.decode.rc, d_in.insn); v.d.oe := decode_oe(d_in.decode.rc, d_in.insn);


-- load/store unit -- load/store unit
v.l.update_reg := decoded_reg_a.reg; v.l.update_reg := gspr_to_gpr(decoded_reg_a.reg);
v.l.addr1 := decoded_reg_a.data; v.l.addr1 := decoded_reg_a.data;
v.l.addr2 := decoded_reg_b.data; v.l.addr2 := decoded_reg_b.data;
v.l.data := decoded_reg_c.data; v.l.data := decoded_reg_c.data;
v.l.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.l.write_reg := gspr_to_gpr(decoded_reg_o.reg);


if d_in.decode.insn_type = OP_LOAD then if d_in.decode.insn_type = OP_LOAD then
v.l.load := '1'; v.l.load := '1';
@ -386,8 +426,8 @@ begin
control_valid_in <= d_in.valid; control_valid_in <= d_in.valid;
control_sgl_pipe <= d_in.decode.sgl_pipe; control_sgl_pipe <= d_in.decode.sgl_pipe;


gpr_write_valid <= '1' when d_in.decode.output_reg_a /= NONE else '0'; gpr_write_valid <= decoded_reg_o.reg_valid;
gpr_write <= decode_output_reg(d_in.decode.output_reg_a, d_in.insn); gpr_write <= decoded_reg_o.reg;


gpr_a_read_valid <= decoded_reg_a.reg_valid; gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg; gpr_a_read <= decoded_reg_a.reg;
@ -396,7 +436,7 @@ begin
gpr_b_read <= decoded_reg_b.reg; gpr_b_read <= decoded_reg_b.reg;


gpr_c_read_valid <= decoded_reg_c.reg_valid; gpr_c_read_valid <= decoded_reg_c.reg_valid;
gpr_c_read <= decoded_reg_c.reg; gpr_c_read <= gspr_to_gpr(decoded_reg_c.reg);


cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn); cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);



@ -21,10 +21,10 @@ package decode_types is
OP_TWI, OP_XOR, OP_SIM_CONFIG OP_TWI, OP_XOR, OP_SIM_CONFIG
); );


type input_reg_a_t is (NONE, RA, RA_OR_ZERO); type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32); type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR);
type input_reg_c_t is (NONE, RS); type input_reg_c_t is (NONE, RS);
type output_reg_a_t is (NONE, RT, RA); type output_reg_a_t is (NONE, RT, RA, SPR);
type rc_t is (NONE, ONE, RC); type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, ONE); type carry_in_t is (ZERO, CA, ONE);



@ -12,10 +12,11 @@ use work.ppc_fx_insns.all;


entity execute1 is entity execute1 is
port ( port (
clk : in std_logic; clk : in std_ulogic;


-- asynchronous -- asynchronous
flush_out : out std_ulogic; flush_out : out std_ulogic;
stall_out : out std_ulogic;


e_in : in Decode2ToExecute1Type; e_in : in Decode2ToExecute1Type;


@ -32,6 +33,8 @@ end entity execute1;
architecture behaviour of execute1 is architecture behaviour of execute1 is
type reg_type is record type reg_type is record
e : Execute1ToWritebackType; e : Execute1ToWritebackType;
lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0);
end record; end record;


signal r, rin : reg_type; signal r, rin : reg_type;
@ -125,6 +128,12 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
r <= rin; r <= rin;
ctrl <= ctrl_tmp; ctrl <= ctrl_tmp;
assert not (r.lr_update = '1' and e_in.valid = '1')
report "LR update collision with valid in EX1"
severity failure;
if r.lr_update = '1' then
report "LR update to " & to_hstring(r.next_lr);
end if;
end if; end if;
end process; end process;


@ -190,12 +199,15 @@ begin
v.e.xerc := e_in.xerc; v.e.xerc := e_in.xerc;
end if; end if;


v.lr_update := '0';

ctrl_tmp <= ctrl; ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq -- FIXME: run at 512MHz not core freq
ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1); ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);


terminate_out <= '0'; terminate_out <= '0';
icache_inval <= '0'; icache_inval <= '0';
stall_out <= '0';
f_out <= Execute1ToFetch1TypeInit; f_out <= Execute1ToFetch1TypeInit;


-- Next insn adder used in a couple of places -- Next insn adder used in a couple of places
@ -251,12 +263,15 @@ begin
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2)); f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(e_in.read_data2));
end if; end if;
when OP_BC => when OP_BC =>
-- read_data1 is CTR
bo := insn_bo(e_in.insn); bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn); bi := insn_bi(e_in.insn);
if bo(4-2) = '0' then if bo(4-2) = '0' then
ctrl_tmp.ctr <= std_ulogic_vector(unsigned(ctrl.ctr) - 1); result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if; end if;
if ppc_bc_taken(bo, bi, e_in.cr, ctrl.ctr) = 1 then if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
f_out.redirect <= '1'; f_out.redirect <= '1';
if (insn_aa(e_in.insn)) then if (insn_aa(e_in.insn)) then
f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2)); f_out.redirect_nia <= std_ulogic_vector(signed(e_in.read_data2));
@ -265,19 +280,18 @@ begin
end if; end if;
end if; end if;
when OP_BCREG => when OP_BCREG =>
-- bits 10 and 6 distinguish between bclr, bcctr and bctar -- read_data1 is CTR
-- read_data2 is target register (CTR, LR or TAR)
bo := insn_bo(e_in.insn); bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn); bi := insn_bi(e_in.insn);
if bo(4-2) = '0' and e_in.insn(10) = '0' then if bo(4-2) = '0' and e_in.insn(10) = '0' then
ctrl_tmp.ctr <= std_ulogic_vector(unsigned(ctrl.ctr) - 1); result := std_ulogic_vector(unsigned(e_in.read_data1) - 1);
result_en := '1';
v.e.write_reg := fast_spr_num(SPR_CTR);
end if; end if;
if ppc_bc_taken(bo, bi, e_in.cr, ctrl.ctr) = 1 then if ppc_bc_taken(bo, bi, e_in.cr, e_in.read_data1) = 1 then
f_out.redirect <= '1'; f_out.redirect <= '1';
if e_in.insn(10) = '0' then f_out.redirect_nia <= e_in.read_data2(63 downto 2) & "00";
f_out.redirect_nia <= ctrl.lr(63 downto 2) & "00";
else
f_out.redirect_nia <= ctrl.ctr(63 downto 2) & "00";
end if;
end if; end if;
when OP_CMPB => when OP_CMPB =>
result := ppc_cmpb(e_in.read_data3, e_in.read_data2); result := ppc_cmpb(e_in.read_data3, e_in.read_data2);
@ -340,23 +354,24 @@ begin
v.e.write_cr_data(hi downto lo) := newcrf; v.e.write_cr_data(hi downto lo) := newcrf;
end loop; end loop;
when OP_MFSPR => when OP_MFSPR =>
case decode_spr_num(e_in.insn) is if is_fast_spr(e_in.read_reg1) then
when SPR_XER => result := e_in.read_data1;
result := ( 63-32 => v.e.xerc.so, if decode_spr_num(e_in.insn) = SPR_XER then
63-33 => v.e.xerc.ov, result(63-32) := v.e.xerc.so;
63-34 => v.e.xerc.ca, result(63-33) := v.e.xerc.ov;
63-44 => v.e.xerc.ov32, result(63-34) := v.e.xerc.ca;
63-45 => v.e.xerc.ca32, result(63-35 downto 63-43) := "000000000";
others => '0'); result(63-44) := v.e.xerc.ov32;
when SPR_CTR => result(63-45) := v.e.xerc.ca32;
result := ctrl.ctr; end if;
when SPR_LR => else
result := ctrl.lr; case decode_spr_num(e_in.insn) is
when SPR_TB => when SPR_TB =>
result := ctrl.tb; result := ctrl.tb;
when others => when others =>
result := (others => '0'); result := (others => '0');
end case; end case;
end if;
result_en := '1'; result_en := '1';
when OP_MFCR => when OP_MFCR =>
if e_in.insn(20) = '0' then if e_in.insn(20) = '0' then
@ -387,20 +402,25 @@ begin
end if; end if;
v.e.write_cr_data := e_in.read_data3(31 downto 0); v.e.write_cr_data := e_in.read_data3(31 downto 0);
when OP_MTSPR => when OP_MTSPR =>
case decode_spr_num(e_in.insn) is report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
when SPR_XER => "=" & to_hstring(e_in.read_data3);
v.e.xerc.so := e_in.read_data3(63-32); if is_fast_spr(e_in.write_reg) then
v.e.xerc.ov := e_in.read_data3(63-33); result := e_in.read_data3;
v.e.xerc.ca := e_in.read_data3(63-34); result_en := '1';
v.e.xerc.ov32 := e_in.read_data3(63-44); if decode_spr_num(e_in.insn) = SPR_XER then
v.e.xerc.ca32 := e_in.read_data3(63-45); v.e.xerc.so := e_in.read_data3(63-32);
v.e.write_xerc_enable := '1'; v.e.xerc.ov := e_in.read_data3(63-33);
when SPR_CTR => v.e.xerc.ca := e_in.read_data3(63-34);
ctrl_tmp.ctr <= e_in.read_data3; v.e.xerc.ov32 := e_in.read_data3(63-44);
when SPR_LR => v.e.xerc.ca32 := e_in.read_data3(63-45);
ctrl_tmp.lr <= e_in.read_data3; v.e.write_xerc_enable := '1';
when others => end if;
end case; else
-- TODO: Implement slow SPRs
-- case decode_spr_num(e_in.insn) is
-- when others =>
-- end case;
end if;
when OP_POPCNTB => when OP_POPCNTB =>
result := ppc_popcntb(e_in.read_data3); result := ppc_popcntb(e_in.read_data3);
result_en := '1'; result_en := '1';
@ -444,15 +464,36 @@ begin
report "illegal"; report "illegal";
end case; end case;


-- Update LR on the next cycle after a branch link
--
-- WARNING: The LR update isn't tracked by our hazard tracker. This
-- will work (well I hope) because it only happens on branches
-- which will flush all decoded instructions. By the time
-- fetch catches up, we'll have the new LR. This will
-- *not* work properly however if we have a branch predictor,
-- in which case the solution would probably be to keep a
-- local cache of the updated LR in execute1 (flushed on
-- exceptions) that is used instead of the value from
-- decode when its content is valid.
if e_in.lr = '1' then if e_in.lr = '1' then
ctrl_tmp.lr <= next_nia; v.lr_update := '1';
v.next_lr := next_nia;
v.e.valid := '0';
report "Delayed LR update to " & to_hstring(next_nia);
stall_out <= '1';
end if; end if;

elsif r.lr_update = '1' then
result_en := '1';
result := r.next_lr;
v.e.write_reg := fast_spr_num(SPR_LR);
v.e.write_len := x"8";
v.e.sign_extend := '0';
v.e.valid := '1';
end if; end if;


v.e.write_data := result; v.e.write_data := result;
v.e.write_enable := result_en; v.e.write_enable := result_en;
v.e.rc := e_in.rc; v.e.rc := e_in.rc and e_in.valid;


-- Update registers -- Update registers
rin <= v; rin <= v;

@ -7,12 +7,13 @@ entity gpr_hazard is
PIPELINE_DEPTH : natural := 2 PIPELINE_DEPTH : natural := 2
); );
port( port(
clk : in std_logic; clk : in std_ulogic;
stall_in : in std_ulogic;


gpr_write_valid_in : in std_ulogic; gpr_write_valid_in : in std_ulogic;
gpr_write_in : in std_ulogic_vector(4 downto 0); gpr_write_in : in std_ulogic_vector(5 downto 0);
gpr_read_valid_in : in std_ulogic; gpr_read_valid_in : in std_ulogic;
gpr_read_in : in std_ulogic_vector(4 downto 0); gpr_read_in : in std_ulogic_vector(5 downto 0);


stall_out : out std_ulogic stall_out : out std_ulogic
); );
@ -20,7 +21,7 @@ end entity gpr_hazard;
architecture behaviour of gpr_hazard is architecture behaviour of gpr_hazard is
type pipeline_entry_type is record type pipeline_entry_type is record
valid : std_ulogic; valid : std_ulogic;
gpr : std_ulogic_vector(4 downto 0); gpr : std_ulogic_vector(5 downto 0);
end record; end record;
constant pipeline_entry_init : pipeline_entry_type := (valid => '0', gpr => (others => '0')); constant pipeline_entry_init : pipeline_entry_type := (valid => '0', gpr => (others => '0'));


@ -32,7 +33,9 @@ begin
gpr_hazard0: process(clk) gpr_hazard0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
r <= rin; if stall_in = '0' then
r <= rin;
end if;
end if; end if;
end process; end process;



@ -94,7 +94,6 @@ package ppc_fx_insns is
function ppc_divwu (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector; function ppc_divwu (ra, rb: std_ulogic_vector(63 downto 0)) return std_ulogic_vector;


function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return integer; function ppc_bc_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0); ctr: std_ulogic_vector(63 downto 0)) return integer;
function ppc_bcctr_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0)) return integer;
end package ppc_fx_insns; end package ppc_fx_insns;


package body ppc_fx_insns is package body ppc_fx_insns is
@ -809,21 +808,4 @@ package body ppc_fx_insns is
return ret; return ret;
end; end;


function ppc_bcctr_taken(bo, bi: std_ulogic_vector(4 downto 0); cr: std_ulogic_vector(31 downto 0)) return integer is
variable crfield: integer;
variable crbit_match: std_ulogic;
variable cond_ok: std_ulogic;
variable ret: integer;
begin
crfield := to_integer(unsigned(bi));
-- BE bit numbering
crbit_match := '1' when cr(31-crfield) = bo(4-1) else '0';
cond_ok := bo(4-0) or crbit_match;
if cond_ok = '1' then
ret := 1;
else
ret := 0;
end if;
return ret;
end;
end package body ppc_fx_insns; end package body ppc_fx_insns;

@ -23,7 +23,7 @@ entity register_file is
end entity register_file; end entity register_file;


architecture behaviour of register_file is architecture behaviour of register_file is
type regfile is array(0 to 31) of std_ulogic_vector(63 downto 0); type regfile is array(0 to 63) of std_ulogic_vector(63 downto 0);
signal registers : regfile := (others => (others => '0')); signal registers : regfile := (others => (others => '0'));
begin begin
-- synchronous writes -- synchronous writes
@ -32,7 +32,11 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
if w_in.write_enable = '1' then if w_in.write_enable = '1' then
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure; assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure;
report "Writing GPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data); if w_in.write_reg(5) = '0' then
report "Writing GPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
else
report "Writing GSPR " & to_hstring(w_in.write_reg) & " " & to_hstring(w_in.write_data);
end if;
registers(to_integer(unsigned(w_in.write_reg))) <= w_in.write_data; registers(to_integer(unsigned(w_in.write_reg))) <= w_in.write_data;
end if; end if;
end if; end if;
@ -52,7 +56,7 @@ begin
end if; end if;
d_out.read1_data <= registers(to_integer(unsigned(d_in.read1_reg))); d_out.read1_data <= registers(to_integer(unsigned(d_in.read1_reg)));
d_out.read2_data <= registers(to_integer(unsigned(d_in.read2_reg))); d_out.read2_data <= registers(to_integer(unsigned(d_in.read2_reg)));
d_out.read3_data <= registers(to_integer(unsigned(d_in.read3_reg))); d_out.read3_data <= registers(to_integer(unsigned(gpr_to_gspr(d_in.read3_reg))));


-- Forward any written data -- Forward any written data
if w_in.write_enable = '1' then if w_in.write_enable = '1' then
@ -62,7 +66,7 @@ begin
if d_in.read2_reg = w_in.write_reg then if d_in.read2_reg = w_in.write_reg then
d_out.read2_data <= w_in.write_data; d_out.read2_data <= w_in.write_data;
end if; end if;
if d_in.read3_reg = w_in.write_reg then if gpr_to_gspr(d_in.read3_reg) = w_in.write_reg then
d_out.read3_data <= w_in.write_data; d_out.read3_data <= w_in.write_data;
end if; end if;
end if; end if;

@ -126,7 +126,7 @@ begin
end if; end if;


if l_in.write_enable = '1' then if l_in.write_enable = '1' then
w_out.write_reg <= l_in.write_reg; w_out.write_reg <= gpr_to_gspr(l_in.write_reg);
data_in <= l_in.write_data; data_in <= l_in.write_data;
data_len <= unsigned(l_in.write_len); data_len <= unsigned(l_in.write_len);
byte_offset <= unsigned(l_in.write_shift); byte_offset <= unsigned(l_in.write_shift);
@ -144,7 +144,7 @@ begin


if m_in.write_reg_enable = '1' then if m_in.write_reg_enable = '1' then
w_out.write_enable <= '1'; w_out.write_enable <= '1';
w_out.write_reg <= m_in.write_reg_nr; w_out.write_reg <= gpr_to_gspr(m_in.write_reg_nr);
data_in <= m_in.write_reg_data; data_in <= m_in.write_reg_data;
rc <= m_in.rc; rc <= m_in.rc;
xe := m_in.xerc; xe := m_in.xerc;
@ -157,7 +157,7 @@ begin


if d_in.write_reg_enable = '1' then if d_in.write_reg_enable = '1' then
w_out.write_enable <= '1'; w_out.write_enable <= '1';
w_out.write_reg <= d_in.write_reg_nr; w_out.write_reg <= gpr_to_gspr(d_in.write_reg_nr);
data_in <= d_in.write_reg_data; data_in <= d_in.write_reg_data;
rc <= d_in.rc; rc <= d_in.rc;
xe := d_in.xerc; xe := d_in.xerc;

Loading…
Cancel
Save