Merge pull request #434 from paulusmack/compliance

Improve architecture compliance
master
Paul Mackerras 5 days ago committed by GitHub
commit f4ec0c2043
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -74,7 +74,7 @@ core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \ cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countbits.vhdl multiply.vhdl multiply-32s.vhdl divider.vhdl \ logical.vhdl countbits.vhdl multiply.vhdl multiply-32s.vhdl divider.vhdl \
execute1.vhdl loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl \ execute1.vhdl loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl \
core_debug.vhdl core.vhdl fpu.vhdl pmu.vhdl core_debug.vhdl core.vhdl fpu.vhdl pmu.vhdl bitsort.vhdl


soc_files = wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \ soc_files = wishbone_arbiter.vhdl wishbone_bram_wrapper.vhdl sync_fifo.vhdl \
wishbone_debug_master.vhdl xics.vhdl syscon.vhdl gpio.vhdl soc.vhdl \ wishbone_debug_master.vhdl xics.vhdl syscon.vhdl gpio.vhdl soc.vhdl \

@ -0,0 +1,102 @@
-- Implements instructions that involve sorting bits,
-- that is, cfuged, pextd and pdepd.
--
-- cfuged: Sort the bits in the mask in RB into 0s at the left, 1s at the right
-- and move the bits in RS in the same fashion to give the result
-- pextd: Like cfuged but the only use the bits of RS where the
-- corresponding bit in RB is 1
-- pdepd: Inverse of pextd; take the low-order bits of RS and spread them out
-- to the bit positions which have a 1 in RB

-- NB opc is bits 7-6 of the instruction:
-- 00 = pdepd, 01 = pextd, 10 = cfuged

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.helpers.all;

entity bit_sorter is
port (
clk : in std_ulogic;
rst : in std_ulogic;
rs : in std_ulogic_vector(63 downto 0);
rb : in std_ulogic_vector(63 downto 0);
go : in std_ulogic;
opc : in std_ulogic_vector(1 downto 0);
done : out std_ulogic;
result : out std_ulogic_vector(63 downto 0)
);
end entity bit_sorter;

architecture behaviour of bit_sorter is

signal val : std_ulogic_vector(63 downto 0);
signal st : std_ulogic;
signal sd : std_ulogic;
signal opr : std_ulogic_vector(1 downto 0);
signal bc : unsigned(5 downto 0);
signal jl : unsigned(5 downto 0);
signal jr : unsigned(5 downto 0);
signal sr_ml : std_ulogic_vector(63 downto 0);
signal sr_mr : std_ulogic_vector(63 downto 0);
signal sr_vl : std_ulogic_vector(63 downto 0);
signal sr_vr : std_ulogic_vector(63 downto 0);

begin
bsort_r: process(clk)
begin
if rising_edge(clk) then
sd <= '0';
if rst = '1' then
st <= '0';
opr <= "00";
val <= (others => '0');
elsif go = '1' then
st <= '1';
sr_ml <= rb;
sr_mr <= rb;
sr_vl <= rs;
sr_vr <= rs;
opr <= opc;
val <= (others => '0');
bc <= to_unsigned(0, 6);
jl <= to_unsigned(63, 6);
jr <= to_unsigned(0, 6);
elsif st = '1' then
if bc = 6x"3f" then
st <= '0';
sd <= '1';
end if;
bc <= bc + 1;
if sr_ml(63) = '0' and opr(1) = '1' then
-- cfuged
val(to_integer(jl)) <= sr_vl(63);
jl <= jl - 1;
end if;
if sr_mr(0) = '1' then
if opr = "00" then
-- pdepd
val(to_integer(bc)) <= sr_vr(0);
else
-- cfuged or pextd
val(to_integer(jr)) <= sr_vr(0);
end if;
jr <= jr + 1;
end if;
sr_vl <= sr_vl(62 downto 0) & '0';
if opr /= "00" or sr_mr(0) = '1' then
sr_vr <= '0' & sr_vr(63 downto 1);
end if;
sr_ml <= sr_ml(62 downto 0) & '0';
sr_mr <= '0' & sr_mr(63 downto 1);
end if;
end if;
end process;

done <= sd;
result <= val;

end behaviour;

@ -12,6 +12,7 @@ package common is


-- MSR bit numbers -- MSR bit numbers
constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode
constant MSR_HV : integer := (63 - 3); -- Hypervisor mode (always 1)
constant MSR_EE : integer := (63 - 48); -- External interrupt Enable constant MSR_EE : integer := (63 - 48); -- External interrupt Enable
constant MSR_PR : integer := (63 - 49); -- PRoblem state constant MSR_PR : integer := (63 - 49); -- PRoblem state
constant MSR_FP : integer := (63 - 50); -- Floating Point available constant MSR_FP : integer := (63 - 50); -- Floating Point available
@ -54,6 +55,15 @@ package common is
constant SPR_PID : spr_num_t := 48; constant SPR_PID : spr_num_t := 48;
constant SPR_PTCR : spr_num_t := 464; constant SPR_PTCR : spr_num_t := 464;
constant SPR_PVR : spr_num_t := 287; constant SPR_PVR : spr_num_t := 287;
constant SPR_FSCR : spr_num_t := 153;
constant SPR_HFSCR : spr_num_t := 190;
constant SPR_HEIR : spr_num_t := 339;
constant SPR_CTRL : spr_num_t := 136;
constant SPR_CTRLW : spr_num_t := 152;
constant SPR_UDSCR : spr_num_t := 3;
constant SPR_DSCR : spr_num_t := 17;
constant SPR_VRSAVE : spr_num_t := 256;
constant SPR_PIR : spr_num_t := 1023;


-- PMU registers -- PMU registers
constant SPR_UPMC1 : spr_num_t := 771; constant SPR_UPMC1 : spr_num_t := 771;
@ -131,30 +141,52 @@ package common is
constant RAMSPR_SPRG3 : ramspr_index := to_unsigned(3,3); constant RAMSPR_SPRG3 : ramspr_index := to_unsigned(3,3);
constant RAMSPR_HSPRG1 : ramspr_index := to_unsigned(4,3); constant RAMSPR_HSPRG1 : ramspr_index := to_unsigned(4,3);
constant RAMSPR_CTR : ramspr_index := to_unsigned(5,3); -- must equal RAMSPR_LR constant RAMSPR_CTR : ramspr_index := to_unsigned(5,3); -- must equal RAMSPR_LR
constant RAMSPR_VRSAVE : ramspr_index := to_unsigned(6,3);


type ram_spr_info is record type ram_spr_info is record
index : ramspr_index; index : ramspr_index;
isodd : std_ulogic; isodd : std_ulogic;
is32b : std_ulogic;
valid : std_ulogic; valid : std_ulogic;
end record; end record;
constant ram_spr_info_init: ram_spr_info := (index => to_unsigned(0,3), others => '0'); constant ram_spr_info_init: ram_spr_info := (index => to_unsigned(0,3), others => '0');


subtype spr_selector is std_ulogic_vector(2 downto 0); subtype spr_selector is std_ulogic_vector(3 downto 0);
type spr_id is record type spr_id is record
sel : spr_selector; sel : spr_selector;
valid : std_ulogic; valid : std_ulogic;
ispmu : std_ulogic; ispmu : std_ulogic;
end record; ronly : std_ulogic;
constant spr_id_init : spr_id := (sel => "000", others => '0'); wonly : std_ulogic;

end record;
constant SPRSEL_TB : spr_selector := 3x"0"; constant spr_id_init : spr_id := (sel => "0000", others => '0');
constant SPRSEL_TBU : spr_selector := 3x"1";
constant SPRSEL_DEC : spr_selector := 3x"2"; constant SPRSEL_TB : spr_selector := 4x"0";
constant SPRSEL_PVR : spr_selector := 3x"3"; constant SPRSEL_TBU : spr_selector := 4x"1";
constant SPRSEL_LOGA : spr_selector := 3x"4"; constant SPRSEL_DEC : spr_selector := 4x"2";
constant SPRSEL_LOGD : spr_selector := 3x"5"; constant SPRSEL_PVR : spr_selector := 4x"3";
constant SPRSEL_CFAR : spr_selector := 3x"6"; constant SPRSEL_LOGA : spr_selector := 4x"4";
constant SPRSEL_XER : spr_selector := 3x"7"; constant SPRSEL_LOGD : spr_selector := 4x"5";
constant SPRSEL_CFAR : spr_selector := 4x"6";
constant SPRSEL_FSCR : spr_selector := 4x"7";
constant SPRSEL_HFSCR : spr_selector := 4x"8";
constant SPRSEL_HEIR : spr_selector := 4x"9";
constant SPRSEL_CTRL : spr_selector := 4x"a";
constant SPRSEL_DSCR : spr_selector := 4x"b";
constant SPRSEL_PIR : spr_selector := 4x"c";
constant SPRSEL_XER : spr_selector := 4x"f";

-- FSCR and HFSCR bit numbers
constant FSCR_PREFIX : integer := 63 - 50;
constant FSCR_SCV : integer := 63 - 51;
constant FSCR_TAR : integer := 63 - 55;
constant FSCR_DSCR : integer := 63 - 61;
constant HFSCR_PREFIX : integer := 63 - 50;
constant HFSCR_MSG : integer := 63 - 53;
constant HFSCR_TAR : integer := 63 - 55;
constant HFSCR_PMUSPR : integer := 63 - 60;
constant HFSCR_DSCR : integer := 63 - 61;
constant HFSCR_FP : integer := 63 - 63;


-- FPSCR bit numbers -- FPSCR bit numbers
constant FPSCR_FX : integer := 63 - 32; constant FPSCR_FX : integer := 63 - 32;
@ -224,14 +256,32 @@ package common is


-- This needs to die... -- This needs to die...
type ctrl_t is record type ctrl_t is record
wait_state: std_ulogic;
run: std_ulogic;
tb: std_ulogic_vector(63 downto 0); tb: std_ulogic_vector(63 downto 0);
dec: std_ulogic_vector(63 downto 0); dec: std_ulogic_vector(63 downto 0);
msr: std_ulogic_vector(63 downto 0); msr: std_ulogic_vector(63 downto 0);
cfar: std_ulogic_vector(63 downto 0); cfar: std_ulogic_vector(63 downto 0);
xer_low: std_ulogic_vector(17 downto 0); xer_low: std_ulogic_vector(17 downto 0);
fscr_ic: std_ulogic_vector(3 downto 0);
fscr_pref: std_ulogic;
fscr_scv: std_ulogic;
fscr_tar: std_ulogic;
fscr_dscr: std_ulogic;
hfscr_ic: std_ulogic_vector(3 downto 0);
hfscr_pref: std_ulogic;
hfscr_tar: std_ulogic;
hfscr_dscr: std_ulogic;
hfscr_fp: std_ulogic;
heir: std_ulogic_vector(63 downto 0);
dscr: std_ulogic_vector(24 downto 0);
end record; end record;
constant ctrl_t_init : ctrl_t := constant ctrl_t_init : ctrl_t :=
(xer_low => 18x"0", others => (others => '0')); (wait_state => '0', run => '1', xer_low => 18x"0",
fscr_ic => x"0", fscr_pref => '1', fscr_scv => '1', fscr_tar => '1', fscr_dscr => '1',
hfscr_ic => x"0", hfscr_pref => '1', hfscr_tar => '1', hfscr_dscr => '1', hfscr_fp => '1',
dscr => (others => '0'),
others => (others => '0'));


type Fetch1ToIcacheType is record type Fetch1ToIcacheType is record
req: std_ulogic; req: std_ulogic;
@ -270,6 +320,7 @@ package common is
type Decode1ToDecode2Type is record type Decode1ToDecode2Type is record
valid: std_ulogic; valid: std_ulogic;
stop_mark : std_ulogic; stop_mark : std_ulogic;
second : std_ulogic;
nia: std_ulogic_vector(63 downto 0); nia: std_ulogic_vector(63 downto 0);
prefixed: std_ulogic; prefixed: std_ulogic;
prefix: std_ulogic_vector(25 downto 0); prefix: std_ulogic_vector(25 downto 0);
@ -286,7 +337,7 @@ package common is
reg_c : gspr_index_t; reg_c : gspr_index_t;
end record; end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type := constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'), (valid => '0', stop_mark => '0', second => '0', nia => (others => '0'),
prefixed => '0', prefix => (others => '0'), insn => (others => '0'), prefixed => '0', prefix => (others => '0'), insn => (others => '0'),
illegal_suffix => '0', misaligned_prefix => '0', illegal_suffix => '0', misaligned_prefix => '0',
decode => decode_rom_init, br_pred => '0', big_endian => '0', decode => decode_rom_init, br_pred => '0', big_endian => '0',
@ -371,11 +422,16 @@ package common is
ramspr_wraddr : ramspr_index; ramspr_wraddr : ramspr_index;
ramspr_write_even : std_ulogic; ramspr_write_even : std_ulogic;
ramspr_write_odd : std_ulogic; ramspr_write_odd : std_ulogic;
ramspr_32bit : std_ulogic;
dbg_spr_access : std_ulogic; dbg_spr_access : std_ulogic;
dec_ctr : std_ulogic; dec_ctr : std_ulogic;
prefixed : std_ulogic; prefixed : std_ulogic;
prefix : std_ulogic_vector(25 downto 0);
illegal_suffix : std_ulogic; illegal_suffix : std_ulogic;
misaligned_prefix : std_ulogic; misaligned_prefix : std_ulogic;
illegal_form : std_ulogic;
uses_tar : std_ulogic;
uses_dscr : std_ulogic;
end record; end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type := constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
(valid => '0', unit => ALU, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init, (valid => '0', unit => ALU, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
@ -393,9 +449,11 @@ package common is
spr_is_ram => '0', spr_is_ram => '0',
ramspr_even_rdaddr => (others => '0'), ramspr_odd_rdaddr => (others => '0'), ramspr_rd_odd => '0', ramspr_even_rdaddr => (others => '0'), ramspr_odd_rdaddr => (others => '0'), ramspr_rd_odd => '0',
ramspr_wraddr => (others => '0'), ramspr_write_even => '0', ramspr_write_odd => '0', ramspr_wraddr => (others => '0'), ramspr_write_even => '0', ramspr_write_odd => '0',
ramspr_32bit => '0',
dbg_spr_access => '0', dbg_spr_access => '0',
dec_ctr => '0', dec_ctr => '0',
prefixed => '0', illegal_suffix => '0', misaligned_prefix => '0', prefixed => '0', prefix => (others => '0'), illegal_suffix => '0',
misaligned_prefix => '0', illegal_form => '0', uses_tar => '0', uses_dscr => '0',
others => (others => '0')); others => (others => '0'));


type MultiplyInputType is record type MultiplyInputType is record
@ -547,14 +605,23 @@ package common is
hold : std_ulogic; hold : std_ulogic;
load : std_ulogic; -- is this a load load : std_ulogic; -- is this a load
dcbz : std_ulogic; dcbz : std_ulogic;
flush : std_ulogic;
touch : std_ulogic;
sync : std_ulogic;
nc : std_ulogic; nc : std_ulogic;
reserve : std_ulogic; reserve : std_ulogic;
atomic_qw : std_ulogic; -- part of a quadword atomic op
atomic_first : std_ulogic;
atomic_last : std_ulogic;
virt_mode : std_ulogic; virt_mode : std_ulogic;
priv_mode : std_ulogic; priv_mode : std_ulogic;
addr : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1 data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1
byte_sel : std_ulogic_vector(7 downto 0); byte_sel : std_ulogic_vector(7 downto 0);
end record; end record;
constant Loadstore1ToDcacheInit : Loadstore1ToDcacheType :=
(addr => (others => '0'), data => (others => '0'), byte_sel => x"00",
others => '0');


type DcacheToLoadstore1Type is record type DcacheToLoadstore1Type is record
valid : std_ulogic; valid : std_ulogic;
@ -562,6 +629,7 @@ package common is
store_done : std_ulogic; store_done : std_ulogic;
error : std_ulogic; error : std_ulogic;
cache_paradox : std_ulogic; cache_paradox : std_ulogic;
reserve_nc : std_ulogic;
end record; end record;


type DcacheEventType is record type DcacheEventType is record
@ -662,6 +730,8 @@ package common is
write_xerc_enable : std_ulogic; write_xerc_enable : std_ulogic;
xerc : xer_common_t; xerc : xer_common_t;
interrupt : std_ulogic; interrupt : std_ulogic;
hv_intr : std_ulogic;
is_scv : std_ulogic;
intr_vec : intr_vector_t; intr_vec : intr_vector_t;
redirect: std_ulogic; redirect: std_ulogic;
redir_mode: std_ulogic_vector(3 downto 0); redir_mode: std_ulogic_vector(3 downto 0);
@ -678,7 +748,8 @@ package common is
write_xerc_enable => '0', xerc => xerc_init, write_xerc_enable => '0', xerc => xerc_init,
write_data => (others => '0'), write_cr_mask => (others => '0'), write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'), write_cr_data => (others => '0'), write_reg => (others => '0'),
interrupt => '0', intr_vec => 0, redirect => '0', redir_mode => "0000", interrupt => '0', hv_intr => '0', is_scv => '0', intr_vec => 0,
redirect => '0', redir_mode => "0000",
last_nia => (others => '0'), last_nia => (others => '0'),
br_last => '0', br_taken => '0', abs_br => '0', br_last => '0', br_taken => '0', abs_br => '0',
srr1 => (others => '0'), msr => (others => '0')); srr1 => (others => '0'), msr => (others => '0'));
@ -766,13 +837,13 @@ package common is
br_last : std_ulogic; br_last : std_ulogic;
br_taken : std_ulogic; br_taken : std_ulogic;
interrupt : std_ulogic; interrupt : std_ulogic;
intr_vec : std_ulogic_vector(11 downto 0); intr_vec : std_ulogic_vector(16 downto 0);
end record; end record;
constant WritebackToFetch1Init : WritebackToFetch1Type := constant WritebackToFetch1Init : WritebackToFetch1Type :=
(redirect => '0', virt_mode => '0', priv_mode => '0', big_endian => '0', (redirect => '0', virt_mode => '0', priv_mode => '0', big_endian => '0',
mode_32bit => '0', redirect_nia => (others => '0'), mode_32bit => '0', redirect_nia => (others => '0'),
br_last => '0', br_taken => '0', br_nia => (others => '0'), br_last => '0', br_taken => '0', br_nia => (others => '0'),
interrupt => '0', intr_vec => x"000"); interrupt => '0', intr_vec => 17x"0");


type WritebackToRegisterFileType is record type WritebackToRegisterFileType is record
write_reg : gspr_index_t; write_reg : gspr_index_t;
@ -796,6 +867,8 @@ package common is


type WritebackToExecute1Type is record type WritebackToExecute1Type is record
intr : std_ulogic; intr : std_ulogic;
hv_intr : std_ulogic;
scv_int : std_ulogic;
srr1 : std_ulogic_vector(15 downto 0); srr1 : std_ulogic_vector(15 downto 0);
end record; end record;



@ -9,6 +9,7 @@ use work.wishbone_types.all;
entity core is entity core is
generic ( generic (
SIM : boolean := false; SIM : boolean := false;
CPU_INDEX : natural := 0;
DISABLE_FLATTEN : boolean := false; DISABLE_FLATTEN : boolean := false;
EX1_BYPASS : boolean := true; EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true; HAS_FPU : boolean := true;
@ -48,6 +49,7 @@ entity core is


ext_irq : in std_ulogic; ext_irq : in std_ulogic;


run_out : out std_ulogic;
terminated_out : out std_logic terminated_out : out std_logic
); );
end core; end core;
@ -363,6 +365,7 @@ begin
execute1_0: entity work.execute1 execute1_0: entity work.execute1
generic map ( generic map (
SIM => SIM, SIM => SIM,
CPU_INDEX => CPU_INDEX,
EX1_BYPASS => EX1_BYPASS, EX1_BYPASS => EX1_BYPASS,
HAS_FPU => HAS_FPU, HAS_FPU => HAS_FPU,
LOG_LENGTH => LOG_LENGTH LOG_LENGTH => LOG_LENGTH
@ -390,6 +393,7 @@ begin
ls_events => loadstore_events, ls_events => loadstore_events,
dc_events => dcache_events, dc_events => dcache_events,
ic_events => icache_events, ic_events => icache_events,
run_out => run_out,
terminate_out => terminate, terminate_out => terminate,
dbg_spr_req => dbg_spr_req, dbg_spr_req => dbg_spr_req,
dbg_spr_ack => dbg_spr_ack, dbg_spr_ack => dbg_spr_ack,

@ -294,7 +294,7 @@ begin


-- For SPRs, use the same mapping as when the fast SPRs were in the GPR file -- For SPRs, use the same mapping as when the fast SPRs were in the GPR file
valid := '1'; valid := '1';
sel := "000"; sel := "0000";
isram := '1'; isram := '1';
raddr := (others => '0'); raddr := (others => '0');
odd := '0'; odd := '0';
@ -324,10 +324,26 @@ begin
sel := SPRSEL_XER; sel := SPRSEL_XER;
when 5x"0d" => when 5x"0d" =>
raddr := RAMSPR_TAR; raddr := RAMSPR_TAR;
when 5x"0e" =>
isram := '0';
sel := SPRSEL_FSCR;
when 5x"0f" =>
isram := '0';
sel := SPRSEL_HFSCR;
when 5x"10" =>
isram := '0';
sel := SPRSEL_HEIR;
when 5x"11" =>
isram := '0';
sel := SPRSEL_CFAR;
when others => when others =>
valid := '0'; valid := '0';
end case; end case;
dbg_spr_addr <= isram & sel & std_ulogic_vector(raddr) & odd; if isram = '1' then
dbg_spr_addr <= "1000" & std_ulogic_vector(raddr) & odd;
else
dbg_spr_addr <= "0000" & sel;
end if;
spr_index_valid <= valid; spr_index_valid <= valid;
end if; end if;
end process; end process;

@ -181,22 +181,13 @@ architecture rtl of dcache is


constant real_mode_perm_attr : perm_attr_t := (nocache => '0', others => '1'); constant real_mode_perm_attr : perm_attr_t := (nocache => '0', others => '1');


-- Type of operation on a "valid" input
type op_t is (OP_NONE,
OP_BAD, -- NC cache hit, TLB miss, prot/RC failure
OP_STCX_FAIL, -- conditional store w/o reservation
OP_LOAD_HIT, -- Cache hit on load
OP_LOAD_MISS, -- Load missing cache
OP_LOAD_NC, -- Non-cachable load
OP_STORE_HIT, -- Store hitting cache
OP_STORE_MISS); -- Store missing cache
-- Cache state machine -- Cache state machine
type state_t is (IDLE, -- Normal load hit processing type state_t is (IDLE, -- Normal load hit processing
RELOAD_WAIT_ACK, -- Cache reload wait ack RELOAD_WAIT_ACK, -- Cache reload wait ack
STORE_WAIT_ACK, -- Store wait ack STORE_WAIT_ACK, -- Store wait ack
NC_LOAD_WAIT_ACK);-- Non-cachable load wait ack NC_LOAD_WAIT_ACK, -- Non-cachable load wait ack

DO_STCX, -- Check for stcx. validity
FLUSH_CYCLE); -- Cycle for invalidating cache line


-- --
-- Dcache operations: -- Dcache operations:
@ -230,8 +221,9 @@ architecture rtl of dcache is
-- Clock edge between cycle 1 and cycle 2: -- Clock edge between cycle 1 and cycle 2:
-- Request is stored in r1 (assuming r1.full was 0) -- Request is stored in r1 (assuming r1.full was 0)
-- The state machine transitions out of IDLE state for a load miss, -- The state machine transitions out of IDLE state for a load miss,
-- a store, a dcbz, or a non-cacheable load. r1.full is set to 1 -- a store, a dcbz, a flush (dcbf) or a non-cacheable load.
-- for a load miss, dcbz or non-cacheable load but not a store. -- r1.full is set to 1 for a load miss, dcbz, flush or
-- non-cacheable load but not a store.
-- --
-- Cycle 2: Completion signals are asserted for a load hit, -- Cycle 2: Completion signals are asserted for a load hit,
-- a store (excluding dcbz), a TLB operation, a conditional -- a store (excluding dcbz), a TLB operation, a conditional
@ -272,6 +264,23 @@ architecture rtl of dcache is
-- subsequent load requests to the same line can be completed as -- subsequent load requests to the same line can be completed as
-- soon as the necessary data comes in from memory, without -- soon as the necessary data comes in from memory, without
-- waiting for the whole line to be read. -- waiting for the whole line to be read.
--
-- Aligned loads and stores of a doubleword or less are atomic
-- because they are done in a single wishbone operation.
-- For quadword atomic loads and stores we rely on the wishbone
-- arbiter not interrupting access to a target once it has first
-- given access; i.e. once we have the main wishbone, no other
-- master gets access until we drop cyc.
--
-- Note on loads potentially hitting the victim line that is
-- currently being replaced: the new tag is available starting
-- with the 3rd cycle of RELOAD_WAIT_ACK state. As long as the
-- first read on the wishbone takes at least one cycle (i.e. the
-- ack doesn't arrive in the same cycle as stb was asserted),
-- r1.full will be true at least until that 3rd cycle and so a load
-- following a load miss can't hit on the old tag of the victim
-- line. As long as ack is not generated combinationally from
-- stb, this will be fine.


-- Stage 0 register, basically contains just the latched request -- Stage 0 register, basically contains just the latched request
type reg_stage_0_t is record type reg_stage_0_t is record
@ -287,12 +296,23 @@ architecture rtl of dcache is
signal r0_full : std_ulogic; signal r0_full : std_ulogic;


type mem_access_request_t is record type mem_access_request_t is record
op : op_t; op_lmiss : std_ulogic;
op_store : std_ulogic;
op_flush : std_ulogic;
op_sync : std_ulogic;
nc : std_ulogic;
valid : std_ulogic; valid : std_ulogic;
dcbz : std_ulogic; dcbz : std_ulogic;
flush : std_ulogic;
touch : std_ulogic;
sync : std_ulogic;
reserve : std_ulogic;
first_dw : std_ulogic;
last_dw : std_ulogic;
real_addr : real_addr_t; real_addr : real_addr_t;
data : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0);
byte_sel : std_ulogic_vector(7 downto 0); byte_sel : std_ulogic_vector(7 downto 0);
is_hit : std_ulogic;
hit_way : way_t; hit_way : way_t;
same_tag : std_ulogic; same_tag : std_ulogic;
mmu_req : std_ulogic; mmu_req : std_ulogic;
@ -306,12 +326,16 @@ architecture rtl of dcache is
full : std_ulogic; -- have uncompleted request full : std_ulogic; -- have uncompleted request
mmu_req : std_ulogic; -- request is from MMU mmu_req : std_ulogic; -- request is from MMU
req : mem_access_request_t; req : mem_access_request_t;
atomic_more : std_ulogic; -- atomic request isn't finished


-- Cache hit state -- Cache hit state
hit_way : way_t; hit_way : way_t;
hit_load_valid : std_ulogic; hit_load_valid : std_ulogic;
hit_index : index_t; hit_index : index_t;
cache_hit : std_ulogic; cache_hit : std_ulogic;
prev_hit : std_ulogic;
prev_way : way_t;
prev_hit_reload : std_ulogic;


-- TLB hit state -- TLB hit state
tlb_hit : std_ulogic; tlb_hit : std_ulogic;
@ -352,6 +376,7 @@ architecture rtl of dcache is
mmu_done : std_ulogic; mmu_done : std_ulogic;
mmu_error : std_ulogic; mmu_error : std_ulogic;
cache_paradox : std_ulogic; cache_paradox : std_ulogic;
reserve_nc : std_ulogic;


-- Signal to complete a failed stcx. -- Signal to complete a failed stcx.
stcx_fail : std_ulogic; stcx_fail : std_ulogic;
@ -365,27 +390,34 @@ architecture rtl of dcache is
-- --
type reservation_t is record type reservation_t is record
valid : std_ulogic; valid : std_ulogic;
addr : std_ulogic_vector(63 downto LINE_OFF_BITS); addr : std_ulogic_vector(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
end record; end record;


signal reservation : reservation_t; signal reservation : reservation_t;
signal kill_rsrv : std_ulogic;
signal kill_rsrv2 : std_ulogic;


-- Async signals on incoming request -- Async signals on incoming request
signal req_index : index_t; signal req_index : index_t;
signal req_hit_way : way_t; signal req_hit_way : way_t;
signal req_is_hit : std_ulogic;
signal req_tag : cache_tag_t; signal req_tag : cache_tag_t;
signal req_op : op_t; signal req_op_load_hit : std_ulogic;
signal req_op_load_miss : std_ulogic;
signal req_op_store : std_ulogic;
signal req_op_flush : std_ulogic;
signal req_op_sync : std_ulogic;
signal req_op_bad : std_ulogic;
signal req_op_nop : std_ulogic;
signal req_data : std_ulogic_vector(63 downto 0); signal req_data : std_ulogic_vector(63 downto 0);
signal req_same_tag : std_ulogic; signal req_same_tag : std_ulogic;
signal req_go : std_ulogic; signal req_go : std_ulogic;
signal req_nc : std_ulogic;
signal req_hit_reload : std_ulogic;


signal early_req_row : row_t; signal early_req_row : row_t;
signal early_rd_valid : std_ulogic; signal early_rd_valid : std_ulogic;


signal cancel_store : std_ulogic;
signal set_rsrv : std_ulogic;
signal clear_rsrv : std_ulogic;

signal r0_valid : std_ulogic; signal r0_valid : std_ulogic;
signal r0_stall : std_ulogic; signal r0_stall : std_ulogic;


@ -427,10 +459,13 @@ architecture rtl of dcache is
-- TLB PLRU output interface -- TLB PLRU output interface
signal tlb_plru_victim : std_ulogic_vector(TLB_WAY_BITS-1 downto 0); signal tlb_plru_victim : std_ulogic_vector(TLB_WAY_BITS-1 downto 0);


signal snoop_active : std_ulogic;
signal snoop_tag_set : cache_tags_set_t; signal snoop_tag_set : cache_tags_set_t;
signal snoop_valid : std_ulogic; signal snoop_valid : std_ulogic;
signal snoop_wrtag : cache_tag_t; signal snoop_paddr : real_addr_t;
signal snoop_index : index_t; signal snoop_addr : real_addr_t;
signal snoop_hits : cache_way_valids_t;
signal req_snoop_hit : std_ulogic;


-- --
-- Helper functions to decode incoming requests -- Helper functions to decode incoming requests
@ -565,12 +600,9 @@ begin
assert (d_in.valid and m_in.valid) = '0' report assert (d_in.valid and m_in.valid) = '0' report
"request collision loadstore vs MMU"; "request collision loadstore vs MMU";
if m_in.valid = '1' then if m_in.valid = '1' then
r.req := Loadstore1ToDcacheInit;
r.req.valid := '1'; r.req.valid := '1';
r.req.load := not (m_in.tlbie or m_in.tlbld); r.req.load := not (m_in.tlbie or m_in.tlbld);
r.req.dcbz := '0';
r.req.nc := '0';
r.req.reserve := '0';
r.req.virt_mode := '0';
r.req.priv_mode := '1'; r.req.priv_mode := '1';
r.req.addr := m_in.addr; r.req.addr := m_in.addr;
r.req.data := m_in.pte; r.req.data := m_in.pte;
@ -861,24 +893,43 @@ begin
end if; end if;
end process; end process;


-- Snoop logic
-- Don't snoop our own cycles
snoop_addr <= addr_to_real(wb_to_addr(snoop_in.adr));
snoop_active <= snoop_in.cyc and snoop_in.stb and snoop_in.we and
not (r1.wb.cyc and not wishbone_in.stall);
kill_rsrv <= '1' when (snoop_active = '1' and reservation.valid = '1' and
snoop_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) = reservation.addr)
else '0';

-- Cache tag RAM second read port, for snooping -- Cache tag RAM second read port, for snooping
cache_tag_read_2 : process(clk) cache_tag_read_2 : process(clk)
variable addr : real_addr_t;
begin begin
if rising_edge(clk) then if rising_edge(clk) then
-- Don't snoop our own cycles if is_X(snoop_addr) then
snoop_valid <= '0'; snoop_tag_set <= (others => 'X');
if not (r1.wb.cyc = '1' and wishbone_in.stall = '0') then else
if (snoop_in.cyc and snoop_in.stb and snoop_in.we) = '1' then snoop_tag_set <= cache_tags(to_integer(get_index(snoop_addr)));
snoop_valid <= '1';
addr := addr_to_real(wb_to_addr(snoop_in.adr));
assert not is_X(addr);
snoop_tag_set <= cache_tags(to_integer(get_index(addr)));
snoop_wrtag <= get_tag(addr);
snoop_index <= get_index(addr);
end if; end if;
snoop_paddr <= snoop_addr;
snoop_valid <= snoop_active;
end if; end if;
end process;

-- Compare the previous cycle's snooped store address to the reservation,
-- to catch the case where a write happens on cycle 1 of a cached larx
kill_rsrv2 <= '1' when (snoop_valid = '1' and reservation.valid = '1' and
snoop_paddr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) = reservation.addr)
else '0';

snoop_tag_match : process(all)
begin
snoop_hits <= (others => '0');
for i in 0 to NUM_WAYS-1 loop
if snoop_valid = '1' and read_tag(i, snoop_tag_set) = get_tag(snoop_paddr) then
snoop_hits(i) <= '1';
end if; end if;
end loop;
end process; end process;


-- Cache request parsing and hit detection -- Cache request parsing and hit detection
@ -887,8 +938,6 @@ begin
variable rindex : index_t; variable rindex : index_t;
variable is_hit : std_ulogic; variable is_hit : std_ulogic;
variable hit_way : way_t; variable hit_way : way_t;
variable op : op_t;
variable opsel : std_ulogic_vector(2 downto 0);
variable go : std_ulogic; variable go : std_ulogic;
variable nc : std_ulogic; variable nc : std_ulogic;
variable s_hit : std_ulogic; variable s_hit : std_ulogic;
@ -901,6 +950,9 @@ begin
variable rel_match : std_ulogic; variable rel_match : std_ulogic;
variable fwd_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0); variable fwd_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable fwd_match : std_ulogic; variable fwd_match : std_ulogic;
variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0);
variable snoop_match : std_ulogic;
variable hit_reload : std_ulogic;
begin begin
-- Extract line, row and tag from request -- Extract line, row and tag from request
rindex := get_index(r0.req.addr); rindex := get_index(r0.req.addr);
@ -924,9 +976,11 @@ begin
is_hit := '0'; is_hit := '0';
rel_match := '0'; rel_match := '0';
fwd_match := '0'; fwd_match := '0';
snoop_match := '0';
if r0.req.virt_mode = '1' then if r0.req.virt_mode = '1' then
rel_matches := (others => '0'); rel_matches := (others => '0');
fwd_matches := (others => '0'); fwd_matches := (others => '0');
snp_matches := (others => '0');
for j in tlb_way_t loop for j in tlb_way_t loop
hit_way_set(j) := to_unsigned(0, WAY_BITS); hit_way_set(j) := to_unsigned(0, WAY_BITS);
s_hit := '0'; s_hit := '0';
@ -943,6 +997,9 @@ begin
tlb_valid_way(j) = '1' then tlb_valid_way(j) = '1' then
hit_way_set(j) := to_unsigned(i, WAY_BITS); hit_way_set(j) := to_unsigned(i, WAY_BITS);
s_hit := '1'; s_hit := '1';
if snoop_hits(i) = '1' then
snp_matches(j) := '1';
end if;
end if; end if;
end loop; end loop;
hit_set(j) := s_hit; hit_set(j) := s_hit;
@ -959,6 +1016,7 @@ begin
hit_way := hit_way_set(to_integer(tlb_hit_way)); hit_way := hit_way_set(to_integer(tlb_hit_way));
rel_match := rel_matches(to_integer(tlb_hit_way)); rel_match := rel_matches(to_integer(tlb_hit_way));
fwd_match := fwd_matches(to_integer(tlb_hit_way)); fwd_match := fwd_matches(to_integer(tlb_hit_way));
snoop_match := snp_matches(to_integer(tlb_hit_way));
end if; end if;
else else
s_tag := get_tag(r0.req.addr); s_tag := get_tag(r0.req.addr);
@ -970,6 +1028,9 @@ begin
read_tag(i, cache_tag_set) = s_tag then read_tag(i, cache_tag_set) = s_tag then
hit_way := to_unsigned(i, WAY_BITS); hit_way := to_unsigned(i, WAY_BITS);
is_hit := '1'; is_hit := '1';
if snoop_hits(i) = '1' then
snoop_match := '1';
end if;
end if; end if;
end loop; end loop;
if go = '1' and not is_X(r1.reload_tag) and s_tag = r1.reload_tag then if go = '1' and not is_X(r1.reload_tag) and s_tag = r1.reload_tag then
@ -982,6 +1043,13 @@ begin
req_same_tag <= rel_match; req_same_tag <= rel_match;
fwd_same_tag <= fwd_match; fwd_same_tag <= fwd_match;


-- This is 1 if the snooped write from the previous cycle hits the same
-- cache line that is being accessed in this cycle.
req_snoop_hit <= '0';
if go = '1' and snoop_match = '1' and get_index(snoop_paddr) = rindex then
req_snoop_hit <= '1';
end if;

-- Whether to use forwarded data for a load or not -- Whether to use forwarded data for a load or not
use_forward_st <= '0'; use_forward_st <= '0';
use_forward_rl <= '0'; use_forward_rl <= '0';
@ -1029,6 +1097,7 @@ begin
assert not is_X(rindex); assert not is_X(rindex);
assert not is_X(r1.store_index); assert not is_X(r1.store_index);
end if; end if;
hit_reload := '0';
if r1.state = RELOAD_WAIT_ACK and rel_match = '1' and if r1.state = RELOAD_WAIT_ACK and rel_match = '1' and
rindex = r1.store_index then rindex = r1.store_index then
-- Ignore is_hit from above, because a load miss writes the new tag -- Ignore is_hit from above, because a load miss writes the new tag
@ -1037,13 +1106,29 @@ begin
-- since it will be by the time we perform the store. -- since it will be by the time we perform the store.
-- For a load, check the appropriate row valid bit; but also, -- For a load, check the appropriate row valid bit; but also,
-- if use_forward_rl is 1 then we can consider this a hit. -- if use_forward_rl is 1 then we can consider this a hit.
is_hit := not r0.req.load or r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or -- For a touch, since the line we want is being reloaded already,
-- consider this a hit.
is_hit := not r0.req.load or r0.req.touch or
r1.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) or
use_forward_rl; use_forward_rl;
hit_way := replace_way; hit_way := replace_way;
hit_reload := is_hit;
elsif r0.req.load = '1' and r0.req.atomic_qw = '1' and r0.req.atomic_first = '0' and
r0.req.nc = '0' and perm_attr.nocache = '0' and r1.prev_hit = '1' then
-- For the second half of an atomic quadword load, just use the
-- same way as the first half, without considering whether the line
-- is valid; it is as if we had read the second dword at the same
-- time as the first dword, and the line was valid back then.
-- (Cases where the line is currently being reloaded are handled above.)
-- NB lq to noncacheable isn't required to be atomic per the ISA.
is_hit := '1';
hit_way := r1.prev_way;
end if; end if;


-- The way that matched on a hit -- The way that matched on a hit
req_hit_way <= hit_way; req_hit_way <= hit_way;
req_is_hit <= is_hit;
req_hit_reload <= hit_reload;


-- work out whether we have permission for this access -- work out whether we have permission for this access
-- NB we don't yet implement AMR, thus no KUAP -- NB we don't yet implement AMR, thus no KUAP
@ -1056,29 +1141,44 @@ begin
-- operation needs to be done -- operation needs to be done
-- --
nc := r0.req.nc or perm_attr.nocache; nc := r0.req.nc or perm_attr.nocache;
op := OP_NONE; req_op_bad <= '0';
req_op_load_hit <= '0';
req_op_load_miss <= '0';
req_op_store <= '0';
req_op_nop <= '0';
req_op_flush <= '0';
req_op_sync <= '0';
if go = '1' then if go = '1' then
if access_ok = '0' then if r0.req.sync = '1' then
op := OP_BAD; req_op_sync <= '1';
elsif cancel_store = '1' then elsif r0.req.touch = '1' then
op := OP_STCX_FAIL; if access_ok = '1' and is_hit = '0' and nc = '0' then
req_op_load_miss <= '1';
elsif access_ok = '1' and is_hit = '1' and nc = '0' then
-- Make this OP_LOAD_HIT so the PLRU gets updated
req_op_load_hit <= '1';
else else
opsel := r0.req.load & nc & is_hit; req_op_nop <= '1';
case opsel is end if;
when "101" => op := OP_LOAD_HIT; elsif access_ok = '0' then
when "100" => op := OP_LOAD_MISS; req_op_bad <= '1';
when "110" => op := OP_LOAD_NC; elsif r0.req.flush = '1' then
when "001" => op := OP_STORE_HIT; if is_hit = '0' then
when "000" => op := OP_STORE_MISS; req_op_nop <= '1';
when "010" => op := OP_STORE_MISS; else
when "011" => op := OP_BAD; req_op_flush <= '1';
when "111" => op := OP_BAD; end if;
when others => op := OP_NONE; elsif nc = '1' and (is_hit = '1' or r0.req.reserve = '1') then
end case; req_op_bad <= '1';
elsif r0.req.load = '0' then
req_op_store <= '1'; -- includes dcbz
else
req_op_load_hit <= is_hit;
req_op_load_miss <= not is_hit; -- includes non-cacheable loads
end if; end if;
end if; end if;
req_op <= op;
req_go <= go; req_go <= go;
req_nc <= nc;


-- Version of the row number that is valid one cycle earlier -- Version of the row number that is valid one cycle earlier
-- in the cases where we need to read the cache data BRAM. -- in the cases where we need to read the cache data BRAM.
@ -1101,45 +1201,6 @@ begin
-- Wire up wishbone request latch out of stage 1 -- Wire up wishbone request latch out of stage 1
wishbone_out <= r1.wb; wishbone_out <= r1.wb;


-- Handle load-with-reservation and store-conditional instructions
reservation_comb: process(all)
begin
cancel_store <= '0';
set_rsrv <= '0';
clear_rsrv <= '0';
if r0_valid = '1' and r0.req.reserve = '1' then
-- XXX generate alignment interrupt if address is not aligned
-- XXX or if r0.req.nc = '1'
if r0.req.load = '1' then
-- load with reservation
set_rsrv <= '1';
else
-- store conditional
clear_rsrv <= '1';
if reservation.valid = '0' or
r0.req.addr(63 downto LINE_OFF_BITS) /= reservation.addr then
cancel_store <= '1';
end if;
end if;
end if;
end process;

reservation_reg: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
reservation.valid <= '0';
elsif r0_valid = '1' and access_ok = '1' then
if clear_rsrv = '1' then
reservation.valid <= '0';
elsif set_rsrv = '1' then
reservation.valid <= '1';
reservation.addr <= r0.req.addr(63 downto LINE_OFF_BITS);
end if;
end if;
end if;
end process;

-- Return data for loads & completion control logic -- Return data for loads & completion control logic
-- --
writeback_control: process(all) writeback_control: process(all)
@ -1149,6 +1210,7 @@ begin
d_out.store_done <= not r1.stcx_fail; d_out.store_done <= not r1.stcx_fail;
d_out.error <= r1.ls_error; d_out.error <= r1.ls_error;
d_out.cache_paradox <= r1.cache_paradox; d_out.cache_paradox <= r1.cache_paradox;
d_out.reserve_nc <= r1.reserve_nc;


-- Outputs to MMU -- Outputs to MMU
m_out.done <= r1.mmu_done; m_out.done <= r1.mmu_done;
@ -1185,7 +1247,7 @@ begin
report "completing ld/st with error"; report "completing ld/st with error";
end if; end if;


-- Slow ops (load miss, NC, stores) -- Slow ops (load miss, NC, stores, sync)
if r1.slow_valid = '1' then if r1.slow_valid = '1' then
report "completing store or load miss data=" & to_hstring(r1.data_out); report "completing store or load miss data=" & to_hstring(r1.data_out);
end if; end if;
@ -1288,14 +1350,6 @@ begin
variable data_out : std_ulogic_vector(63 downto 0); variable data_out : std_ulogic_vector(63 downto 0);
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if req_op /= OP_NONE then
report "op:" & op_t'image(req_op) &
" addr:" & to_hstring(r0.req.addr) &
" nc:" & std_ulogic'image(r0.req.nc) &
" idx:" & to_hstring(req_index) &
" tag:" & to_hstring(req_tag) &
" way: " & to_hstring(req_hit_way);
end if;
if r0_valid = '1' then if r0_valid = '1' then
r1.mmu_req <= r0.mmu_req; r1.mmu_req <= r0.mmu_req;
end if; end if;
@ -1341,36 +1395,19 @@ begin
r1.forward_valid <= '1'; r1.forward_valid <= '1';
end if; end if;


-- Fast path for load/store hits. Set signals for the writeback controls. r1.hit_load_valid <= req_op_load_hit;
if req_op = OP_LOAD_HIT then r1.cache_hit <= req_op_load_hit or (req_op_store and req_is_hit); -- causes PLRU update
r1.hit_load_valid <= '1';
else
r1.hit_load_valid <= '0';
end if;


-- The cache hit indication is used for PLRU updates r1.cache_paradox <= access_ok and req_nc and req_is_hit;
if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then r1.reserve_nc <= access_ok and r0.req.reserve and req_nc;
r1.cache_hit <= '1'; if req_op_bad = '1' then
else
r1.cache_hit <= '0';
end if;

if req_op = OP_BAD then
report "Signalling ld/st error valid_ra=" & std_ulogic'image(valid_ra) & report "Signalling ld/st error valid_ra=" & std_ulogic'image(valid_ra) &
" rc_ok=" & std_ulogic'image(rc_ok) & " perm_ok=" & std_ulogic'image(perm_ok); " rc_ok=" & std_ulogic'image(rc_ok) & " perm_ok=" & std_ulogic'image(perm_ok);
r1.ls_error <= not r0.mmu_req; r1.ls_error <= not r0.mmu_req;
r1.mmu_error <= r0.mmu_req; r1.mmu_error <= r0.mmu_req;
r1.cache_paradox <= access_ok;
else else
r1.ls_error <= '0'; r1.ls_error <= '0';
r1.mmu_error <= '0'; r1.mmu_error <= '0';
r1.cache_paradox <= '0';
end if;

if req_op = OP_STCX_FAIL then
r1.stcx_fail <= '1';
else
r1.stcx_fail <= '0';
end if; end if;


-- Record TLB hit information for updating TLB PLRU -- Record TLB hit information for updating TLB PLRU
@ -1423,6 +1460,10 @@ begin
r1.acks_pending <= to_unsigned(0, 3); r1.acks_pending <= to_unsigned(0, 3);
r1.stalled <= '0'; r1.stalled <= '0';
r1.dec_acks <= '0'; r1.dec_acks <= '0';
r1.prev_hit <= '0';
r1.prev_hit_reload <= '0';
reservation.valid <= '0';
reservation.addr <= (others => '0');


-- Not useful normally but helps avoiding tons of sim warnings -- Not useful normally but helps avoiding tons of sim warnings
r1.wb.adr <= (others => '0'); r1.wb.adr <= (others => '0');
@ -1430,27 +1471,33 @@ begin
-- One cycle pulses reset -- One cycle pulses reset
r1.slow_valid <= '0'; r1.slow_valid <= '0';
r1.write_bram <= '0'; r1.write_bram <= '0';
r1.stcx_fail <= '0';


r1.ls_valid <= '0'; r1.ls_valid <= (req_op_load_hit or req_op_nop) and not r0.mmu_req;
-- complete tlbies and TLB loads in the third cycle -- complete tlbies and TLB loads in the third cycle
r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld); r1.mmu_done <= (r0_valid and (r0.tlbie or r0.tlbld)) or
if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then (req_op_load_hit and r0.mmu_req);
if r0.mmu_req = '0' then
r1.ls_valid <= '1'; -- The kill_rsrv2 term covers the case where the reservation
else -- address was set at the beginning of this cycle, and a store
r1.mmu_done <= '1'; -- to that address happened in the previous cycle.
if kill_rsrv = '1' or kill_rsrv2 = '1' then
reservation.valid <= '0';
end if; end if;
if req_go = '1' and access_ok = '1' and r0.req.load = '1' and
r0.req.reserve = '1' and r0.req.atomic_first = '1' then
reservation.addr <= ra(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS);
reservation.valid <= req_is_hit and not req_snoop_hit;
end if; end if;


-- Do invalidations from snooped stores to memory -- Do invalidations from snooped stores to memory
if snoop_valid = '1' then if snoop_valid = '1' then
assert not is_X(snoop_tag_set); assert not is_X(snoop_paddr);
assert not is_X(snoop_wrtag); assert not is_X(snoop_hits);
end if; end if;
for i in 0 to NUM_WAYS-1 loop for i in 0 to NUM_WAYS-1 loop
if snoop_valid = '1' and read_tag(i, snoop_tag_set) = snoop_wrtag then if snoop_hits(i) = '1' then
assert not is_X(snoop_index); cache_valids(to_integer(get_index(snoop_paddr)))(i) <= '0';
cache_valids(to_integer(snoop_index))(i) <= '0';
end if; end if;
end loop; end loop;


@ -1469,14 +1516,24 @@ begin
end if; end if;


-- Take request from r1.req if there is one there, -- Take request from r1.req if there is one there,
-- else from req_op, ra, etc. -- else from req_op_*, ra, etc.
if r1.full = '1' then if r1.full = '1' then
req := r1.req; req := r1.req;
else else
req.op := req_op; req.op_lmiss := req_op_load_miss;
req.op_store := req_op_store;
req.op_flush := req_op_flush;
req.op_sync := req_op_sync;
req.nc := req_nc;
req.valid := req_go; req.valid := req_go;
req.mmu_req := r0.mmu_req; req.mmu_req := r0.mmu_req;
req.dcbz := r0.req.dcbz; req.dcbz := r0.req.dcbz;
req.flush := r0.req.flush;
req.touch := r0.req.touch;
req.sync := r0.req.sync;
req.reserve := r0.req.reserve;
req.first_dw := not r0.req.atomic_qw or r0.req.atomic_first;
req.last_dw := not r0.req.atomic_qw or r0.req.atomic_last;
req.real_addr := ra; req.real_addr := ra;
-- Force data to 0 for dcbz -- Force data to 0 for dcbz
if r0.req.dcbz = '1' then if r0.req.dcbz = '1' then
@ -1493,14 +1550,16 @@ begin
req.byte_sel := r0.req.byte_sel; req.byte_sel := r0.req.byte_sel;
end if; end if;
req.hit_way := req_hit_way; req.hit_way := req_hit_way;
req.is_hit := req_is_hit;
req.same_tag := req_same_tag; req.same_tag := req_same_tag;


-- Store the incoming request from r0, if it is a slow request -- Store the incoming request from r0, if it is a slow request
-- Note that r1.full = 1 implies req_op = OP_NONE -- Note that r1.full = 1 implies none of the req_op_* are 1.
if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC or -- For the sake of timing we put any valid request in r1.req,
req_op = OP_STORE_MISS or req_op = OP_STORE_HIT then -- but only set r1.full if it is a slow request.
if req_go = '1' then
r1.req <= req; r1.req <= req;
r1.full <= '1'; r1.full <= req_op_load_miss or req_op_store or req_op_flush or req_op_sync;
end if; end if;
end if; end if;


@ -1512,9 +1571,14 @@ begin
r1.victim_way <= plru_victim; r1.victim_way <= plru_victim;
report "victim way:" & to_hstring(plru_victim); report "victim way:" & to_hstring(plru_victim);
end if; end if;
if req_op = OP_LOAD_MISS or (req_op = OP_STORE_MISS and r0.req.dcbz = '1') then if req_op_load_miss = '1' or (r0.req.dcbz = '1' and req_is_hit = '0') then
r1.choose_victim <= '1'; r1.choose_victim <= '1';
end if; end if;
if req_go = '1' then
r1.prev_hit <= req_is_hit;
r1.prev_way <= req_hit_way;
r1.prev_hit_reload <= req_hit_reload;
end if;


-- Update count of pending acks -- Update count of pending acks
acks := r1.acks_pending; acks := r1.acks_pending;
@ -1536,6 +1600,7 @@ begin
r1.wb.sel <= req.byte_sel; r1.wb.sel <= req.byte_sel;
r1.wb.dat <= req.data; r1.wb.dat <= req.data;
r1.dcbz <= req.dcbz; r1.dcbz <= req.dcbz;
r1.atomic_more <= not req.last_dw;


-- Keep track of our index and way for subsequent stores. -- Keep track of our index and way for subsequent stores.
r1.store_index <= get_index(req.real_addr); r1.store_index <= get_index(req.real_addr);
@ -1544,44 +1609,52 @@ begin
r1.reload_tag <= get_tag(req.real_addr); r1.reload_tag <= get_tag(req.real_addr);
r1.req.same_tag <= '1'; r1.req.same_tag <= '1';


if req.op = OP_STORE_HIT then if req.is_hit = '1' then
r1.store_way <= req.hit_way; r1.store_way <= req.hit_way;
end if; end if;


-- Reset per-row valid bits, ready for handling OP_LOAD_MISS -- Reset per-row valid bits, ready for handling the next load miss
for i in 0 to ROW_PER_LINE - 1 loop for i in 0 to ROW_PER_LINE - 1 loop
r1.rows_valid(i) <= '0'; r1.rows_valid(i) <= '0';
end loop; end loop;


case req.op is if req.op_lmiss = '1' then
when OP_LOAD_HIT =>
-- stay in IDLE state

when OP_LOAD_MISS =>
-- Normal load cache miss, start the reload machine -- Normal load cache miss, start the reload machine
-- -- Or non-cacheable load
if req.nc = '0' then
report "cache miss real addr:" & to_hstring(req.real_addr) & report "cache miss real addr:" & to_hstring(req.real_addr) &
" idx:" & to_hstring(get_index(req.real_addr)) & " idx:" & to_hstring(get_index(req.real_addr)) &
" tag:" & to_hstring(get_tag(req.real_addr)); " tag:" & to_hstring(get_tag(req.real_addr));
end if;


-- Start the wishbone cycle -- Start the wishbone cycle
r1.wb.we <= '0'; r1.wb.we <= '0';
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
r1.wb.stb <= '1'; r1.wb.stb <= '1';


if req.nc = '0' then
-- Track that we had one request sent -- Track that we had one request sent
r1.state <= RELOAD_WAIT_ACK; r1.state <= RELOAD_WAIT_ACK;
r1.write_tag <= '1'; r1.write_tag <= '1';
ev.load_miss <= '1'; ev.load_miss <= '1';


when OP_LOAD_NC => -- If this is a touch, complete the instruction
r1.wb.cyc <= '1'; if req.touch = '1' then
r1.wb.stb <= '1'; r1.full <= '0';
r1.wb.we <= '0'; r1.slow_valid <= '1';
r1.ls_valid <= '1';
end if;
else
r1.state <= NC_LOAD_WAIT_ACK; r1.state <= NC_LOAD_WAIT_ACK;
end if;
end if;


when OP_STORE_HIT | OP_STORE_MISS => if req.op_store = '1' then
if req.dcbz = '0' then if req.reserve = '1' then
-- stcx needs to wait until next cycle
-- for the reservation address check
r1.state <= DO_STCX;
elsif req.dcbz = '0' then
r1.state <= STORE_WAIT_ACK; r1.state <= STORE_WAIT_ACK;
r1.full <= '0'; r1.full <= '0';
r1.slow_valid <= '1'; r1.slow_valid <= '1';
@ -1590,30 +1663,33 @@ begin
else else
r1.mmu_done <= '1'; r1.mmu_done <= '1';
end if; end if;
if req.op = OP_STORE_HIT then r1.write_bram <= req.is_hit;
r1.write_bram <= '1'; r1.wb.we <= '1';
end if; r1.wb.cyc <= '1';
r1.wb.stb <= '1';
else else
-- dcbz is handled much like a load miss except -- dcbz is handled much like a load miss except
-- that we are writing to memory instead of reading -- that we are writing to memory instead of reading
r1.state <= RELOAD_WAIT_ACK; r1.state <= RELOAD_WAIT_ACK;
if req.op = OP_STORE_MISS then r1.write_tag <= not req.is_hit;
r1.write_tag <= '1';
end if;
end if;
r1.wb.we <= '1'; r1.wb.we <= '1';
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
r1.wb.stb <= '1'; r1.wb.stb <= '1';
if req.op = OP_STORE_MISS then end if;
ev.store_miss <= '1'; ev.store_miss <= not req.is_hit;
end if; end if;


-- OP_NONE and OP_BAD do nothing if req.op_flush = '1' then
-- OP_BAD & OP_STCX_FAIL were handled above already r1.state <= FLUSH_CYCLE;
when OP_NONE => end if;
when OP_BAD =>
when OP_STCX_FAIL => if req.op_sync = '1' then
end case; -- sync/lwsync can complete now that the state machine
-- is idle.
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
end if;


when RELOAD_WAIT_ACK => when RELOAD_WAIT_ACK =>
-- If we are still sending requests, was one accepted ? -- If we are still sending requests, was one accepted ?
@ -1643,7 +1719,7 @@ begin
assert not is_X(r1.req.real_addr); assert not is_X(r1.req.real_addr);
end if; end if;
if r1.full = '1' and r1.req.same_tag = '1' and if r1.full = '1' and r1.req.same_tag = '1' and
((r1.dcbz = '1' and req.dcbz = '1') or r1.req.op = OP_LOAD_MISS) and ((r1.dcbz = '1' and r1.req.dcbz = '1') or r1.req.op_lmiss = '1') and
r1.store_row = get_row(r1.req.real_addr) then r1.store_row = get_row(r1.req.real_addr) then
r1.full <= '0'; r1.full <= '0';
r1.slow_valid <= '1'; r1.slow_valid <= '1';
@ -1652,6 +1728,10 @@ begin
else else
r1.mmu_done <= '1'; r1.mmu_done <= '1';
end if; end if;
-- NB: for lqarx, set the reservation on the first dword
if r1.req.reserve = '1' and r1.req.first_dw = '1' then
reservation.valid <= '1';
end if;
end if; end if;


-- Check for completion -- Check for completion
@ -1667,6 +1747,10 @@ begin
cache_valids(to_integer(r1.store_index))(to_integer(r1.store_way)) <= '1'; cache_valids(to_integer(r1.store_index))(to_integer(r1.store_way)) <= '1';


ev.dcache_refill <= not r1.dcbz; ev.dcache_refill <= not r1.dcbz;
-- Second half of a lq/lqarx can assume a hit on this line now
-- if the first half hit this line.
r1.prev_hit <= r1.prev_hit_reload;
r1.prev_way <= r1.store_way;
r1.state <= IDLE; r1.state <= IDLE;
end if; end if;


@ -1680,6 +1764,10 @@ begin
if wishbone_in.stall = '0' then if wishbone_in.stall = '0' then
-- See if there is another store waiting to be done -- See if there is another store waiting to be done
-- which is in the same real page. -- which is in the same real page.
-- This could be either in r1.req or in r0.
-- Ignore store-conditionals, they have to go through
-- DO_STCX state, unless they are the second half of a
-- successful stqcx, which is handled here.
if req.valid = '1' then if req.valid = '1' then
r1.wb.adr(SET_SIZE_BITS - ROW_OFF_BITS - 1 downto 0) <= r1.wb.adr(SET_SIZE_BITS - ROW_OFF_BITS - 1 downto 0) <=
req.real_addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS); req.real_addr(SET_SIZE_BITS - 1 downto ROW_OFF_BITS);
@ -1687,30 +1775,33 @@ begin
r1.wb.sel <= req.byte_sel; r1.wb.sel <= req.byte_sel;
end if; end if;
assert not is_X(acks); assert not is_X(acks);
if acks < 7 and req.same_tag = '1' and req.dcbz = '0' and r1.wb.stb <= '0';
(req.op = OP_STORE_MISS or req.op = OP_STORE_HIT) then if req.op_store = '1' and req.same_tag = '1' and req.dcbz = '0' and
(req.reserve = '0' or r1.atomic_more = '1') then
if acks < 7 then
r1.wb.stb <= '1'; r1.wb.stb <= '1';
stbs_done := false; stbs_done := false;
r1.store_way <= req.hit_way; r1.store_way <= req.hit_way;
r1.store_row <= get_row(req.real_addr); r1.store_row <= get_row(req.real_addr);
if req.op = OP_STORE_HIT then r1.write_bram <= req.is_hit;
r1.write_bram <= '1'; r1.atomic_more <= not req.last_dw;
end if;
r1.full <= '0'; r1.full <= '0';
r1.slow_valid <= '1'; r1.slow_valid <= '1';
-- Store requests never come from the MMU -- Store requests never come from the MMU
r1.ls_valid <= '1'; r1.ls_valid <= '1';
stbs_done := false; end if;
else else
r1.wb.stb <= '0';
stbs_done := true; stbs_done := true;
if req.valid = '1' then
r1.atomic_more <= '0';
end if;
end if; end if;
end if; end if;


-- Got ack ? See if complete. -- Got ack ? See if complete.
if wishbone_in.ack = '1' then if stbs_done and r1.atomic_more = '0' then
assert not is_X(acks); assert not is_X(acks);
if stbs_done and acks = 1 then if acks = 0 or (wishbone_in.ack = '1' and acks = 1) then
r1.state <= IDLE; r1.state <= IDLE;
r1.wb.cyc <= '0'; r1.wb.cyc <= '0';
r1.wb.stb <= '0'; r1.wb.stb <= '0';
@ -1736,6 +1827,51 @@ begin
r1.wb.cyc <= '0'; r1.wb.cyc <= '0';
r1.wb.stb <= '0'; r1.wb.stb <= '0';
end if; end if;

when DO_STCX =>
if reservation.valid = '0' or kill_rsrv = '1' or
r1.req.real_addr(REAL_ADDR_BITS - 1 downto LINE_OFF_BITS) /= reservation.addr then
-- Wrong address, didn't have reservation, or lost reservation
-- Abandon the wishbone cycle if started and fail the stcx.
r1.stcx_fail <= '1';
r1.full <= '0';
r1.ls_valid <= '1';
r1.state <= IDLE;
r1.wb.cyc <= '0';
r1.wb.stb <= '0';
reservation.valid <= '0';
-- If this is the first half of a stqcx., the second half
-- will fail also because the reservation is not valid.
r1.state <= IDLE;
elsif r1.wb.cyc = '0' then
-- Right address and have reservation, so start the
-- wishbone cycle
r1.wb.we <= '1';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';
elsif r1.wb.stb = '1' and wishbone_in.stall = '0' then
-- Store has been accepted, so now we can write the
-- cache data RAM and complete the request
r1.write_bram <= r1.req.is_hit;
r1.wb.stb <= '0';
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
reservation.valid <= '0';
-- For a stqcx, STORE_WAIT_ACK will issue the second half
-- without checking the reservation, which is what we want
-- given that the first half has gone out.
-- With r1.atomic_more set, STORE_WAIT_ACK won't exit to
-- IDLE state until it sees the second half.
r1.state <= STORE_WAIT_ACK;
end if;

when FLUSH_CYCLE =>
cache_valids(to_integer(r1.store_index))(to_integer(r1.store_way)) <= '0';
r1.full <= '0';
r1.slow_valid <= '1';
r1.ls_valid <= '1';
r1.state <= IDLE;
end case; end case;
end if; end if;
end if; end if;
@ -1753,7 +1889,7 @@ begin
r1.wb.stb & r1.wb.cyc & r1.wb.stb & r1.wb.cyc &
d_out.error & d_out.error &
d_out.valid & d_out.valid &
std_ulogic_vector(to_unsigned(op_t'pos(req_op), 3)) & req_op_load_miss & req_op_store & req_op_bad &
stall_out & stall_out &
std_ulogic_vector(resize(tlb_hit_way, 3)) & std_ulogic_vector(resize(tlb_hit_way, 3)) &
valid_ra & valid_ra &

@ -44,6 +44,8 @@ architecture behaviour of decode1 is
signal decode_rom_addr : insn_code; signal decode_rom_addr : insn_code;
signal decode : decode_rom_t; signal decode : decode_rom_t;


signal double : std_ulogic;

type prefix_state_t is record type prefix_state_t is record
prefixed : std_ulogic; prefixed : std_ulogic;
prefix : std_ulogic_vector(25 downto 0); prefix : std_ulogic_vector(25 downto 0);
@ -106,6 +108,7 @@ architecture behaviour of decode1 is
INSN_brd => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_brd => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cbcdtd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cbcdtd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cdtbcd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cdtbcd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cfuged => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmp => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), INSN_cmp => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_cmpb => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmpb => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmpeqb => (ALU, NONE, OP_CMPEQB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmpeqb => (ALU, NONE, OP_CMPEQB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -113,10 +116,10 @@ architecture behaviour of decode1 is
INSN_cmpl => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmpl => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmpli => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmpli => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cmprb => (ALU, NONE, OP_CMPRB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmprb => (ALU, NONE, OP_CMPRB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_cntlzd => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_cntlzd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_cntlzw => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), INSN_cntlzw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_cnttzd => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_cnttzd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_cnttzw => (ALU, NONE, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), INSN_cnttzw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_crand => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_crand => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_crandc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_crandc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_creqv => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_creqv => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -126,10 +129,10 @@ architecture behaviour of decode1 is
INSN_crorc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_crorc => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_crxor => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_crxor => (ALU, NONE, OP_CROP, NONE, NONE, NONE, NONE, '1', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_darn => (ALU, NONE, OP_DARN, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_darn => (ALU, NONE, OP_DARN, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbf => (ALU, NONE, OP_DCBF, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbf => (LDST, NONE, OP_DCBF, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbst => (ALU, NONE, OP_DCBST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbst => (ALU, NONE, OP_DCBST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbt => (ALU, NONE, OP_DCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbt => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbtst => (ALU, NONE, OP_DCBTST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbtst => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_dcbz => (LDST, NONE, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbz => (LDST, NONE, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_divd => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE), INSN_divd => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE),
INSN_divde => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE), INSN_divde => (DVU, NONE, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE),
@ -197,7 +200,7 @@ architecture behaviour of decode1 is
INSN_ftdiv => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_ftdiv => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_ftsqrt => (FPU, FPU, OP_FP_CMP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_ftsqrt => (FPU, FPU, OP_FP_CMP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_icbi => (ALU, NONE, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), INSN_icbi => (ALU, NONE, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_icbt => (ALU, NONE, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), INSN_icbt => (ALU, NONE, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_isel => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_isel => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_isync => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_isync => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_lbarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), INSN_lbarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE),
@ -234,6 +237,8 @@ architecture behaviour of decode1 is
INSN_lhzu => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), INSN_lhzu => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD),
INSN_lhzux => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), INSN_lhzux => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD),
INSN_lhzx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_lhzx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_lq => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DQ, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRTP),
INSN_lqarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', DRTP),
INSN_lwa => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_lwa => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_lwarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), INSN_lwarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE),
INSN_lwaux => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), INSN_lwaux => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD),
@ -281,12 +286,15 @@ architecture behaviour of decode1 is
INSN_ori => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), INSN_ori => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_oris => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), INSN_oris => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_paddi => (ALU, NONE, OP_ADD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_paddi => (ALU, NONE, OP_ADD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pdepd => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pextd => (ALU, NONE, OP_BSORT, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plbz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plbz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pld => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_pld => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plfd => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plfd => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plfs => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), INSN_plfs => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_plha => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plha => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plhz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plhz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plq => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRTP),
INSN_plwa => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plwa => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_plwz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plwz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pnop => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_pnop => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -295,13 +303,15 @@ architecture behaviour of decode1 is
INSN_pstfd => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_pstfd => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstfs => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), INSN_pstfs => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_psth => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_psth => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_pstq => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRSP),
INSN_pstw => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_pstw => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntb => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_popcntb => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntd => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_popcntd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_popcntw => (ALU, NONE, OP_POPCNT, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_popcntw => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_prtyd => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_prtyd => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_prtyw => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_prtyw => (ALU, NONE, OP_PRTY, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_rfid => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_rfid => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_rfscv => (ALU, NONE, OP_RFID, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_rldcl => (ALU, NONE, OP_RLCL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_rldcl => (ALU, NONE, OP_RLCL, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_rldcr => (ALU, NONE, OP_RLCR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_rldcr => (ALU, NONE, OP_RLCR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_rldic => (ALU, NONE, OP_RLC, NONE, CONST_SH, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_rldic => (ALU, NONE, OP_RLC, NONE, CONST_SH, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
@ -352,6 +362,8 @@ architecture behaviour of decode1 is
INSN_sthu => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), INSN_sthu => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE),
INSN_sthux => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), INSN_sthux => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE),
INSN_sthx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_sthx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_stq => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRSP),
INSN_stqcx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', DRSP),
INSN_stw => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_stw => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_stwbrx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_stwbrx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_stwcix => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '1', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_stwcix => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '1', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -365,7 +377,7 @@ architecture behaviour of decode1 is
INSN_subfic => (ALU, NONE, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_subfic => (ALU, NONE, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_subfme => (ALU, NONE, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE), INSN_subfme => (ALU, NONE, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
INSN_subfze => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE), INSN_subfze => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE),
INSN_sync => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_sync => (LDST, NONE, OP_SYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_td => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_td => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_tdi => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_tdi => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_tlbie => (LDST, NONE, OP_TLBIE, NONE, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_tlbie => (LDST, NONE, OP_TLBIE, NONE, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -373,7 +385,7 @@ architecture behaviour of decode1 is
INSN_tlbsync => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_tlbsync => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_tw => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), INSN_tw => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_twi => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), INSN_twi => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE),
INSN_wait => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_wait => (ALU, NONE, OP_WAIT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_xor => (ALU, NONE, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_xor => (ALU, NONE, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE),
INSN_xori => (ALU, NONE, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_xori => (ALU, NONE, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_xoris => (ALU, NONE, OP_XOR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_xoris => (ALU, NONE, OP_XOR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -384,7 +396,7 @@ architecture behaviour of decode1 is
function decode_ram_spr(sprn : spr_num_t) return ram_spr_info is function decode_ram_spr(sprn : spr_num_t) return ram_spr_info is
variable ret : ram_spr_info; variable ret : ram_spr_info;
begin begin
ret := (index => (others => '0'), isodd => '0', valid => '1'); ret := (index => (others => '0'), isodd => '0', is32b => '0', valid => '1');
case sprn is case sprn is
when SPR_LR => when SPR_LR =>
ret.index := RAMSPR_LR; ret.index := RAMSPR_LR;
@ -418,6 +430,10 @@ architecture behaviour of decode1 is
when SPR_HSPRG1 => when SPR_HSPRG1 =>
ret.index := RAMSPR_HSPRG1; ret.index := RAMSPR_HSPRG1;
ret.isodd := '1'; ret.isodd := '1';
when SPR_VRSAVE =>
ret.index := RAMSPR_VRSAVE;
ret.isodd := '1';
ret.is32b := '1';
when others => when others =>
ret.valid := '0'; ret.valid := '0';
end case; end case;
@ -427,9 +443,11 @@ architecture behaviour of decode1 is
function map_spr(sprn : spr_num_t) return spr_id is function map_spr(sprn : spr_num_t) return spr_id is
variable i : spr_id; variable i : spr_id;
begin begin
i.sel := "000"; i.sel := "0000";
i.valid := '1'; i.valid := '1';
i.ispmu := '0'; i.ispmu := '0';
i.ronly := '0';
i.wonly := '0';
case sprn is case sprn is
when SPR_TB => when SPR_TB =>
i.sel := SPRSEL_TB; i.sel := SPRSEL_TB;
@ -452,6 +470,24 @@ architecture behaviour of decode1 is
i.sel := SPRSEL_CFAR; i.sel := SPRSEL_CFAR;
when SPR_XER => when SPR_XER =>
i.sel := SPRSEL_XER; i.sel := SPRSEL_XER;
when SPR_FSCR =>
i.sel := SPRSEL_FSCR;
when SPR_HFSCR =>
i.sel := SPRSEL_HFSCR;
when SPR_HEIR =>
i.sel := SPRSEL_HEIR;
when SPR_CTRL =>
i.sel := SPRSEL_CTRL;
i.ronly := '1';
when SPR_CTRLW =>
i.sel := SPRSEL_CTRL;
i.wonly := '1';
when SPR_UDSCR =>
i.sel := SPRSEL_DSCR;
when SPR_DSCR =>
i.sel := SPRSEL_DSCR;
when SPR_PIR =>
i.sel := SPRSEL_PIR;
when others => when others =>
i.valid := '0'; i.valid := '0';
end case; end case;
@ -459,6 +495,8 @@ architecture behaviour of decode1 is
end; end;


begin begin
double <= not r.second when (r.valid = '1' and decode.repeat /= NONE) else '0';

decode1_0: process(clk) decode1_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
@ -471,11 +509,16 @@ begin
fetch_failed <= '0'; fetch_failed <= '0';
pr <= prefix_state_init; pr <= prefix_state_init;
elsif stall_in = '0' then elsif stall_in = '0' then
if double = '0' then
r <= rin; r <= rin;
fetch_failed <= f_in.fetch_failed; fetch_failed <= f_in.fetch_failed;
if f_in.valid = '1' then if f_in.valid = '1' then
pr <= pr_in; pr <= pr_in;
end if; end if;
else
r.second <= '1';
r.reg_c <= rin.reg_c;
end if;
end if; end if;
if rst = '1' then if rst = '1' then
br.predict <= '0'; br.predict <= '0';
@ -485,12 +528,12 @@ begin
end if; end if;
end process; end process;


busy_out <= stall_in; busy_out <= stall_in or double;


decode1_rom: process(clk) decode1_rom: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if stall_in = '0' then if stall_in = '0' and double = '0' then
decode <= decode_rom(decode_rom_addr); decode <= decode_rom(decode_rom_addr);
end if; end if;
end if; end if;
@ -521,7 +564,7 @@ begin
v.big_endian := f_in.big_endian; v.big_endian := f_in.big_endian;


if is_X(f_in.insn) then if is_X(f_in.insn) then
v.spr_info := (sel => "XXX", others => 'X'); v.spr_info := (sel => "XXXX", others => 'X');
v.ram_spr := (index => (others => 'X'), others => 'X'); v.ram_spr := (index => (others => 'X'), others => 'X');
else else
sprn := decode_spr_num(f_in.insn); sprn := decode_spr_num(f_in.insn);
@ -620,6 +663,7 @@ begin
-- Work out GPR/FPR read addresses -- Work out GPR/FPR read addresses
-- Note that for prefixed instructions we are working this out based -- Note that for prefixed instructions we are working this out based
-- only on the suffix. -- only on the suffix.
if double = '0' then
maybe_rb := '0'; maybe_rb := '0';
vr.reg_1_addr := '0' & insn_ra(f_in.insn); vr.reg_1_addr := '0' & insn_ra(f_in.insn);
vr.reg_2_addr := '0' & insn_rb(f_in.insn); vr.reg_2_addr := '0' & insn_rb(f_in.insn);
@ -644,9 +688,30 @@ begin
end if; end if;
end if; end if;
end if; end if;
-- See if this is an instruction where repeat_t = DRSP and we need
-- to read RS|1 followed by RS, i.e. stq or stqcx. in LE mode
-- (note we don't have access to the decode for the current instruction)
if (icode = INSN_stq or icode = INSN_stqcx) and f_in.big_endian = '0' then
vr.reg_3_addr(0) := '1';
end if;
vr.read_1_enable := f_in.valid; vr.read_1_enable := f_in.valid;
vr.read_2_enable := f_in.valid and maybe_rb; vr.read_2_enable := f_in.valid and maybe_rb;
vr.read_3_enable := f_in.valid; vr.read_3_enable := f_in.valid;
else
-- second instance of a doubled instruction
vr.reg_1_addr := r.reg_a;
vr.reg_2_addr := r.reg_b;
vr.reg_3_addr := r.reg_c;
vr.read_1_enable := '0'; -- (not actually used)
vr.read_2_enable := '0';
vr.read_3_enable := '1'; -- (not actually used)
-- For pstq, and for stq and stqcx in BE mode,
-- we need to read register RS|1 in the cycle after we read RS;
-- stq and stqcx in LE mode read RS.
if decode.repeat = DRSP then
vr.reg_3_addr(0) := r.prefixed or f_in.big_endian;
end if;
end if;


v.reg_a := vr.reg_1_addr; v.reg_a := vr.reg_1_addr;
v.reg_b := vr.reg_2_addr; v.reg_b := vr.reg_2_addr;

@ -232,12 +232,13 @@ architecture behaviour of decode2 is
); );


constant subresult_select : mux_select_array_t := ( constant subresult_select : mux_select_array_t := (
OP_MUL_L64 => "000", -- muldiv_result OP_MUL_L64 => "000", -- multicyc_result
OP_MUL_H64 => "001", OP_MUL_H64 => "010",
OP_MUL_H32 => "010", OP_MUL_H32 => "001",
OP_DIV => "011", OP_DIV => "101",
OP_DIVE => "011", OP_DIVE => "101",
OP_MOD => "011", OP_MOD => "101",
OP_BSORT => "100",
OP_ADDG6S => "001", -- misc_result OP_ADDG6S => "001", -- misc_result
OP_ISEL => "010", OP_ISEL => "010",
OP_DARN => "011", OP_DARN => "011",
@ -347,7 +348,8 @@ begin
elsif deferred = '0' then elsif deferred = '0' then
if dc2in.e.valid = '1' then if dc2in.e.valid = '1' then
report "execute " & to_hstring(dc2in.e.nia) & report "execute " & to_hstring(dc2in.e.nia) &
" tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid); " tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid) &
" rpt=" & std_ulogic'image(dc2in.e.repeat) & " 2nd=" & std_ulogic'image(dc2in.e.second) & " wr=" & to_hstring(dc2in.e.write_reg);
end if; end if;
dc2 <= dc2in; dc2 <= dc2in;
elsif dc2.read_rspr = '0' then elsif dc2.read_rspr = '0' then
@ -376,6 +378,31 @@ begin
dec_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix); dec_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, d_in.prefix);
dec_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn); dec_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn);
dec_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn); dec_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn);
case d_in.decode.repeat is
when DUPD =>
if d_in.second = '1' then
-- update-form loads, 2nd instruction writes RA
dec_o.reg := dec_a.reg;
end if;
when DRSP =>
-- non-prefixed stq, stqcx do RS|1, RS in LE mode; others do RS, RS|1
if d_in.second = (d_in.big_endian or d_in.prefixed) then
dec_c.reg(0) := '1'; -- do RS, RS|1
end if;
when DRTP =>
-- non-prefixed lq, lqarx do RT|1, RT in LE mode; others do RT, RT|1
if d_in.second = (d_in.big_endian or d_in.prefixed) then
dec_o.reg(0) := '1';
end if;
when others =>
end case;
-- For the second instance of a doubled instruction, we ignore the RA
-- and RB operands, in order to avoid false dependencies on the output
-- of the first instance.
if d_in.second = '1' then
dec_a.reg_valid := '0';
dec_b.reg_valid := '0';
end if;
if d_in.valid = '0' or d_in.illegal_suffix = '1' then if d_in.valid = '0' or d_in.illegal_suffix = '1' then
dec_a.reg_valid := '0'; dec_a.reg_valid := '0';
dec_b.reg_valid := '0'; dec_b.reg_valid := '0';
@ -420,6 +447,8 @@ begin
v.e.input_cr := d_in.decode.input_cr; v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr; v.e.output_cr := d_in.decode.output_cr;


v.e.spr_select := d_in.spr_info;

-- Work out whether XER SO/OV/OV32 bits are set -- Work out whether XER SO/OV/OV32 bits are set
-- or used by this instruction -- or used by this instruction
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
@ -450,8 +479,15 @@ begin
v.input_ov := '1'; v.input_ov := '1';
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR =>
unit := LDST; unit := LDST;
when SPR_TAR =>
v.e.uses_tar := '1';
when SPR_UDSCR =>
v.e.uses_dscr := '1';
when others => when others =>
end case; end case;
if d_in.spr_info.wonly = '1' then
v.e.spr_select.valid := '0';
end if;
end if; end if;
when OP_MTSPR => when OP_MTSPR =>
if is_X(d_in.insn) then if is_X(d_in.insn) then
@ -468,9 +504,15 @@ begin
if d_in.valid = '1' then if d_in.valid = '1' then
v.sgl_pipe := '1'; v.sgl_pipe := '1';
end if; end if;
when SPR_TAR =>
v.e.uses_tar := '1';
when SPR_UDSCR =>
v.e.uses_dscr := '1';
when others => when others =>
end case; end case;
if d_in.spr_info.valid = '1' and d_in.valid = '1' then if d_in.spr_info.ronly = '1' then
v.e.spr_select.valid := '0';
elsif d_in.spr_info.valid = '1' and d_in.valid = '1' then
v.sgl_pipe := '1'; v.sgl_pipe := '1';
end if; end if;
end if; end if;
@ -496,12 +538,10 @@ begin
end if; end if;
v.e.dec_ctr := decctr; v.e.dec_ctr := decctr;


v.repeat := d_in.decode.repeat;
if d_in.decode.repeat /= NONE then if d_in.decode.repeat /= NONE then
v.e.repeat := '1'; v.e.repeat := '1';
end if; end if;

v.e.second := d_in.second;
v.e.spr_select := d_in.spr_info;


if decctr = '1' then if decctr = '1' then
-- read and write CTR -- read and write CTR
@ -525,12 +565,14 @@ begin
v.e.ramspr_rd_odd := '1'; v.e.ramspr_rd_odd := '1';
else else
v.e.ramspr_even_rdaddr := RAMSPR_TAR; v.e.ramspr_even_rdaddr := RAMSPR_TAR;
v.e.uses_tar := '1';
end if; end if;
sprs_busy := '1'; sprs_busy := '1';
when OP_MFSPR => when OP_MFSPR =>
v.e.ramspr_even_rdaddr := d_in.ram_spr.index; v.e.ramspr_even_rdaddr := d_in.ram_spr.index;
v.e.ramspr_odd_rdaddr := d_in.ram_spr.index; v.e.ramspr_odd_rdaddr := d_in.ram_spr.index;
v.e.ramspr_rd_odd := d_in.ram_spr.isodd; v.e.ramspr_rd_odd := d_in.ram_spr.isodd;
v.e.ramspr_32bit := d_in.ram_spr.is32b;
v.e.spr_is_ram := d_in.ram_spr.valid; v.e.spr_is_ram := d_in.ram_spr.valid;
sprs_busy := d_in.ram_spr.valid; sprs_busy := d_in.ram_spr.valid;
when OP_MTSPR => when OP_MTSPR =>
@ -539,8 +581,19 @@ begin
v.e.ramspr_write_odd := d_in.ram_spr.valid and d_in.ram_spr.isodd; v.e.ramspr_write_odd := d_in.ram_spr.valid and d_in.ram_spr.isodd;
v.e.spr_is_ram := d_in.ram_spr.valid; v.e.spr_is_ram := d_in.ram_spr.valid;
when OP_RFID => when OP_RFID =>
if d_in.insn(7) = '1' then
-- rfscv
v.e.ramspr_even_rdaddr := RAMSPR_LR;
v.e.ramspr_odd_rdaddr := RAMSPR_CTR;
elsif d_in.insn(9) = '0' then
-- rfid
v.e.ramspr_even_rdaddr := RAMSPR_SRR0; v.e.ramspr_even_rdaddr := RAMSPR_SRR0;
v.e.ramspr_odd_rdaddr := RAMSPR_SRR1; v.e.ramspr_odd_rdaddr := RAMSPR_SRR1;
else
-- hrfid
v.e.ramspr_even_rdaddr := RAMSPR_HSRR0;
v.e.ramspr_odd_rdaddr := RAMSPR_HSRR1;
end if;
sprs_busy := '1'; sprs_busy := '1';
when others => when others =>
end case; end case;
@ -590,23 +643,28 @@ begin
if op = OP_MFSPR then if op = OP_MFSPR then
if d_in.ram_spr.valid = '1' then if d_in.ram_spr.valid = '1' then
v.e.result_sel := "101"; -- ramspr_result v.e.result_sel := "101"; -- ramspr_result
elsif d_in.spr_info.valid = '0' then elsif d_in.spr_info.valid = '0' or d_in.spr_info.wonly = '1' then
-- Privileged mfspr to invalid/unimplemented SPR numbers -- Privileged mfspr to invalid/unimplemented SPR numbers
-- writes the contents of RT back to RT (i.e. it's a no-op) -- writes the contents of RT back to RT (i.e. it's a no-op)
v.e.result_sel := "001"; -- logical_result v.e.result_sel := "001"; -- logical_result
end if; end if;
end if; end if;
v.e.prefixed := d_in.prefixed; v.e.prefixed := d_in.prefixed;
v.e.prefix := d_in.prefix;
v.e.illegal_suffix := d_in.illegal_suffix; v.e.illegal_suffix := d_in.illegal_suffix;
v.e.misaligned_prefix := d_in.misaligned_prefix; v.e.misaligned_prefix := d_in.misaligned_prefix;


elsif dc2.e.valid = '1' then -- check for invalid forms that cause an illegal instruction interrupt
-- dc2.busy = 1 and dc2.e.valid = 1, thus this must be a repeated instruction. -- Does RA = RT for a load quadword instr, or RB = RT for lqarx?
-- Set up for the second iteration (if deferred = 1 this will all be ignored) if d_in.decode.repeat = DRTP and
v.e.second := '1'; (insn_ra(d_in.insn) = insn_rt(d_in.insn) or
-- DUPD is the only possibility here: (d_in.decode.reserve = '1' and insn_rb(d_in.insn) = insn_rt(d_in.insn))) then
-- update-form loads, 2nd instruction writes RA v.e.illegal_form := '1';
v.e.write_reg := dc2.e.read_reg1; end if;
-- Is RS/RT odd for a load/store quadword instruction?
if (d_in.decode.repeat = DRSP or d_in.decode.repeat = DRTP) and d_in.insn(21) = '1' then
v.e.illegal_form := '1';
end if;
end if; end if;


-- issue control -- issue control
@ -695,7 +753,7 @@ begin


v.e.valid := control_valid_out; v.e.valid := control_valid_out;
v.e.instr_tag := instr_tag; v.e.instr_tag := instr_tag;
v.busy := valid_in and (not control_valid_out or (v.e.repeat and not v.e.second)); v.busy := valid_in and not control_valid_out;


stall_out <= dc2.busy or deferred; stall_out <= dc2.busy or deferred;



@ -6,9 +6,10 @@ package decode_types is
OP_ATTN, OP_B, OP_BC, OP_BCREG, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BCD, OP_BPERM, OP_BREV, OP_BCD, OP_BPERM, OP_BREV,
OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
OP_CNTZ, OP_CROP, OP_COUNTB, OP_CROP,
OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBZ,
OP_DCBZ, OP_ICBI, OP_ICBT, OP_SPARE,
OP_ICBI, OP_ICBT,
OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC, OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC,
OP_DIV, OP_DIVE, OP_MOD, OP_DIV, OP_DIVE, OP_MOD,
OP_EXTS, OP_EXTSWSLI, OP_EXTS, OP_EXTSWSLI,
@ -18,12 +19,14 @@ package decode_types is
OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR,
OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64, OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_MUL_H64, OP_MUL_H32,
OP_POPCNT, OP_PRTY, OP_RFID, OP_BSORT,
OP_PRTY, OP_RFID,
OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB, OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB,
OP_SHL, OP_SHR, OP_SHL, OP_SHR,
OP_SYNC, OP_TLBIE, OP_TRAP, OP_SYNC, OP_TLBIE, OP_TRAP,
OP_XOR, OP_XOR,
OP_ADDG6S, OP_ADDG6S,
OP_WAIT,
OP_FETCH_FAILED OP_FETCH_FAILED
); );


@ -106,6 +109,7 @@ package decode_types is
INSN_prtyw, INSN_prtyw,
INSN_prtyd, -- 70 INSN_prtyd, -- 70
INSN_rfid, INSN_rfid,
INSN_rfscv,
INSN_rldic, INSN_rldic,
INSN_rldicl, INSN_rldicl,
INSN_rldicr, INSN_rldicr,
@ -113,8 +117,8 @@ package decode_types is
INSN_rlwimi, INSN_rlwimi,
INSN_rlwinm, INSN_rlwinm,
INSN_rnop, INSN_rnop,
INSN_sc, INSN_sc, -- 80
INSN_setb, -- 80 INSN_setb,
INSN_slbia, INSN_slbia,
INSN_sradi, INSN_sradi,
INSN_srawi, INSN_srawi,
@ -122,9 +126,10 @@ package decode_types is
INSN_std, INSN_std,
INSN_stdu, INSN_stdu,
INSN_sthu, INSN_sthu,
INSN_stwu, INSN_stq,
INSN_stwu, -- 90
INSN_subfic, INSN_subfic,
INSN_subfme, -- 90 INSN_subfme,
INSN_subfze, INSN_subfze,
INSN_sync, INSN_sync,
INSN_tdi, INSN_tdi,
@ -132,23 +137,23 @@ package decode_types is
INSN_twi, INSN_twi,
INSN_wait, INSN_wait,
INSN_xori, INSN_xori,
INSN_xoris, INSN_xoris, -- 100
-- pad to 104 -- pad to 102
INSN_063, INSN_064, INSN_065, INSN_066, INSN_067, INSN_065,


-- Non-prefixed instructions that have a MLS:D prefixed form and -- Non-prefixed instructions that have a MLS:D prefixed form and
-- their corresponding prefixed instructions. -- their corresponding prefixed instructions.
-- The non-prefixed versions have even indexes so that we can -- The non-prefixed versions have even indexes so that we can
-- convert them to the prefixed version by setting bit 0 -- convert them to the prefixed version by setting bit 0
INSN_addi, -- 104 INSN_addi, -- 102
INSN_paddi, INSN_paddi,
INSN_lbz, INSN_lbz,
INSN_plbz, INSN_plbz,
INSN_lha, INSN_lha,
INSN_plha, INSN_plha,
INSN_lhz, -- 110 INSN_lhz,
INSN_plhz, INSN_plhz,
INSN_lwz, INSN_lwz, -- 110
INSN_plwz, INSN_plwz,
INSN_stb, INSN_stb,
INSN_pstb, INSN_pstb,
@ -158,15 +163,18 @@ package decode_types is
INSN_pstw, INSN_pstw,


-- Slots for non-prefixed opcodes that are 8LS:D when prefixed -- Slots for non-prefixed opcodes that are 8LS:D when prefixed
INSN_lhzu, -- 120 INSN_lhzu,
INSN_plwa, INSN_plwa,
INSN_lq, -- 120
INSN_plq,
INSN_op57, INSN_op57,
INSN_pld, INSN_pld,
INSN_op60,
INSN_pstq,
INSN_op61, INSN_op61,
INSN_pstd, INSN_pstd,


-- pad to 128 to simplify comparison logic -- pad to 128 to simplify comparison logic
INSN_07e, INSN_07f,


-- The following instructions have an RB operand but don't access FPRs -- The following instructions have an RB operand but don't access FPRs
INSN_add, INSN_add,
@ -177,11 +185,12 @@ package decode_types is
INSN_and, INSN_and,
INSN_andc, INSN_andc,
INSN_bperm, INSN_bperm,
INSN_cfuged,
INSN_cmp, INSN_cmp,
INSN_cmpb, INSN_cmpb,
INSN_cmpeqb, INSN_cmpeqb,
INSN_cmpl, INSN_cmpl, -- 140
INSN_cmprb, -- 140 INSN_cmprb,
INSN_dcbf, INSN_dcbf,
INSN_dcbst, INSN_dcbst,
INSN_dcbt, INSN_dcbt,
@ -190,8 +199,8 @@ package decode_types is
INSN_divd, INSN_divd,
INSN_divdu, INSN_divdu,
INSN_divde, INSN_divde,
INSN_divdeu, INSN_divdeu, -- 150
INSN_divw, -- 150 INSN_divw,
INSN_divwu, INSN_divwu,
INSN_divwe, INSN_divwe,
INSN_divweu, INSN_divweu,
@ -200,8 +209,8 @@ package decode_types is
INSN_icbt, INSN_icbt,
INSN_isel, INSN_isel,
INSN_lbarx, INSN_lbarx,
INSN_lbzcix, INSN_lbzcix, -- 160
INSN_lbzux, -- 160 INSN_lbzux,
INSN_lbzx, INSN_lbzx,
INSN_ldarx, INSN_ldarx,
INSN_ldbrx, INSN_ldbrx,
@ -210,18 +219,19 @@ package decode_types is
INSN_ldux, INSN_ldux,
INSN_lharx, INSN_lharx,
INSN_lhax, INSN_lhax,
INSN_lhaux, INSN_lhaux, -- 170
INSN_lhbrx, -- 170 INSN_lhbrx,
INSN_lhzcix, INSN_lhzcix,
INSN_lhzx, INSN_lhzx,
INSN_lhzux, INSN_lhzux,
INSN_lqarx,
INSN_lwarx, INSN_lwarx,
INSN_lwax, INSN_lwax,
INSN_lwaux, INSN_lwaux,
INSN_lwbrx, INSN_lwbrx,
INSN_lwzcix, INSN_lwzcix, -- 180
INSN_lwzx, INSN_lwzx,
INSN_lwzux, -- 180 INSN_lwzux,
INSN_modsd, INSN_modsd,
INSN_modsw, INSN_modsw,
INSN_moduw, INSN_moduw,
@ -229,52 +239,55 @@ package decode_types is
INSN_mulhw, INSN_mulhw,
INSN_mulhwu, INSN_mulhwu,
INSN_mulhd, INSN_mulhd,
INSN_mulhdu, INSN_mulhdu, -- 190
INSN_mullw, INSN_mullw,
INSN_mulld, -- 190 INSN_mulld,
INSN_nand, INSN_nand,
INSN_nor, INSN_nor,
INSN_or, INSN_or,
INSN_orc, INSN_orc,
INSN_pdepd,
INSN_pextd,
INSN_rldcl, INSN_rldcl,
INSN_rldcr, INSN_rldcr, -- 200
INSN_rlwnm, INSN_rlwnm,
INSN_slw, INSN_slw,
INSN_sld, INSN_sld,
INSN_sraw, -- 200 INSN_sraw,
INSN_srad, INSN_srad,
INSN_srw, INSN_srw,
INSN_srd, INSN_srd,
INSN_stbcix, INSN_stbcix,
INSN_stbcx, INSN_stbcx,
INSN_stbx, INSN_stbx, -- 210
INSN_stbux, INSN_stbux,
INSN_stdbrx, INSN_stdbrx,
INSN_stdcix, INSN_stdcix,
INSN_stdcx, -- 210 INSN_stdcx,
INSN_stdx, INSN_stdx,
INSN_stdux, INSN_stdux,
INSN_sthbrx, INSN_sthbrx,
INSN_sthcix, INSN_sthcix,
INSN_sthcx, INSN_sthcx,
INSN_sthx, INSN_sthx, -- 220
INSN_sthux, INSN_sthux,
INSN_stqcx,
INSN_stwbrx, INSN_stwbrx,
INSN_stwcix, INSN_stwcix,
INSN_stwcx, -- 220 INSN_stwcx,
INSN_stwx, INSN_stwx,
INSN_stwux, INSN_stwux,
INSN_subf, INSN_subf,
INSN_subfc, INSN_subfc,
INSN_subfe, INSN_subfe, -- 230
INSN_td, INSN_td,
INSN_tlbie, INSN_tlbie,
INSN_tlbiel, INSN_tlbiel,
INSN_tw, INSN_tw,
INSN_xor, -- 230 INSN_xor,


-- pad to 232 to simplify comparison logic -- pad to 240 to simplify comparison logic
INSN_231, INSN_236, INSN_237, INSN_238, INSN_239,


-- The following instructions have a third input addressed by RC -- The following instructions have a third input addressed by RC
INSN_maddld, INSN_maddld,
@ -282,9 +295,7 @@ package decode_types is
INSN_maddhdu, INSN_maddhdu,


-- pad to 256 to simplify comparison logic -- pad to 256 to simplify comparison logic
INSN_235, INSN_243,
INSN_236, INSN_237, INSN_238, INSN_239,
INSN_240, INSN_241, INSN_242, INSN_243,
INSN_244, INSN_245, INSN_246, INSN_247, INSN_244, INSN_245, INSN_246, INSN_247,
INSN_248, INSN_249, INSN_250, INSN_251, INSN_248, INSN_249, INSN_250, INSN_251,
INSN_252, INSN_253, INSN_254, INSN_255, INSN_252, INSN_253, INSN_254, INSN_255,
@ -434,7 +445,9 @@ package decode_types is
type length_t is (NONE, is1B, is2B, is4B, is8B); type length_t is (NONE, is1B, is2B, is4B, is8B);


type repeat_t is (NONE, -- instruction is not repeated type repeat_t is (NONE, -- instruction is not repeated
DUPD); -- update-form load DUPD, -- update-form load
DRSP, -- double RS (RS, RS+1)
DRTP); -- double RT (RT, RT+1, or RT+1, RT)


type decode_rom_t is record type decode_rom_t is record
unit : unit_t; unit : unit_t;
@ -518,6 +531,7 @@ package body decode_types is
when INSN_lhau => return "101011"; when INSN_lhau => return "101011";
when INSN_lhz => return "101000"; when INSN_lhz => return "101000";
when INSN_lhzu => return "101001"; when INSN_lhzu => return "101001";
when INSN_lq => return "111000";
when INSN_lwz => return "100000"; when INSN_lwz => return "100000";
when INSN_lwzu => return "100001"; when INSN_lwzu => return "100001";
when INSN_mulli => return "000111"; when INSN_mulli => return "000111";
@ -537,6 +551,7 @@ package body decode_types is
when INSN_sth => return "101100"; when INSN_sth => return "101100";
when INSN_sthu => return "101101"; when INSN_sthu => return "101101";
when INSN_stw => return "100100"; when INSN_stw => return "100100";
when INSN_stq => return "111110";
when INSN_stwu => return "100101"; when INSN_stwu => return "100101";
when INSN_subfic => return "001000"; when INSN_subfic => return "001000";
when INSN_tdi => return "000010"; when INSN_tdi => return "000010";
@ -582,6 +597,7 @@ package body decode_types is
when INSN_fnmadd => return "111111"; when INSN_fnmadd => return "111111";
when INSN_prefix => return "000001"; when INSN_prefix => return "000001";
when INSN_op57 => return "111001"; when INSN_op57 => return "111001";
when INSN_op60 => return "111100";
when INSN_op61 => return "111101"; when INSN_op61 => return "111101";
when INSN_add => return "011111"; when INSN_add => return "011111";
when INSN_addc => return "011111"; when INSN_addc => return "011111";
@ -649,6 +665,7 @@ package body decode_types is
when INSN_lhzcix => return "011111"; when INSN_lhzcix => return "011111";
when INSN_lhzux => return "011111"; when INSN_lhzux => return "011111";
when INSN_lhzx => return "011111"; when INSN_lhzx => return "011111";
when INSN_lqarx => return "011111";
when INSN_lwarx => return "011111"; when INSN_lwarx => return "011111";
when INSN_lwaux => return "011111"; when INSN_lwaux => return "011111";
when INSN_lwax => return "011111"; when INSN_lwax => return "011111";
@ -714,6 +731,7 @@ package body decode_types is
when INSN_sthcx => return "011111"; when INSN_sthcx => return "011111";
when INSN_sthux => return "011111"; when INSN_sthux => return "011111";
when INSN_sthx => return "011111"; when INSN_sthx => return "011111";
when INSN_stqcx => return "011111";
when INSN_stwbrx => return "011111"; when INSN_stwbrx => return "011111";
when INSN_stwcix => return "011111"; when INSN_stwcix => return "011111";
when INSN_stwcx => return "011111"; when INSN_stwcx => return "011111";

@ -15,6 +15,7 @@ entity execute1 is
SIM : boolean := false; SIM : boolean := false;
EX1_BYPASS : boolean := true; EX1_BYPASS : boolean := true;
HAS_FPU : boolean := true; HAS_FPU : boolean := true;
CPU_INDEX : natural;
-- Non-zero to enable log data collection -- Non-zero to enable log data collection
LOG_LENGTH : natural := 0 LOG_LENGTH : natural := 0
); );
@ -45,6 +46,7 @@ entity execute1 is


dbg_ctrl_out : out ctrl_t; dbg_ctrl_out : out ctrl_t;


run_out : out std_ulogic;
icache_inval : out std_ulogic; icache_inval : out std_ulogic;
terminate_out : out std_ulogic; terminate_out : out std_ulogic;


@ -79,12 +81,23 @@ architecture behaviour of execute1 is
write_xerlow : std_ulogic; write_xerlow : std_ulogic;
write_dec : std_ulogic; write_dec : std_ulogic;
write_cfar : std_ulogic; write_cfar : std_ulogic;
set_cfar : std_ulogic;
write_loga : std_ulogic; write_loga : std_ulogic;
inc_loga : std_ulogic; inc_loga : std_ulogic;
write_pmuspr : std_ulogic; write_pmuspr : std_ulogic;
ramspr_write_even : std_ulogic; ramspr_write_even : std_ulogic;
ramspr_write_odd : std_ulogic; ramspr_write_odd : std_ulogic;
mult_32s : std_ulogic; mult_32s : std_ulogic;
write_fscr : std_ulogic;
write_ic : std_ulogic;
write_hfscr : std_ulogic;
write_hic : std_ulogic;
write_heir : std_ulogic;
set_heir : std_ulogic;
write_ctrl : std_ulogic;
write_dscr : std_ulogic;
enter_wait : std_ulogic;
scv_trap : std_ulogic;
end record; end record;
constant side_effect_init : side_effect_type := (others => '0'); constant side_effect_init : side_effect_type := (others => '0');


@ -101,16 +114,18 @@ architecture behaviour of execute1 is
direct_branch : std_ulogic; direct_branch : std_ulogic;
start_mul : std_ulogic; start_mul : std_ulogic;
start_div : std_ulogic; start_div : std_ulogic;
start_bsort : std_ulogic;
do_trace : std_ulogic; do_trace : std_ulogic;
fp_intr : std_ulogic; fp_intr : std_ulogic;
res2_sel : std_ulogic_vector(1 downto 0); res2_sel : std_ulogic_vector(1 downto 0);
bypass_valid : std_ulogic; bypass_valid : std_ulogic;
ramspr_odd_data : std_ulogic_vector(63 downto 0); ramspr_odd_data : std_ulogic_vector(63 downto 0);
ic : std_ulogic_vector(3 downto 0);
end record; end record;
constant actions_type_init : actions_type := constant actions_type_init : actions_type :=
(e => Execute1ToWritebackInit, se => side_effect_init, (e => Execute1ToWritebackInit, se => side_effect_init,
new_msr => (others => '0'), res2_sel => "00", new_msr => (others => '0'), res2_sel => "00",
ramspr_odd_data => 64x"0", others => '0'); ramspr_odd_data => 64x"0", ic => x"0", others => '0');


type reg_stage1_type is record type reg_stage1_type is record
e : Execute1ToWritebackType; e : Execute1ToWritebackType;
@ -121,7 +136,7 @@ architecture behaviour of execute1 is
prev_op : insn_type_t; prev_op : insn_type_t;
prev_prefixed : std_ulogic; prev_prefixed : std_ulogic;
oe : std_ulogic; oe : std_ulogic;
mul_select : std_ulogic_vector(1 downto 0); mul_select : std_ulogic_vector(2 downto 0);
res2_sel : std_ulogic_vector(1 downto 0); res2_sel : std_ulogic_vector(1 downto 0);
spr_select : spr_id; spr_select : spr_id;
pmu_spr_num : std_ulogic_vector(4 downto 0); pmu_spr_num : std_ulogic_vector(4 downto 0);
@ -131,6 +146,7 @@ architecture behaviour of execute1 is
mul_in_progress : std_ulogic; mul_in_progress : std_ulogic;
mul_finish : std_ulogic; mul_finish : std_ulogic;
div_in_progress : std_ulogic; div_in_progress : std_ulogic;
bsort_in_progress : std_ulogic;
no_instr_avail : std_ulogic; no_instr_avail : std_ulogic;
instr_dispatch : std_ulogic; instr_dispatch : std_ulogic;
ext_interrupt : std_ulogic; ext_interrupt : std_ulogic;
@ -141,21 +157,28 @@ architecture behaviour of execute1 is
xerc_valid : std_ulogic; xerc_valid : std_ulogic;
ramspr_wraddr : ramspr_index; ramspr_wraddr : ramspr_index;
ramspr_odd_data : std_ulogic_vector(63 downto 0); ramspr_odd_data : std_ulogic_vector(63 downto 0);
ic : std_ulogic_vector(3 downto 0);
prefixed : std_ulogic;
insn : std_ulogic_vector(31 downto 0);
prefix : std_ulogic_vector(25 downto 0);
end record; end record;
constant reg_stage1_type_init : reg_stage1_type := constant reg_stage1_type_init : reg_stage1_type :=
(e => Execute1ToWritebackInit, se => side_effect_init, (e => Execute1ToWritebackInit, se => side_effect_init,
busy => '0', busy => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
prev_prefixed => '0', prev_prefixed => '0',
oe => '0', mul_select => "00", res2_sel => "00", oe => '0', mul_select => "000", res2_sel => "00",
spr_select => spr_id_init, pmu_spr_num => 5x"0", spr_select => spr_id_init, pmu_spr_num => 5x"0",
redir_to_next => '0', advance_nia => '0', lr_from_next => '0', redir_to_next => '0', advance_nia => '0', lr_from_next => '0',
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
bsort_in_progress => '0',
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0', no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
taken_branch_event => '0', br_mispredict => '0', taken_branch_event => '0', br_mispredict => '0',
msr => 64x"0", msr => 64x"0",
xerc => xerc_init, xerc_valid => '0', xerc => xerc_init, xerc_valid => '0',
ramspr_wraddr => (others => '0'), ramspr_odd_data => 64x"0"); ramspr_wraddr => (others => '0'), ramspr_odd_data => 64x"0",
ic => x"0",
prefixed => '0', insn => 32x"0", prefix => 26x"0");


type reg_stage2_type is record type reg_stage2_type is record
e : Execute1ToWritebackType; e : Execute1ToWritebackType;
@ -190,7 +213,8 @@ architecture behaviour of execute1 is
signal alu_result: std_ulogic_vector(63 downto 0); signal alu_result: std_ulogic_vector(63 downto 0);
signal adder_result: std_ulogic_vector(63 downto 0); signal adder_result: std_ulogic_vector(63 downto 0);
signal misc_result: std_ulogic_vector(63 downto 0); signal misc_result: std_ulogic_vector(63 downto 0);
signal muldiv_result: std_ulogic_vector(63 downto 0); signal multicyc_result: std_ulogic_vector(63 downto 0);
signal bsort_result: std_ulogic_vector(63 downto 0);
signal spr_result: std_ulogic_vector(63 downto 0); signal spr_result: std_ulogic_vector(63 downto 0);
signal next_nia : std_ulogic_vector(63 downto 0); signal next_nia : std_ulogic_vector(63 downto 0);
signal s1_sel : std_ulogic_vector(2 downto 0); signal s1_sel : std_ulogic_vector(2 downto 0);
@ -215,6 +239,10 @@ architecture behaviour of execute1 is
signal x_to_divider: Execute1ToDividerType; signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type := DividerToExecute1Init; signal divider_to_x: DividerToExecute1Type := DividerToExecute1Init;


-- bit-sort unit signals
signal bsort_start : std_ulogic;
signal bsort_done : std_ulogic;

-- random number generator signals -- random number generator signals
signal random_raw : std_ulogic_vector(63 downto 0); signal random_raw : std_ulogic_vector(63 downto 0);
signal random_cond : std_ulogic_vector(63 downto 0); signal random_cond : std_ulogic_vector(63 downto 0);
@ -322,6 +350,7 @@ architecture behaviour of execute1 is
-- 48:63, and partial function MSR bits lie in the range -- 48:63, and partial function MSR bits lie in the range
-- 33:36 and 42:47. (Note this is IBM bit numbering). -- 33:36 and 42:47. (Note this is IBM bit numbering).
msr_out := (others => '0'); msr_out := (others => '0');
msr_out(MSR_HV) := '1'; -- HV is always set
msr_out(63 downto 31) := msr(63 downto 31); msr_out(63 downto 31) := msr(63 downto 31);
msr_out(26 downto 22) := msr(26 downto 22); msr_out(26 downto 22) := msr(26 downto 22);
msr_out(15 downto 0) := msr(15 downto 0); msr_out(15 downto 0) := msr(15 downto 0);
@ -332,6 +361,9 @@ architecture behaviour of execute1 is
return std_ulogic_vector is return std_ulogic_vector is
variable srr1: std_ulogic_vector(63 downto 0); variable srr1: std_ulogic_vector(63 downto 0);
begin begin
srr1(63 downto 61) := msr(63 downto 61);
srr1(MSR_HV) := '1';
srr1(59 downto 31) := msr(59 downto 31);
srr1(63 downto 31) := msr(63 downto 31); srr1(63 downto 31) := msr(63 downto 31);
srr1(30 downto 27) := flags(14 downto 11); srr1(30 downto 27) := flags(14 downto 11);
srr1(26 downto 22) := msr(26 downto 22); srr1(26 downto 22) := msr(26 downto 22);
@ -365,6 +397,39 @@ architecture behaviour of execute1 is
xerc.ov32 & xerc.ca32 & xer_low(17 downto 0); xerc.ov32 & xerc.ca32 & xer_low(17 downto 0);
end; end;


function assemble_fscr(c: ctrl_t) return std_ulogic_vector is
variable ret : std_ulogic_vector(63 downto 0);
begin
ret := (others => '0');
ret(59 downto 56) := c.fscr_ic;
ret(FSCR_PREFIX) := c.fscr_pref;
ret(FSCR_SCV) := c.fscr_scv;
ret(FSCR_TAR) := c.fscr_tar;
ret(FSCR_DSCR) := c.fscr_dscr;
return ret;
end;

function assemble_hfscr(c: ctrl_t) return std_ulogic_vector is
variable ret : std_ulogic_vector(63 downto 0);
begin
ret := (others => '0');
ret(59 downto 56) := c.hfscr_ic;
ret(HFSCR_PREFIX) := c.hfscr_pref;
ret(HFSCR_TAR) := c.hfscr_tar;
ret(HFSCR_DSCR) := c.hfscr_dscr;
ret(HFSCR_FP) := c.hfscr_fp;
return ret;
end;

function assemble_ctrl(c: ctrl_t; msrpr: std_ulogic) return std_ulogic_vector is
variable ret : std_ulogic_vector(63 downto 0);
begin
ret := (others => '0');
ret(0) := c.run;
ret(15) := c.run and not msrpr;
return ret;
end;

-- Tell vivado to keep the hierarchy for the random module so that the -- Tell vivado to keep the hierarchy for the random module so that the
-- net names in the xdc file match. -- net names in the xdc file match.
attribute keep_hierarchy : string; attribute keep_hierarchy : string;
@ -437,6 +502,18 @@ begin
); );
end generate; end generate;


bsort_0: entity work.bit_sorter
port map (
clk => clk,
rst => rst,
rs => c_in,
rb => b_in,
go => bsort_start,
opc => e_in.insn(7 downto 6),
done => bsort_done,
result => bsort_result
);

random_0: entity work.random random_0: entity work.random
port map ( port map (
clk => clk, clk => clk,
@ -484,7 +561,7 @@ begin
x_to_pmu.addr_v <= '0'; x_to_pmu.addr_v <= '0';
x_to_pmu.spr_num <= ex1.pmu_spr_num; x_to_pmu.spr_num <= ex1.pmu_spr_num;
x_to_pmu.spr_val <= ex1.e.write_data; x_to_pmu.spr_val <= ex1.e.write_data;
x_to_pmu.run <= '1'; x_to_pmu.run <= ctrl.run;


-- XER forwarding. The CA and CA32 bits are only modified by instructions -- XER forwarding. The CA and CA32 bits are only modified by instructions
-- that are handled here, so for them we can just use the result most -- that are handled here, so for them we can just use the result most
@ -501,7 +578,7 @@ begin


-- N.B. the busy signal from each source includes the -- N.B. the busy signal from each source includes the
-- stage2 stall from that source in it. -- stage2 stall from that source in it.
busy_out <= l_in.busy or ex1.busy or fp_in.busy; busy_out <= l_in.busy or ex1.busy or fp_in.busy or ctrl.wait_state;


valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt); valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt);


@ -533,7 +610,13 @@ begin
even_wr_enab := (ex1.se.ramspr_write_even and doit) or interrupt_in.intr; even_wr_enab := (ex1.se.ramspr_write_even and doit) or interrupt_in.intr;
odd_wr_enab := (ex1.se.ramspr_write_odd and doit) or interrupt_in.intr; odd_wr_enab := (ex1.se.ramspr_write_odd and doit) or interrupt_in.intr;
if interrupt_in.intr = '1' then if interrupt_in.intr = '1' then
if interrupt_in.hv_intr = '1' then
wr_addr := RAMSPR_HSRR0;
elsif interrupt_in.scv_int = '1' then
wr_addr := RAMSPR_LR;
else
wr_addr := RAMSPR_SRR0; wr_addr := RAMSPR_SRR0;
end if;
else else
wr_addr := ex1.ramspr_wraddr; wr_addr := ex1.ramspr_wraddr;
end if; end if;
@ -573,6 +656,9 @@ begin
else else
ramspr_result <= ramspr_odd; ramspr_result <= ramspr_odd;
end if; end if;
if e_in.ramspr_32bit = '1' then
ramspr_result(63 downto 32) <= 32x"0";
end if;
end process; end process;


ramspr_write: process(clk) ramspr_write: process(clk)
@ -599,7 +685,7 @@ begin
adder_result when "000", adder_result when "000",
logical_result when "001", logical_result when "001",
rotator_result when "010", rotator_result when "010",
muldiv_result when "100", multicyc_result when "100",
ramspr_result when "101", ramspr_result when "101",
misc_result when others; misc_result when others;


@ -610,16 +696,18 @@ begin
ex1 <= reg_stage1_type_init; ex1 <= reg_stage1_type_init;
ex2 <= reg_stage2_type_init; ex2 <= reg_stage2_type_init;
ctrl <= ctrl_t_init; ctrl <= ctrl_t_init;
ctrl.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0'); ctrl.msr <= (MSR_SF => '1', MSR_HV => '1', MSR_LE => '1', others => '0');
ex1.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0'); ex1.msr <= (MSR_SF => '1', MSR_HV => '1', MSR_LE => '1', others => '0');
else else
ex1 <= ex1in; ex1 <= ex1in;
ex2 <= ex2in; ex2 <= ex2in;
ctrl <= ctrl_tmp; ctrl <= ctrl_tmp;
if valid_in = '1' then if valid_in = '1' then
report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) & report "CPU " & natural'image(CPU_INDEX) & " execute " & to_hstring(e_in.nia) &
" op=" & insn_type_t'image(e_in.insn_type) &
" wr=" & to_hstring(ex1in.e.write_reg) & " we=" & std_ulogic'image(ex1in.e.write_enable) & " wr=" & to_hstring(ex1in.e.write_reg) & " we=" & std_ulogic'image(ex1in.e.write_enable) &
" tag=" & integer'image(ex1in.e.instr_tag.tag) & std_ulogic'image(ex1in.e.instr_tag.valid); " tag=" & integer'image(ex1in.e.instr_tag.tag) & std_ulogic'image(ex1in.e.instr_tag.valid) &
" 2nd=" & std_ulogic'image(e_in.second);
end if; end if;
-- We mustn't get stalled on a cycle where execute2 is -- We mustn't get stalled on a cycle where execute2 is
-- completing an instruction or generating an interrupt -- completing an instruction or generating an interrupt
@ -638,7 +726,18 @@ begin
if dbg_spr_addr(7) = '1' then if dbg_spr_addr(7) = '1' then
dbg_spr_data <= ramspr_result; dbg_spr_data <= ramspr_result;
else else
case dbg_spr_addr(3 downto 0) is
when SPRSEL_FSCR =>
dbg_spr_data <= assemble_fscr(ctrl);
when SPRSEL_HFSCR =>
dbg_spr_data <= assemble_hfscr(ctrl);
when SPRSEL_HEIR =>
dbg_spr_data <= ctrl.heir;
when SPRSEL_CFAR =>
dbg_spr_data <= ctrl.cfar;
when others =>
dbg_spr_data <= assemble_xer(xerc_in, ctrl.xer_low); dbg_spr_data <= assemble_xer(xerc_in, ctrl.xer_low);
end case;
end if; end if;
dbg_spr_ack <= '1'; dbg_spr_ack <= '1';
end if; end if;
@ -769,17 +868,21 @@ begin
x_to_mult_32s.subtract <= '0'; x_to_mult_32s.subtract <= '0';
x_to_mult_32s.addend <= (others => '0'); x_to_mult_32s.addend <= (others => '0');


case ex1.mul_select is if ex1.mul_select(2) = '0' then
case ex1.mul_select(1 downto 0) is
when "00" => when "00" =>
muldiv_result <= multiply_to_x.result(63 downto 0); multicyc_result <= multiply_to_x.result(63 downto 0);
when "01" => when "01" =>
muldiv_result <= multiply_to_x.result(127 downto 64); multicyc_result <= multiply_to_x.result(63 downto 32) &
when "10" =>
muldiv_result <= multiply_to_x.result(63 downto 32) &
multiply_to_x.result(63 downto 32); multiply_to_x.result(63 downto 32);
when others => when others =>
muldiv_result <= divider_to_x.write_reg_data; multicyc_result <= multiply_to_x.result(127 downto 64);
end case; end case;
elsif ex1.mul_select(0) = '1' and not HAS_FPU then
multicyc_result <= divider_to_x.write_reg_data;
else
multicyc_result <= bsort_result;
end if;


-- Compute misc_result -- Compute misc_result
case e_in.sub_select is case e_in.sub_select is
@ -1047,7 +1150,7 @@ begin
slow_op := '0'; slow_op := '0';
owait := '0'; owait := '0';


if e_in.illegal_suffix = '1' then if e_in.illegal_suffix = '1' or e_in.illegal_form = '1' then
illegal := '1'; illegal := '1';
elsif ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then elsif ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then
privileged := '1'; privileged := '1';
@ -1058,18 +1161,20 @@ begin
when OP_ILLEGAL => when OP_ILLEGAL =>
illegal := '1'; illegal := '1';
when OP_SC => when OP_SC =>
-- check bit 1 of the instruction is 1 so we know this is sc; -- check bit 1 of the instruction to distinguish sc from scv
-- 0 would mean scv, so generate an illegal instruction interrupt
if e_in.insn(1) = '1' then if e_in.insn(1) = '1' then
v.trap := '1'; -- sc
v.advance_nia := '1';
v.e.intr_vec := 16#C00#; v.e.intr_vec := 16#C00#;
if e_in.valid = '1' then if e_in.valid = '1' then
report "sc"; report "sc";
end if; end if;
else else
illegal := '1'; -- scv
v.se.scv_trap := '1';
v.e.intr_vec := to_integer(unsigned(e_in.insn(11 downto 5))) * 32;
end if; end if;
v.trap := '1';
v.advance_nia := '1';
when OP_ATTN => when OP_ATTN =>
-- check bits 1-10 of the instruction to make sure it's attn -- check bits 1-10 of the instruction to make sure it's attn
-- if not then it is illegal -- if not then it is illegal
@ -1081,7 +1186,7 @@ begin
else else
illegal := '1'; illegal := '1';
end if; end if;
when OP_NOP | OP_DCBF | OP_DCBST | OP_DCBT | OP_DCBTST | OP_ICBT => when OP_NOP | OP_DCBST | OP_ICBT =>
-- Do nothing -- Do nothing
when OP_ADD => when OP_ADD =>
if e_in.output_carry = '1' then if e_in.output_carry = '1' then
@ -1126,7 +1231,7 @@ begin
if ex1.msr(MSR_BE) = '1' then if ex1.msr(MSR_BE) = '1' then
v.do_trace := '1'; v.do_trace := '1';
end if; end if;
v.se.write_cfar := '1'; v.se.set_cfar := '1';
when OP_BC => when OP_BC =>
-- If CTR is being decremented, it is in ramspr_odd. -- If CTR is being decremented, it is in ramspr_odd.
bo := insn_bo(e_in.insn); bo := insn_bo(e_in.insn);
@ -1145,7 +1250,7 @@ begin
if ex1.msr(MSR_BE) = '1' then if ex1.msr(MSR_BE) = '1' then
v.do_trace := '1'; v.do_trace := '1';
end if; end if;
v.se.write_cfar := v.take_branch; v.se.set_cfar := v.take_branch;
when OP_BCREG => when OP_BCREG =>
-- If CTR is being decremented, it is in ramspr_odd. -- If CTR is being decremented, it is in ramspr_odd.
-- The target address is in ramspr_result (LR, CTR or TAR). -- The target address is in ramspr_result (LR, CTR or TAR).
@ -1158,15 +1263,20 @@ begin
if ex1.msr(MSR_BE) = '1' then if ex1.msr(MSR_BE) = '1' then
v.do_trace := '1'; v.do_trace := '1';
end if; end if;
v.se.write_cfar := v.take_branch; v.se.set_cfar := v.take_branch;


when OP_RFID => when OP_RFID =>
-- rfid, hrfid and rfscv.
-- These all act the same given that we don't have
-- privileged non-hypervisor mode or ultravisor mode.
srr1 := ramspr_odd; srr1 := ramspr_odd;
v.e.redir_mode := (srr1(MSR_IR) or srr1(MSR_PR)) & not srr1(MSR_PR) & v.e.redir_mode := (srr1(MSR_IR) or srr1(MSR_PR)) & not srr1(MSR_PR) &
not srr1(MSR_LE) & not srr1(MSR_SF); not srr1(MSR_LE) & not srr1(MSR_SF);
-- Can't use msr_copy here because the partial function MSR -- Can't use msr_copy here because the partial function MSR
-- bits should be left unchanged, not zeroed. -- bits should be left unchanged, not zeroed.
v.new_msr(63 downto 31) := srr1(63 downto 31); v.new_msr(63 downto 61) := srr1(63 downto 61);
v.new_msr(MSR_HV) := '1';
v.new_msr(59 downto 31) := srr1(59 downto 31);
v.new_msr(26 downto 22) := srr1(26 downto 22); v.new_msr(26 downto 22) := srr1(26 downto 22);
v.new_msr(15 downto 0) := srr1(15 downto 0); v.new_msr(15 downto 0) := srr1(15 downto 0);
if srr1(MSR_PR) = '1' then if srr1(MSR_PR) = '1' then
@ -1176,14 +1286,14 @@ begin
end if; end if;
v.se.write_msr := '1'; v.se.write_msr := '1';
v.e.redirect := '1'; v.e.redirect := '1';
v.se.write_cfar := '1'; v.se.set_cfar := '1';
if HAS_FPU then if HAS_FPU then
v.fp_intr := fp_in.exception and v.fp_intr := fp_in.exception and
(srr1(MSR_FE0) or srr1(MSR_FE1)); (srr1(MSR_FE0) or srr1(MSR_FE1));
end if; end if;
v.do_trace := '0'; v.do_trace := '0';


when OP_CNTZ | OP_POPCNT => when OP_COUNTB =>
v.res2_sel := "01"; v.res2_sel := "01";
slow_op := '1'; slow_op := '1';
when OP_ISEL => when OP_ISEL =>
@ -1270,6 +1380,18 @@ begin
v.se.write_dec := '1'; v.se.write_dec := '1';
when SPRSEL_LOGA => when SPRSEL_LOGA =>
v.se.write_loga := '1'; v.se.write_loga := '1';
when SPRSEL_CFAR =>
v.se.write_cfar := '1';
when SPRSEL_FSCR =>
v.se.write_fscr := '1';
when SPRSEL_HFSCR =>
v.se.write_hfscr := '1';
when SPRSEL_HEIR =>
v.se.write_heir := '1';
when SPRSEL_CTRL =>
v.se.write_ctrl := '1';
when SPRSEL_DSCR =>
v.se.write_dscr := '1';
when others => when others =>
end case; end case;
end if; end if;
@ -1293,6 +1415,11 @@ begin
when OP_ICBI => when OP_ICBI =>
v.se.icache_inval := '1'; v.se.icache_inval := '1';


when OP_BSORT =>
v.start_bsort := '1';
slow_op := '1';
owait := '1';

when OP_MUL_L64 => when OP_MUL_L64 =>
if e_in.is_32bit = '1' then if e_in.is_32bit = '1' then
v.se.mult_32s := '1'; v.se.mult_32s := '1';
@ -1321,6 +1448,11 @@ begin
owait := '1'; owait := '1';
end if; end if;


when OP_WAIT =>
if e_in.insn(22 downto 21) = "00" then
v.se.enter_wait := '1';
end if;

when OP_FETCH_FAILED => when OP_FETCH_FAILED =>
-- Handling an ITLB miss doesn't count as having executed an instruction -- Handling an ITLB miss doesn't count as having executed an instruction
v.do_trace := '0'; v.do_trace := '0';
@ -1331,7 +1463,25 @@ begin
end if; end if;
end case; end case;


if misaligned = '1' then if ex1.msr(MSR_PR) = '1' and e_in.prefixed = '1' and
(ctrl.hfscr_pref = '0' or ctrl.fscr_pref = '0') then
-- [Hypervisor] facility unavailable for prefixed instructions,
-- which has higher priority than the alignment interrupt for
-- misaligned prefixed instructions, which has higher priority than
-- other [hypervisor] facility unavailable interrupts (e.g. for
-- plfs with HFSCR[FP] = 0).
v.exception := '1';
v.ic := x"b";
if ctrl.hfscr_pref = '0' then
v.e.hv_intr := '1';
v.e.intr_vec := 16#f80#;
v.se.write_hic := '1';
else
v.e.intr_vec := 16#f60#;
v.se.write_ic := '1';
end if;

elsif misaligned = '1' then
-- generate an alignment interrupt -- generate an alignment interrupt
-- This is higher priority than illegal because a misaligned -- This is higher priority than illegal because a misaligned
-- prefix will come down as an OP_ILLEGAL instruction. -- prefix will come down as an OP_ILLEGAL instruction.
@ -1354,15 +1504,62 @@ begin
end if; end if;


elsif illegal = '1' then elsif illegal = '1' then
-- generate hypervisor emulation assistance interrupt (HEAI)
-- and write the offending instruction into HEIR
v.exception := '1'; v.exception := '1';
v.e.srr1(47 - 34) := e_in.prefixed; v.e.srr1(47 - 34) := e_in.prefixed;
-- Since we aren't doing Hypervisor emulation assist (0xe40) we v.e.intr_vec := 16#e40#;
-- set bit 44 to indicate we have an illegal v.e.hv_intr := '1';
v.e.srr1(47 - 44) := '1'; v.se.set_heir := '1';
if e_in.valid = '1' then if e_in.valid = '1' then
report "illegal instruction"; report "illegal instruction";
end if; end if;


elsif ex1.msr(MSR_PR) = '1' and v.se.scv_trap = '1' and
ctrl.fscr_scv = '0' then
-- Facility unavailable for scv instruction
v.exception := '1';
v.ic := x"c";
v.e.intr_vec := 16#f60#;
v.se.write_ic := '1';

elsif ex1.msr(MSR_PR) = '1' and e_in.uses_tar = '1' and
(ctrl.hfscr_tar = '0' or ctrl.fscr_tar = '0') then
-- [Hypervisor] facility unavailable for TAR access
v.exception := '1';
v.ic := x"8";
if ctrl.hfscr_tar = '0' then
v.e.hv_intr := '1';
v.e.intr_vec := 16#f80#;
v.se.write_hic := '1';
else
v.e.intr_vec := 16#f60#;
v.se.write_ic := '1';
end if;

elsif ex1.msr(MSR_PR) = '1' and e_in.uses_dscr = '1' and
(ctrl.hfscr_dscr = '0' or ctrl.fscr_dscr = '0') then
-- [Hypervisor] facility unavailable for DSCR access
v.exception := '1';
v.ic := x"2";
if ctrl.hfscr_dscr = '0' then
v.e.hv_intr := '1';
v.e.intr_vec := 16#f80#;
v.se.write_hic := '1';
else
v.e.intr_vec := 16#f60#;
v.se.write_ic := '1';
end if;

elsif HAS_FPU and ex1.msr(MSR_PR) = '1' and e_in.fac = FPU and
ctrl.hfscr_fp = '0' then
-- Hypervisor facility unavailable for FP instructions
v.exception := '1';
v.ic := x"0";
v.e.hv_intr := '1';
v.e.intr_vec := 16#f80#;
v.se.write_hic := '1';

elsif HAS_FPU and ex1.msr(MSR_FP) = '0' and e_in.fac = FPU then elsif HAS_FPU and ex1.msr(MSR_FP) = '0' and e_in.fac = FPU then
-- generate a floating-point unavailable interrupt -- generate a floating-point unavailable interrupt
v.exception := '1'; v.exception := '1';
@ -1391,19 +1588,24 @@ begin
variable fv : Execute1ToFPUType; variable fv : Execute1ToFPUType;
variable go : std_ulogic; variable go : std_ulogic;
variable bypass_valid : std_ulogic; variable bypass_valid : std_ulogic;
variable is_scv : std_ulogic;
begin begin
v := ex1; v := ex1;
if (ex1.busy or l_in.busy or fp_in.busy) = '0' then if busy_out = '0' then
v.e := actions.e; v.e := actions.e;
v.e.valid := '0'; v.e.valid := '0';
v.oe := e_in.oe; v.oe := e_in.oe;
v.spr_select := e_in.spr_select; v.spr_select := e_in.spr_select;
v.pmu_spr_num := e_in.insn(20 downto 16); v.pmu_spr_num := e_in.insn(20 downto 16);
v.mul_select := e_in.sub_select(1 downto 0); v.mul_select := e_in.sub_select;
v.se := side_effect_init; v.se := side_effect_init;
v.ramspr_wraddr := e_in.ramspr_wraddr; v.ramspr_wraddr := e_in.ramspr_wraddr;
v.lr_from_next := e_in.lr; v.lr_from_next := e_in.lr;
v.ramspr_odd_data := actions.ramspr_odd_data; v.ramspr_odd_data := actions.ramspr_odd_data;
v.ic := actions.ic;
v.prefixed := e_in.prefixed;
v.insn := e_in.insn;
v.prefix := e_in.prefix;
end if; end if;


lv := Execute1ToLoadstore1Init; lv := Execute1ToLoadstore1Init;
@ -1426,7 +1628,7 @@ begin
rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0'; rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0'; rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0';


do_popcnt <= '1' when e_in.insn_type = OP_POPCNT else '0'; do_popcnt <= '1' when e_in.insn_type = OP_COUNTB and e_in.insn(7 downto 6) = "11" else '0';


if valid_in = '1' then if valid_in = '1' then
v.prev_op := e_in.insn_type; v.prev_op := e_in.insn_type;
@ -1454,10 +1656,9 @@ begin
v.e.srr1(47 - 33) := '1'; v.e.srr1(47 - 33) := '1';
v.e.srr1(47 - 34) := ex1.prev_prefixed; v.e.srr1(47 - 34) := ex1.prev_prefixed;
if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or
ex1.prev_op = OP_DCBT or ex1.prev_op = OP_DCBST or ex1.prev_op = OP_DCBF then ex1.prev_op = OP_DCBF then
v.e.srr1(47 - 35) := '1'; v.e.srr1(47 - 35) := '1';
elsif ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ or elsif ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ then
ex1.prev_op = OP_DCBTST then
v.e.srr1(47 - 36) := '1'; v.e.srr1(47 - 36) := '1';
end if; end if;


@ -1474,6 +1675,7 @@ begin
v.e.intr_vec := 16#500#; v.e.intr_vec := 16#500#;
report "IRQ valid: External"; report "IRQ valid: External";
v.ext_interrupt := '1'; v.ext_interrupt := '1';
v.e.hv_intr := '1';
end if; end if;
v.e.srr1 := (others => '0'); v.e.srr1 := (others => '0');
exception := '1'; exception := '1';
@ -1500,6 +1702,7 @@ begin
v.mul_in_progress := actions.start_mul; v.mul_in_progress := actions.start_mul;
x_to_divider.valid <= actions.start_div; x_to_divider.valid <= actions.start_div;
v.div_in_progress := actions.start_div; v.div_in_progress := actions.start_div;
v.bsort_in_progress := actions.start_bsort;
v.br_mispredict := v.e.redirect and actions.direct_branch; v.br_mispredict := v.e.redirect and actions.direct_branch;
v.advance_nia := actions.advance_nia; v.advance_nia := actions.advance_nia;
v.redir_to_next := actions.redir_to_next; v.redir_to_next := actions.redir_to_next;
@ -1510,7 +1713,7 @@ begin
-- multiply is happening in order to stop following -- multiply is happening in order to stop following
-- instructions from using the wrong XER value -- instructions from using the wrong XER value
-- (and for simplicity in the OE=0 case). -- (and for simplicity in the OE=0 case).
v.busy := actions.start_div or actions.start_mul; v.busy := actions.start_div or actions.start_mul or actions.start_bsort;


-- instruction for other units, i.e. LDST -- instruction for other units, i.e. LDST
if e_in.unit = LDST then if e_in.unit = LDST then
@ -1520,6 +1723,8 @@ begin
fv.valid := '1'; fv.valid := '1';
end if; end if;
end if; end if;
is_scv := go and actions.se.scv_trap;
bsort_start <= go and actions.start_bsort;


if not HAS_FPU and ex1.div_in_progress = '1' then if not HAS_FPU and ex1.div_in_progress = '1' then
v.div_in_progress := not divider_to_x.valid; v.div_in_progress := not divider_to_x.valid;
@ -1552,6 +1757,13 @@ begin
end if; end if;
v.e.valid := '1'; v.e.valid := '1';
end if; end if;
if ex1.bsort_in_progress = '1' then
v.bsort_in_progress := not bsort_done;
v.e.valid := bsort_done;
v.busy := not bsort_done;
v.e.write_data := alu_result;
bypass_valid := bsort_done;
end if;


if v.e.write_xerc_enable = '1' and v.e.valid = '1' then if v.e.write_xerc_enable = '1' and v.e.valid = '1' then
v.xerc := v.e.xerc; v.xerc := v.e.xerc;
@ -1560,6 +1772,7 @@ begin


if (ex1.busy or l_in.busy or fp_in.busy) = '0' then if (ex1.busy or l_in.busy or fp_in.busy) = '0' then
v.e.interrupt := exception; v.e.interrupt := exception;
v.e.is_scv := is_scv;
end if; end if;
if v.e.valid = '0' then if v.e.valid = '0' then
v.e.redirect := '0'; v.e.redirect := '0';
@ -1658,6 +1871,12 @@ begin
log_wr_addr & ex2.log_addr_spr when SPRSEL_LOGA, log_wr_addr & ex2.log_addr_spr when SPRSEL_LOGA,
log_rd_data when SPRSEL_LOGD, log_rd_data when SPRSEL_LOGD,
ctrl.cfar when SPRSEL_CFAR, ctrl.cfar when SPRSEL_CFAR,
assemble_fscr(ctrl) when SPRSEL_FSCR,
assemble_hfscr(ctrl) when SPRSEL_HFSCR,
ctrl.heir when SPRSEL_HEIR,
assemble_ctrl(ctrl, ex1.msr(MSR_PR)) when SPRSEL_CTRL,
39x"0" & ctrl.dscr when SPRSEL_DSCR,
56x"0" & std_ulogic_vector(to_unsigned(CPU_INDEX, 8)) when SPRSEL_PIR,
assemble_xer(ex1.e.xerc, ctrl.xer_low) when others; assemble_xer(ex1.e.xerc, ctrl.xer_low) when others;


stage2_stall <= l_in.l2stall or fp_in.f2stall; stage2_stall <= l_in.l2stall or fp_in.f2stall;
@ -1673,6 +1892,7 @@ begin
variable cr_mask : std_ulogic_vector(7 downto 0); variable cr_mask : std_ulogic_vector(7 downto 0);
variable sign, zero : std_ulogic; variable sign, zero : std_ulogic;
variable rcnz_hi, rcnz_lo : std_ulogic; variable rcnz_hi, rcnz_lo : std_ulogic;
variable irq_exc : std_ulogic;
begin begin
-- Next insn adder used in a couple of places -- Next insn adder used in a couple of places
next_nia <= std_ulogic_vector(unsigned(ex1.e.last_nia) + 4); next_nia <= std_ulogic_vector(unsigned(ex1.e.last_nia) + 4);
@ -1792,6 +2012,8 @@ begin
ctrl_tmp.dec <= ex1.e.write_data; ctrl_tmp.dec <= ex1.e.write_data;
end if; end if;
if ex1.se.write_cfar = '1' then if ex1.se.write_cfar = '1' then
ctrl_tmp.cfar <= ex1.e.write_data;
elsif ex1.se.set_cfar = '1' then
ctrl_tmp.cfar <= ex1.e.last_nia; ctrl_tmp.cfar <= ex1.e.last_nia;
end if; end if;
if ex1.se.write_loga = '1' then if ex1.se.write_loga = '1' then
@ -1800,11 +2022,56 @@ begin
v.log_addr_spr := std_ulogic_vector(unsigned(ex2.log_addr_spr) + 1); v.log_addr_spr := std_ulogic_vector(unsigned(ex2.log_addr_spr) + 1);
end if; end if;
x_to_pmu.mtspr <= ex1.se.write_pmuspr; x_to_pmu.mtspr <= ex1.se.write_pmuspr;
if ex1.se.write_hfscr = '1' then
ctrl_tmp.hfscr_ic <= ex1.e.write_data(59 downto 56);
ctrl_tmp.hfscr_pref <= ex1.e.write_data(HFSCR_PREFIX);
ctrl_tmp.hfscr_tar <= ex1.e.write_data(HFSCR_TAR);
ctrl_tmp.hfscr_dscr <= ex1.e.write_data(HFSCR_DSCR);
ctrl_tmp.hfscr_fp <= ex1.e.write_data(HFSCR_FP);
elsif ex1.se.write_hic = '1' then
ctrl_tmp.hfscr_ic <= ex1.ic;
end if;
if ex1.se.write_fscr = '1' then
ctrl_tmp.fscr_ic <= ex1.e.write_data(59 downto 56);
ctrl_tmp.fscr_pref <= ex1.e.write_data(FSCR_PREFIX);
ctrl_tmp.fscr_scv <= ex1.e.write_data(FSCR_SCV);
ctrl_tmp.fscr_tar <= ex1.e.write_data(FSCR_TAR);
ctrl_tmp.fscr_dscr <= ex1.e.write_data(FSCR_DSCR);
elsif ex1.se.write_ic = '1' then
ctrl_tmp.fscr_ic <= ex1.ic;
end if;
if ex1.se.write_heir = '1' then
ctrl_tmp.heir <= ex1.e.write_data;
elsif ex1.se.set_heir = '1' then
ctrl_tmp.heir(31 downto 0) <= ex1.insn;
if ex1.prefixed = '1' then
ctrl_tmp.heir(63 downto 58) <= 6x"01";
ctrl_tmp.heir(57 downto 32) <= ex1.prefix;
else
ctrl_tmp.heir(63 downto 32) <= (others => '0');
end if;
end if;
if ex1.se.write_ctrl = '1' then
ctrl_tmp.run <= ex1.e.write_data(0);
end if;
if ex1.se.write_dscr = '1' then
ctrl_tmp.dscr <= ex1.e.write_data(24 downto 0);
end if;
if ex1.se.enter_wait = '1' then
ctrl_tmp.wait_state <= '1';
end if;
end if;

-- pending exceptions clear any wait state
-- ex1.fp_exception_next is not tested because it is not possible to
-- get into wait state with a pending FP exception.
irq_exc := pmu_to_x.intr or ctrl.dec(63) or ext_irq_in;
if ex1.trace_next = '1' or irq_exc = '1' or interrupt_in.intr = '1' then
ctrl_tmp.wait_state <= '0';
end if; end if;


if interrupt_in.intr = '1' then if interrupt_in.intr = '1' then
ctrl_tmp.msr(MSR_SF) <= '1'; ctrl_tmp.msr(MSR_SF) <= '1';
ctrl_tmp.msr(MSR_EE) <= '0';
ctrl_tmp.msr(MSR_PR) <= '0'; ctrl_tmp.msr(MSR_PR) <= '0';
ctrl_tmp.msr(MSR_SE) <= '0'; ctrl_tmp.msr(MSR_SE) <= '0';
ctrl_tmp.msr(MSR_BE) <= '0'; ctrl_tmp.msr(MSR_BE) <= '0';
@ -1813,8 +2080,11 @@ begin
ctrl_tmp.msr(MSR_FE1) <= '0'; ctrl_tmp.msr(MSR_FE1) <= '0';
ctrl_tmp.msr(MSR_IR) <= '0'; ctrl_tmp.msr(MSR_IR) <= '0';
ctrl_tmp.msr(MSR_DR) <= '0'; ctrl_tmp.msr(MSR_DR) <= '0';
ctrl_tmp.msr(MSR_RI) <= '0';
ctrl_tmp.msr(MSR_LE) <= '1'; ctrl_tmp.msr(MSR_LE) <= '1';
if interrupt_in.scv_int = '0' then
ctrl_tmp.msr(MSR_EE) <= '0';
ctrl_tmp.msr(MSR_RI) <= '0';
end if;
end if; end if;


bypass_valid := ex1.e.valid; bypass_valid := ex1.e.valid;
@ -1838,6 +2108,7 @@ begin
e_out <= ex2.e; e_out <= ex2.e;
e_out.msr <= msr_copy(ctrl.msr); e_out.msr <= msr_copy(ctrl.msr);


run_out <= ctrl.run;
terminate_out <= ex2.se.terminate; terminate_out <= ex2.se.terminate;
icache_inval <= ex2.se.icache_inval; icache_inval <= ex2.se.icache_inval;



@ -391,7 +391,7 @@ begin
v_int.next_nia := RESET_ADDRESS; v_int.next_nia := RESET_ADDRESS;
end if; end if;
elsif w_in.interrupt = '1' then elsif w_in.interrupt = '1' then
v_int.next_nia := 52x"0" & w_in.intr_vec(11 downto 2) & "00"; v_int.next_nia := 47x"0" & w_in.intr_vec(16 downto 2) & "00";
end if; end if;
if rst /= '0' or w_in.interrupt = '1' then if rst /= '0' or w_in.interrupt = '1' then
v.req := '0'; v.req := '0';

@ -142,6 +142,9 @@ end entity toplevel;


architecture behaviour of toplevel is architecture behaviour of toplevel is


-- Status
signal run_out : std_ulogic;

-- Reset signals: -- Reset signals:
signal soc_rst : std_ulogic; signal soc_rst : std_ulogic;
signal pll_rst : std_ulogic; signal pll_rst : std_ulogic;
@ -263,6 +266,7 @@ begin
system_clk => system_clk, system_clk => system_clk,
rst => soc_rst, rst => soc_rst,
sw_soc_reset => sw_rst, sw_soc_reset => sw_rst,
run_out => run_out,


-- UART signals -- UART signals
uart0_txd => uart_main_tx, uart0_txd => uart_main_tx,
@ -742,6 +746,7 @@ begin
led4 <= system_clk_locked; led4 <= system_clk_locked;
led5 <= eth_clk_locked; led5 <= eth_clk_locked;
led6 <= not soc_rst; led6 <= not soc_rst;
led7 <= run_out;


-- GPIO -- GPIO
gpio_in(10) <= btn0; gpio_in(10) <= btn0;

@ -61,6 +61,9 @@ architecture behave of loadstore1 is
dc_req : std_ulogic; dc_req : std_ulogic;
load : std_ulogic; load : std_ulogic;
store : std_ulogic; store : std_ulogic;
flush : std_ulogic;
touch : std_ulogic;
sync : std_ulogic;
tlbie : std_ulogic; tlbie : std_ulogic;
dcbz : std_ulogic; dcbz : std_ulogic;
read_spr : std_ulogic; read_spr : std_ulogic;
@ -84,6 +87,9 @@ architecture behave of loadstore1 is
update : std_ulogic; update : std_ulogic;
xerc : xer_common_t; xerc : xer_common_t;
reserve : std_ulogic; reserve : std_ulogic;
atomic_qw : std_ulogic;
atomic_first : std_ulogic;
atomic_last : std_ulogic;
rc : std_ulogic; rc : std_ulogic;
nc : std_ulogic; -- non-cacheable access nc : std_ulogic; -- non-cacheable access
virt_mode : std_ulogic; virt_mode : std_ulogic;
@ -97,7 +103,8 @@ architecture behave of loadstore1 is
two_dwords : std_ulogic; two_dwords : std_ulogic;
incomplete : std_ulogic; incomplete : std_ulogic;
end record; end record;
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0', constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0',
flush => '0', touch => '0', sync => '0', tlbie => '0',
dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0', dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
instr_fault => '0', do_update => '0', instr_fault => '0', do_update => '0',
mode_32bit => '0', prefixed => '0', mode_32bit => '0', prefixed => '0',
@ -108,6 +115,7 @@ architecture behave of loadstore1 is
elt_length => x"0", byte_reverse => '0', brev_mask => "000", elt_length => x"0", byte_reverse => '0', brev_mask => "000",
sign_extend => '0', update => '0', sign_extend => '0', update => '0',
xerc => xerc_init, reserve => '0', xerc => xerc_init, reserve => '0',
atomic_qw => '0', atomic_first => '0', atomic_last => '0',
rc => '0', nc => '0', rc => '0', nc => '0',
virt_mode => '0', priv_mode => '0', load_sp => '0', virt_mode => '0', priv_mode => '0', load_sp => '0',
sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0', sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0',
@ -447,7 +455,10 @@ begin
if l_in.second = '1' then if l_in.second = '1' then
-- for an update-form load, use the previous address -- for an update-form load, use the previous address
-- as the value to write back to RA. -- as the value to write back to RA.
addr := r1.addr0; -- for a quadword load or store, use with the previous
-- address + 8.
addr := std_ulogic_vector(unsigned(r1.addr0(63 downto 3)) + not l_in.update) &
r1.addr0(2 downto 0);
end if; end if;
if l_in.mode_32bit = '1' then if l_in.mode_32bit = '1' then
addr(63 downto 32) := (others => '0'); addr(63 downto 32) := (others => '0');
@ -463,7 +474,7 @@ begin
addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1); addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1);


-- Do length_to_sel and work out if we are doing 2 dwords -- Do length_to_sel and work out if we are doing 2 dwords
long_sel := xfer_data_sel(v.length, addr(2 downto 0)); long_sel := xfer_data_sel(l_in.length, addr(2 downto 0));
v.byte_sel := long_sel(7 downto 0); v.byte_sel := long_sel(7 downto 0);
v.second_bytes := long_sel(15 downto 8); v.second_bytes := long_sel(15 downto 8);
if long_sel(15 downto 8) /= "00000000" then if long_sel(15 downto 8) /= "00000000" then
@ -472,23 +483,54 @@ begin


-- check alignment for larx/stcx -- check alignment for larx/stcx
misaligned := or (addr_mask and addr(2 downto 0)); misaligned := or (addr_mask and addr(2 downto 0));
if l_in.repeat = '1' and l_in.update = '0' and addr(3) /= l_in.second then
misaligned := '1';
end if;
v.align_intr := l_in.reserve and misaligned; v.align_intr := l_in.reserve and misaligned;


v.atomic_first := not misaligned and not l_in.second;
v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);

-- is this a quadword load or store? i.e. lq plq stq pstq lqarx stqcx.
if l_in.repeat = '1' and l_in.update = '0' then
if misaligned = '0' then
-- Since the access is aligned we have to do it atomically
v.atomic_qw := '1';
else
-- We require non-prefixed lq in LE mode to be aligned in order
-- to avoid the case where RA = RT+1 and the second access faults
-- after the first has overwritten RA.
if l_in.op = OP_LOAD and l_in.byte_reverse = '0' and l_in.prefixed = '0' then
v.align_intr := '1';
end if;
end if;
end if;

case l_in.op is case l_in.op is
when OP_SYNC =>
v.sync := '1';
when OP_STORE => when OP_STORE =>
v.store := '1'; v.store := '1';
if l_in.length = "0000" then
v.touch := '1';
end if;
when OP_LOAD => when OP_LOAD =>
-- Note: only RA updates have l_in.second = 1 if l_in.update = '0' or l_in.second = '0' then
if l_in.second = '0' then
v.load := '1'; v.load := '1';
if HAS_FPU and l_in.is_32bit = '1' then if HAS_FPU and l_in.is_32bit = '1' then
-- Allow an extra cycle for SP->DP precision conversion -- Allow an extra cycle for SP->DP precision conversion
v.load_sp := '1'; v.load_sp := '1';
end if; end if;
if l_in.length = "0000" then
v.touch := '1';
end if;
else else
-- write back address to RA -- write back address to RA
v.do_update := '1'; v.do_update := '1';
end if; end if;
when OP_DCBF =>
v.load := '1';
v.flush := '1';
when OP_DCBZ => when OP_DCBZ =>
v.dcbz := '1'; v.dcbz := '1';
v.align_intr := v.nc; v.align_intr := v.nc;
@ -508,13 +550,13 @@ begin
v.mmu_op := '1'; v.mmu_op := '1';
when others => when others =>
end case; end case;
v.dc_req := l_in.valid and (v.load or v.store or v.dcbz) and not v.align_intr; v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz) and not v.align_intr;
v.incomplete := v.dc_req and v.two_dwords; v.incomplete := v.dc_req and v.two_dwords;


-- Work out controls for load and store formatting -- Work out controls for load and store formatting
brev_lenm1 := "000"; brev_lenm1 := "000";
if v.byte_reverse = '1' then if v.byte_reverse = '1' then
brev_lenm1 := unsigned(v.length(2 downto 0)) - 1; brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1;
end if; end if;
v.brev_mask := brev_lenm1; v.brev_mask := brev_lenm1;


@ -699,7 +741,8 @@ begin
end if; end if;


interrupt := (r2.req.valid and r2.req.align_intr) or interrupt := (r2.req.valid and r2.req.align_intr) or
(d_in.error and d_in.cache_paradox) or m_in.err; (d_in.error and (d_in.cache_paradox or d_in.reserve_nc)) or
m_in.err;
if interrupt = '1' then if interrupt = '1' then
v.req.valid := '0'; v.req.valid := '0';
v.busy := '0'; v.busy := '0';
@ -855,7 +898,8 @@ begin


if d_in.valid = '1' then if d_in.valid = '1' then
if r2.req.incomplete = '0' then if r2.req.incomplete = '0' then
write_enable := r2.req.load and not r2.req.load_sp; write_enable := r2.req.load and not r2.req.load_sp and
not r2.req.flush and not r2.req.touch;
-- stores write back rA update -- stores write back rA update
do_update := r2.req.update and r2.req.store; do_update := r2.req.update and r2.req.store;
end if; end if;
@ -865,6 +909,7 @@ begin
-- signal an interrupt straight away -- signal an interrupt straight away
exception := '1'; exception := '1';
dsisr(63 - 38) := not r2.req.load; dsisr(63 - 38) := not r2.req.load;
dsisr(63 - 37) := d_in.reserve_nc;
-- XXX there is no architected bit for this -- XXX there is no architected bit for this
-- (probably should be a machine check in fact) -- (probably should be a machine check in fact)
dsisr(63 - 35) := d_in.cache_paradox; dsisr(63 - 35) := d_in.cache_paradox;
@ -950,8 +995,14 @@ begin
d_out.valid <= stage1_dcreq; d_out.valid <= stage1_dcreq;
d_out.load <= stage1_req.load; d_out.load <= stage1_req.load;
d_out.dcbz <= stage1_req.dcbz; d_out.dcbz <= stage1_req.dcbz;
d_out.flush <= stage1_req.flush;
d_out.touch <= stage1_req.touch;
d_out.sync <= stage1_req.sync;
d_out.nc <= stage1_req.nc; d_out.nc <= stage1_req.nc;
d_out.reserve <= stage1_req.reserve; d_out.reserve <= stage1_req.reserve;
d_out.atomic_qw <= stage1_req.atomic_qw;
d_out.atomic_first <= stage1_req.atomic_first;
d_out.atomic_last <= stage1_req.atomic_last;
d_out.addr <= stage1_req.addr; d_out.addr <= stage1_req.addr;
d_out.byte_sel <= stage1_req.byte_sel; d_out.byte_sel <= stage1_req.byte_sel;
d_out.virt_mode <= stage1_req.virt_mode; d_out.virt_mode <= stage1_req.virt_mode;
@ -960,8 +1011,14 @@ begin
d_out.valid <= req; d_out.valid <= req;
d_out.load <= r2.req.load; d_out.load <= r2.req.load;
d_out.dcbz <= r2.req.dcbz; d_out.dcbz <= r2.req.dcbz;
d_out.flush <= r2.req.flush;
d_out.touch <= r2.req.touch;
d_out.sync <= r2.req.sync;
d_out.nc <= r2.req.nc; d_out.nc <= r2.req.nc;
d_out.reserve <= r2.req.reserve; d_out.reserve <= r2.req.reserve;
d_out.atomic_qw <= r2.req.atomic_qw;
d_out.atomic_first <= r2.req.atomic_first;
d_out.atomic_last <= r2.req.atomic_last;
d_out.addr <= r2.req.addr; d_out.addr <= r2.req.addr;
d_out.byte_sel <= r2.req.byte_sel; d_out.byte_sel <= r2.req.byte_sel;
d_out.virt_mode <= r2.req.virt_mode; d_out.virt_mode <= r2.req.virt_mode;

@ -20,6 +20,7 @@ filesets:
- sim_console.vhdl - sim_console.vhdl
- logical.vhdl - logical.vhdl
- countbits.vhdl - countbits.vhdl
- bitsort.vhdl
- control.vhdl - control.vhdl
- execute1.vhdl - execute1.vhdl
- fpu.vhdl - fpu.vhdl

@ -121,6 +121,8 @@ architecture behaviour of predecoder is
2#011110_01110# to 2#011110_01111# => INSN_rldimi, 2#011110_01110# to 2#011110_01111# => INSN_rldimi,
2#011110_10000# to 2#011110_10001# => INSN_rldcl, 2#011110_10000# to 2#011110_10001# => INSN_rldcl,
2#011110_10010# to 2#011110_10011# => INSN_rldcr, 2#011110_10010# to 2#011110_10011# => INSN_rldcr,
-- major opcode 56
2#111000_00000# to 2#111000_11111# => INSN_lq,
-- major opcode 58 -- major opcode 58
2#111010_00000# => INSN_ld, 2#111010_00000# => INSN_ld,
2#111010_00001# => INSN_ldu, 2#111010_00001# => INSN_ldu,
@ -161,20 +163,28 @@ architecture behaviour of predecoder is
-- major opcode 62 -- major opcode 62
2#111110_00000# => INSN_std, 2#111110_00000# => INSN_std,
2#111110_00001# => INSN_stdu, 2#111110_00001# => INSN_stdu,
2#111110_00010# => INSN_stq,
2#111110_00100# => INSN_std, 2#111110_00100# => INSN_std,
2#111110_00101# => INSN_stdu, 2#111110_00101# => INSN_stdu,
2#111110_00110# => INSN_stq,
2#111110_01000# => INSN_std, 2#111110_01000# => INSN_std,
2#111110_01001# => INSN_stdu, 2#111110_01001# => INSN_stdu,
2#111110_01010# => INSN_stq,
2#111110_01100# => INSN_std, 2#111110_01100# => INSN_std,
2#111110_01101# => INSN_stdu, 2#111110_01101# => INSN_stdu,
2#111110_01110# => INSN_stq,
2#111110_10000# => INSN_std, 2#111110_10000# => INSN_std,
2#111110_10001# => INSN_stdu, 2#111110_10001# => INSN_stdu,
2#111110_10010# => INSN_stq,
2#111110_10100# => INSN_std, 2#111110_10100# => INSN_std,
2#111110_10101# => INSN_stdu, 2#111110_10101# => INSN_stdu,
2#111110_10110# => INSN_stq,
2#111110_11000# => INSN_std, 2#111110_11000# => INSN_std,
2#111110_11001# => INSN_stdu, 2#111110_11001# => INSN_stdu,
2#111110_11010# => INSN_stq,
2#111110_11100# => INSN_std, 2#111110_11100# => INSN_std,
2#111110_11101# => INSN_stdu, 2#111110_11101# => INSN_stdu,
2#111110_11110# => INSN_stq,
-- major opcode 63 -- major opcode 63
2#111111_00100# to 2#111111_00101# => INSN_fdiv, 2#111111_00100# to 2#111111_00101# => INSN_fdiv,
2#111111_01000# to 2#111111_01001# => INSN_fsub, 2#111111_01000# to 2#111111_01001# => INSN_fsub,
@ -190,8 +200,9 @@ architecture behaviour of predecoder is
2#111111_11110# to 2#111111_11111# => INSN_fnmadd, 2#111111_11110# to 2#111111_11111# => INSN_fnmadd,
-- prefix word, PO1 -- prefix word, PO1
2#000001_00000# to 2#000001_11111# => INSN_prefix, 2#000001_00000# to 2#000001_11111# => INSN_prefix,
-- Major opcodes 57 and 61 are SFFS load/store instructions when prefixed -- Major opcodes 57, 60 and 61 are SFFS load/store instructions when prefixed
2#111001_00000# to 2#111001_11111# => INSN_op57, 2#111001_00000# to 2#111001_11111# => INSN_op57,
2#111100_00000# to 2#111100_11111# => INSN_op60,
2#111101_00000# to 2#111101_11111# => INSN_op61, 2#111101_00000# to 2#111101_11111# => INSN_op61,
others => INSN_illegal others => INSN_illegal
); );
@ -219,6 +230,7 @@ architecture behaviour of predecoder is
2#0_00101_11011# => INSN_brd, 2#0_00101_11011# => INSN_brd,
2#0_01001_11010# => INSN_cbcdtd, 2#0_01001_11010# => INSN_cbcdtd,
2#0_01000_11010# => INSN_cdtbcd, 2#0_01000_11010# => INSN_cdtbcd,
2#0_00110_11100# => INSN_cfuged,
2#0_00000_00000# => INSN_cmp, 2#0_00000_00000# => INSN_cmp,
2#0_01111_11100# => INSN_cmpb, 2#0_01111_11100# => INSN_cmpb,
2#0_00111_00000# => INSN_cmpeqb, 2#0_00111_00000# => INSN_cmpeqb,
@ -316,6 +328,7 @@ architecture behaviour of predecoder is
2#0_11001_10101# => INSN_lhzcix, 2#0_11001_10101# => INSN_lhzcix,
2#0_01001_10111# => INSN_lhzux, 2#0_01001_10111# => INSN_lhzux,
2#0_01000_10111# => INSN_lhzx, 2#0_01000_10111# => INSN_lhzx,
2#0_01000_10100# => INSN_lqarx,
2#0_00000_10100# => INSN_lwarx, 2#0_00000_10100# => INSN_lwarx,
2#0_01011_10101# => INSN_lwaux, 2#0_01011_10101# => INSN_lwaux,
2#0_01010_10101# => INSN_lwax, 2#0_01010_10101# => INSN_lwax,
@ -363,6 +376,8 @@ architecture behaviour of predecoder is
2#0_00011_11100# => INSN_nor, 2#0_00011_11100# => INSN_nor,
2#0_01101_11100# => INSN_or, 2#0_01101_11100# => INSN_or,
2#0_01100_11100# => INSN_orc, 2#0_01100_11100# => INSN_orc,
2#0_00100_11100# => INSN_pdepd,
2#0_00101_11100# => INSN_pextd,
2#0_00011_11010# => INSN_popcntb, 2#0_00011_11010# => INSN_popcntb,
2#0_01111_11010# => INSN_popcntd, 2#0_01111_11010# => INSN_popcntd,
2#0_01011_11010# => INSN_popcntw, 2#0_01011_11010# => INSN_popcntw,
@ -402,6 +417,7 @@ architecture behaviour of predecoder is
2#0_10110_10110# => INSN_sthcx, 2#0_10110_10110# => INSN_sthcx,
2#0_01101_10111# => INSN_sthux, 2#0_01101_10111# => INSN_sthux,
2#0_01100_10111# => INSN_sthx, 2#0_01100_10111# => INSN_sthx,
2#0_00101_10110# => INSN_stqcx,
2#0_10100_10110# => INSN_stwbrx, 2#0_10100_10110# => INSN_stwbrx,
2#0_11100_10101# => INSN_stwcix, 2#0_11100_10101# => INSN_stwcix,
2#0_00100_10110# => INSN_stwcx, 2#0_00100_10110# => INSN_stwcx,
@ -447,6 +463,8 @@ architecture behaviour of predecoder is
2#1_00100_11110# => INSN_isync, 2#1_00100_11110# => INSN_isync,
2#1_00000_10000# => INSN_mcrf, 2#1_00000_10000# => INSN_mcrf,
2#1_00000_11010# => INSN_rfid, 2#1_00000_11010# => INSN_rfid,
2#1_00010_11010# => INSN_rfscv,
2#1_01000_11010# => INSN_rfid, -- hrfid


-- Major opcode 59 -- Major opcode 59
-- Address bits are 1, insn(10..6), 1, 0, insn(3..1) -- Address bits are 1, insn(10..6), 1, 0, insn(3..1)

@ -87,13 +87,13 @@ const char *units[4] = { "al", "ls", "fp", "3?" };
const char *ops[64] = const char *ops[64] =
{ {
"illegal", "nop ", "add ", "attn ", "b ", "bc ", "bcreg ", "bcd ", "illegal", "nop ", "add ", "attn ", "b ", "bc ", "bcreg ", "bcd ",
"bperm ", "brev ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "bperm ", "brev ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "countb ", "crop ",
"darn ", "dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "icbi ", "icbt ", "darn ", "dcbf ", "dcbst ", "xcbt ", "dcbtst ", "dcbz ", "icbi ", "fpcmp ",
"fpcmp ", "fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ", "fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ", "extswsl",
"extswsl", "isel ", "isync ", "logic ", "ld ", "st ", "mcrxrx ", "mfcr ", "isel ", "isync ", "logic ", "ld ", "st ", "mcrxrx ", "mfcr ", "mfmsr ",
"mfmsr ", "mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "bsort ",
"popcnt ", "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
"shl ", "shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "ffail ", "shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "wait ", "ffail ",
}; };


const char *spr_names[13] = const char *spr_names[13] =

@ -550,6 +550,7 @@ static const char *fast_spr_names[] =
"lr", "ctr", "srr0", "srr1", "hsrr0", "hsrr1", "lr", "ctr", "srr0", "srr1", "hsrr0", "hsrr1",
"sprg0", "sprg1", "sprg2", "sprg3", "sprg0", "sprg1", "sprg2", "sprg3",
"hsprg0", "hsprg1", "xer", "tar", "hsprg0", "hsprg1", "xer", "tar",
"fscr", "hfscr", "heir", "cfar",
}; };


static const char *ldst_spr_names[] = { static const char *ldst_spr_names[] = {

@ -99,6 +99,8 @@ entity soc is
rst : in std_ulogic; rst : in std_ulogic;
system_clk : in std_ulogic; system_clk : in std_ulogic;


run_out : out std_ulogic;

-- "Large" (64-bit) DRAM wishbone -- "Large" (64-bit) DRAM wishbone
wb_dram_in : out wishbone_master_out; wb_dram_in : out wishbone_master_out;
wb_dram_out : in wishbone_slave_out := wishbone_slave_out_init; wb_dram_out : in wishbone_slave_out := wishbone_slave_out_init;
@ -349,6 +351,7 @@ begin
processor: entity work.core processor: entity work.core
generic map( generic map(
SIM => SIM, SIM => SIM,
CPU_INDEX => 0,
HAS_FPU => HAS_FPU, HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC, HAS_BTC => HAS_BTC,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
@ -366,6 +369,7 @@ begin
clk => system_clk, clk => system_clk,
rst => rst_core, rst => rst_core,
alt_reset => alt_reset_d, alt_reset => alt_reset_d,
run_out => run_out,
wishbone_insn_in => wishbone_icore_in, wishbone_insn_in => wishbone_icore_in,
wishbone_insn_out => wishbone_icore_out, wishbone_insn_out => wishbone_icore_out,
wishbone_data_in => wishbone_dcore_in, wishbone_data_in => wishbone_dcore_in,

@ -74,25 +74,15 @@ ill_test_1:
EXCEPTION(0x500) EXCEPTION(0x500)
EXCEPTION(0x600) EXCEPTION(0x600)


// We shouldn't get a Program interrupt at 700, so fail
. = 0x700 . = 0x700
mtsprg0 %r3 mtsprg0 %r3
mtsprg1 %r4

// test for bit 44 being set for ILL
mfsrr1 %r3
li %r4, 1
sldi %r4, %r4, (63-44)
and. %r4, %r4, %r3
li %r4, 8 // PASS so skip 2 instructions
bne 1f
li %r4, 4 // FAIL so only skip 1 instruction. Return will catch
1:
mfsrr0 %r3 mfsrr0 %r3
add %r3, %r3, %r4 // skip some instructions addi %r3, %r3, 4 // skip one instruction, causing a fail
mtsrr0 %r3 mtsrr0 %r3


mfsprg0 %r3 mfsprg0 %r3
mfsprg1 %r4
rfid rfid


EXCEPTION(0x800) EXCEPTION(0x800)
@ -104,7 +94,18 @@ ill_test_1:
EXCEPTION(0xd00) EXCEPTION(0xd00)
EXCEPTION(0xe00) EXCEPTION(0xe00)
EXCEPTION(0xe20) EXCEPTION(0xe20)
EXCEPTION(0xe40)
// We now expect a HEAI at e40 for illegal instructions
. = 0xe40
mthsprg0 %r3

mfhsrr0 %r3
addi %r3, %r3, 8 // skip one instruction, causing success
mthsrr0 %r3

mfhsprg0 %r3
hrfid

EXCEPTION(0xe60) EXCEPTION(0xe60)
EXCEPTION(0xe80) EXCEPTION(0xe80)
EXCEPTION(0xf00) EXCEPTION(0xf00)

@ -7,6 +7,7 @@
#define MSR_LE 0x1 #define MSR_LE 0x1
#define MSR_DR 0x10 #define MSR_DR 0x10
#define MSR_IR 0x20 #define MSR_IR 0x20
#define MSR_HV 0x1000000000000000ul
#define MSR_SF 0x8000000000000000ul #define MSR_SF 0x8000000000000000ul


extern int test_read(long *addr, long *ret, long init); extern int test_read(long *addr, long *ret, long init);
@ -450,11 +451,11 @@ int mmu_test_11(void)
unsigned long ptr = 0x523000; unsigned long ptr = 0x523000;


/* this should fail */ /* this should fail */
if (test_exec(0, ptr, MSR_SF | MSR_IR | MSR_LE)) if (test_exec(0, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1; return 1;
/* SRR0 and SRR1 should be set correctly */ /* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != (long) ptr || if (mfspr(SRR0) != (long) ptr ||
mfspr(SRR1) != (MSR_SF | 0x40000000 | MSR_IR | MSR_LE)) mfspr(SRR1) != (MSR_SF | MSR_HV | 0x40000000 | MSR_IR | MSR_LE))
return 2; return 2;
return 0; return 0;
} }
@ -468,12 +469,12 @@ int mmu_test_12(void)
/* create PTE */ /* create PTE */
map((void *)ptr, (void *)mem, PERM_EX | REF); map((void *)ptr, (void *)mem, PERM_EX | REF);
/* this should succeed and be a cache miss */ /* this should succeed and be a cache miss */
if (!test_exec(0, ptr, MSR_SF | MSR_IR | MSR_LE)) if (!test_exec(0, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1; return 1;
/* create a second PTE */ /* create a second PTE */
map((void *)ptr2, (void *)mem, PERM_EX | REF); map((void *)ptr2, (void *)mem, PERM_EX | REF);
/* this should succeed and be a cache hit */ /* this should succeed and be a cache hit */
if (!test_exec(0, ptr2, MSR_SF | MSR_IR | MSR_LE)) if (!test_exec(0, ptr2, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 2; return 2;
return 0; return 0;
} }
@ -487,18 +488,18 @@ int mmu_test_13(void)
/* create a PTE */ /* create a PTE */
map((void *)ptr, (void *)mem, PERM_EX | REF); map((void *)ptr, (void *)mem, PERM_EX | REF);
/* this should succeed */ /* this should succeed */
if (!test_exec(1, ptr, MSR_SF | MSR_IR | MSR_LE)) if (!test_exec(1, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1; return 1;
/* invalidate the PTE */ /* invalidate the PTE */
unmap((void *)ptr); unmap((void *)ptr);
/* install a second PTE */ /* install a second PTE */
map((void *)ptr2, (void *)mem, PERM_EX | REF); map((void *)ptr2, (void *)mem, PERM_EX | REF);
/* this should fail */ /* this should fail */
if (test_exec(1, ptr, MSR_SF | MSR_IR | MSR_LE)) if (test_exec(1, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 2; return 2;
/* SRR0 and SRR1 should be set correctly */ /* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != (long) ptr || if (mfspr(SRR0) != (long) ptr ||
mfspr(SRR1) != (MSR_SF | 0x40000000 | MSR_IR | MSR_LE)) mfspr(SRR1) != (MSR_SF | MSR_HV | 0x40000000 | MSR_IR | MSR_LE))
return 3; return 3;
return 0; return 0;
} }
@ -513,16 +514,16 @@ int mmu_test_14(void)
/* create a PTE */ /* create a PTE */
map((void *)ptr, (void *)mem, PERM_EX | REF); map((void *)ptr, (void *)mem, PERM_EX | REF);
/* this should fail due to second page not being mapped */ /* this should fail due to second page not being mapped */
if (test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE)) if (test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1; return 1;
/* SRR0 and SRR1 should be set correctly */ /* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != ptr2 || if (mfspr(SRR0) != ptr2 ||
mfspr(SRR1) != (MSR_SF | 0x40000000 | MSR_IR | MSR_LE)) mfspr(SRR1) != (MSR_SF | MSR_HV | 0x40000000 | MSR_IR | MSR_LE))
return 2; return 2;
/* create a PTE for the second page */ /* create a PTE for the second page */
map((void *)ptr2, (void *)mem2, PERM_EX | REF); map((void *)ptr2, (void *)mem2, PERM_EX | REF);
/* this should succeed */ /* this should succeed */
if (!test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE)) if (!test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 3; return 3;
return 0; return 0;
} }
@ -535,11 +536,11 @@ int mmu_test_15(void)
/* create a PTE without execute permission */ /* create a PTE without execute permission */
map((void *)ptr, (void *)mem, DFLT_PERM); map((void *)ptr, (void *)mem, DFLT_PERM);
/* this should fail */ /* this should fail */
if (test_exec(0, ptr, MSR_SF | MSR_IR | MSR_LE)) if (test_exec(0, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1; return 1;
/* SRR0 and SRR1 should be set correctly */ /* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != ptr || if (mfspr(SRR0) != ptr ||
mfspr(SRR1) != (MSR_SF | 0x10000000 | MSR_IR | MSR_LE)) mfspr(SRR1) != (MSR_SF | MSR_HV | 0x10000000 | MSR_IR | MSR_LE))
return 2; return 2;
return 0; return 0;
} }
@ -556,16 +557,16 @@ int mmu_test_16(void)
/* create a PTE for the second page without execute permission */ /* create a PTE for the second page without execute permission */
map((void *)ptr2, (void *)mem2, PERM_RD | REF); map((void *)ptr2, (void *)mem2, PERM_RD | REF);
/* this should fail due to second page being no-execute */ /* this should fail due to second page being no-execute */
if (test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE)) if (test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1; return 1;
/* SRR0 and SRR1 should be set correctly */ /* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != ptr2 || if (mfspr(SRR0) != ptr2 ||
mfspr(SRR1) != (MSR_SF | 0x10000000 | MSR_IR | MSR_LE)) mfspr(SRR1) != (MSR_SF | MSR_HV | 0x10000000 | MSR_IR | MSR_LE))
return 2; return 2;
/* create a PTE for the second page with execute permission */ /* create a PTE for the second page with execute permission */
map((void *)ptr2, (void *)mem2, PERM_RD | PERM_EX | REF); map((void *)ptr2, (void *)mem2, PERM_RD | PERM_EX | REF);
/* this should succeed */ /* this should succeed */
if (!test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE)) if (!test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 3; return 3;
return 0; return 0;
} }
@ -578,22 +579,22 @@ int mmu_test_17(void)
/* create a PTE without the ref bit set */ /* create a PTE without the ref bit set */
map((void *)ptr, (void *)mem, PERM_EX); map((void *)ptr, (void *)mem, PERM_EX);
/* this should fail */ /* this should fail */
if (test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE)) if (test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1; return 1;
/* SRR0 and SRR1 should be set correctly */ /* SRR0 and SRR1 should be set correctly */
if (mfspr(SRR0) != (long) ptr || if (mfspr(SRR0) != (long) ptr ||
mfspr(SRR1) != (MSR_SF | 0x00040000 | MSR_IR | MSR_LE)) mfspr(SRR1) != (MSR_SF | MSR_HV | 0x00040000 | MSR_IR | MSR_LE))
return 2; return 2;
/* create a PTE without ref or execute permission */ /* create a PTE without ref or execute permission */
unmap((void *)ptr); unmap((void *)ptr);
map((void *)ptr, (void *)mem, 0); map((void *)ptr, (void *)mem, 0);
/* this should fail */ /* this should fail */
if (test_exec(2, ptr, MSR_SF | MSR_IR | MSR_LE)) if (test_exec(2, ptr, MSR_SF | MSR_HV | MSR_IR | MSR_LE))
return 1; return 1;
/* SRR0 and SRR1 should be set correctly */ /* SRR0 and SRR1 should be set correctly */
/* RC update fail bit should not be set */ /* RC update fail bit should not be set */
if (mfspr(SRR0) != (long) ptr || if (mfspr(SRR0) != (long) ptr ||
mfspr(SRR1) != (MSR_SF | 0x10000000 | MSR_IR | MSR_LE)) mfspr(SRR1) != (MSR_SF | MSR_HV | 0x10000000 | MSR_IR | MSR_LE))
return 2; return 2;
return 0; return 0;
} }

@ -230,3 +230,63 @@ restore:
ld %r0,16(%r1) ld %r0,16(%r1)
mtlr %r0 mtlr %r0
blr blr

.global do_lq
do_lq:
lq %r6,0(%r3)
std %r6,0(%r4)
std %r7,8(%r4)
li %r3,0
blr

.global do_lq_np /* "non-preferred" form of lq */
do_lq_np:
mr %r7,%r3
lq %r6,0(%r7)
std %r6,0(%r4)
std %r7,8(%r4)
li %r3,0
blr

.global do_lq_bad /* illegal form of lq */
do_lq_bad:
mr %r6,%r3
.long 0xe0c60000 /* lq %r6,0(%r6) */
std %r6,0(%r4)
std %r7,8(%r4)
li %r3,0
blr

.global do_stq
do_stq:
ld %r8,0(%r4)
ld %r9,8(%r4)
stq %r8,0(%r3)
li %r3,0
blr

/* big-endian versions of the above */
.global do_lq_be
do_lq_be:
.long 0x0000c3e0
.long 0x0000c4f8
.long 0x0800e4f8
.long 0x00006038
.long 0x2000804e

.global do_lq_np_be /* "non-preferred" form of lq */
do_lq_np_be:
.long 0x781b677c
.long 0x0000c7e0
.long 0x0000c4f8
.long 0x0800e4f8
.long 0x00006038
.long 0x2000804e

.global do_stq_be
do_stq_be:
.long 0x000004e9
.long 0x080024e9
.long 0x020003f9
.long 0x00006038
.long 0x2000804e

@ -12,6 +12,14 @@
extern unsigned long callit(unsigned long arg1, unsigned long arg2, extern unsigned long callit(unsigned long arg1, unsigned long arg2,
unsigned long fn, unsigned long msr); unsigned long fn, unsigned long msr);


extern void do_lq(void *src, unsigned long *regs);
extern void do_lq_np(void *src, unsigned long *regs);
extern void do_lq_bad(void *src, unsigned long *regs);
extern void do_stq(void *dst, unsigned long *regs);
extern void do_lq_be(void *src, unsigned long *regs);
extern void do_lq_np_be(void *src, unsigned long *regs);
extern void do_stq_be(void *dst, unsigned long *regs);

static inline void do_tlbie(unsigned long rb, unsigned long rs) static inline void do_tlbie(unsigned long rb, unsigned long rs)
{ {
__asm__ volatile("tlbie %0,%1" : : "r" (rb), "r" (rs) : "memory"); __asm__ volatile("tlbie %0,%1" : : "r" (rb), "r" (rs) : "memory");
@ -25,6 +33,8 @@ static inline void do_tlbie(unsigned long rb, unsigned long rs)
#define SPRG0 272 #define SPRG0 272
#define SPRG1 273 #define SPRG1 273
#define SPRG3 275 #define SPRG3 275
#define HSRR0 314
#define HSRR1 315
#define PTCR 464 #define PTCR 464


static inline unsigned long mfspr(int sprnum) static inline unsigned long mfspr(int sprnum)
@ -294,6 +304,166 @@ int mode_test_6(void)
return 0; return 0;
} }


int mode_test_7(void)
{
unsigned long quad[4] __attribute__((__aligned__(16)));
unsigned long regs[2];
unsigned long ret, msr;

/*
* Test lq/stq in LE mode
*/
msr = MSR_SF | MSR_LE;
quad[0] = 0x123456789abcdef0ul;
quad[1] = 0xfafa5959bcbc3434ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq, msr);
if (ret)
return ret | 1;
if (regs[0] != quad[1] || regs[1] != quad[0])
return 2;
/* unaligned may give alignment interrupt */
quad[2] = 0x0011223344556677ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq, msr);
if (ret == 0) {
if (regs[0] != quad[2] || regs[1] != quad[1])
return 3;
} else if (ret == 0x600) {
if (mfspr(SPRG0) != (unsigned long) &do_lq ||
mfspr(DAR) != (unsigned long) &quad[1])
return ret | 4;
} else
return ret | 5;

/* try stq */
regs[0] = 0x5238523852385238ul;
regs[1] = 0x5239523952395239ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_stq, msr);
if (ret)
return ret | 5;
if (quad[0] != regs[1] || quad[1] != regs[0])
return 6;
regs[0] = 0x0172686966746564ul;
regs[1] = 0xfe8d0badd00dabcdul;
ret = callit((unsigned long)quad + 1, (unsigned long)regs,
(unsigned long)&do_stq, msr);
if (ret)
return ret | 7;
if (((quad[0] >> 8) | (quad[1] << 56)) != regs[1] ||
((quad[1] >> 8) | (quad[2] << 56)) != regs[0])
return 8;

/* try lq non-preferred form */
quad[0] = 0x56789abcdef01234ul;
quad[1] = 0x5959bcbc3434fafaul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_np, msr);
if (ret)
return ret | 9;
if (regs[0] != quad[1] || regs[1] != quad[0])
return 10;
/* unaligned should give alignment interrupt in uW implementation */
quad[2] = 0x6677001122334455ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq_np, msr);
if (ret == 0x600) {
if (mfspr(SPRG0) != (unsigned long) &do_lq_np + 4 ||
mfspr(DAR) != (unsigned long) &quad[1])
return ret | 11;
} else
return 12;

/* make sure lq with rt = ra causes a HEAI interrupt */
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_bad, msr);
if (ret != 0xe40)
return 13;
if (mfspr(HSRR0) != (unsigned long)&do_lq_bad + 4)
return 14;
return 0;
}

int mode_test_8(void)
{
unsigned long quad[4] __attribute__((__aligned__(16)));
unsigned long regs[2];
unsigned long ret, msr;

/*
* Test lq/stq in BE mode
*/
msr = MSR_SF;
quad[0] = 0x123456789abcdef0ul;
quad[1] = 0xfafa5959bcbc3434ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_be, msr);
if (ret)
return ret | 1;
if (regs[0] != quad[0] || regs[1] != quad[1]) {
print_hex(regs[0], 16);
print_string(" ");
print_hex(regs[1], 16);
print_string(" ");
return 2;
}
/* don't expect alignment interrupt */
quad[2] = 0x0011223344556677ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq_be, msr);
if (ret == 0) {
if (regs[0] != quad[1] || regs[1] != quad[2])
return 3;
} else
return ret | 5;

/* try stq */
regs[0] = 0x5238523852385238ul;
regs[1] = 0x5239523952395239ul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_stq_be, msr);
if (ret)
return ret | 5;
if (quad[0] != regs[0] || quad[1] != regs[1])
return 6;
regs[0] = 0x0172686966746564ul;
regs[1] = 0xfe8d0badd00dabcdul;
ret = callit((unsigned long)quad + 1, (unsigned long)regs,
(unsigned long)&do_stq_be, msr);
if (ret)
return ret | 7;
if (((quad[0] >> 8) | (quad[1] << 56)) != regs[0] ||
((quad[1] >> 8) | (quad[2] << 56)) != regs[1]) {
print_hex(quad[0], 16);
print_string(" ");
print_hex(quad[1], 16);
print_string(" ");
print_hex(quad[2], 16);
print_string(" ");
return 8;
}

/* try lq non-preferred form */
quad[0] = 0x56789abcdef01234ul;
quad[1] = 0x5959bcbc3434fafaul;
ret = callit((unsigned long)quad, (unsigned long)regs,
(unsigned long)&do_lq_np_be, msr);
if (ret)
return ret | 9;
if (regs[0] != quad[0] || regs[1] != quad[1])
return 10;
/* unaligned should not give alignment interrupt in uW implementation */
quad[2] = 0x6677001122334455ul;
ret = callit((unsigned long)&quad[1], (unsigned long)regs,
(unsigned long)&do_lq_np_be, msr);
if (ret)
return ret | 11;
if (regs[0] != quad[1] || regs[1] != quad[2])
return 12;
return 0;
}

int fail = 0; int fail = 0;


void do_test(int num, int (*test)(void)) void do_test(int num, int (*test)(void))
@ -338,6 +508,8 @@ int main(void)
do_test(4, mode_test_4); do_test(4, mode_test_4);
do_test(5, mode_test_5); do_test(5, mode_test_5);
do_test(6, mode_test_6); do_test(6, mode_test_6);
do_test(7, mode_test_7);
do_test(8, mode_test_8);


return fail; return fail;
} }

@ -245,3 +245,23 @@ test_pstw:
pstw %r3,wvar(0) pstw %r3,wvar(0)
li %r3,0 li %r3,0
blr blr

.globl test_plq
test_plq:
nop
nop
plq %r4,qvar(0)
std %r4,0(%r3)
std %r5,8(%r3)
li %r3,0
blr

.globl test_pstq
test_pstq:
nop
nop
ld %r4,0(%r3)
ld %r5,8(%r3)
pstq %r4,qvar(0)
li %r3,0
blr

@ -7,6 +7,7 @@
#define MSR_LE 0x1 #define MSR_LE 0x1
#define MSR_DR 0x10 #define MSR_DR 0x10
#define MSR_IR 0x20 #define MSR_IR 0x20
#define MSR_HV 0x1000000000000000ul
#define MSR_SF 0x8000000000000000ul #define MSR_SF 0x8000000000000000ul


#define DSISR 18 #define DSISR 18
@ -32,6 +33,8 @@ extern long test_pstd(long arg);
extern long test_psth(long arg); extern long test_psth(long arg);
extern long test_pstw(long arg); extern long test_pstw(long arg);
extern long test_plfd(long arg); extern long test_plfd(long arg);
extern long test_plq(long arg);
extern long test_pstq(long arg);


static inline unsigned long mfspr(int sprnum) static inline unsigned long mfspr(int sprnum)
{ {
@ -103,7 +106,7 @@ long int prefix_test_2(void)
return 1; return 1;
if (mfspr(SRR0) != (unsigned long)&test_paddi_mis + 8) if (mfspr(SRR0) != (unsigned long)&test_paddi_mis + 8)
return 2; return 2;
if (mfspr(SRR1) != (MSR_SF | MSR_LE | (1ul << (63 - 35)) | (1ul << (63 - 34)))) if (mfspr(SRR1) != (MSR_SF | MSR_HV | MSR_LE | (1ul << (63 - 35)) | (1ul << (63 - 34))))
return 3; return 3;


ret = trapit((long)&x, test_plfd); ret = trapit((long)&x, test_plfd);
@ -111,7 +114,7 @@ long int prefix_test_2(void)
return ret; return ret;
if (mfspr(SRR0) != (unsigned long)&test_plfd + 8) if (mfspr(SRR0) != (unsigned long)&test_plfd + 8)
return 6; return 6;
if (mfspr(SRR1) != (MSR_SF | MSR_LE | (1ul << (63 - 34)))) if (mfspr(SRR1) != (MSR_SF | MSR_HV | MSR_LE | (1ul << (63 - 34))))
return 7; return 7;
return 0; return 0;
} }
@ -182,6 +185,39 @@ long int prefix_test_3(void)
return 0; return 0;
} }


unsigned long qvar[2] __attribute__((__aligned__(16)));
#define V1 0x678912345a5a2b2bull
#define V2 0xa0549922bbccddeeull

/* test plq and pstq */
long int prefix_test_4(void)
{
long int ret;
unsigned long x[2];

qvar[0] = V1;
qvar[1] = V2;
ret = trapit((long)&x, test_plq);
if (ret)
return ret | 1;
if (x[0] != V1 || x[1] != V2) {
print_hex(x[0], 16, " ");
print_hex(x[1], 16, " ");
return 2;
}
x[0] = ~V2;
x[1] = ~V1;
ret = trapit((long)&x, test_pstq);
if (ret)
return ret | 3;
if (qvar[0] != ~V2 || qvar[1] != ~V1) {
print_hex(qvar[0], 16, " ");
print_hex(qvar[1], 16, " ");
return 4;
}
return 0;
}

int fail = 0; int fail = 0;


void do_test(int num, long int (*test)(void)) void do_test(int num, long int (*test)(void))
@ -209,6 +245,7 @@ int main(void)
do_test(1, prefix_test_1); do_test(1, prefix_test_1);
do_test(2, prefix_test_2); do_test(2, prefix_test_2);
do_test(3, prefix_test_3); do_test(3, prefix_test_3);
do_test(4, prefix_test_4);


return fail; return fail;
} }

@ -155,3 +155,31 @@ call_ret:
ld %r31,248(%r1) ld %r31,248(%r1)
addi %r1,%r1,256 addi %r1,%r1,256
blr blr

.global do_lqarx
do_lqarx:
/* r3 = src, r4 = regs */
lqarx %r10,0,%r3
std %r10,0(%r4)
std %r11,8(%r4)
li %r3,0
blr

.global do_lqarx_bad
do_lqarx_bad:
/* r3 = src, r4 = regs */
.long 0x7d405228 /* lqarx %r10,0,%r10 */
std %r10,0(%r4)
std %r11,8(%r4)
li %r3,0
blr

.global do_stqcx
do_stqcx:
/* r3 = dest, r4 = regs, return CR */
ld %r10,0(%r4)
ld %r11,8(%r4)
stqcx. %r10,0,%r3
mfcr %r3
oris %r3,%r3,1 /* to distinguish from trap number */
blr

@ -7,6 +7,10 @@
extern unsigned long callit(unsigned long arg1, unsigned long arg2, extern unsigned long callit(unsigned long arg1, unsigned long arg2,
unsigned long (*fn)(unsigned long, unsigned long)); unsigned long (*fn)(unsigned long, unsigned long));


extern unsigned long do_lqarx(unsigned long src, unsigned long regs);
extern unsigned long do_lqarx_bad(unsigned long src, unsigned long regs);
extern unsigned long do_stqcx(unsigned long dst, unsigned long regs);

#define DSISR 18 #define DSISR 18
#define DAR 19 #define DAR 19
#define SRR0 26 #define SRR0 26
@ -180,6 +184,63 @@ int resv_test_2(void)
return 0; return 0;
} }


/* test lqarx/stqcx */
int resv_test_3(void)
{
unsigned long x[4] __attribute__((__aligned__(16)));
unsigned long y[2], regs[2];
unsigned long ret, offset;
int count;

x[0] = 0x7766554433221100ul;
x[1] = 0xffeeddccbbaa9988ul;
y[0] = 0x0badcafef00dd00dul;
y[1] = 0xdeadbeef07070707ul;
for (count = 0; count < 1000; ++count) {
ret = callit((unsigned long)x, (unsigned long)regs, do_lqarx);
if (ret)
return ret | 1;
ret = callit((unsigned long)x, (unsigned long)y, do_stqcx);
if (ret < 0x10000)
return ret | 2;
if (ret & 0x20000000)
break;
}
if (count == 1000)
return 3;
if (x[0] != y[1] || x[1] != y[0])
return 4;
if (regs[1] != 0x7766554433221100ul || regs[0] != 0xffeeddccbbaa9988ul)
return 5;
ret = callit((unsigned long)x, (unsigned long)regs, do_stqcx);
if (ret < 0x10000 || (ret & 0x20000000))
return ret | 12;
/* test alignment interrupts */
for (offset = 0; offset < 16; ++offset) {
ret = callit((unsigned long)x + offset, (unsigned long)regs, do_lqarx);
if (ret == 0 && (offset & 15) != 0)
return 6;
if (ret == 0x600) {
if ((offset & 15) == 0)
return ret + 7;
} else if (ret)
return ret;
ret = callit((unsigned long)x + offset, (unsigned long)y, do_stqcx);
if (ret >= 0x10000 && (offset & 15) != 0)
return 8;
if (ret == 0x600) {
if ((offset & 15) == 0)
return ret + 9;
} else if (ret < 0x10000)
return ret;
}
/* test illegal interrupt for bad lqarx case */
ret = callit((unsigned long)x, (unsigned long)regs, do_lqarx_bad);
if (ret != 0xe40)
return ret + 10;
return 0;
}

int fail = 0; int fail = 0;


void do_test(int num, int (*test)(void)) void do_test(int num, int (*test)(void))
@ -204,6 +265,7 @@ int main(void)


do_test(1, resv_test_1); do_test(1, resv_test_1);
do_test(2, resv_test_2); do_test(2, resv_test_2);
do_test(3, resv_test_3);


return fail; return fail;
} }

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -4,3 +4,5 @@ test 03:PASS
test 04:PASS test 04:PASS
test 05:PASS test 05:PASS
test 06:PASS test 06:PASS
test 07:PASS
test 08:PASS

Binary file not shown.

@ -1,3 +1,4 @@
test 01:PASS test 01:PASS
test 02:PASS test 02:PASS
test 03:PASS test 03:PASS
test 04:PASS

Binary file not shown.

@ -1,2 +1,3 @@
test 01:PASS test 01:PASS
test 02:PASS test 02:PASS
test 03:PASS

Binary file not shown.

@ -115,7 +115,7 @@ __isr:
std %r29, 29*8(%r1) std %r29, 29*8(%r1)
std %r30, 30*8(%r1) std %r30, 30*8(%r1)
std %r31, 31*8(%r1) std %r31, 31*8(%r1)
mfsrr0 %r0 mfhsrr0 %r0
std %r0, SAVE_NIA*8(%r1) std %r0, SAVE_NIA*8(%r1)
mflr %r0 mflr %r0
std %r0, SAVE_LR*8(%r1) std %r0, SAVE_LR*8(%r1)
@ -123,7 +123,7 @@ __isr:
std %r0, SAVE_CTR*8(%r1) std %r0, SAVE_CTR*8(%r1)
mfcr %r0 mfcr %r0
std %r0, SAVE_CR*8(%r1) std %r0, SAVE_CR*8(%r1)
mfsrr1 %r0 mfhsrr1 %r0
std %r0, SAVE_SRR1*8(%r1) std %r0, SAVE_SRR1*8(%r1)


stdu %r1,-STACK_FRAME_C_MINIMAL(%r1) stdu %r1,-STACK_FRAME_C_MINIMAL(%r1)

@ -72,11 +72,15 @@ begin
variable vec : integer range 0 to 16#fff#; variable vec : integer range 0 to 16#fff#;
variable srr1 : std_ulogic_vector(15 downto 0); variable srr1 : std_ulogic_vector(15 downto 0);
variable intr : std_ulogic; variable intr : std_ulogic;
variable hvi : std_ulogic;
variable scv : std_ulogic;
variable intr_page : std_ulogic_vector(4 downto 0);
begin begin
w_out <= WritebackToRegisterFileInit; w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit; c_out <= WritebackToCrFileInit;
f := WritebackToFetch1Init; f := WritebackToFetch1Init;
vec := 0; vec := 0;
hvi := '0';


complete_out <= instr_tag_init; complete_out <= instr_tag_init;
if e_in.valid = '1' then if e_in.valid = '1' then
@ -93,9 +97,16 @@ begin
interrupt_out.intr <= intr; interrupt_out.intr <= intr;


srr1 := (others => '0'); srr1 := (others => '0');
intr_page := 5x"0";
scv := '0';
if e_in.interrupt = '1' then if e_in.interrupt = '1' then
vec := e_in.intr_vec; vec := e_in.intr_vec;
srr1 := e_in.srr1; srr1 := e_in.srr1;
hvi := e_in.hv_intr;
scv := e_in.is_scv;
if e_in.is_scv = '1' then
intr_page := 5x"17";
end if;
elsif l_in.interrupt = '1' then elsif l_in.interrupt = '1' then
vec := l_in.intr_vec; vec := l_in.intr_vec;
srr1 := l_in.srr1; srr1 := l_in.srr1;
@ -103,7 +114,9 @@ begin
vec := fp_in.intr_vec; vec := fp_in.intr_vec;
srr1 := fp_in.srr1; srr1 := fp_in.srr1;
end if; end if;
interrupt_out.hv_intr <= hvi;
interrupt_out.srr1 <= srr1; interrupt_out.srr1 <= srr1;
interrupt_out.scv_int <= scv;


if intr = '0' then if intr = '0' then
if e_in.write_enable = '1' then if e_in.write_enable = '1' then
@ -161,7 +174,7 @@ begin


-- Outputs to fetch1 -- Outputs to fetch1
f.interrupt := intr; f.interrupt := intr;
f.intr_vec := std_ulogic_vector(to_unsigned(vec, 12)); f.intr_vec := intr_page & std_ulogic_vector(to_unsigned(vec, 12));
f.redirect := e_in.redirect; f.redirect := e_in.redirect;
f.redirect_nia := e_in.write_data; f.redirect_nia := e_in.write_data;
f.br_nia := e_in.last_nia; f.br_nia := e_in.last_nia;

Loading…
Cancel
Save