Merge pull request #422 from paulusmack/real-icache

Icache improvements - use synchronous RAMs and remove 4kB per set limit
pull/426/head
Paul Mackerras 10 months ago committed by GitHub
commit fdcb6ec449
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -166,10 +166,6 @@ RAM_INIT_FILE ?=hello_world/hello_world.hex

FPGA_TARGET ?= ORANGE-CRAB-0.21

# FIXME: icache RAMs aren't being inferrenced as block RAMs on ECP5
# with yosys, so make it smaller for now as a workaround.
ICACHE_NUM_LINES=4

clkgen=fpga/clk_gen_ecp5.vhd
toplevel=fpga/top-generic.vhdl
dmi_dtm=dmi_dtm_dummy.vhdl
@ -227,7 +223,7 @@ LITEDRAM_GHDL_ARG=-gUSE_LITEDRAM=true
endif

GHDL_IMAGE_GENERICS=-gMEMORY_SIZE=$(MEMORY_SIZE) -gRAM_INIT_FILE=$(RAM_INIT_FILE) \
-gRESET_LOW=$(RESET_LOW) -gCLK_INPUT=$(CLK_INPUT) -gCLK_FREQUENCY=$(CLK_FREQUENCY) -gICACHE_NUM_LINES=$(ICACHE_NUM_LINES) \
-gRESET_LOW=$(RESET_LOW) -gCLK_INPUT=$(CLK_INPUT) -gCLK_FREQUENCY=$(CLK_FREQUENCY) \
$(LITEDRAM_GHDL_ARG)



@ -194,6 +194,10 @@ package common is
subtype real_addr_t is std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
function addr_to_real(addr: std_ulogic_vector(63 downto 0)) return real_addr_t;

-- Minimum page size
constant MIN_LG_PGSZ : positive := 12;
constant MIN_PAGESZ : positive := 2 ** MIN_LG_PGSZ;

-- Used for tracking instruction completion and pending register writes
constant TAG_COUNT : positive := 4;
constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT);
@ -231,6 +235,7 @@ package common is

type Fetch1ToIcacheType is record
req: std_ulogic;
fetch_fail : std_ulogic;
virt_mode : std_ulogic;
priv_mode : std_ulogic;
big_endian : std_ulogic;
@ -238,6 +243,9 @@ package common is
predicted : std_ulogic;
pred_ntaken : std_ulogic;
nia: std_ulogic_vector(63 downto 0);
next_nia: std_ulogic_vector(63 downto 0);
rpn: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
next_rpn: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
end record;

type IcacheToDecode1Type is record
@ -606,7 +614,7 @@ package common is
data : std_ulogic_vector(63 downto 0);
end record;

type MmuToIcacheType is record
type MmuToITLBType is record
tlbld : std_ulogic;
tlbie : std_ulogic;
doall : std_ulogic;
@ -658,7 +666,6 @@ package common is
redirect: std_ulogic;
redir_mode: std_ulogic_vector(3 downto 0);
last_nia: std_ulogic_vector(63 downto 0);
br_offset: std_ulogic_vector(63 downto 0);
br_last: std_ulogic;
br_taken: std_ulogic;
abs_br: std_ulogic;
@ -672,7 +679,7 @@ package common is
write_data => (others => '0'), write_cr_mask => (others => '0'),
write_cr_data => (others => '0'), write_reg => (others => '0'),
interrupt => '0', intr_vec => 0, redirect => '0', redir_mode => "0000",
last_nia => (others => '0'), br_offset => (others => '0'),
last_nia => (others => '0'),
br_last => '0', br_taken => '0', abs_br => '0',
srr1 => (others => '0'), msr => (others => '0'));

@ -758,11 +765,14 @@ package common is
br_nia : std_ulogic_vector(63 downto 0);
br_last : std_ulogic;
br_taken : std_ulogic;
interrupt : std_ulogic;
intr_vec : std_ulogic_vector(11 downto 0);
end record;
constant WritebackToFetch1Init : WritebackToFetch1Type :=
(redirect => '0', virt_mode => '0', priv_mode => '0', big_endian => '0',
mode_32bit => '0', redirect_nia => (others => '0'),
br_last => '0', br_taken => '0', br_nia => (others => '0'));
br_last => '0', br_taken => '0', br_nia => (others => '0'),
interrupt => '0', intr_vec => x"000");

type WritebackToRegisterFileType is record
write_reg : gspr_index_t;

@ -57,7 +57,7 @@ architecture behave of core is
signal fetch1_to_icache : Fetch1ToIcacheType;
signal writeback_to_fetch1: WritebackToFetch1Type;
signal icache_to_decode1 : IcacheToDecode1Type;
signal mmu_to_icache : MmuToIcacheType;
signal mmu_to_itlb : MmuToITLBType;

-- decode signals
signal decode1_to_decode2: Decode1ToDecode2Type;
@ -223,6 +223,7 @@ begin
generic map (
RESET_ADDRESS => (others => '0'),
ALT_RESET_ADDRESS => ALT_RESET_ADDRESS,
TLB_SIZE => ICACHE_TLB_SIZE,
HAS_BTC => HAS_BTC
)
port map (
@ -231,8 +232,9 @@ begin
alt_reset_in => alt_reset_d,
stall_in => fetch1_stall_in,
flush_in => fetch1_flush,
inval_btc => ex1_icache_inval or mmu_to_icache.tlbie,
inval_btc => ex1_icache_inval or mmu_to_itlb.tlbie,
stop_in => dbg_core_stop,
m_in => mmu_to_itlb,
d_in => decode1_to_fetch1,
w_in => writeback_to_fetch1,
i_out => fetch1_to_icache,
@ -249,7 +251,6 @@ begin
LINE_SIZE => 64,
NUM_LINES => ICACHE_NUM_LINES,
NUM_WAYS => ICACHE_NUM_WAYS,
TLB_SIZE => ICACHE_TLB_SIZE,
LOG_LENGTH => LOG_LENGTH
)
port map(
@ -257,7 +258,6 @@ begin
rst => rst_icache,
i_in => fetch1_to_icache,
i_out => icache_to_decode1,
m_in => mmu_to_icache,
flush_in => fetch1_flush,
inval_in => dbg_icache_rst or ex1_icache_inval,
stall_in => icache_stall_in,
@ -454,7 +454,7 @@ begin
l_out => mmu_to_loadstore1,
d_out => mmu_to_dcache,
d_in => dcache_to_mmu,
i_out => mmu_to_icache
i_out => mmu_to_itlb
);

dcache_0: entity work.dcache

@ -35,8 +35,7 @@ architecture behaviour of decode1 is
signal f, fin : Decode1ToFetch1Type;

type br_predictor_t is record
br_nia : std_ulogic_vector(61 downto 0);
br_offset : signed(23 downto 0);
br_target : signed(61 downto 0);
predict : std_ulogic;
end record;

@ -94,8 +93,10 @@ architecture behaviour of decode1 is
INSN_andi_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_andis_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE),
INSN_attn => (ALU, NONE, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_b => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bc => (ALU, NONE, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_brel => (ALU, NONE, OP_B, CIA, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_babs => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bcrel => (ALU, NONE, OP_BC, CIA, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bcabs => (ALU, NONE, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bcctr => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bclr => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
INSN_bctar => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE),
@ -310,6 +311,7 @@ architecture behaviour of decode1 is
INSN_rlwimi => (ALU, NONE, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_rlwinm => (ALU, NONE, OP_RLC, NONE, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_rlwnm => (ALU, NONE, OP_RLC, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_rnop => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_sc => (ALU, NONE, OP_SC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_setb => (ALU, NONE, OP_SETB, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_slbia => (LDST, NONE, OP_TLBIE, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -476,8 +478,6 @@ begin
end if;
end if;
if rst = '1' then
br.br_nia <= (others => '0');
br.br_offset <= (others => '0');
br.predict <= '0';
else
br <= br_in;
@ -499,8 +499,8 @@ begin
decode1_1: process(all)
variable v : Decode1ToDecode2Type;
variable vr : Decode1ToRegisterFileType;
variable br_target : std_ulogic_vector(61 downto 0);
variable br_offset : signed(23 downto 0);
variable br_nia : std_ulogic_vector(61 downto 0);
variable br_offset : std_ulogic_vector(23 downto 0);
variable bv : br_predictor_t;
variable icode : insn_code;
variable sprn : spr_num_t;
@ -594,31 +594,28 @@ begin
-- Branch predictor
-- Note bclr, bcctr and bctar not predicted as we have no
-- count cache or link stack.
br_offset := (others => '0');
br_offset := f_in.insn(25 downto 2);
case icode is
when INSN_b =>
when INSN_brel | INSN_babs =>
-- Unconditional branches are always taken
v.br_pred := '1';
br_offset := signed(f_in.insn(25 downto 2));
when INSN_bc =>
-- Predict backward branches as taken, forward as untaken
when INSN_bcrel =>
-- Predict backward relative branches as taken, others as untaken
v.br_pred := f_in.insn(15);
br_offset := resize(signed(f_in.insn(15 downto 2)), 24);
br_offset(23 downto 14) := (others => '1');
when others =>
end case;
bv.br_nia := f_in.nia(63 downto 2);
br_nia := f_in.nia(63 downto 2);
if f_in.insn(1) = '1' then
bv.br_nia := (others => '0');
br_nia := (others => '0');
end if;
bv.br_offset := br_offset;
bv.br_target := signed(br_nia) + signed(br_offset);
if f_in.next_predicted = '1' then
v.br_pred := '1';
elsif f_in.next_pred_ntaken = '1' then
v.br_pred := '0';
end if;
bv.predict := v.br_pred and f_in.valid and not flush_in and not busy_out and not f_in.next_predicted;
-- after a clock edge...
br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);

-- Work out GPR/FPR read addresses
-- Note that for prefixed instructions we are working this out based
@ -665,7 +662,7 @@ begin
d_out.decode <= decode;
r_out <= vr;
f_out.redirect <= br.predict;
f_out.redirect_nia <= br_target & "00";
f_out.redirect_nia <= std_ulogic_vector(br.br_target) & "00";
flush_out <= bv.predict or br.predict;
end process;


@ -221,9 +221,8 @@ architecture behaviour of decode2 is
OP_SHR => "010",
OP_EXTSWSLI => "010",
OP_MUL_L64 => "011", -- muldiv_result
OP_B => "110", -- next_nia
OP_BC => "110",
OP_BCREG => "110",
OP_BCREG => "101", -- ramspr_result
OP_RFID => "101",
OP_ADDG6S => "111", -- misc_result
OP_ISEL => "111",
OP_DARN => "111",

@ -47,14 +47,16 @@ package decode_types is
INSN_andi_dot, -- 10
INSN_andis_dot,
INSN_attn,
INSN_b,
INSN_bc,
INSN_brel,
INSN_babs,
INSN_bcrel,
INSN_bcabs,
INSN_bcctr,
INSN_bclr,
INSN_bctar,
INSN_brh,
INSN_brh, -- 20
INSN_brw,
INSN_brd, -- 20
INSN_brd,
INSN_cbcdtd,
INSN_cdtbcd,
INSN_cmpi,
@ -62,9 +64,9 @@ package decode_types is
INSN_cntlzw,
INSN_cntlzd,
INSN_cnttzw,
INSN_cnttzd,
INSN_cnttzd, -- 30
INSN_crand,
INSN_crandc, -- 30
INSN_crandc,
INSN_creqv,
INSN_crnand,
INSN_crnor,
@ -72,9 +74,9 @@ package decode_types is
INSN_crorc,
INSN_crxor,
INSN_darn,
INSN_eieio,
INSN_eieio, -- 40
INSN_extsb,
INSN_extsh, -- 40
INSN_extsh,
INSN_extsw,
INSN_extswsli,
INSN_isync,
@ -82,9 +84,9 @@ package decode_types is
INSN_ld,
INSN_ldu,
INSN_lhau,
INSN_lwa,
INSN_lwa, -- 50
INSN_lwzu,
INSN_mcrf, -- 50
INSN_mcrf,
INSN_mcrxrx,
INSN_mfcr,
INSN_mfmsr,
@ -92,9 +94,9 @@ package decode_types is
INSN_mtcrf,
INSN_mtmsr,
INSN_mtmsrd,
INSN_mtspr,
INSN_mtspr, -- 60
INSN_mulli,
INSN_neg, -- 60
INSN_neg,
INSN_nop,
INSN_ori,
INSN_oris,
@ -102,46 +104,49 @@ package decode_types is
INSN_popcntw,
INSN_popcntd,
INSN_prtyw,
INSN_prtyd,
INSN_prtyd, -- 70
INSN_rfid,
INSN_rldic, -- 70
INSN_rldic,
INSN_rldicl,
INSN_rldicr,
INSN_rldimi,
INSN_rlwimi,
INSN_rlwinm,
INSN_rnop,
INSN_sc,
INSN_setb,
INSN_setb, -- 80
INSN_slbia,
INSN_sradi,
INSN_srawi, -- 80
INSN_srawi,
INSN_stbu,
INSN_std,
INSN_stdu,
INSN_sthu,
INSN_stwu,
INSN_subfic,
INSN_subfme,
INSN_subfme, -- 90
INSN_subfze,
INSN_sync,
INSN_tdi, -- 90
INSN_tdi,
INSN_tlbsync,
INSN_twi,
INSN_wait,
INSN_xori,
INSN_xoris,
-- pad to 104
INSN_063, INSN_064, INSN_065, INSN_066, INSN_067,

-- Non-prefixed instructions that have a MLS:D prefixed form and
-- their corresponding prefixed instructions.
-- The non-prefixed versions have even indexes so that we can
-- convert them to the prefixed version by setting bit 0
INSN_addi, -- 96
INSN_addi, -- 104
INSN_paddi,
INSN_lbz,
INSN_plbz,
INSN_lha, -- 100
INSN_lha,
INSN_plha,
INSN_lhz,
INSN_lhz, -- 110
INSN_plhz,
INSN_lwz,
INSN_plwz,
@ -149,11 +154,11 @@ package decode_types is
INSN_pstb,
INSN_sth,
INSN_psth,
INSN_stw, -- 110
INSN_stw,
INSN_pstw,

-- Slots for non-prefixed opcodes that are 8LS:D when prefixed
INSN_lhzu, -- 112
INSN_lhzu, -- 120
INSN_plwa,
INSN_op57,
INSN_pld,
@ -161,8 +166,7 @@ package decode_types is
INSN_pstd,

-- pad to 128 to simplify comparison logic
INSN_076, INSN_077,
INSN_078, INSN_079, INSN_07a, INSN_07b, INSN_07c, INSN_07d, INSN_07e, INSN_07f,
INSN_07e, INSN_07f,

-- The following instructions have an RB operand but don't access FPRs
INSN_add,
@ -475,7 +479,308 @@ package decode_types is
update => '0', reserve => '0', is_32bit => '0',
is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0', repeat => NONE);

-- This function maps from insn_code values to primary opcode.
-- With this, we don't have to store the primary opcode of each instruction
-- in the icache if we are storing its insn_code.
function recode_primary_opcode(icode: insn_code) return std_ulogic_vector;

end decode_types;

package body decode_types is

function recode_primary_opcode(icode: insn_code) return std_ulogic_vector is
begin
case icode is
when INSN_addic => return "001100";
when INSN_addic_dot => return "001101";
when INSN_addi => return "001110";
when INSN_addis => return "001111";
when INSN_addpcis => return "010011";
when INSN_andi_dot => return "011100";
when INSN_andis_dot => return "011101";
when INSN_attn => return "000000";
when INSN_brel => return "010010";
when INSN_babs => return "010010";
when INSN_bcrel => return "010000";
when INSN_bcabs => return "010000";
when INSN_brh => return "011111";
when INSN_brw => return "011111";
when INSN_brd => return "011111";
when INSN_cmpi => return "001011";
when INSN_cmpli => return "001010";
when INSN_lbz => return "100010";
when INSN_lbzu => return "100011";
when INSN_lfd => return "110010";
when INSN_lfdu => return "110011";
when INSN_lfs => return "110000";
when INSN_lfsu => return "110001";
when INSN_lha => return "101010";
when INSN_lhau => return "101011";
when INSN_lhz => return "101000";
when INSN_lhzu => return "101001";
when INSN_lwz => return "100000";
when INSN_lwzu => return "100001";
when INSN_mulli => return "000111";
when INSN_nop => return "011000";
when INSN_ori => return "011000";
when INSN_oris => return "011001";
when INSN_rlwimi => return "010100";
when INSN_rlwinm => return "010101";
when INSN_rlwnm => return "010111";
when INSN_sc => return "010001";
when INSN_stb => return "100110";
when INSN_stbu => return "100111";
when INSN_stfd => return "110110";
when INSN_stfdu => return "110111";
when INSN_stfs => return "110100";
when INSN_stfsu => return "110101";
when INSN_sth => return "101100";
when INSN_sthu => return "101101";
when INSN_stw => return "100100";
when INSN_stwu => return "100101";
when INSN_subfic => return "001000";
when INSN_tdi => return "000010";
when INSN_twi => return "000011";
when INSN_xori => return "011010";
when INSN_xoris => return "011011";
when INSN_maddhd => return "000100";
when INSN_maddhdu => return "000100";
when INSN_maddld => return "000100";
when INSN_rldic => return "011110";
when INSN_rldicl => return "011110";
when INSN_rldicr => return "011110";
when INSN_rldimi => return "011110";
when INSN_rldcl => return "011110";
when INSN_rldcr => return "011110";
when INSN_ld => return "111010";
when INSN_ldu => return "111010";
when INSN_lwa => return "111010";
when INSN_fdivs => return "111011";
when INSN_fsubs => return "111011";
when INSN_fadds => return "111011";
when INSN_fsqrts => return "111011";
when INSN_fres => return "111011";
when INSN_fmuls => return "111011";
when INSN_frsqrtes => return "111011";
when INSN_fmsubs => return "111011";
when INSN_fmadds => return "111011";
when INSN_fnmsubs => return "111011";
when INSN_fnmadds => return "111011";
when INSN_std => return "111110";
when INSN_stdu => return "111110";
when INSN_fdiv => return "111111";
when INSN_fsub => return "111111";
when INSN_fadd => return "111111";
when INSN_fsqrt => return "111111";
when INSN_fsel => return "111111";
when INSN_fre => return "111111";
when INSN_fmul => return "111111";
when INSN_frsqrte => return "111111";
when INSN_fmsub => return "111111";
when INSN_fmadd => return "111111";
when INSN_fnmsub => return "111111";
when INSN_fnmadd => return "111111";
when INSN_prefix => return "000001";
when INSN_op57 => return "111001";
when INSN_op61 => return "111101";
when INSN_add => return "011111";
when INSN_addc => return "011111";
when INSN_adde => return "011111";
when INSN_addex => return "011111";
when INSN_addg6s => return "011111";
when INSN_addme => return "011111";
when INSN_addze => return "011111";
when INSN_and => return "011111";
when INSN_andc => return "011111";
when INSN_bperm => return "011111";
when INSN_cbcdtd => return "011111";
when INSN_cdtbcd => return "011111";
when INSN_cmp => return "011111";
when INSN_cmpb => return "011111";
when INSN_cmpeqb => return "011111";
when INSN_cmpl => return "011111";
when INSN_cmprb => return "011111";
when INSN_cntlzd => return "011111";
when INSN_cntlzw => return "011111";
when INSN_cnttzd => return "011111";
when INSN_cnttzw => return "011111";
when INSN_darn => return "011111";
when INSN_dcbf => return "011111";
when INSN_dcbst => return "011111";
when INSN_dcbt => return "011111";
when INSN_dcbtst => return "011111";
when INSN_dcbz => return "011111";
when INSN_divdeu => return "011111";
when INSN_divweu => return "011111";
when INSN_divde => return "011111";
when INSN_divwe => return "011111";
when INSN_divdu => return "011111";
when INSN_divwu => return "011111";
when INSN_divd => return "011111";
when INSN_divw => return "011111";
when INSN_eieio => return "011111";
when INSN_eqv => return "011111";
when INSN_extsb => return "011111";
when INSN_extsh => return "011111";
when INSN_extsw => return "011111";
when INSN_extswsli => return "011111";
when INSN_icbi => return "011111";
when INSN_icbt => return "011111";
when INSN_isel => return "011111";
when INSN_lbarx => return "011111";
when INSN_lbzcix => return "011111";
when INSN_lbzux => return "011111";
when INSN_lbzx => return "011111";
when INSN_ldarx => return "011111";
when INSN_ldbrx => return "011111";
when INSN_ldcix => return "011111";
when INSN_ldux => return "011111";
when INSN_ldx => return "011111";
when INSN_lfdx => return "011111";
when INSN_lfdux => return "011111";
when INSN_lfiwax => return "011111";
when INSN_lfiwzx => return "011111";
when INSN_lfsx => return "011111";
when INSN_lfsux => return "011111";
when INSN_lharx => return "011111";
when INSN_lhaux => return "011111";
when INSN_lhax => return "011111";
when INSN_lhbrx => return "011111";
when INSN_lhzcix => return "011111";
when INSN_lhzux => return "011111";
when INSN_lhzx => return "011111";
when INSN_lwarx => return "011111";
when INSN_lwaux => return "011111";
when INSN_lwax => return "011111";
when INSN_lwbrx => return "011111";
when INSN_lwzcix => return "011111";
when INSN_lwzux => return "011111";
when INSN_lwzx => return "011111";
when INSN_mcrxrx => return "011111";
when INSN_mfcr => return "011111";
when INSN_mfmsr => return "011111";
when INSN_mfspr => return "011111";
when INSN_modud => return "011111";
when INSN_moduw => return "011111";
when INSN_modsd => return "011111";
when INSN_modsw => return "011111";
when INSN_mtcrf => return "011111";
when INSN_mtmsr => return "011111";
when INSN_mtmsrd => return "011111";
when INSN_mtspr => return "011111";
when INSN_mulhd => return "011111";
when INSN_mulhdu => return "011111";
when INSN_mulhw => return "011111";
when INSN_mulhwu => return "011111";
when INSN_mulld => return "011111";
when INSN_mullw => return "011111";
when INSN_nand => return "011111";
when INSN_neg => return "011111";
when INSN_rnop => return "011111";
when INSN_nor => return "011111";
when INSN_or => return "011111";
when INSN_orc => return "011111";
when INSN_popcntb => return "011111";
when INSN_popcntd => return "011111";
when INSN_popcntw => return "011111";
when INSN_prtyd => return "011111";
when INSN_prtyw => return "011111";
when INSN_setb => return "011111";
when INSN_slbia => return "011111";
when INSN_sld => return "011111";
when INSN_slw => return "011111";
when INSN_srad => return "011111";
when INSN_sradi => return "011111";
when INSN_sraw => return "011111";
when INSN_srawi => return "011111";
when INSN_srd => return "011111";
when INSN_srw => return "011111";
when INSN_stbcix => return "011111";
when INSN_stbcx => return "011111";
when INSN_stbux => return "011111";
when INSN_stbx => return "011111";
when INSN_stdbrx => return "011111";
when INSN_stdcix => return "011111";
when INSN_stdcx => return "011111";
when INSN_stdux => return "011111";
when INSN_stdx => return "011111";
when INSN_stfdx => return "011111";
when INSN_stfdux => return "011111";
when INSN_stfiwx => return "011111";
when INSN_stfsx => return "011111";
when INSN_stfsux => return "011111";
when INSN_sthbrx => return "011111";
when INSN_sthcix => return "011111";
when INSN_sthcx => return "011111";
when INSN_sthux => return "011111";
when INSN_sthx => return "011111";
when INSN_stwbrx => return "011111";
when INSN_stwcix => return "011111";
when INSN_stwcx => return "011111";
when INSN_stwux => return "011111";
when INSN_stwx => return "011111";
when INSN_subf => return "011111";
when INSN_subfc => return "011111";
when INSN_subfe => return "011111";
when INSN_subfme => return "011111";
when INSN_subfze => return "011111";
when INSN_sync => return "011111";
when INSN_td => return "011111";
when INSN_tw => return "011111";
when INSN_tlbie => return "011111";
when INSN_tlbiel => return "011111";
when INSN_tlbsync => return "011111";
when INSN_wait => return "011111";
when INSN_xor => return "011111";
when INSN_bcctr => return "010011";
when INSN_bclr => return "010011";
when INSN_bctar => return "010011";
when INSN_crand => return "010011";
when INSN_crandc => return "010011";
when INSN_creqv => return "010011";
when INSN_crnand => return "010011";
when INSN_crnor => return "010011";
when INSN_cror => return "010011";
when INSN_crorc => return "010011";
when INSN_crxor => return "010011";
when INSN_isync => return "010011";
when INSN_mcrf => return "010011";
when INSN_rfid => return "010011";
when INSN_fcfids => return "111011";
when INSN_fcfidus => return "111011";
when INSN_fcmpu => return "111111";
when INSN_fcmpo => return "111111";
when INSN_mcrfs => return "111111";
when INSN_ftdiv => return "111111";
when INSN_ftsqrt => return "111111";
when INSN_mtfsb => return "111111";
when INSN_mtfsfi => return "111111";
when INSN_fmrgow => return "111111";
when INSN_fmrgew => return "111111";
when INSN_mffs => return "111111";
when INSN_mtfsf => return "111111";
when INSN_fcpsgn => return "111111";
when INSN_fneg => return "111111";
when INSN_fmr => return "111111";
when INSN_fnabs => return "111111";
when INSN_fabs => return "111111";
when INSN_frin => return "111111";
when INSN_friz => return "111111";
when INSN_frip => return "111111";
when INSN_frim => return "111111";
when INSN_frsp => return "111111";
when INSN_fctiw => return "111111";
when INSN_fctiwu => return "111111";
when INSN_fctid => return "111111";
when INSN_fcfid => return "111111";
when INSN_fctidu => return "111111";
when INSN_fcfidu => return "111111";
when INSN_fctiwz => return "111111";
when INSN_fctiwuz => return "111111";
when INSN_fctidz => return "111111";
when INSN_fctiduz => return "111111";
when others => return "XXXXXX";
end case;
end;

end decode_types;

@ -95,6 +95,7 @@ architecture behaviour of execute1 is
exception : std_ulogic;
trap : std_ulogic;
advance_nia : std_ulogic;
redir_to_next : std_ulogic;
new_msr : std_ulogic_vector(63 downto 0);
take_branch : std_ulogic;
direct_branch : std_ulogic;
@ -124,6 +125,9 @@ architecture behaviour of execute1 is
res2_sel : std_ulogic_vector(1 downto 0);
spr_select : spr_id;
pmu_spr_num : std_ulogic_vector(4 downto 0);
redir_to_next : std_ulogic;
advance_nia : std_ulogic;
lr_from_next : std_ulogic;
mul_in_progress : std_ulogic;
mul_finish : std_ulogic;
div_in_progress : std_ulogic;
@ -145,6 +149,7 @@ architecture behaviour of execute1 is
prev_prefixed => '0',
oe => '0', mul_select => "00", res2_sel => "00",
spr_select => spr_id_init, pmu_spr_num => 5x"0",
redir_to_next => '0', advance_nia => '0', lr_from_next => '0',
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0',
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
taken_branch_event => '0', br_mispredict => '0',
@ -510,6 +515,7 @@ begin
variable wr_addr : ramspr_index;
variable even_wr_enab, odd_wr_enab : std_ulogic;
variable even_wr_data, odd_wr_data : std_ulogic_vector(63 downto 0);
variable ramspr_even_data : std_ulogic_vector(63 downto 0);
variable doit : std_ulogic;
begin
-- Read address mux and async RAM reading
@ -533,11 +539,16 @@ begin
else
wr_addr := ex1.ramspr_wraddr;
end if;
if ex1.lr_from_next = '1' then
ramspr_even_data := next_nia;
else
ramspr_even_data := ex1.e.write_data;
end if;
if interrupt_in.intr = '1' then
even_wr_data := ex2.e.last_nia;
odd_wr_data := intr_srr1(ctrl.msr, interrupt_in.srr1);
else
even_wr_data := ex1.e.write_data;
even_wr_data := ramspr_even_data;
odd_wr_data := ex1.ramspr_odd_data;
end if;
ramspr_wr_addr <= wr_addr;
@ -550,7 +561,7 @@ begin
-- We assume no instruction executes in the cycle immediately following
-- an interrupt, so we don't need to bypass interrupt data
if ex1.se.ramspr_write_even = '1' and e_in.ramspr_even_rdaddr = ex1.ramspr_wraddr then
ramspr_even <= ex1.e.write_data;
ramspr_even <= ramspr_even_data;
else
ramspr_even <= even_rd_data;
end if;
@ -593,7 +604,6 @@ begin
shortmul_result when "011",
muldiv_result when "100",
ramspr_result when "101",
next_nia when "110",
misc_result when others;

execute1_0: process(clk)
@ -1016,7 +1026,6 @@ begin
v.e.mode_32bit := not ex1.msr(MSR_SF);
v.e.instr_tag := e_in.instr_tag;
v.e.last_nia := e_in.nia;
v.e.br_offset := 64x"4";

v.se.ramspr_write_even := e_in.ramspr_write_even;
v.se.ramspr_write_odd := e_in.ramspr_write_odd;
@ -1114,8 +1123,6 @@ begin
v.direct_branch := '1';
v.e.br_last := '1';
v.e.br_taken := '1';
v.e.br_offset := b_in;
v.e.abs_br := insn_aa(e_in.insn);
if e_in.br_pred = '0' then
-- should never happen
v.e.redirect := '1';
@ -1129,14 +1136,13 @@ begin
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
v.take_branch := ppc_bc_taken(bo, bi, cr_in, ramspr_odd);
if v.take_branch = '1' then
v.e.br_offset := b_in;
v.e.abs_br := insn_aa(e_in.insn);
end if;
-- Mispredicted branches cause a redirect
if v.take_branch /= e_in.br_pred then
v.e.redirect := '1';
end if;
if v.take_branch = '0' then
v.redir_to_next := '1';
end if;
v.direct_branch := '1';
v.e.br_last := '1';
v.e.br_taken := v.take_branch;
@ -1150,10 +1156,6 @@ begin
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
v.take_branch := ppc_bc_taken(bo, bi, cr_in, ramspr_odd);
if v.take_branch = '1' then
v.e.br_offset := ramspr_result;
v.e.abs_br := '1';
end if;
-- Indirect branches are never predicted taken
v.e.redirect := v.take_branch;
v.e.br_taken := v.take_branch;
@ -1177,8 +1179,6 @@ begin
v.new_msr(MSR_DR) := '1';
end if;
v.se.write_msr := '1';
v.e.br_offset := ramspr_result;
v.e.abs_br := '1';
v.e.redirect := '1';
v.se.write_cfar := '1';
if HAS_FPU then
@ -1292,6 +1292,7 @@ begin

when OP_ISYNC =>
v.e.redirect := '1';
v.redir_to_next := '1';

when OP_ICBI =>
v.se.icache_inval := '1';
@ -1406,6 +1407,7 @@ begin
v.mul_select := e_in.sub_select(1 downto 0);
v.se := side_effect_init;
v.ramspr_wraddr := e_in.ramspr_wraddr;
v.lr_from_next := e_in.lr;
v.ramspr_odd_data := actions.ramspr_odd_data;
end if;

@ -1423,9 +1425,6 @@ begin

irq_valid := ex1.msr(MSR_EE) and (pmu_to_x.intr or ctrl.dec(63) or ext_irq_in);

-- Next insn adder used in a couple of places
next_nia <= std_ulogic_vector(unsigned(e_in.nia) + 4);

-- rotator control signals
right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0';
@ -1507,10 +1506,9 @@ begin
x_to_divider.valid <= actions.start_div;
v.div_in_progress := actions.start_div;
v.br_mispredict := v.e.redirect and actions.direct_branch;
v.advance_nia := actions.advance_nia;
v.redir_to_next := actions.redir_to_next;
exception := actions.trap;
if actions.advance_nia = '1' then
v.e.last_nia := next_nia;
end if;

-- Go busy while division is happening because the
-- divider is not pipelined. Also go busy while a
@ -1681,6 +1679,9 @@ begin
variable sign, zero : std_ulogic;
variable rcnz_hi, rcnz_lo : std_ulogic;
begin
-- Next insn adder used in a couple of places
next_nia <= std_ulogic_vector(unsigned(ex1.e.last_nia) + 4);

v := ex2;
if stage2_stall = '0' then
v.e := ex1.e;
@ -1688,6 +1689,9 @@ begin
v.ext_interrupt := ex1.ext_interrupt;
v.taken_branch_event := ex1.taken_branch_event;
v.br_mispredict := ex1.br_mispredict;
if ex1.advance_nia = '1' then
v.e.last_nia := next_nia;
end if;
end if;

if ex1.se.mult_32s = '1' and ex1.oe = '1' then
@ -1748,10 +1752,12 @@ begin
else
sprres := pmu_to_x.spr_val;
end if;
if ex1.res2_sel(1) = '0' then
ex_result := rcresult;
else
if ex1.res2_sel(1) = '1' then
ex_result := sprres;
elsif ex1.redir_to_next = '1' then
ex_result := next_nia;
else
ex_result := rcresult;
end if;

cr_res := ex1.e.write_cr_data;

@ -3,12 +3,14 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.common.all;

entity fetch1 is
generic(
RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
ALT_RESET_ADDRESS : std_logic_vector(63 downto 0) := (others => '0');
TLB_SIZE : positive := 64; -- L1 ITLB number of entries (direct mapped)
HAS_BTC : boolean := true
);
port(
@ -21,6 +23,7 @@ entity fetch1 is
inval_btc : in std_ulogic;
stop_in : in std_ulogic;
alt_reset_in : in std_ulogic;
m_in : in MmuToITLBType;

-- redirect from writeback unit
w_in : in WritebackToFetch1Type;
@ -40,14 +43,32 @@ architecture behaviour of fetch1 is
type reg_internal_t is record
mode_32bit: std_ulogic;
rd_is_niap4: std_ulogic;
predicted_taken: std_ulogic;
predicted_nia: std_ulogic_vector(63 downto 0);
tlbcheck: std_ulogic;
tlbstall: std_ulogic;
next_nia: std_ulogic_vector(63 downto 0);
end record;

-- Mini effective to real translation cache
type erat_t is record
epn0: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0);
epn1: std_ulogic_vector(63 - MIN_LG_PGSZ downto 0);
rpn0: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
rpn1: std_ulogic_vector(REAL_ADDR_BITS - MIN_LG_PGSZ - 1 downto 0);
priv0: std_ulogic;
priv1: std_ulogic;
valid: std_ulogic_vector(1 downto 0);
mru: std_ulogic; -- '1' => entry 1 most recently used
end record;

signal r, r_next : Fetch1ToIcacheType;
signal r_int, r_next_int : reg_internal_t;
signal advance_nia : std_ulogic;
signal log_nia : std_ulogic_vector(42 downto 0);

signal erat : erat_t;
signal erat_hit : std_ulogic;
signal erat_sel : std_ulogic;

constant BTC_ADDR_BITS : integer := 10;
constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS;
constant BTC_TARGET_BITS : integer := 62;
@ -55,43 +76,75 @@ architecture behaviour of fetch1 is
constant BTC_WIDTH : integer := BTC_TAG_BITS + BTC_TARGET_BITS + 2;
type btc_mem_type is array (0 to BTC_SIZE - 1) of std_ulogic_vector(BTC_WIDTH - 1 downto 0);

signal btc_rd_addr : unsigned(BTC_ADDR_BITS - 1 downto 0);
signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0');
signal btc_rd_valid : std_ulogic := '0';

-- L1 ITLB.
constant TLB_BITS : natural := log2(TLB_SIZE);
constant TLB_EA_TAG_BITS : natural := 64 - (MIN_LG_PGSZ + TLB_BITS);
constant TLB_PTE_BITS : natural := 64;

subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
type tlb_valids_t is array(tlb_index_t) of std_ulogic;
subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;

signal itlb_valids : tlb_valids_t;
signal itlb_tags : tlb_tags_t;
signal itlb_ptes : tlb_ptes_t;

-- Values read from above arrays on a clock edge
signal itlb_valid : std_ulogic;
signal itlb_ttag : tlb_tag_t;
signal itlb_pte : tlb_pte_t;
signal itlb_hit : std_ulogic;

-- Privilege bit from PTE EAA field
signal eaa_priv : std_ulogic;

-- Simple hash for direct-mapped TLB index
function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
hash := addr(MIN_LG_PGSZ + TLB_BITS - 1 downto MIN_LG_PGSZ)
xor addr(MIN_LG_PGSZ + 2 * TLB_BITS - 1 downto MIN_LG_PGSZ + TLB_BITS)
xor addr(MIN_LG_PGSZ + 3 * TLB_BITS - 1 downto MIN_LG_PGSZ + 2 * TLB_BITS);
return hash;
end;

begin

regs : process(clk)
begin
if rising_edge(clk) then
log_nia <= r.nia(63) & r.nia(43 downto 2);
if r /= r_next then
if r /= r_next and advance_nia = '1' then
report "fetch1 rst:" & std_ulogic'image(rst) &
" IR:" & std_ulogic'image(r_next.virt_mode) &
" P:" & std_ulogic'image(r_next.priv_mode) &
" E:" & std_ulogic'image(r_next.big_endian) &
" 32:" & std_ulogic'image(r_next_int.mode_32bit) &
" I:" & std_ulogic'image(w_in.interrupt) &
" R:" & std_ulogic'image(w_in.redirect) & std_ulogic'image(d_in.redirect) &
" S:" & std_ulogic'image(stall_in) &
" T:" & std_ulogic'image(stop_in) &
" nia:" & to_hstring(r_next.nia);
" nia:" & to_hstring(r_next.nia) &
" req:" & std_ulogic'image(r_next.req) &
" FF:" & std_ulogic'image(r_next.fetch_fail);
end if;
if rst = '1' or w_in.redirect = '1' or d_in.redirect = '1' or stall_in = '0' then
r.virt_mode <= r_next.virt_mode;
r.priv_mode <= r_next.priv_mode;
r.big_endian <= r_next.big_endian;
r_int.mode_32bit <= r_next_int.mode_32bit;
end if;
if advance_nia = '1' then
r.predicted <= r_next.predicted;
r.pred_ntaken <= r_next.pred_ntaken;
r.nia <= r_next.nia;
r_int.predicted_taken <= r_next_int.predicted_taken;
r_int.predicted_nia <= r_next_int.predicted_nia;
r_int.rd_is_niap4 <= r_next_int.rd_is_niap4;
r <= r_next;
r_int <= r_next_int;
end if;
-- always send the up-to-date stop mark and req
r.stop_mark <= stop_in;
r.req <= not rst and not stop_in;
r.req <= r_next.req;
r.fetch_fail <= r_next.fetch_fail;
r_int.tlbcheck <= r_next_int.tlbcheck;
r_int.tlbstall <= r_next_int.tlbstall;
end if;
end process;
log_out <= log_nia;
@ -119,15 +172,13 @@ begin
variable raddr : unsigned(BTC_ADDR_BITS - 1 downto 0);
begin
if rising_edge(clk) then
raddr := unsigned(r.nia(BTC_ADDR_BITS + 1 downto 2)) +
to_unsigned(2, BTC_ADDR_BITS);
if advance_nia = '1' then
if is_X(raddr) then
if is_X(btc_rd_addr) then
btc_rd_data <= (others => 'X');
btc_rd_valid <= 'X';
else
btc_rd_data <= btc_memory(to_integer(raddr));
btc_rd_valid <= btc_valids(to_integer(raddr));
btc_rd_data <= btc_memory(to_integer(btc_rd_addr));
btc_rd_valid <= btc_valids(to_integer(btc_rd_addr));
end if;
end if;
if btc_wr = '1' then
@ -144,70 +195,250 @@ begin
end process;
end generate;

erat_sync : process(clk)
begin
if rising_edge(clk) then
if rst /= '0' or m_in.tlbie = '1' then
erat.valid <= "00";
erat.mru <= '0';
else
if erat_hit = '1' then
erat.mru <= erat_sel;
end if;
if m_in.tlbld = '1' then
erat.epn0 <= m_in.addr(63 downto MIN_LG_PGSZ);
erat.rpn0 <= m_in.pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
erat.priv0 <= m_in.pte(3);
erat.valid(0) <= '1';
erat.valid(1) <= '0';
erat.mru <= '0';
elsif r_int.tlbcheck = '1' and itlb_hit = '1' then
if erat.mru = '0' then
erat.epn1 <= r.nia(63 downto MIN_LG_PGSZ);
erat.rpn1 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
erat.priv1 <= itlb_pte(3);
erat.valid(1) <= '1';
else
erat.epn0 <= r.nia(63 downto MIN_LG_PGSZ);
erat.rpn0 <= itlb_pte(REAL_ADDR_BITS-1 downto MIN_LG_PGSZ);
erat.priv0 <= itlb_pte(3);
erat.valid(0) <= '1';
end if;
erat.mru <= not erat.mru;
end if;
end if;
end if;
end process;

-- Read TLB using the NIA for the next cycle
itlb_read : process(clk)
variable tlb_req_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
if rising_edge(clk) then
if advance_nia = '1' then
tlb_req_index := hash_ea(r_next.nia);
if is_X(tlb_req_index) then
itlb_pte <= (others => 'X');
itlb_ttag <= (others => 'X');
itlb_valid <= 'X';
else
itlb_pte <= itlb_ptes(to_integer(unsigned(tlb_req_index)));
itlb_ttag <= itlb_tags(to_integer(unsigned(tlb_req_index)));
itlb_valid <= itlb_valids(to_integer(unsigned(tlb_req_index)));
end if;
end if;
end if;
end process;

-- TLB hit detection
itlb_lookup : process(all)
begin
itlb_hit <= '0';
if itlb_ttag = r.nia(63 downto MIN_LG_PGSZ + TLB_BITS) then
itlb_hit <= itlb_valid;
end if;
end process;

-- iTLB update
itlb_update: process(clk)
variable wr_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
if rising_edge(clk) then
wr_index := hash_ea(m_in.addr);
if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
-- clear all valid bits
for i in tlb_index_t loop
itlb_valids(i) <= '0';
end loop;
elsif m_in.tlbie = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
-- clear entry regardless of hit or miss
itlb_valids(to_integer(unsigned(wr_index))) <= '0';
elsif m_in.tlbld = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
itlb_tags(to_integer(unsigned(wr_index))) <= m_in.addr(63 downto MIN_LG_PGSZ + TLB_BITS);
itlb_ptes(to_integer(unsigned(wr_index))) <= m_in.pte;
itlb_valids(to_integer(unsigned(wr_index))) <= '1';
end if;
--ev.itlb_miss_resolved <= m_in.tlbld and not rst;
end if;
end process;

comb : process(all)
variable v : Fetch1ToIcacheType;
variable v_int : reg_internal_t;
variable next_nia : std_ulogic_vector(63 downto 0);
variable m32 : std_ulogic;
variable ehit, esel : std_ulogic;
variable eaa_priv : std_ulogic;
begin
v := r;
v_int := r_int;
v.predicted := '0';
v.pred_ntaken := '0';
v_int.predicted_taken := '0';
v_int.rd_is_niap4 := '0';
v.req := not stop_in;
v_int.tlbstall := r_int.tlbcheck;
v_int.tlbcheck := '0';

if r_int.tlbcheck = '1' and itlb_hit = '0' then
v.fetch_fail := '1';
end if;

if rst = '1' then
-- Combinatorial computation of the CIA for the next cycle.
-- Needs to be simple so the result can be used for RAM
-- and TLB access in the icache.
-- If we are stalled, this still advances, and the assumption
-- is that it will not be used.
m32 := r_int.mode_32bit;
if w_in.redirect = '1' then
next_nia := w_in.redirect_nia(63 downto 2) & "00";
m32 := w_in.mode_32bit;
v.virt_mode := w_in.virt_mode;
v.priv_mode := w_in.priv_mode;
v.big_endian := w_in.big_endian;
v_int.mode_32bit := w_in.mode_32bit;
v.fetch_fail := '0';
elsif d_in.redirect = '1' then
next_nia := d_in.redirect_nia(63 downto 2) & "00";
v.fetch_fail := '0';
elsif r_int.tlbstall = '1' then
-- this case is needed so that the correct icache tags are read
next_nia := r.nia;
else
next_nia := r_int.next_nia;
end if;
if m32 = '1' then
next_nia(63 downto 32) := (others => '0');
end if;
v.nia := next_nia;

v_int.next_nia := std_ulogic_vector(unsigned(next_nia) + 4);

-- Use v_int.next_nia as the BTC read address before it gets possibly
-- overridden with the reset or interrupt address or the predicted branch
-- target address, in order to improve timing. If it gets overridden then
-- rd_is_niap4 gets cleared to indicate that the BTC data doesn't apply.
btc_rd_addr <= unsigned(v_int.next_nia(BTC_ADDR_BITS + 1 downto 2));
v_int.rd_is_niap4 := '1';

-- If the last NIA value went down with a stop mark, it didn't get
-- executed, and hence we shouldn't increment NIA.
advance_nia <= rst or w_in.interrupt or w_in.redirect or d_in.redirect or
(not r.stop_mark and not (r.req and stall_in));
-- reduce metavalue warnings in sim
if is_X(rst) then
advance_nia <= '1';
end if;

-- Translate next_nia to real if possible, otherwise we have to stall
-- and look up the TLB.
ehit := '0';
esel := '0';
eaa_priv := '1';
if next_nia(63 downto MIN_LG_PGSZ) = erat.epn1 and erat.valid(1) = '1' then
ehit := '1';
esel := '1';
end if;
if next_nia(63 downto MIN_LG_PGSZ) = erat.epn0 and erat.valid(0) = '1' then
ehit := '1';
end if;
if v.virt_mode = '0' then
v.rpn := v.nia(REAL_ADDR_BITS - 1 downto MIN_LG_PGSZ);
eaa_priv := '1';
elsif esel = '1' then
v.rpn := erat.rpn1;
eaa_priv := erat.priv1;
else
v.rpn := erat.rpn0;
eaa_priv := erat.priv0;
end if;
if advance_nia = '1' and ehit = '0' and v.virt_mode = '1' and
r_int.tlbcheck = '0' and v.fetch_fail = '0' then
v_int.tlbstall := '1';
v_int.tlbcheck := '1';
end if;
if ehit = '1' or v.virt_mode = '0' then
if eaa_priv = '1' and v.priv_mode = '0' then
v.fetch_fail := '1';
else
v.fetch_fail := '0';
end if;
end if;
erat_hit <= ehit and advance_nia;
erat_sel <= esel;

if rst /= '0' then
if alt_reset_in = '1' then
v.nia := ALT_RESET_ADDRESS;
v_int.next_nia := ALT_RESET_ADDRESS;
else
v.nia := RESET_ADDRESS;
v_int.next_nia := RESET_ADDRESS;
end if;
elsif w_in.interrupt = '1' then
v_int.next_nia := 52x"0" & w_in.intr_vec(11 downto 2) & "00";
end if;
if rst /= '0' or w_in.interrupt = '1' then
v.req := '0';
v.virt_mode := '0';
v.priv_mode := '1';
v.big_endian := '0';
v_int.mode_32bit := '0';
v_int.predicted_nia := (others => '0');
elsif w_in.redirect = '1' then
v.nia := w_in.redirect_nia(63 downto 2) & "00";
if w_in.mode_32bit = '1' then
v.nia(63 downto 32) := (others => '0');
end if;
v.virt_mode := w_in.virt_mode;
v.priv_mode := w_in.priv_mode;
v.big_endian := w_in.big_endian;
v_int.mode_32bit := w_in.mode_32bit;
elsif d_in.redirect = '1' then
v.nia := d_in.redirect_nia(63 downto 2) & "00";
if r_int.mode_32bit = '1' then
v.nia(63 downto 32) := (others => '0');
end if;
elsif r_int.predicted_taken = '1' then
v.nia := r_int.predicted_nia;
elsif r.req = '1' then
v_int.rd_is_niap4 := '1';
v.nia := std_ulogic_vector(unsigned(r.nia) + 4);
if r_int.mode_32bit = '1' then
v.nia(63 downto 32) := x"00000000";
end if;
if btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and
v_int.rd_is_niap4 := '0';
v_int.tlbstall := '0';
v_int.tlbcheck := '0';
v.fetch_fail := '0';
end if;
if v.fetch_fail = '1' then
v_int.tlbstall := '1';
end if;
if v_int.tlbstall = '1' then
v.req := '0';
end if;

-- If there is a valid entry in the BTC which corresponds to the next instruction,
-- use that to predict the address of the instruction after that.
-- (w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0')
-- implies v.nia = r_int.next_nia.
-- r_int.rd_is_niap4 implies r_int.next_nia is the address used to read the BTC.
if v.req = '1' and w_in.redirect = '0' and d_in.redirect = '0' and r_int.tlbstall = '0' and
btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and
btc_rd_data(BTC_WIDTH - 2) = r.virt_mode and
btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS)
= v.nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
v_int.predicted_taken := btc_rd_data(BTC_WIDTH - 1);
v.predicted := btc_rd_data(BTC_WIDTH - 1);
v.pred_ntaken := not btc_rd_data(BTC_WIDTH - 1);
= r_int.next_nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then
v.predicted := btc_rd_data(BTC_WIDTH - 1);
v.pred_ntaken := not btc_rd_data(BTC_WIDTH - 1);
if btc_rd_data(BTC_WIDTH - 1) = '1' then
v_int.next_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";
v_int.rd_is_niap4 := '0';
end if;
end if;
v_int.predicted_nia := btc_rd_data(BTC_TARGET_BITS - 1 downto 0) & "00";

-- If the last NIA value went down with a stop mark, it didn't get
-- executed, and hence we shouldn't increment NIA.
advance_nia <= rst or w_in.redirect or d_in.redirect or (not r.stop_mark and not stall_in);

r_next <= v;
r_next_int <= v_int;

-- Update outputs to the icache
i_out <= r;
i_out.next_nia <= next_nia;
i_out.next_rpn <= v.rpn;

end process;


@ -41,10 +41,6 @@ entity icache is
NUM_LINES : positive := 32;
-- Number of ways
NUM_WAYS : positive := 4;
-- L1 ITLB number of entries (direct mapped)
TLB_SIZE : positive := 64;
-- L1 ITLB log_2(page_size)
TLB_LG_PGSZ : positive := 12;
-- Non-zero to enable log data collection
LOG_LENGTH : natural := 0
);
@ -55,8 +51,6 @@ entity icache is
i_in : in Fetch1ToIcacheType;
i_out : out IcacheToDecode1Type;

m_in : in MmuToIcacheType;

stall_in : in std_ulogic;
stall_out : out std_ulogic;
flush_in : in std_ulogic;
@ -139,49 +133,24 @@ architecture rtl of icache is
-- The cache data BRAM organized as described above for each way
subtype cache_row_t is std_ulogic_vector(ROW_WIDTH-1 downto 0);

-- The cache tags LUTRAM has a row per set. Vivado is a pain and will
-- not handle a clean (commented) definition of the cache tags as a 3d
-- memory. For now, work around it by putting all the tags
-- We define a cache tag RAM per way, accessed synchronously
subtype cache_tag_t is std_logic_vector(TAG_BITS-1 downto 0);
-- type cache_tags_set_t is array(way_t) of cache_tag_t;
-- type cache_tags_array_t is array(index_t) of cache_tags_set_t;
constant TAG_RAM_WIDTH : natural := TAG_BITS * NUM_WAYS;
subtype cache_tags_set_t is std_logic_vector(TAG_RAM_WIDTH-1 downto 0);
type cache_tags_array_t is array(index_t) of cache_tags_set_t;
type cache_tags_set_t is array(way_t) of cache_tag_t;
type cache_tags_array_t is array(index_t) of cache_tag_t;

-- Set of cache tags read on the last clock edge
signal cache_tags_set : cache_tags_set_t;
-- Set of cache tags for snooping writes to memory
signal snoop_tags_set : cache_tags_set_t;
-- Flags indicating write-hit-read on the cache tags
signal tag_overwrite : std_ulogic_vector(NUM_WAYS - 1 downto 0);

-- The cache valid bits
subtype cache_way_valids_t is std_ulogic_vector(NUM_WAYS-1 downto 0);
type cache_valids_t is array(index_t) of cache_way_valids_t;
type row_per_line_valid_t is array(0 to ROW_PER_LINE - 1) of std_ulogic;

-- Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
signal cache_tags : cache_tags_array_t;
signal cache_valids : cache_valids_t;

attribute ram_style : string;
attribute ram_style of cache_tags : signal is "distributed";

-- L1 ITLB.
constant TLB_BITS : natural := log2(TLB_SIZE);
constant TLB_EA_TAG_BITS : natural := 64 - (TLB_LG_PGSZ + TLB_BITS);
constant TLB_PTE_BITS : natural := 64;

subtype tlb_index_t is integer range 0 to TLB_SIZE - 1;
type tlb_valids_t is array(tlb_index_t) of std_ulogic;
subtype tlb_tag_t is std_ulogic_vector(TLB_EA_TAG_BITS - 1 downto 0);
type tlb_tags_t is array(tlb_index_t) of tlb_tag_t;
subtype tlb_pte_t is std_ulogic_vector(TLB_PTE_BITS - 1 downto 0);
type tlb_ptes_t is array(tlb_index_t) of tlb_pte_t;

signal itlb_valids : tlb_valids_t;
signal itlb_tags : tlb_tags_t;
signal itlb_ptes : tlb_ptes_t;
attribute ram_style of itlb_tags : signal is "distributed";
attribute ram_style of itlb_ptes : signal is "distributed";

-- Privilege bit from PTE EAA field
signal eaa_priv : std_ulogic;

-- Cache reload state machine
type state_t is (IDLE, STOP_RELOAD, CLR_TAG, WAIT_ACK);

@ -189,6 +158,7 @@ architecture rtl of icache is
-- Cache hit state (Latches for 1 cycle BRAM access)
hit_way : way_sig_t;
hit_nia : std_ulogic_vector(63 downto 0);
hit_ra : real_addr_t;
hit_smark : std_ulogic;
hit_valid : std_ulogic;
big_endian: std_ulogic;
@ -208,6 +178,9 @@ architecture rtl of icache is
end_row_ix : row_in_line_t;
rows_valid : row_per_line_valid_t;

stalled_hit : std_ulogic; -- remembers hit while stalled
stalled_way : way_sig_t;

-- TLB miss state
fetch_failed : std_ulogic;
end record;
@ -226,9 +199,6 @@ architecture rtl of icache is
signal req_raddr : real_addr_t;

signal real_addr : real_addr_t;
signal ra_valid : std_ulogic;
signal priv_fault : std_ulogic;
signal access_ok : std_ulogic;

-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
@ -240,14 +210,16 @@ architecture rtl of icache is
signal plru_victim : way_sig_t;

-- Memory write snoop signals
signal snoop_valid : std_ulogic;
signal snoop_index : index_sig_t;
signal snoop_hits : cache_way_valids_t;
signal snoop_valid : std_ulogic;
signal snoop_index : index_sig_t;
signal snoop_tag : cache_tag_t;
signal snoop_index2 : index_sig_t;
signal snoop_hits : cache_way_valids_t;

signal log_insn : std_ulogic_vector(35 downto 0);

-- Return the cache line index (tag index) for an address
function get_index(addr: std_ulogic_vector) return index_sig_t is
function get_index(addr: real_addr_t) return index_sig_t is
begin
return unsigned(addr(SET_SIZE_BITS - 1 downto LINE_OFF_BITS));
end;
@ -321,29 +293,6 @@ architecture rtl of icache is
return endian & addr(addr'left downto SET_SIZE_BITS);
end;

-- Read a tag from a tag memory row
function read_tag(way: way_t; tagset: cache_tags_set_t) return cache_tag_t is
begin
return tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS);
end;

-- Write a tag to tag memory row
procedure write_tag(way: in way_t; tagset: inout cache_tags_set_t;
tag: cache_tag_t) is
begin
tagset((way+1) * TAG_BITS - 1 downto way * TAG_BITS) := tag;
end;

-- Simple hash for direct-mapped TLB index
function hash_ea(addr: std_ulogic_vector(63 downto 0)) return std_ulogic_vector is
variable hash : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
hash := addr(TLB_LG_PGSZ + TLB_BITS - 1 downto TLB_LG_PGSZ)
xor addr(TLB_LG_PGSZ + 2 * TLB_BITS - 1 downto TLB_LG_PGSZ + TLB_BITS)
xor addr(TLB_LG_PGSZ + 3 * TLB_BITS - 1 downto TLB_LG_PGSZ + 2 * TLB_BITS);
return hash;
end;

begin

-- byte-swap read data if big endian
@ -415,7 +364,9 @@ begin
signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
signal dout : cache_row_t;
signal wr_sel : std_ulogic_vector(0 downto 0);
signal ic_tags : cache_tags_array_t;
begin
-- Cache data RAMs, one per way
way: entity work.cache_ram
generic map (
ROW_BITS => ROW_BITS,
@ -443,6 +394,49 @@ begin
wr_addr <= std_ulogic_vector(r.store_row);
wr_sel(0) <= do_write;
end process;

-- Cache tag RAMs, one per way, are read and written synchronously.
-- They are instantiated like this instead of trying to describe them as
-- a single array in order to avoid problems with writing a single way.
process(clk)
variable replace_way : way_sig_t;
variable snoop_addr : real_addr_t;
variable next_raddr : real_addr_t;
begin
replace_way := to_unsigned(0, WAY_BITS);
if NUM_WAYS > 1 then
-- Get victim way from plru
replace_way := plru_victim;
end if;
if rising_edge(clk) then
-- Read tags using NIA for next cycle
if flush_in = '1' or i_in.req = '0' or (stall_in = '0' and stall_out = '0') then
next_raddr := i_in.next_rpn & i_in.next_nia(MIN_LG_PGSZ - 1 downto 0);
cache_tags_set(i) <= ic_tags(to_integer(get_index(next_raddr)));
-- Check for simultaneous write to the same location
tag_overwrite(i) <= '0';
if r.state = CLR_TAG and r.store_index = get_index(next_raddr) and
to_unsigned(i, WAY_BITS) = replace_way then
tag_overwrite(i) <= '1';
end if;
end if;

-- Second read port for snooping writes to memory
if (wb_snoop_in.cyc and wb_snoop_in.stb and wb_snoop_in.we) = '1' then
snoop_addr := addr_to_real(wb_to_addr(wb_snoop_in.adr));
snoop_tags_set(i) <= ic_tags(to_integer(get_index(snoop_addr)));
end if;

-- Write one tag when in CLR_TAG state
if r.state = CLR_TAG and to_unsigned(i, WAY_BITS) = replace_way then
ic_tags(to_integer(r.store_index)) <= r.store_tag;
end if;

if rst = '1' then
tag_overwrite(i) <= '0';
end if;
end if;
end process;
end generate;
-- Generate PLRUs
@ -468,10 +462,10 @@ begin
process(all)
begin
-- Read PLRU bits from array
if is_X(r.hit_nia) then
if is_X(r.hit_ra) then
plru_cur <= (others => 'X');
else
plru_cur <= plru_ram(to_integer(get_index(r.hit_nia)));
plru_cur <= plru_ram(to_integer(get_index(r.hit_ra)));
end if;

-- PLRU interface
@ -484,92 +478,32 @@ begin
begin
if rising_edge(clk) then
if r.hit_valid = '1' then
assert not is_X(r.hit_nia) severity failure;
plru_ram(to_integer(get_index(r.hit_nia))) <= plru_upd;
assert not is_X(r.hit_ra) severity failure;
plru_ram(to_integer(get_index(r.hit_ra))) <= plru_upd;
end if;
end if;
end process;
end generate;

-- TLB hit detection and real address generation
itlb_lookup : process(all)
variable pte : tlb_pte_t;
variable ttag : tlb_tag_t;
variable tlb_req_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
tlb_req_index := hash_ea(i_in.nia);
if is_X(tlb_req_index) then
pte := (others => 'X');
ttag := (others => 'X');
else
pte := itlb_ptes(to_integer(unsigned(tlb_req_index)));
ttag := itlb_tags(to_integer(unsigned(tlb_req_index)));
end if;
if i_in.virt_mode = '1' then
real_addr <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
i_in.nia(TLB_LG_PGSZ - 1 downto 0);
if ttag = i_in.nia(63 downto TLB_LG_PGSZ + TLB_BITS) then
if is_X(tlb_req_index) then
ra_valid <= 'X';
else
ra_valid <= itlb_valids(to_integer(unsigned(tlb_req_index)));
end if;
else
ra_valid <= '0';
end if;
eaa_priv <= pte(3);
else
real_addr <= addr_to_real(i_in.nia);
ra_valid <= '1';
eaa_priv <= '1';
end if;

-- no IAMR, so no KUEP support for now
priv_fault <= eaa_priv and not i_in.priv_mode;
access_ok <= ra_valid and not priv_fault;
end process;

-- iTLB update
itlb_update: process(clk)
variable wr_index : std_ulogic_vector(TLB_BITS - 1 downto 0);
begin
if rising_edge(clk) then
wr_index := hash_ea(m_in.addr);
if rst = '1' or (m_in.tlbie = '1' and m_in.doall = '1') then
-- clear all valid bits
for i in tlb_index_t loop
itlb_valids(i) <= '0';
end loop;
elsif m_in.tlbie = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
-- clear entry regardless of hit or miss
itlb_valids(to_integer(unsigned(wr_index))) <= '0';
elsif m_in.tlbld = '1' then
assert not is_X(wr_index) report "icache index invalid on write" severity FAILURE;
itlb_tags(to_integer(unsigned(wr_index))) <= m_in.addr(63 downto TLB_LG_PGSZ + TLB_BITS);
itlb_ptes(to_integer(unsigned(wr_index))) <= m_in.pte;
itlb_valids(to_integer(unsigned(wr_index))) <= '1';
end if;
ev.itlb_miss_resolved <= m_in.tlbld and not rst;
end if;
end process;

-- Cache hit detection, output to fetch2 and other misc logic
icache_comb : process(all)
variable is_hit : std_ulogic;
variable hit_way : way_sig_t;
variable insn : std_ulogic_vector(ICWORDLEN - 1 downto 0);
variable icode : insn_code;
variable ra : real_addr_t;
begin
-- Extract line, row and tag from request
req_index <= get_index(i_in.nia);
req_row <= get_row(i_in.nia);
req_tag <= get_tag(real_addr, i_in.big_endian);
ra := i_in.rpn & i_in.nia(MIN_LG_PGSZ - 1 downto 0);
real_addr <= ra;
req_index <= get_index(ra);
req_row <= get_row(ra);
req_tag <= get_tag(ra, i_in.big_endian);

-- Calculate address of beginning of cache row, will be
-- used for cache miss processing if needed
--
req_raddr <= real_addr(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
req_raddr <= ra(REAL_ADDR_BITS - 1 downto ROW_OFF_BITS) &
(ROW_OFF_BITS-1 downto 0 => '0');

-- Test if pending request is a hit on any way
@ -580,20 +514,27 @@ begin
end if;
for i in way_t loop
if i_in.req = '1' and
(cache_valids(to_integer(req_index))(i) = '1' or
(r.state = WAIT_ACK and
req_index = r.store_index and
to_unsigned(i, WAY_BITS) = r.store_way and
r.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) = '1')) then
if read_tag(i, cache_tags(to_integer(req_index))) = req_tag then
hit_way := to_unsigned(i, WAY_BITS);
is_hit := '1';
end if;
cache_valids(to_integer(req_index))(i) = '1' and
tag_overwrite(i) = '0' and
cache_tags_set(i) = req_tag then
hit_way := to_unsigned(i, WAY_BITS);
is_hit := '1';
end if;
end loop;
if r.state = WAIT_ACK and r.store_valid = '1' and
req_index = r.store_index and
req_tag = r.store_tag and
r.rows_valid(to_integer(req_row(ROW_LINEBITS-1 downto 0))) = '1' then
is_hit := '1';
hit_way := r.store_way;
end if;
if r.stalled_hit = '1' then
is_hit := '1';
hit_way := r.stalled_way;
end if;

-- Generate the "hit" and "miss" signals for the synchronous blocks
if i_in.req = '1' and access_ok = '1' and flush_in = '0' and rst = '0' then
if i_in.req = '1' and flush_in = '0' and rst = '0' then
req_is_hit <= is_hit;
req_is_miss <= not is_hit;
else
@ -610,19 +551,22 @@ begin
-- I prefer not to do just yet as it would force fetch2 to know about
-- some of the cache geometry information.
--
insn := (others => '0');
icode := INSN_illegal;
if r.hit_valid = '1' then
assert not is_X(r.hit_way) severity failure;
if is_X(r.hit_way) then
insn := (others => 'X');
else
insn := read_insn_word(r.hit_nia, cache_out(to_integer(r.hit_way)));
-- Currently we use only the top bit for indicating illegal
-- instructions because we know that insn_codes fit into 9 bits.
if is_X(insn) then
insn := (others => '0');
elsif insn(ICWORDLEN - 1) = '0' then
icode := insn_code'val(to_integer(unsigned(insn(ICWORDLEN-1 downto INSN_IMAGE_BITS))));
end if;
end if;
end if;
assert not (r.hit_valid = '1' and is_X(r.hit_way)) severity failure;
-- Currently we use only the top bit for indicating illegal
-- instructions because we know that insn_codes fit into 9 bits.
if is_X(insn) then
insn := (others => '0');
elsif insn(ICWORDLEN - 1) = '0' then
icode := insn_code'val(to_integer(unsigned(insn(ICWORDLEN-1 downto INSN_IMAGE_BITS))));
insn(31 downto 26) := recode_primary_opcode(icode);
end if;

i_out.insn <= insn(31 downto 0);
i_out.icode <= icode;
log_insn <= insn;
@ -634,8 +578,8 @@ begin
i_out.next_predicted <= r.predicted;
i_out.next_pred_ntaken <= r.pred_ntaken;

-- Stall fetch1 if we have a miss on cache or TLB or a protection fault
stall_out <= not (is_hit and access_ok);
-- Stall fetch1 if we have a cache miss
stall_out <= i_in.req and not is_hit and not flush_in;

-- Wishbone requests output (from the cache miss reload machine)
wishbone_out <= r.wb;
@ -647,9 +591,17 @@ begin
if rising_edge(clk) then
-- keep outputs to fetch2 unchanged on a stall
-- except that flush or reset sets valid to 0
if stall_in = '1' then
if rst = '1' or flush_in = '1' then
r.hit_valid <= '0';
if rst = '1' or flush_in = '1' then
r.hit_valid <= '0';
r.stalled_hit <= '0';
r.stalled_way <= to_unsigned(0, WAY_BITS);
elsif stall_in = '1' then
if r.state = CLR_TAG then
r.stalled_hit <= '0';
elsif req_is_hit = '1' then
-- if we have a hit while stalled, remember it
r.stalled_hit <= '1';
r.stalled_way <= req_hit_way;
end if;
else
-- On a hit, latch the request for the next cycle, when the BRAM data
@ -669,14 +621,17 @@ begin
" way:" & to_hstring(req_hit_way) &
" RA:" & to_hstring(real_addr);
end if;
r.stalled_hit <= '0';
end if;
if stall_in = '0' then
-- Send stop marks and NIA down regardless of validity
r.hit_smark <= i_in.stop_mark;
r.hit_nia <= i_in.nia;
r.hit_ra <= real_addr;
r.big_endian <= i_in.big_endian;
r.predicted <= i_in.predicted;
r.pred_ntaken <= i_in.pred_ntaken;
r.fetch_failed <= i_in.fetch_fail and not flush_in;
end if;
if i_out.valid = '1' then
assert not is_X(i_out.insn) severity failure;
@ -689,7 +644,6 @@ begin
variable tagset : cache_tags_set_t;
variable tag : cache_tag_t;
variable snoop_addr : real_addr_t;
variable snoop_tag : cache_tag_t;
variable snoop_cache_tags : cache_tags_set_t;
variable replace_way : way_sig_t;
begin
@ -722,15 +676,14 @@ begin
snoop_valid <= wb_snoop_in.cyc and wb_snoop_in.stb and wb_snoop_in.we;
snoop_addr := addr_to_real(wb_to_addr(wb_snoop_in.adr));
snoop_index <= get_index(snoop_addr);
snoop_tag := get_tag(snoop_addr, '0');
snoop_tag <= get_tag(snoop_addr, '0');
snoop_hits <= (others => '0');

-- On the next cycle, match up tags with the snooped address
-- to see if any ways need to be invalidated
if snoop_valid = '1' then
if is_X(snoop_addr) then
report "metavalue in snoop_addr" severity FAILURE;
end if;
snoop_cache_tags := cache_tags(to_integer(get_index(snoop_addr)));
for i in way_t loop
tag := read_tag(i, snoop_cache_tags);
tag := snoop_tags_set(i);
-- Ignore endian bit in comparison
tag(TAG_BITS - 1) := '0';
if tag = snoop_tag then
@ -738,6 +691,7 @@ begin
end if;
end loop;
end if;
snoop_index2 <= snoop_index;

-- Process cache invalidations
if inval_in = '1' then
@ -746,12 +700,12 @@ begin
end loop;
r.store_valid <= '0';
else
-- Do invalidations from snooped stores to memory, one
-- cycle after the address appears on wb_snoop_in.
-- Do invalidations from snooped stores to memory,
-- two cycles after the address appears on wb_snoop_in.
for i in way_t loop
if snoop_hits(i) = '1' then
assert not is_X(snoop_index) severity failure;
cache_valids(to_integer(snoop_index))(i) <= '0';
assert not is_X(snoop_index2) severity failure;
cache_valids(to_integer(snoop_index2))(i) <= '0';
end if;
end loop;
end if;
@ -809,15 +763,6 @@ begin
assert not is_X(replace_way) severity failure;
cache_valids(to_integer(r.store_index))(to_integer(replace_way)) <= '0';

-- Store new tag in selected way
for i in 0 to NUM_WAYS-1 loop
if to_unsigned(i, WAY_BITS) = replace_way then
tagset := cache_tags(to_integer(r.store_index));
write_tag(i, tagset, r.store_tag);
cache_tags(to_integer(r.store_index)) <= tagset;
end if;
end loop;

r.state <= WAIT_ACK;
end if;

@ -879,13 +824,6 @@ begin
end if;
end case;
end if;

-- TLB miss and protection fault processing
if rst = '1' or flush_in = '1' or m_in.tlbld = '1' then
r.fetch_failed <= '0';
elsif i_in.req = '1' and access_ok = '0' and stall_in = '0' then
r.fetch_failed <= '1';
end if;
end if;
end process;

@ -915,8 +853,8 @@ begin
wstate &
std_ulogic_vector(resize(lway, 3)) &
req_is_hit & req_is_miss &
access_ok &
ra_valid;
'1' & -- was access_ok
'1'; -- was ra_valid
end if;
end process;
log_out <= log_data;

@ -15,8 +15,6 @@ architecture behave of icache_tb is
signal i_out : Fetch1ToIcacheType;
signal i_in : IcacheToDecode1Type;

signal m_out : MmuToIcacheType;

signal wb_bram_in : wishbone_master_out;
signal wb_bram_out : wishbone_slave_out;

@ -32,7 +30,6 @@ begin
rst => rst,
i_in => i_out,
i_out => i_in,
m_in => m_out,
stall_in => '0',
flush_in => '0',
inval_in => '0',
@ -77,19 +74,21 @@ begin
i_out.priv_mode <= '1';
i_out.virt_mode <= '0';
i_out.big_endian <= '0';

m_out.tlbld <= '0';
m_out.tlbie <= '0';
m_out.addr <= (others => '0');
m_out.pte <= (others => '0');
i_out.fetch_fail <= '0';
i_out.predicted <= '0';
i_out.pred_ntaken <= '0';

wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);

i_out.next_nia <= x"0000000000000004";
i_out.next_rpn <= (others => '0');
wait until rising_edge(clk);

i_out.req <= '1';
i_out.nia <= x"0000000000000004";
i_out.rpn <= (others => '0');

wait for 30*clk_period;
wait until rising_edge(clk);
@ -102,6 +101,7 @@ begin
severity failure;

i_out.req <= '0';
i_out.next_nia <= x"0000000000000008";

wait until rising_edge(clk);

@ -116,6 +116,8 @@ begin
"=" & to_hstring(i_in.insn) &
" expected 00000002"
severity failure;

i_out.next_nia <= x"0000000000000040";
wait until rising_edge(clk);

-- another miss
@ -133,6 +135,9 @@ begin
severity failure;

-- test something that aliases
i_out.next_nia <= x"0000000000000100";
wait until rising_edge(clk);

i_out.req <= '1';
i_out.nia <= x"0000000000000100";
wait until rising_edge(clk);

@ -20,7 +20,7 @@ entity mmu is
d_out : out MmuToDcacheType;
d_in : in DcacheToMmuType;

i_out : out MmuToIcacheType
i_out : out MmuToITLBType
);
end mmu;


@ -38,8 +38,38 @@ architecture behaviour of predecoder is
2#011100_00000# to 2#011100_11111# => INSN_andi_dot,
2#011101_00000# to 2#011101_11111# => INSN_andis_dot,
2#000000_00000# => INSN_attn,
2#010010_00000# to 2#010010_11111# => INSN_b,
2#010000_00000# to 2#010000_11111# => INSN_bc,
2#010010_00000# to 2#010010_00001# => INSN_brel,
2#010010_00010# to 2#010010_00011# => INSN_babs,
2#010010_00100# to 2#010010_00101# => INSN_brel,
2#010010_00110# to 2#010010_00111# => INSN_babs,
2#010010_01000# to 2#010010_01001# => INSN_brel,
2#010010_01010# to 2#010010_01011# => INSN_babs,
2#010010_01100# to 2#010010_01101# => INSN_brel,
2#010010_01110# to 2#010010_01111# => INSN_babs,
2#010010_10000# to 2#010010_10001# => INSN_brel,
2#010010_10010# to 2#010010_10011# => INSN_babs,
2#010010_10100# to 2#010010_10101# => INSN_brel,
2#010010_10110# to 2#010010_10111# => INSN_babs,
2#010010_11000# to 2#010010_11001# => INSN_brel,
2#010010_11010# to 2#010010_11011# => INSN_babs,
2#010010_11100# to 2#010010_11101# => INSN_brel,
2#010010_11110# to 2#010010_11111# => INSN_babs,
2#010000_00000# to 2#010000_00001# => INSN_bcrel,
2#010000_00010# to 2#010000_00011# => INSN_bcabs,
2#010000_00100# to 2#010000_00101# => INSN_bcrel,
2#010000_00110# to 2#010000_00111# => INSN_bcabs,
2#010000_01000# to 2#010000_01001# => INSN_bcrel,
2#010000_01010# to 2#010000_01011# => INSN_bcabs,
2#010000_01100# to 2#010000_01101# => INSN_bcrel,
2#010000_01110# to 2#010000_01111# => INSN_bcabs,
2#010000_10000# to 2#010000_10001# => INSN_bcrel,
2#010000_10010# to 2#010000_10011# => INSN_bcabs,
2#010000_10100# to 2#010000_10101# => INSN_bcrel,
2#010000_10110# to 2#010000_10111# => INSN_bcabs,
2#010000_11000# to 2#010000_11001# => INSN_bcrel,
2#010000_11010# to 2#010000_11011# => INSN_bcabs,
2#010000_11100# to 2#010000_11101# => INSN_bcrel,
2#010000_11110# to 2#010000_11111# => INSN_bcabs,
2#001011_00000# to 2#001011_11111# => INSN_cmpi,
2#001010_00000# to 2#001010_11111# => INSN_cmpli,
2#100010_00000# to 2#100010_11111# => INSN_lbz,
@ -220,9 +250,9 @@ architecture behaviour of predecoder is
2#0_11111_01001# => INSN_divd, -- divdo
2#0_01111_01011# => INSN_divw,
2#0_11111_01011# => INSN_divw, -- divwo
2#0_11001_10110# => INSN_nop, -- dss
2#0_01010_10110# => INSN_nop, -- dst
2#0_01011_10110# => INSN_nop, -- dstst
2#0_11001_10110# => INSN_rnop, -- dss
2#0_01010_10110# => INSN_rnop, -- dst
2#0_01011_10110# => INSN_rnop, -- dstst
2#0_11010_10110# => INSN_eieio,
2#0_01000_11100# => INSN_eqv,
2#0_11101_11010# => INSN_extsb,
@ -322,14 +352,14 @@ architecture behaviour of predecoder is
2#0_00011_01000# => INSN_neg,
2#0_10011_01000# => INSN_neg, -- nego
-- next 8 are reserved no-op instructions
2#0_10000_10010# => INSN_nop,
2#0_10001_10010# => INSN_nop,
2#0_10010_10010# => INSN_nop,
2#0_10011_10010# => INSN_nop,
2#0_10100_10010# => INSN_nop,
2#0_10101_10010# => INSN_nop,
2#0_10110_10010# => INSN_nop,
2#0_10111_10010# => INSN_nop,
2#0_10000_10010# => INSN_rnop,
2#0_10001_10010# => INSN_rnop,
2#0_10010_10010# => INSN_rnop,
2#0_10011_10010# => INSN_rnop,
2#0_10100_10010# => INSN_rnop,
2#0_10101_10010# => INSN_rnop,
2#0_10110_10010# => INSN_rnop,
2#0_10111_10010# => INSN_rnop,
2#0_00011_11100# => INSN_nor,
2#0_01101_11100# => INSN_or,
2#0_01100_11100# => INSN_orc,

@ -160,34 +160,21 @@ begin
end if;

-- Outputs to fetch1
f.interrupt := intr;
f.intr_vec := std_ulogic_vector(to_unsigned(vec, 12));
f.redirect := e_in.redirect;
f.redirect_nia := e_in.write_data;
f.br_nia := e_in.last_nia;
f.br_last := e_in.br_last;
f.br_last := e_in.br_last and not intr;
f.br_taken := e_in.br_taken;
if intr = '1' then
f.redirect := '1';
f.br_last := '0';
f.redirect_nia := std_ulogic_vector(to_unsigned(vec, 64));
f.virt_mode := '0';
f.priv_mode := '1';
-- XXX need an interrupt LE bit here, e.g. from LPCR
f.big_endian := '0';
f.mode_32bit := '0';
else
if e_in.abs_br = '1' then
f.redirect_nia := e_in.br_offset;
else
f.redirect_nia := std_ulogic_vector(unsigned(e_in.last_nia) + unsigned(e_in.br_offset));
end if;
-- send MSR[IR], ~MSR[PR], ~MSR[LE] and ~MSR[SF] up to fetch1
f.virt_mode := e_in.redir_mode(3);
f.priv_mode := e_in.redir_mode(2);
f.big_endian := e_in.redir_mode(1);
f.mode_32bit := e_in.redir_mode(0);
end if;
-- send MSR[IR], ~MSR[PR], ~MSR[LE] and ~MSR[SF] up to fetch1
f.virt_mode := e_in.redir_mode(3);
f.priv_mode := e_in.redir_mode(2);
f.big_endian := e_in.redir_mode(1);
f.mode_32bit := e_in.redir_mode(0);

f_out <= f;
flush_out <= f_out.redirect;
flush_out <= f_out.redirect or intr;

-- Register write data bypass to decode2
wb_bypass.tag.tag <= complete_out.tag;

Loading…
Cancel
Save