core: Implement hashst and hashchk instructions

These are done in loadstore1.  The HashDigest function is computed in
9 cycles; for 8 cycles, a state machine does 4 steps of key expansion
per cycle, and for each of 4 lanes of data, does 4 steps of ciphering;
then there is 1 cycle to combine the results into the final hash
value.

At present, hashcmp does not overlap the computation of the hash with
fetching of data from memory (in the case of a cache miss).

The 'is_signed' field in the instruction decode table is used to
distinguish hashst and hashcmp from ordinary loads and stores.  We
have a new 'RBC' value for input_reg_c_t which says that we are
reading RB but we want the value to come in via the C port; this is
because we want the 5-bit immediate offset on the B port.

Note that in the list of insn_code values, hashst/chk have been put in
the section for instructions with an RB operand, which is not strictly
correct given that the B port is used for the immediate D operand;
however, adding them to the section for instructions without an RB
operand would have made that section exceed 128 entries, causing
changes to the padding needed.  The only downside to having hashst/cmp
where they are is that the debug logic can't use the RB port to read
GPR/FPRs when a hashst/cmp instruction is being decoded.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/437/head
Paul Mackerras 1 week ago
parent e9b57ca5bf
commit 0a11e8455f

@ -69,6 +69,7 @@ package common is
constant SPR_DAWR1 : spr_num_t := 181; constant SPR_DAWR1 : spr_num_t := 181;
constant SPR_DAWRX0 : spr_num_t := 188; constant SPR_DAWRX0 : spr_num_t := 188;
constant SPR_DAWRX1 : spr_num_t := 189; constant SPR_DAWRX1 : spr_num_t := 189;
constant SPR_HASHKEYR : spr_num_t := 468;


-- PMU registers -- PMU registers
constant SPR_UPMC1 : spr_num_t := 771; constant SPR_UPMC1 : spr_num_t := 771;
@ -585,6 +586,7 @@ package common is
byte_reverse : std_ulogic; byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend? sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction? update : std_ulogic; -- is this an update instruction?
hash : std_ulogic;
xerc : xer_common_t; xerc : xer_common_t;
reserve : std_ulogic; -- set for larx/stcx. reserve : std_ulogic; -- set for larx/stcx.
rc : std_ulogic; -- set for stcx. rc : std_ulogic; -- set for stcx.
@ -600,7 +602,7 @@ package common is
end record; end record;
constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type :=
(valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
sign_extend => '0', update => '0', xerc => xerc_init, sign_extend => '0', update => '0', hash => '0', xerc => xerc_init,
reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0', reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
insn => (others => '0'), insn => (others => '0'),
instr_tag => instr_tag_init, instr_tag => instr_tag_init,

@ -199,6 +199,8 @@ architecture behaviour of decode1 is
INSN_fsubs => (FPU, FPU, OP_FP_ARITH, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), INSN_fsubs => (FPU, FPU, OP_FP_ARITH, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE),
INSN_ftdiv => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_ftdiv => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_ftsqrt => (FPU, FPU, OP_FP_CMP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_ftsqrt => (FPU, FPU, OP_FP_CMP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_hashchk => (LDST, NONE, OP_LOAD, RA, DSX, RBC, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_hashst => (LDST, NONE, OP_STORE, RA, DSX, RBC, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE),
INSN_icbi => (ALU, NONE, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), INSN_icbi => (ALU, NONE, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE),
INSN_icbt => (ALU, NONE, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_icbt => (ALU, NONE, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
INSN_isel => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_isel => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE),
@ -696,6 +698,12 @@ begin
if (icode = INSN_stq or icode = INSN_stqcx) and f_in.big_endian = '0' then if (icode = INSN_stq or icode = INSN_stqcx) and f_in.big_endian = '0' then
vr.reg_3_addr(0) := '1'; vr.reg_3_addr(0) := '1';
end if; end if;
-- See if this is an instruction where we need to use the RS/RC
-- read port to read the RB operand, because we want to get an
-- immediate operand to execute1 via read_data2.
if (icode = INSN_hashst or icode = INSN_hashchk) then
vr.reg_3_addr := '0' & insn_rb(f_in.insn);
end if;
vr.read_1_enable := f_in.valid; vr.read_1_enable := f_in.valid;
vr.read_2_enable := f_in.valid and maybe_rb; vr.read_2_enable := f_in.valid and maybe_rb;
vr.read_3_enable := f_in.valid; vr.read_3_enable := f_in.valid;

@ -138,6 +138,8 @@ architecture behaviour of decode2 is
ret := ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11)); ret := ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11));
when CONST_SH32 => when CONST_SH32 =>
ret := ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11)); ret := ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11));
when DSX =>
ret := ('0', (others => '0'), 55x"7FFFFFFFFFFFFF" & insn_in(0) & insn_in(25 downto 21) & "000");
when NONE => when NONE =>
ret := ('0', (others => '0'), (others => '0')); ret := ('0', (others => '0'), (others => '0'));
end case; end case;
@ -165,6 +167,8 @@ architecture behaviour of decode2 is
else else
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end if; end if;
when RBC =>
return ('1', gpr_to_gspr(insn_rb(insn_in)), (others => '0'));
when NONE => when NONE =>
return ('0', (others => '0'), (others => '0')); return ('0', (others => '0'), (others => '0'));
end case; end case;
@ -495,7 +499,8 @@ begin
when SPR_XER => when SPR_XER =>
v.input_ov := '1'; v.input_ov := '1';
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR |
SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 |
SPR_HASHKEYR =>
unit := LDST; unit := LDST;
when SPR_TAR => when SPR_TAR =>
v.e.uses_tar := '1'; v.e.uses_tar := '1';
@ -518,7 +523,8 @@ begin
v.e.output_xer := '1'; v.e.output_xer := '1';
v.output_ov := '1'; v.output_ov := '1';
when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR |
SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 |
SPR_HASHKEYR =>
unit := LDST; unit := LDST;
if d_in.valid = '1' then if d_in.valid = '1' then
v.sgl_pipe := '1'; v.sgl_pipe := '1';

@ -205,11 +205,13 @@ package decode_types is
INSN_divwe, INSN_divwe,
INSN_divweu, INSN_divweu,
INSN_eqv, INSN_eqv,
INSN_hashchk,
INSN_hashst,
INSN_icbi, INSN_icbi,
INSN_icbt, INSN_icbt,
INSN_isel, INSN_isel, -- 160
INSN_lbarx, INSN_lbarx,
INSN_lbzcix, -- 160 INSN_lbzcix,
INSN_lbzux, INSN_lbzux,
INSN_lbzx, INSN_lbzx,
INSN_ldarx, INSN_ldarx,
@ -217,9 +219,9 @@ package decode_types is
INSN_ldcix, INSN_ldcix,
INSN_ldx, INSN_ldx,
INSN_ldux, INSN_ldux,
INSN_lharx, INSN_lharx, -- 170
INSN_lhax, INSN_lhax,
INSN_lhaux, -- 170 INSN_lhaux,
INSN_lhbrx, INSN_lhbrx,
INSN_lhzcix, INSN_lhzcix,
INSN_lhzx, INSN_lhzx,
@ -227,9 +229,9 @@ package decode_types is
INSN_lqarx, INSN_lqarx,
INSN_lwarx, INSN_lwarx,
INSN_lwax, INSN_lwax,
INSN_lwaux, INSN_lwaux, -- 180
INSN_lwbrx, INSN_lwbrx,
INSN_lwzcix, -- 180 INSN_lwzcix,
INSN_lwzx, INSN_lwzx,
INSN_lwzux, INSN_lwzux,
INSN_modsd, INSN_modsd,
@ -237,9 +239,9 @@ package decode_types is
INSN_moduw, INSN_moduw,
INSN_modud, INSN_modud,
INSN_mulhw, INSN_mulhw,
INSN_mulhwu, INSN_mulhwu, -- 190
INSN_mulhd, INSN_mulhd,
INSN_mulhdu, -- 190 INSN_mulhdu,
INSN_mullw, INSN_mullw,
INSN_mulld, INSN_mulld,
INSN_nand, INSN_nand,
@ -247,9 +249,9 @@ package decode_types is
INSN_or, INSN_or,
INSN_orc, INSN_orc,
INSN_pdepd, INSN_pdepd,
INSN_pextd, INSN_pextd, -- 200
INSN_rldcl, INSN_rldcl,
INSN_rldcr, -- 200 INSN_rldcr,
INSN_rlwnm, INSN_rlwnm,
INSN_slw, INSN_slw,
INSN_sld, INSN_sld,
@ -257,9 +259,9 @@ package decode_types is
INSN_srad, INSN_srad,
INSN_srw, INSN_srw,
INSN_srd, INSN_srd,
INSN_stbcix, INSN_stbcix, -- 210
INSN_stbcx, INSN_stbcx,
INSN_stbx, -- 210 INSN_stbx,
INSN_stbux, INSN_stbux,
INSN_stdbrx, INSN_stdbrx,
INSN_stdcix, INSN_stdcix,
@ -267,9 +269,9 @@ package decode_types is
INSN_stdx, INSN_stdx,
INSN_stdux, INSN_stdux,
INSN_sthbrx, INSN_sthbrx,
INSN_sthcix, INSN_sthcix, -- 220
INSN_sthcx, INSN_sthcx,
INSN_sthx, -- 220 INSN_sthx,
INSN_sthux, INSN_sthux,
INSN_stqcx, INSN_stqcx,
INSN_stwbrx, INSN_stwbrx,
@ -277,9 +279,9 @@ package decode_types is
INSN_stwcx, INSN_stwcx,
INSN_stwx, INSN_stwx,
INSN_stwux, INSN_stwux,
INSN_subf, INSN_subf, -- 230
INSN_subfc, INSN_subfc,
INSN_subfe, -- 230 INSN_subfe,
INSN_td, INSN_td,
INSN_tlbie, INSN_tlbie,
INSN_tlbiel, INSN_tlbiel,
@ -287,7 +289,7 @@ package decode_types is
INSN_xor, INSN_xor,


-- pad to 240 to simplify comparison logic -- pad to 240 to simplify comparison logic
INSN_236, INSN_237, INSN_238, INSN_239, INSN_238, INSN_239,


-- The following instructions have a third input addressed by RC -- The following instructions have a third input addressed by RC
INSN_maddld, INSN_maddld,
@ -416,8 +418,9 @@ package decode_types is


type input_reg_a_t is (NONE, RA, RA_OR_ZERO, RA0_OR_CIA, CIA, FRA); type input_reg_a_t is (NONE, RA, RA_OR_ZERO, RA0_OR_CIA, CIA, FRA);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD,
CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, CONST_PSI, FRB); CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, CONST_PSI,
type input_reg_c_t is (NONE, RS, RCR, FRC, FRS); DSX, FRB);
type input_reg_c_t is (NONE, RS, RCR, RBC, FRC, FRS);
type output_reg_a_t is (NONE, RT, RA, FRT); type output_reg_a_t is (NONE, RT, RA, FRT);
type rc_t is (NONE, ONE, RC, RCOE); type rc_t is (NONE, ONE, RC, RCOE);
type carry_in_t is (ZERO, CA, OV, ONE); type carry_in_t is (ZERO, CA, OV, ONE);
@ -634,6 +637,8 @@ package body decode_types is
when INSN_divwu => return "011111"; when INSN_divwu => return "011111";
when INSN_divd => return "011111"; when INSN_divd => return "011111";
when INSN_divw => return "011111"; when INSN_divw => return "011111";
when INSN_hashchk => return "011111";
when INSN_hashst => return "011111";
when INSN_eieio => return "011111"; when INSN_eieio => return "011111";
when INSN_eqv => return "011111"; when INSN_eqv => return "011111";
when INSN_extsb => return "011111"; when INSN_extsb => return "011111";

@ -1844,6 +1844,8 @@ begin
lv.byte_reverse := e_in.byte_reverse xnor ex1.msr(MSR_LE); lv.byte_reverse := e_in.byte_reverse xnor ex1.msr(MSR_LE);
lv.sign_extend := e_in.sign_extend; lv.sign_extend := e_in.sign_extend;
lv.update := e_in.update; lv.update := e_in.update;
-- abuse e_in.is_signed to indicate hash store/check instructions
lv.hash := e_in.is_signed;
lv.xerc := xerc_in; lv.xerc := xerc_in;
lv.reserve := e_in.reserve; lv.reserve := e_in.reserve;
lv.rc := e_in.rc; lv.rc := e_in.rc;

@ -68,6 +68,8 @@ architecture behave of loadstore1 is
sync : std_ulogic; sync : std_ulogic;
tlbie : std_ulogic; tlbie : std_ulogic;
dcbz : std_ulogic; dcbz : std_ulogic;
hashst : std_ulogic;
hashcmp : std_ulogic;
read_spr : std_ulogic; read_spr : std_ulogic;
write_spr : std_ulogic; write_spr : std_ulogic;
mmu_op : std_ulogic; mmu_op : std_ulogic;
@ -97,7 +99,7 @@ architecture behave of loadstore1 is
virt_mode : std_ulogic; virt_mode : std_ulogic;
priv_mode : std_ulogic; priv_mode : std_ulogic;
load_sp : std_ulogic; load_sp : std_ulogic;
sprsel : std_ulogic_vector(2 downto 0); sprsel : std_ulogic_vector(3 downto 0);
ric : std_ulogic_vector(1 downto 0); ric : std_ulogic_vector(1 downto 0);
is_slbia : std_ulogic; is_slbia : std_ulogic;
align_intr : std_ulogic; align_intr : std_ulogic;
@ -107,25 +109,14 @@ architecture behave of loadstore1 is
incomplete : std_ulogic; incomplete : std_ulogic;
ea_valid : std_ulogic; ea_valid : std_ulogic;
end record; end record;
constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', constant request_init : request_t := (addr => (others => '0'),
flush => '0', touch => '0', sync => '0', tlbie => '0',
dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
instr_fault => '0', do_update => '0',
mode_32bit => '0', prefixed => '0',
addr => (others => '0'),
byte_sel => x"00", second_bytes => x"00", byte_sel => x"00", second_bytes => x"00",
store_data => (others => '0'), instr_tag => instr_tag_init, store_data => (others => '0'), instr_tag => instr_tag_init,
write_reg => 6x"00", length => x"0", write_reg => 6x"00", length => x"0",
elt_length => x"0", byte_reverse => '0', brev_mask => "000", elt_length => x"0", brev_mask => "000",
sign_extend => '0', update => '0', xerc => xerc_init,
xerc => xerc_init, reserve => '0', sprsel => "0000", ric => "00",
atomic_qw => '0', atomic_first => '0', atomic_last => '0', others => '0');
rc => '0', nc => '0',
virt_mode => '0', priv_mode => '0', load_sp => '0',
sprsel => "000", ric => "00", is_slbia => '0', align_intr => '0',
dawr_intr => '0',
dword_index => '0', two_dwords => '0', incomplete => '0',
ea_valid => '0');


type reg_stage1_t is record type reg_stage1_t is record
req : request_t; req : request_t;
@ -147,7 +138,7 @@ architecture behave of loadstore1 is
one_cycle : std_ulogic; one_cycle : std_ulogic;
wr_sel : std_ulogic_vector(1 downto 0); wr_sel : std_ulogic_vector(1 downto 0);
addr0 : std_ulogic_vector(63 downto 0); addr0 : std_ulogic_vector(63 downto 0);
sprsel : std_ulogic_vector(2 downto 0); sprsel : std_ulogic_vector(3 downto 0);
dbg_spr : std_ulogic_vector(63 downto 0); dbg_spr : std_ulogic_vector(63 downto 0);
dbg_spr_ack: std_ulogic; dbg_spr_ack: std_ulogic;
end record; end record;
@ -180,6 +171,7 @@ architecture behave of loadstore1 is
dawrx : dawrx_array_t; dawrx : dawrx_array_t;
dawr_uplim : dawr_array_t; dawr_uplim : dawr_array_t;
dawr_upd : std_ulogic; dawr_upd : std_ulogic;
hashkeyr : std_ulogic_vector(63 downto 0);
end record; end record;


signal req_in : request_t; signal req_in : request_t;
@ -201,6 +193,28 @@ architecture behave of loadstore1 is
signal stage1_dreq : std_ulogic; signal stage1_dreq : std_ulogic;
signal stage1_dawr_match : std_ulogic; signal stage1_dawr_match : std_ulogic;


type hw_array_4 is array(0 to 3) of std_ulogic_vector(15 downto 0);
type hw_array_8 is array(0 to 7) of std_ulogic_vector(15 downto 0);

type hash_reg_t is record
active : std_ulogic;
done : std_ulogic;
step : unsigned(2 downto 0);
z0 : std_ulogic_vector(30 downto 0);
key : hw_array_4;
xleft : hw_array_4;
xright : hw_array_4;
end record;
constant hash_reg_init : hash_reg_t := (
active => '0', done => '0', step => "000", z0 => (others => '0'),
key => (others => (others => '0')),
xleft => (others => (others => '0')), xright => (others => (others => '0')));

signal hash_r : hash_reg_t;
signal hash_rin : hash_reg_t;
signal hash_start : std_ulogic;
signal hash_result : std_ulogic_vector(63 downto 0);

-- Generate byte enables from sizes -- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
begin begin
@ -336,7 +350,7 @@ begin
r1.req.instr_fault <= '0'; r1.req.instr_fault <= '0';
r1.req.load <= '0'; r1.req.load <= '0';
r1.req.priv_mode <= '0'; r1.req.priv_mode <= '0';
r1.req.sprsel <= "000"; r1.req.sprsel <= "0000";
r1.req.ric <= "00"; r1.req.ric <= "00";
r1.req.xerc <= xerc_init; r1.req.xerc <= xerc_init;
r1.dawr_ll <= (others => '0'); r1.dawr_ll <= (others => '0');
@ -350,7 +364,7 @@ begin
r2.req.instr_fault <= '0'; r2.req.instr_fault <= '0';
r2.req.load <= '0'; r2.req.load <= '0';
r2.req.priv_mode <= '0'; r2.req.priv_mode <= '0';
r2.req.sprsel <= "000"; r2.req.sprsel <= "0000";
r2.req.ric <= "00"; r2.req.ric <= "00";
r2.req.xerc <= xerc_init; r2.req.xerc <= xerc_init;


@ -448,6 +462,98 @@ begin
end process; end process;
end generate; end generate;


-- This does the HashDigest computation from ISA Book I section 3.3.17
-- in 8 cycles. In each cycle it does 4 steps of key expansion, and
-- 4 rounds of cipher for each of 4 lanes.
loadstore_hash_reg: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
hash_r <= hash_reg_init;
else
if hash_r.done = '1' then
report "hash_result = " & to_hstring(hash_result);
end if;
hash_r <= hash_rin;
end if;
end if;
end process;

loadstore_hash_comb: process(all)
variable hv : hash_reg_t;
variable keys : hw_array_8;
variable xl, xr : std_ulogic_vector(15 downto 0);
variable z, t : std_ulogic_vector(15 downto 0);
variable fx : std_ulogic_vector(15 downto 0);
variable ra, rb : std_ulogic_vector(63 downto 0);
variable key : std_ulogic_vector(63 downto 0);
variable j, k : integer;
begin
hv := hash_r;
hv.done := '0';
if hash_r.active = '1' then
-- Initialize keys to avoid yosys/ghdl incorrectly inferring latches
for i in 0 to 7 loop
keys(i) := (others => '0');
end loop;
-- generate the next 4 key words
for i in 0 to 3 loop
keys(i) := hash_r.key(i);
end loop;
for i in 4 to 7 loop
z := 15x"0" & hash_r.z0(34 - i);
t := (keys(i-1)(2 downto 0) & keys(i-1)(15 downto 3)) xor keys(i-3);
keys(i) := x"fffc" xor z xor keys(i-4) xor t xor (t(0) & t(15 downto 1));
hv.key(i-4) := keys(i);
end loop;
hv.z0 := hash_r.z0(26 downto 0) & "0000";
-- do 4 rounds for each of 4 lanes
for lane in 0 to 3 loop
xr := hash_r.xright(lane);
xl := hash_r.xleft(lane);
for i in 0 to 3 loop
fx := ((xl(14 downto 0) & xl(15)) and (xl(7 downto 0) & xl(15 downto 8))) xor
(xl(13 downto 0) & xl(15 downto 14));
t := xr xor fx xor hash_r.key((i + lane) mod 4);
xr := xl;
xl := t;
end loop;
hv.xright(lane) := xr;
hv.xleft(lane) := xl;
end loop;
hv.step := hash_r.step + 1;
if hash_r.step = 3x"7" then
hv.active := '0';
hv.done := '1';
end if;
elsif hash_start = '1' then
-- start a new hash process
hv.z0 := 31x"7D12B0E6"; -- 0xFA2561CD >> 1
ra := l_in.addr1;
rb := l_in.data;
key := r3.hashkeyr;
for lane in 0 to 3 loop
j := lane * 16;
k := (3 - lane) * 16;
hv.xright(lane)(15 downto 8) := rb(j + 7 downto j);
hv.xright(lane)(7 downto 0) := ra(k + 15 downto k + 8);
hv.xleft(lane)(15 downto 8) := rb(j + 15 downto j + 8);
hv.xleft(lane)(7 downto 0) := ra(k + 7 downto k);
end loop;
for i in 0 to 3 loop
j := (3 - i) * 16;
hv.key(i) := key(j + 15 downto j);
end loop;
hv.step := "000";
hv.active := '1';
end if;
-- only valid when hash_r.done = 1
hash_result <= (hash_r.xright(0) & hash_r.xleft(0) & hash_r.xright(1) & hash_r.xleft(1)) xor
(hash_r.xright(2) & hash_r.xleft(2) & hash_r.xright(3) & hash_r.xleft(3));

hash_rin <= hv;
end process;

-- Translate a load/store instruction into the internal request format -- Translate a load/store instruction into the internal request format
-- XXX this should only depend on l_in, but actually depends on -- XXX this should only depend on l_in, but actually depends on
-- r1.addr0 as well (in the l_in.second = 1 case). -- r1.addr0 as well (in the l_in.second = 1 case).
@ -483,13 +589,16 @@ begin
v.ric := l_in.insn(19 downto 18); v.ric := l_in.insn(19 downto 18);
if sprn(8 downto 7) = "01" then if sprn(8 downto 7) = "01" then
-- debug registers DAWR[X][01] -- debug registers DAWR[X][01]
v.sprsel := '1' & sprn(3) & sprn(0); v.sprsel := "01" & sprn(3) & sprn(0);
elsif sprn(2) = '1' then
-- HASH[P]KEYR
v.sprsel := "000" & sprn(0);
elsif sprn(1) = '1' then elsif sprn(1) = '1' then
-- DSISR and DAR -- DSISR and DAR
v.sprsel := "01" & sprn(0); v.sprsel := "001" & sprn(0);
else else
-- PID and PTCR -- PID and PTCR
v.sprsel := "00" & sprn(8); v.sprsel := "100" & sprn(8);
end if; end if;


lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)); lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2));
@ -536,7 +645,7 @@ begin
if l_in.repeat = '1' and l_in.update = '0' and addr(3) /= l_in.second then if l_in.repeat = '1' and l_in.update = '0' and addr(3) /= l_in.second then
misaligned := '1'; misaligned := '1';
end if; end if;
v.align_intr := l_in.reserve and misaligned; v.align_intr := (l_in.reserve or l_in.hash) and misaligned;


v.atomic_first := not misaligned and not l_in.second; v.atomic_first := not misaligned and not l_in.second;
v.atomic_last := not misaligned and (l_in.second or not l_in.repeat); v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
@ -565,6 +674,7 @@ begin
if l_in.length = "0000" then if l_in.length = "0000" then
v.touch := '1'; v.touch := '1';
end if; end if;
v.hashst := l_in.hash;
when OP_LOAD => when OP_LOAD =>
if l_in.update = '0' or l_in.second = '0' then if l_in.update = '0' or l_in.second = '0' then
v.load := '1'; v.load := '1';
@ -579,6 +689,7 @@ begin
-- write back address to RA -- write back address to RA
v.do_update := '1'; v.do_update := '1';
end if; end if;
v.hashcmp := l_in.hash;
when OP_DCBF => when OP_DCBF =>
v.load := '1'; v.load := '1';
v.flush := '1'; v.flush := '1';
@ -631,6 +742,7 @@ begin
v := r1; v := r1;
issue := '0'; issue := '0';
dcreq := '0'; dcreq := '0';
hash_start <= '0';


if r1.busy = '0' then if r1.busy = '0' then
req := req_in; req := req_in;
@ -662,6 +774,7 @@ begin
else else
-- For the lfs conversion cycle, leave the request valid -- For the lfs conversion cycle, leave the request valid
-- for another cycle but with req.dc_req = 0. -- for another cycle but with req.dc_req = 0.
-- (In other words we insert an extra dummy request.)
-- For an MMU request last cycle, we have nothing -- For an MMU request last cycle, we have nothing
-- to do in this cycle, so make it invalid. -- to do in this cycle, so make it invalid.
if r1.req.load_sp = '0' then if r1.req.load_sp = '0' then
@ -695,9 +808,20 @@ begin
-- we can change what's in r1 next cycle because the current thing -- we can change what's in r1 next cycle because the current thing
-- in r1 will go into r2 -- in r1 will go into r2
v.req := req; v.req := req;
if issue = '1' and (req.hashst or req.hashcmp) = '1' then
-- need to initiate and then wait for the hash computation
hash_start <= not r1.busy;
v.busy := not hash_r.done;
if hash_r.done = '0' then
issue := '0';
else
v.req.store_data := hash_result;
end if;
else
v.busy := (issue and (req.incomplete or req.load_sp)) or (req.valid and req.mmu_op);
end if;
dcreq := issue; dcreq := issue;
v.issued := issue; v.issued := issue;
v.busy := (issue and (req.incomplete or req.load_sp)) or (req.valid and req.mmu_op);
else else
-- pipeline is stalled -- pipeline is stalled
if r1.issued = '1' and d_in.error = '1' then if r1.issued = '1' and d_in.error = '1' then
@ -723,7 +847,7 @@ begin
variable byte_offset : unsigned(2 downto 0); variable byte_offset : unsigned(2 downto 0);
variable interrupt : std_ulogic; variable interrupt : std_ulogic;
variable dbg_spr_rd : std_ulogic; variable dbg_spr_rd : std_ulogic;
variable sprsel : std_ulogic_vector(2 downto 0); variable sprsel : std_ulogic_vector(3 downto 0);
variable sprval : std_ulogic_vector(63 downto 0); variable sprval : std_ulogic_vector(63 downto 0);
variable dawr_match : std_ulogic; variable dawr_match : std_ulogic;
begin begin
@ -758,9 +882,12 @@ begin
if dbg_spr_rd = '0' then if dbg_spr_rd = '0' then
sprsel := r1.req.sprsel; sprsel := r1.req.sprsel;
else else
sprsel := '0' & dbg_spr_addr; sprsel := "00" & dbg_spr_addr;
end if; end if;
case sprsel is if sprsel(3) = '1' then
sprval := m_in.sprval; -- MMU regs
else
case sprsel(2 downto 0) is
when "100" => when "100" =>
sprval := r3.dawr(0) & "000"; sprval := r3.dawr(0) & "000";
when "101" => when "101" =>
@ -769,13 +896,16 @@ begin
sprval := 48x"0" & r3.dawrx(0); sprval := 48x"0" & r3.dawrx(0);
when "111" => when "111" =>
sprval := 48x"0" & r3.dawrx(1); sprval := 48x"0" & r3.dawrx(1);
when "000" =>
sprval := r3.hashkeyr;
when "010" => when "010" =>
sprval := x"00000000" & r3.dsisr; sprval := x"00000000" & r3.dsisr;
when "011" => when "011" =>
sprval := r3.dar; sprval := r3.dar;
when others => when others =>
sprval := m_in.sprval; -- MMU regs sprval := (others => '0');
end case; end case;
end if;
if dbg_spr_req = '0' then if dbg_spr_req = '0' then
v.dbg_spr_ack := '0'; v.dbg_spr_ack := '0';
elsif dbg_spr_rd = '1' and r2.dbg_spr_ack = '0' then elsif dbg_spr_rd = '1' and r2.dbg_spr_ack = '0' then
@ -790,9 +920,9 @@ begin
v.req.store_data := store_data; v.req.store_data := store_data;
v.req.dawr_intr := dawr_match; v.req.dawr_intr := dawr_match;
v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and
not r1.req.incomplete; not r1.req.incomplete and not r1.req.hashcmp;
v.wait_mmu := r1.req.valid and r1.req.mmu_op; v.wait_mmu := r1.req.valid and r1.req.mmu_op;
if r1.req.valid = '1' and r1.req.align_intr = '1' then if r1.req.valid = '1' and (r1.req.align_intr or r1.req.hashcmp) = '1' then
v.busy := '1'; v.busy := '1';
v.one_cycle := '0'; v.one_cycle := '0';
else else
@ -832,8 +962,10 @@ begin
v.wait_mmu := '0'; v.wait_mmu := '0';
end if; end if;
if r2.busy = '1' and r2.wait_mmu = '0' then if r2.busy = '1' and r2.wait_mmu = '0' then
if r2.req.hashcmp = '0' or d_in.valid = '1' then
v.busy := '0'; v.busy := '0';
end if; end if;
end if;


interrupt := (r2.req.valid and r2.req.align_intr) or interrupt := (r2.req.valid and r2.req.align_intr) or
(d_in.error and (d_in.cache_paradox or d_in.reserve_nc or r2.req.dawr_intr)) or (d_in.error and (d_in.cache_paradox or d_in.reserve_nc or r2.req.dawr_intr)) or
@ -877,6 +1009,7 @@ begin
variable dsisr : std_ulogic_vector(31 downto 0); variable dsisr : std_ulogic_vector(31 downto 0);
variable itlb_fault : std_ulogic; variable itlb_fault : std_ulogic;
variable trim_ctl : trim_ctl_t; variable trim_ctl : trim_ctl_t;
variable hashchk_trap : std_ulogic;
begin begin
v := r3; v := r3;


@ -966,6 +1099,15 @@ begin
v.load_data := data_permuted; v.load_data := data_permuted;
end if; end if;


hashchk_trap := '0';
if d_in.valid = '1' and r2.req.hashcmp = '1' then
if d_in.data = r2.req.store_data then
v.complete := '1';
else
hashchk_trap := '1';
exception := '1';
end if;
end if;


if r2.req.valid = '1' then if r2.req.valid = '1' then
if r2.req.read_spr = '1' then if r2.req.read_spr = '1' then
@ -982,22 +1124,24 @@ begin
write_enable := '1'; write_enable := '1';
end if; end if;
if r2.req.write_spr = '1' then if r2.req.write_spr = '1' then
if r2.req.sprsel(2) = '1' then if r2.req.sprsel(3 downto 2) = "01" then
v.dawr_upd := '1'; v.dawr_upd := '1';
end if; end if;
case r2.req.sprsel is case r2.req.sprsel is
when "100" => when "0100" =>
v.dawr(0) := r2.req.store_data(63 downto 3); v.dawr(0) := r2.req.store_data(63 downto 3);
when "101" => when "0101" =>
v.dawr(1) := r2.req.store_data(63 downto 3); v.dawr(1) := r2.req.store_data(63 downto 3);
when "110" => when "0110" =>
v.dawrx(0) := r2.req.store_data(15 downto 0); v.dawrx(0) := r2.req.store_data(15 downto 0);
when "111" => when "0111" =>
v.dawrx(1) := r2.req.store_data(15 downto 0); v.dawrx(1) := r2.req.store_data(15 downto 0);
when "010" => when "0010" =>
v.dsisr := r2.req.store_data(31 downto 0); v.dsisr := r2.req.store_data(31 downto 0);
when "011" => when "0011" =>
v.dar := r2.req.store_data; v.dar := r2.req.store_data;
when "0000" =>
v.hashkeyr := r2.req.store_data;
when others => when others =>
end case; end case;
end if; end if;
@ -1016,7 +1160,7 @@ begin
if d_in.valid = '1' then if d_in.valid = '1' then
if r2.req.incomplete = '0' then if r2.req.incomplete = '0' then
write_enable := r2.req.load and not r2.req.load_sp and write_enable := r2.req.load and not r2.req.load_sp and
not r2.req.flush and not r2.req.touch; not r2.req.flush and not r2.req.touch and not r2.req.hashcmp;
-- stores write back rA update -- stores write back rA update
do_update := r2.req.update and r2.req.store; do_update := r2.req.update and r2.req.store;
end if; end if;
@ -1074,6 +1218,10 @@ begin
v.intr_vec := 16#600#; v.intr_vec := 16#600#;
v.srr1(47 - 34) := r2.req.prefixed; v.srr1(47 - 34) := r2.req.prefixed;
v.dar := r2.req.addr; v.dar := r2.req.addr;
elsif hashchk_trap = '1' then
v.intr_vec := 16#700#;
v.srr1(47 - 34) := r2.req.prefixed;
v.srr1(47 - 46) := '1';
elsif r2.req.instr_fault = '0' then elsif r2.req.instr_fault = '0' then
v.srr1(47 - 34) := r2.req.prefixed; v.srr1(47 - 34) := r2.req.prefixed;
v.dar := r2.req.addr; v.dar := r2.req.addr;

@ -364,15 +364,15 @@ architecture behaviour of predecoder is
2#0_01110_11100# => INSN_nand, 2#0_01110_11100# => INSN_nand,
2#0_00011_01000# => INSN_neg, 2#0_00011_01000# => INSN_neg,
2#0_10011_01000# => INSN_neg, -- nego 2#0_10011_01000# => INSN_neg, -- nego
-- next 8 are reserved no-op instructions -- next 6 are reserved no-op instructions
2#0_10000_10010# => INSN_rnop, 2#0_10000_10010# => INSN_rnop,
2#0_10001_10010# => INSN_rnop, 2#0_10001_10010# => INSN_rnop,
2#0_10010_10010# => INSN_rnop, 2#0_10010_10010# => INSN_rnop,
2#0_10011_10010# => INSN_rnop, 2#0_10011_10010# => INSN_rnop,
2#0_10100_10010# => INSN_rnop, 2#0_10100_10010# => INSN_rnop,
2#0_10101_10010# => INSN_rnop, 2#0_10101_10010# => INSN_rnop,
2#0_10110_10010# => INSN_rnop, 2#0_10110_10010# => INSN_hashst,
2#0_10111_10010# => INSN_rnop, 2#0_10111_10010# => INSN_hashchk,
2#0_00011_11100# => INSN_nor, 2#0_00011_11100# => INSN_nor,
2#0_01101_11100# => INSN_or, 2#0_01101_11100# => INSN_or,
2#0_01100_11100# => INSN_orc, 2#0_01100_11100# => INSN_orc,

Loading…
Cancel
Save