From 0a11e8455fcc5c2dae87d250c7234c426c20ecb7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 23 Jan 2025 15:02:36 +1100 Subject: [PATCH] core: Implement hashst and hashchk instructions These are done in loadstore1. The HashDigest function is computed in 9 cycles; for 8 cycles, a state machine does 4 steps of key expansion per cycle, and for each of 4 lanes of data, does 4 steps of ciphering; then there is 1 cycle to combine the results into the final hash value. At present, hashcmp does not overlap the computation of the hash with fetching of data from memory (in the case of a cache miss). The 'is_signed' field in the instruction decode table is used to distinguish hashst and hashcmp from ordinary loads and stores. We have a new 'RBC' value for input_reg_c_t which says that we are reading RB but we want the value to come in via the C port; this is because we want the 5-bit immediate offset on the B port. Note that in the list of insn_code values, hashst/chk have been put in the section for instructions with an RB operand, which is not strictly correct given that the B port is used for the immediate D operand; however, adding them to the section for instructions without an RB operand would have made that section exceed 128 entries, causing changes to the padding needed. The only downside to having hashst/cmp where they are is that the debug logic can't use the RB port to read GPR/FPRs when a hashst/cmp instruction is being decoded. Signed-off-by: Paul Mackerras --- common.vhdl | 4 +- decode1.vhdl | 8 ++ decode2.vhdl | 10 +- decode_types.vhdl | 43 ++++---- execute1.vhdl | 2 + loadstore1.vhdl | 256 ++++++++++++++++++++++++++++++++++++---------- predecode.vhdl | 6 +- 7 files changed, 250 insertions(+), 79 deletions(-) diff --git a/common.vhdl b/common.vhdl index 7d31b67..867a8f5 100644 --- a/common.vhdl +++ b/common.vhdl @@ -69,6 +69,7 @@ package common is constant SPR_DAWR1 : spr_num_t := 181; constant SPR_DAWRX0 : spr_num_t := 188; constant SPR_DAWRX1 : spr_num_t := 189; + constant SPR_HASHKEYR : spr_num_t := 468; -- PMU registers constant SPR_UPMC1 : spr_num_t := 771; @@ -585,6 +586,7 @@ package common is byte_reverse : std_ulogic; sign_extend : std_ulogic; -- do we need to sign extend? update : std_ulogic; -- is this an update instruction? + hash : std_ulogic; xerc : xer_common_t; reserve : std_ulogic; -- set for larx/stcx. rc : std_ulogic; -- set for stcx. @@ -600,7 +602,7 @@ package common is end record; constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', - sign_extend => '0', update => '0', xerc => xerc_init, + sign_extend => '0', update => '0', hash => '0', xerc => xerc_init, reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0', insn => (others => '0'), instr_tag => instr_tag_init, diff --git a/decode1.vhdl b/decode1.vhdl index 8d2d2fb..154e91e 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -199,6 +199,8 @@ architecture behaviour of decode1 is INSN_fsubs => (FPU, FPU, OP_FP_ARITH, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), INSN_ftdiv => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_ftsqrt => (FPU, FPU, OP_FP_CMP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_hashchk => (LDST, NONE, OP_LOAD, RA, DSX, RBC, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), + INSN_hashst => (LDST, NONE, OP_STORE, RA, DSX, RBC, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), INSN_icbi => (ALU, NONE, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), INSN_icbt => (ALU, NONE, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_isel => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), @@ -696,6 +698,12 @@ begin if (icode = INSN_stq or icode = INSN_stqcx) and f_in.big_endian = '0' then vr.reg_3_addr(0) := '1'; end if; + -- See if this is an instruction where we need to use the RS/RC + -- read port to read the RB operand, because we want to get an + -- immediate operand to execute1 via read_data2. + if (icode = INSN_hashst or icode = INSN_hashchk) then + vr.reg_3_addr := '0' & insn_rb(f_in.insn); + end if; vr.read_1_enable := f_in.valid; vr.read_2_enable := f_in.valid and maybe_rb; vr.read_3_enable := f_in.valid; diff --git a/decode2.vhdl b/decode2.vhdl index da2fbb3..40c66cc 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -138,6 +138,8 @@ architecture behaviour of decode2 is ret := ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11)); when CONST_SH32 => ret := ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11)); + when DSX => + ret := ('0', (others => '0'), 55x"7FFFFFFFFFFFFF" & insn_in(0) & insn_in(25 downto 21) & "000"); when NONE => ret := ('0', (others => '0'), (others => '0')); end case; @@ -165,6 +167,8 @@ architecture behaviour of decode2 is else return ('0', (others => '0'), (others => '0')); end if; + when RBC => + return ('1', gpr_to_gspr(insn_rb(insn_in)), (others => '0')); when NONE => return ('0', (others => '0'), (others => '0')); end case; @@ -495,7 +499,8 @@ begin when SPR_XER => v.input_ov := '1'; when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | - SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 | + SPR_HASHKEYR => unit := LDST; when SPR_TAR => v.e.uses_tar := '1'; @@ -518,7 +523,8 @@ begin v.e.output_xer := '1'; v.output_ov := '1'; when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | - SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 | + SPR_HASHKEYR => unit := LDST; if d_in.valid = '1' then v.sgl_pipe := '1'; diff --git a/decode_types.vhdl b/decode_types.vhdl index 5695643..def2c43 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -205,11 +205,13 @@ package decode_types is INSN_divwe, INSN_divweu, INSN_eqv, + INSN_hashchk, + INSN_hashst, INSN_icbi, INSN_icbt, - INSN_isel, + INSN_isel, -- 160 INSN_lbarx, - INSN_lbzcix, -- 160 + INSN_lbzcix, INSN_lbzux, INSN_lbzx, INSN_ldarx, @@ -217,9 +219,9 @@ package decode_types is INSN_ldcix, INSN_ldx, INSN_ldux, - INSN_lharx, + INSN_lharx, -- 170 INSN_lhax, - INSN_lhaux, -- 170 + INSN_lhaux, INSN_lhbrx, INSN_lhzcix, INSN_lhzx, @@ -227,9 +229,9 @@ package decode_types is INSN_lqarx, INSN_lwarx, INSN_lwax, - INSN_lwaux, + INSN_lwaux, -- 180 INSN_lwbrx, - INSN_lwzcix, -- 180 + INSN_lwzcix, INSN_lwzx, INSN_lwzux, INSN_modsd, @@ -237,9 +239,9 @@ package decode_types is INSN_moduw, INSN_modud, INSN_mulhw, - INSN_mulhwu, + INSN_mulhwu, -- 190 INSN_mulhd, - INSN_mulhdu, -- 190 + INSN_mulhdu, INSN_mullw, INSN_mulld, INSN_nand, @@ -247,9 +249,9 @@ package decode_types is INSN_or, INSN_orc, INSN_pdepd, - INSN_pextd, + INSN_pextd, -- 200 INSN_rldcl, - INSN_rldcr, -- 200 + INSN_rldcr, INSN_rlwnm, INSN_slw, INSN_sld, @@ -257,9 +259,9 @@ package decode_types is INSN_srad, INSN_srw, INSN_srd, - INSN_stbcix, + INSN_stbcix, -- 210 INSN_stbcx, - INSN_stbx, -- 210 + INSN_stbx, INSN_stbux, INSN_stdbrx, INSN_stdcix, @@ -267,9 +269,9 @@ package decode_types is INSN_stdx, INSN_stdux, INSN_sthbrx, - INSN_sthcix, + INSN_sthcix, -- 220 INSN_sthcx, - INSN_sthx, -- 220 + INSN_sthx, INSN_sthux, INSN_stqcx, INSN_stwbrx, @@ -277,9 +279,9 @@ package decode_types is INSN_stwcx, INSN_stwx, INSN_stwux, - INSN_subf, + INSN_subf, -- 230 INSN_subfc, - INSN_subfe, -- 230 + INSN_subfe, INSN_td, INSN_tlbie, INSN_tlbiel, @@ -287,7 +289,7 @@ package decode_types is INSN_xor, -- pad to 240 to simplify comparison logic - INSN_236, INSN_237, INSN_238, INSN_239, + INSN_238, INSN_239, -- The following instructions have a third input addressed by RC INSN_maddld, @@ -416,8 +418,9 @@ package decode_types is type input_reg_a_t is (NONE, RA, RA_OR_ZERO, RA0_OR_CIA, CIA, FRA); type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, - CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, CONST_PSI, FRB); - type input_reg_c_t is (NONE, RS, RCR, FRC, FRS); + CONST_DXHI4, CONST_DS, CONST_DQ, CONST_M1, CONST_SH, CONST_SH32, CONST_PSI, + DSX, FRB); + type input_reg_c_t is (NONE, RS, RCR, RBC, FRC, FRS); type output_reg_a_t is (NONE, RT, RA, FRT); type rc_t is (NONE, ONE, RC, RCOE); type carry_in_t is (ZERO, CA, OV, ONE); @@ -634,6 +637,8 @@ package body decode_types is when INSN_divwu => return "011111"; when INSN_divd => return "011111"; when INSN_divw => return "011111"; + when INSN_hashchk => return "011111"; + when INSN_hashst => return "011111"; when INSN_eieio => return "011111"; when INSN_eqv => return "011111"; when INSN_extsb => return "011111"; diff --git a/execute1.vhdl b/execute1.vhdl index b9ad9ad..e14bcd2 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1844,6 +1844,8 @@ begin lv.byte_reverse := e_in.byte_reverse xnor ex1.msr(MSR_LE); lv.sign_extend := e_in.sign_extend; lv.update := e_in.update; + -- abuse e_in.is_signed to indicate hash store/check instructions + lv.hash := e_in.is_signed; lv.xerc := xerc_in; lv.reserve := e_in.reserve; lv.rc := e_in.rc; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 6d59fb3..92724a9 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -68,6 +68,8 @@ architecture behave of loadstore1 is sync : std_ulogic; tlbie : std_ulogic; dcbz : std_ulogic; + hashst : std_ulogic; + hashcmp : std_ulogic; read_spr : std_ulogic; write_spr : std_ulogic; mmu_op : std_ulogic; @@ -97,7 +99,7 @@ architecture behave of loadstore1 is virt_mode : std_ulogic; priv_mode : std_ulogic; load_sp : std_ulogic; - sprsel : std_ulogic_vector(2 downto 0); + sprsel : std_ulogic_vector(3 downto 0); ric : std_ulogic_vector(1 downto 0); is_slbia : std_ulogic; align_intr : std_ulogic; @@ -107,25 +109,14 @@ architecture behave of loadstore1 is incomplete : std_ulogic; ea_valid : std_ulogic; end record; - constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', - flush => '0', touch => '0', sync => '0', tlbie => '0', - dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0', - instr_fault => '0', do_update => '0', - mode_32bit => '0', prefixed => '0', - addr => (others => '0'), + constant request_init : request_t := (addr => (others => '0'), byte_sel => x"00", second_bytes => x"00", store_data => (others => '0'), instr_tag => instr_tag_init, write_reg => 6x"00", length => x"0", - elt_length => x"0", byte_reverse => '0', brev_mask => "000", - sign_extend => '0', update => '0', - xerc => xerc_init, reserve => '0', - atomic_qw => '0', atomic_first => '0', atomic_last => '0', - rc => '0', nc => '0', - virt_mode => '0', priv_mode => '0', load_sp => '0', - sprsel => "000", ric => "00", is_slbia => '0', align_intr => '0', - dawr_intr => '0', - dword_index => '0', two_dwords => '0', incomplete => '0', - ea_valid => '0'); + elt_length => x"0", brev_mask => "000", + xerc => xerc_init, + sprsel => "0000", ric => "00", + others => '0'); type reg_stage1_t is record req : request_t; @@ -147,7 +138,7 @@ architecture behave of loadstore1 is one_cycle : std_ulogic; wr_sel : std_ulogic_vector(1 downto 0); addr0 : std_ulogic_vector(63 downto 0); - sprsel : std_ulogic_vector(2 downto 0); + sprsel : std_ulogic_vector(3 downto 0); dbg_spr : std_ulogic_vector(63 downto 0); dbg_spr_ack: std_ulogic; end record; @@ -180,6 +171,7 @@ architecture behave of loadstore1 is dawrx : dawrx_array_t; dawr_uplim : dawr_array_t; dawr_upd : std_ulogic; + hashkeyr : std_ulogic_vector(63 downto 0); end record; signal req_in : request_t; @@ -201,6 +193,28 @@ architecture behave of loadstore1 is signal stage1_dreq : std_ulogic; signal stage1_dawr_match : std_ulogic; + type hw_array_4 is array(0 to 3) of std_ulogic_vector(15 downto 0); + type hw_array_8 is array(0 to 7) of std_ulogic_vector(15 downto 0); + + type hash_reg_t is record + active : std_ulogic; + done : std_ulogic; + step : unsigned(2 downto 0); + z0 : std_ulogic_vector(30 downto 0); + key : hw_array_4; + xleft : hw_array_4; + xright : hw_array_4; + end record; + constant hash_reg_init : hash_reg_t := ( + active => '0', done => '0', step => "000", z0 => (others => '0'), + key => (others => (others => '0')), + xleft => (others => (others => '0')), xright => (others => (others => '0'))); + + signal hash_r : hash_reg_t; + signal hash_rin : hash_reg_t; + signal hash_start : std_ulogic; + signal hash_result : std_ulogic_vector(63 downto 0); + -- Generate byte enables from sizes function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is begin @@ -336,7 +350,7 @@ begin r1.req.instr_fault <= '0'; r1.req.load <= '0'; r1.req.priv_mode <= '0'; - r1.req.sprsel <= "000"; + r1.req.sprsel <= "0000"; r1.req.ric <= "00"; r1.req.xerc <= xerc_init; r1.dawr_ll <= (others => '0'); @@ -350,7 +364,7 @@ begin r2.req.instr_fault <= '0'; r2.req.load <= '0'; r2.req.priv_mode <= '0'; - r2.req.sprsel <= "000"; + r2.req.sprsel <= "0000"; r2.req.ric <= "00"; r2.req.xerc <= xerc_init; @@ -448,6 +462,98 @@ begin end process; end generate; + -- This does the HashDigest computation from ISA Book I section 3.3.17 + -- in 8 cycles. In each cycle it does 4 steps of key expansion, and + -- 4 rounds of cipher for each of 4 lanes. + loadstore_hash_reg: process(clk) + begin + if rising_edge(clk) then + if rst = '1' then + hash_r <= hash_reg_init; + else + if hash_r.done = '1' then + report "hash_result = " & to_hstring(hash_result); + end if; + hash_r <= hash_rin; + end if; + end if; + end process; + + loadstore_hash_comb: process(all) + variable hv : hash_reg_t; + variable keys : hw_array_8; + variable xl, xr : std_ulogic_vector(15 downto 0); + variable z, t : std_ulogic_vector(15 downto 0); + variable fx : std_ulogic_vector(15 downto 0); + variable ra, rb : std_ulogic_vector(63 downto 0); + variable key : std_ulogic_vector(63 downto 0); + variable j, k : integer; + begin + hv := hash_r; + hv.done := '0'; + if hash_r.active = '1' then + -- Initialize keys to avoid yosys/ghdl incorrectly inferring latches + for i in 0 to 7 loop + keys(i) := (others => '0'); + end loop; + -- generate the next 4 key words + for i in 0 to 3 loop + keys(i) := hash_r.key(i); + end loop; + for i in 4 to 7 loop + z := 15x"0" & hash_r.z0(34 - i); + t := (keys(i-1)(2 downto 0) & keys(i-1)(15 downto 3)) xor keys(i-3); + keys(i) := x"fffc" xor z xor keys(i-4) xor t xor (t(0) & t(15 downto 1)); + hv.key(i-4) := keys(i); + end loop; + hv.z0 := hash_r.z0(26 downto 0) & "0000"; + -- do 4 rounds for each of 4 lanes + for lane in 0 to 3 loop + xr := hash_r.xright(lane); + xl := hash_r.xleft(lane); + for i in 0 to 3 loop + fx := ((xl(14 downto 0) & xl(15)) and (xl(7 downto 0) & xl(15 downto 8))) xor + (xl(13 downto 0) & xl(15 downto 14)); + t := xr xor fx xor hash_r.key((i + lane) mod 4); + xr := xl; + xl := t; + end loop; + hv.xright(lane) := xr; + hv.xleft(lane) := xl; + end loop; + hv.step := hash_r.step + 1; + if hash_r.step = 3x"7" then + hv.active := '0'; + hv.done := '1'; + end if; + elsif hash_start = '1' then + -- start a new hash process + hv.z0 := 31x"7D12B0E6"; -- 0xFA2561CD >> 1 + ra := l_in.addr1; + rb := l_in.data; + key := r3.hashkeyr; + for lane in 0 to 3 loop + j := lane * 16; + k := (3 - lane) * 16; + hv.xright(lane)(15 downto 8) := rb(j + 7 downto j); + hv.xright(lane)(7 downto 0) := ra(k + 15 downto k + 8); + hv.xleft(lane)(15 downto 8) := rb(j + 15 downto j + 8); + hv.xleft(lane)(7 downto 0) := ra(k + 7 downto k); + end loop; + for i in 0 to 3 loop + j := (3 - i) * 16; + hv.key(i) := key(j + 15 downto j); + end loop; + hv.step := "000"; + hv.active := '1'; + end if; + -- only valid when hash_r.done = 1 + hash_result <= (hash_r.xright(0) & hash_r.xleft(0) & hash_r.xright(1) & hash_r.xleft(1)) xor + (hash_r.xright(2) & hash_r.xleft(2) & hash_r.xright(3) & hash_r.xleft(3)); + + hash_rin <= hv; + end process; + -- Translate a load/store instruction into the internal request format -- XXX this should only depend on l_in, but actually depends on -- r1.addr0 as well (in the l_in.second = 1 case). @@ -483,13 +589,16 @@ begin v.ric := l_in.insn(19 downto 18); if sprn(8 downto 7) = "01" then -- debug registers DAWR[X][01] - v.sprsel := '1' & sprn(3) & sprn(0); + v.sprsel := "01" & sprn(3) & sprn(0); + elsif sprn(2) = '1' then + -- HASH[P]KEYR + v.sprsel := "000" & sprn(0); elsif sprn(1) = '1' then -- DSISR and DAR - v.sprsel := "01" & sprn(0); + v.sprsel := "001" & sprn(0); else -- PID and PTCR - v.sprsel := "00" & sprn(8); + v.sprsel := "100" & sprn(8); end if; lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)); @@ -536,7 +645,7 @@ begin if l_in.repeat = '1' and l_in.update = '0' and addr(3) /= l_in.second then misaligned := '1'; end if; - v.align_intr := l_in.reserve and misaligned; + v.align_intr := (l_in.reserve or l_in.hash) and misaligned; v.atomic_first := not misaligned and not l_in.second; v.atomic_last := not misaligned and (l_in.second or not l_in.repeat); @@ -565,6 +674,7 @@ begin if l_in.length = "0000" then v.touch := '1'; end if; + v.hashst := l_in.hash; when OP_LOAD => if l_in.update = '0' or l_in.second = '0' then v.load := '1'; @@ -579,6 +689,7 @@ begin -- write back address to RA v.do_update := '1'; end if; + v.hashcmp := l_in.hash; when OP_DCBF => v.load := '1'; v.flush := '1'; @@ -631,6 +742,7 @@ begin v := r1; issue := '0'; dcreq := '0'; + hash_start <= '0'; if r1.busy = '0' then req := req_in; @@ -662,6 +774,7 @@ begin else -- For the lfs conversion cycle, leave the request valid -- for another cycle but with req.dc_req = 0. + -- (In other words we insert an extra dummy request.) -- For an MMU request last cycle, we have nothing -- to do in this cycle, so make it invalid. if r1.req.load_sp = '0' then @@ -695,9 +808,20 @@ begin -- we can change what's in r1 next cycle because the current thing -- in r1 will go into r2 v.req := req; + if issue = '1' and (req.hashst or req.hashcmp) = '1' then + -- need to initiate and then wait for the hash computation + hash_start <= not r1.busy; + v.busy := not hash_r.done; + if hash_r.done = '0' then + issue := '0'; + else + v.req.store_data := hash_result; + end if; + else + v.busy := (issue and (req.incomplete or req.load_sp)) or (req.valid and req.mmu_op); + end if; dcreq := issue; v.issued := issue; - v.busy := (issue and (req.incomplete or req.load_sp)) or (req.valid and req.mmu_op); else -- pipeline is stalled if r1.issued = '1' and d_in.error = '1' then @@ -723,7 +847,7 @@ begin variable byte_offset : unsigned(2 downto 0); variable interrupt : std_ulogic; variable dbg_spr_rd : std_ulogic; - variable sprsel : std_ulogic_vector(2 downto 0); + variable sprsel : std_ulogic_vector(3 downto 0); variable sprval : std_ulogic_vector(63 downto 0); variable dawr_match : std_ulogic; begin @@ -758,24 +882,30 @@ begin if dbg_spr_rd = '0' then sprsel := r1.req.sprsel; else - sprsel := '0' & dbg_spr_addr; + sprsel := "00" & dbg_spr_addr; + end if; + if sprsel(3) = '1' then + sprval := m_in.sprval; -- MMU regs + else + case sprsel(2 downto 0) is + when "100" => + sprval := r3.dawr(0) & "000"; + when "101" => + sprval := r3.dawr(1) & "000"; + when "110" => + sprval := 48x"0" & r3.dawrx(0); + when "111" => + sprval := 48x"0" & r3.dawrx(1); + when "000" => + sprval := r3.hashkeyr; + when "010" => + sprval := x"00000000" & r3.dsisr; + when "011" => + sprval := r3.dar; + when others => + sprval := (others => '0'); + end case; end if; - case sprsel is - when "100" => - sprval := r3.dawr(0) & "000"; - when "101" => - sprval := r3.dawr(1) & "000"; - when "110" => - sprval := 48x"0" & r3.dawrx(0); - when "111" => - sprval := 48x"0" & r3.dawrx(1); - when "010" => - sprval := x"00000000" & r3.dsisr; - when "011" => - sprval := r3.dar; - when others => - sprval := m_in.sprval; -- MMU regs - end case; if dbg_spr_req = '0' then v.dbg_spr_ack := '0'; elsif dbg_spr_rd = '1' and r2.dbg_spr_ack = '0' then @@ -790,9 +920,9 @@ begin v.req.store_data := store_data; v.req.dawr_intr := dawr_match; v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and - not r1.req.incomplete; + not r1.req.incomplete and not r1.req.hashcmp; v.wait_mmu := r1.req.valid and r1.req.mmu_op; - if r1.req.valid = '1' and r1.req.align_intr = '1' then + if r1.req.valid = '1' and (r1.req.align_intr or r1.req.hashcmp) = '1' then v.busy := '1'; v.one_cycle := '0'; else @@ -832,7 +962,9 @@ begin v.wait_mmu := '0'; end if; if r2.busy = '1' and r2.wait_mmu = '0' then - v.busy := '0'; + if r2.req.hashcmp = '0' or d_in.valid = '1' then + v.busy := '0'; + end if; end if; interrupt := (r2.req.valid and r2.req.align_intr) or @@ -877,6 +1009,7 @@ begin variable dsisr : std_ulogic_vector(31 downto 0); variable itlb_fault : std_ulogic; variable trim_ctl : trim_ctl_t; + variable hashchk_trap : std_ulogic; begin v := r3; @@ -966,6 +1099,15 @@ begin v.load_data := data_permuted; end if; + hashchk_trap := '0'; + if d_in.valid = '1' and r2.req.hashcmp = '1' then + if d_in.data = r2.req.store_data then + v.complete := '1'; + else + hashchk_trap := '1'; + exception := '1'; + end if; + end if; if r2.req.valid = '1' then if r2.req.read_spr = '1' then @@ -982,22 +1124,24 @@ begin write_enable := '1'; end if; if r2.req.write_spr = '1' then - if r2.req.sprsel(2) = '1' then + if r2.req.sprsel(3 downto 2) = "01" then v.dawr_upd := '1'; end if; case r2.req.sprsel is - when "100" => + when "0100" => v.dawr(0) := r2.req.store_data(63 downto 3); - when "101" => + when "0101" => v.dawr(1) := r2.req.store_data(63 downto 3); - when "110" => + when "0110" => v.dawrx(0) := r2.req.store_data(15 downto 0); - when "111" => + when "0111" => v.dawrx(1) := r2.req.store_data(15 downto 0); - when "010" => + when "0010" => v.dsisr := r2.req.store_data(31 downto 0); - when "011" => + when "0011" => v.dar := r2.req.store_data; + when "0000" => + v.hashkeyr := r2.req.store_data; when others => end case; end if; @@ -1016,7 +1160,7 @@ begin if d_in.valid = '1' then if r2.req.incomplete = '0' then write_enable := r2.req.load and not r2.req.load_sp and - not r2.req.flush and not r2.req.touch; + not r2.req.flush and not r2.req.touch and not r2.req.hashcmp; -- stores write back rA update do_update := r2.req.update and r2.req.store; end if; @@ -1074,6 +1218,10 @@ begin v.intr_vec := 16#600#; v.srr1(47 - 34) := r2.req.prefixed; v.dar := r2.req.addr; + elsif hashchk_trap = '1' then + v.intr_vec := 16#700#; + v.srr1(47 - 34) := r2.req.prefixed; + v.srr1(47 - 46) := '1'; elsif r2.req.instr_fault = '0' then v.srr1(47 - 34) := r2.req.prefixed; v.dar := r2.req.addr; diff --git a/predecode.vhdl b/predecode.vhdl index e8689ef..7169864 100644 --- a/predecode.vhdl +++ b/predecode.vhdl @@ -364,15 +364,15 @@ architecture behaviour of predecoder is 2#0_01110_11100# => INSN_nand, 2#0_00011_01000# => INSN_neg, 2#0_10011_01000# => INSN_neg, -- nego - -- next 8 are reserved no-op instructions + -- next 6 are reserved no-op instructions 2#0_10000_10010# => INSN_rnop, 2#0_10001_10010# => INSN_rnop, 2#0_10010_10010# => INSN_rnop, 2#0_10011_10010# => INSN_rnop, 2#0_10100_10010# => INSN_rnop, 2#0_10101_10010# => INSN_rnop, - 2#0_10110_10010# => INSN_rnop, - 2#0_10111_10010# => INSN_rnop, + 2#0_10110_10010# => INSN_hashst, + 2#0_10111_10010# => INSN_hashchk, 2#0_00011_11100# => INSN_nor, 2#0_01101_11100# => INSN_or, 2#0_01100_11100# => INSN_orc,