From c49c32b5fe76ebb2eba1f7429777852d2046013f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 30 Jan 2025 21:16:05 +1100 Subject: [PATCH] core: Implement DEXCR and HDEXCR registers Of the defined aspect bits (which are all read-write), only the NPHIE and PHIE bits have any function at all, since Microwatt is an in-order single-issue machine and never does any branch speculation. Also, since there is no privileged non-hypervisor mode, the high 32 bits of DEXCR do nothing. Signed-off-by: Paul Mackerras --- common.vhdl | 24 +++++++++++++++++++++++- decode1.vhdl | 5 +++++ execute1.vhdl | 40 ++++++++++++++++++++++++++++++++++++++++ loadstore1.vhdl | 9 +++++++-- 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/common.vhdl b/common.vhdl index 6b60e49..3cb552d 100644 --- a/common.vhdl +++ b/common.vhdl @@ -71,6 +71,10 @@ package common is constant SPR_DAWRX1 : spr_num_t := 189; constant SPR_HASHKEYR : spr_num_t := 468; constant SPR_HASHPKEYR : spr_num_t := 469; + constant SPR_DEXCR : spr_num_t := 828; + constant SPR_DEXCRU : spr_num_t := 812; + constant SPR_HDEXCR : spr_num_t := 471; + constant SPR_HDEXCU : spr_num_t := 455; -- PMU registers constant SPR_UPMC1 : spr_num_t := 771; @@ -184,6 +188,7 @@ package common is constant SPRSEL_DSCR : spr_selector := 4x"b"; constant SPRSEL_PIR : spr_selector := 4x"c"; constant SPRSEL_CIABR : spr_selector := 4x"d"; + constant SPRSEL_DEXCR : spr_selector := 4x"e"; constant SPRSEL_XER : spr_selector := 4x"f"; -- FSCR and HFSCR bit numbers @@ -266,6 +271,16 @@ package common is pri : std_ulogic_vector(31 downto 0); -- 8 bits each for 4 cpus end record; + -- Bits in each half of DEXCR and HDEXCR + subtype aspect_bits_t is std_ulogic_vector(4 downto 0); + constant aspect_bits_init : aspect_bits_t := (others => '1'); + -- Bit numbers in aspect_bits_t + constant DEXCR_SBHE : integer := 4; -- speculative branch hint enable + constant DEXCR_IBRTPD : integer := 3; -- indirect branch recurrent target prediction disable + constant DEXCR_SRAPD : integer := 2; -- subroutine return address prediction disable + constant DEXCR_NPHIE : integer := 1; -- non-privileged hash instruction enable + constant DEXCR_PHIE : integer := 0; -- privileged hash instruction enable + -- This needs to die... type ctrl_t is record wait_state: std_ulogic; @@ -287,12 +302,18 @@ package common is heir: std_ulogic_vector(63 downto 0); dscr: std_ulogic_vector(24 downto 0); ciabr: std_ulogic_vector(63 downto 0); + dexcr_pnh: aspect_bits_t; + dexcr_pro: aspect_bits_t; + hdexcr_hyp: aspect_bits_t; + hdexcr_enf: aspect_bits_t; end record; constant ctrl_t_init : ctrl_t := (wait_state => '0', run => '1', xer_low => 18x"0", fscr_ic => x"0", fscr_pref => '1', fscr_scv => '1', fscr_tar => '1', fscr_dscr => '1', hfscr_ic => x"0", hfscr_pref => '1', hfscr_tar => '1', hfscr_dscr => '1', hfscr_fp => '1', dscr => (others => '0'), + dexcr_pnh => aspect_bits_init, dexcr_pro => aspect_bits_init, + hdexcr_hyp => aspect_bits_init, hdexcr_enf => aspect_bits_init, others => (others => '0')); type Fetch1ToIcacheType is record @@ -604,6 +625,7 @@ package common is e2stall : std_ulogic; msr : std_ulogic_vector(63 downto 0); hashkey : std_ulogic_vector(63 downto 0); + hash_enable : std_ulogic; end record; constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', @@ -616,7 +638,7 @@ package common is length => (others => '0'), mode_32bit => '0', is_32bit => '0', prefixed => '0', repeat => '0', second => '0', e2stall => '0', - msr => (others => '0'), hashkey => (others => '0')); + msr => (others => '0'), hashkey => (others => '0'), hash_enable => '0'); type Loadstore1ToExecute1Type is record busy : std_ulogic; diff --git a/decode1.vhdl b/decode1.vhdl index 4be6413..c762b18 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -499,6 +499,11 @@ architecture behaviour of decode1 is i.sel := SPRSEL_PIR; when SPR_CIABR => i.sel := SPRSEL_CIABR; + when SPR_DEXCR | SPR_HDEXCR => + i.sel := SPRSEL_DEXCR; + when SPR_DEXCRU | SPR_HDEXCU => + i.sel := SPRSEL_DEXCR; + i.ronly := '1'; when others => i.valid := '0'; end case; diff --git a/execute1.vhdl b/execute1.vhdl index 2f6c6ff..26f8dae 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -425,6 +425,32 @@ architecture behaviour of execute1 is return ret; end; + -- return contents of DEXCR or HDEXCR + -- top 32 bits are zeroed for access via non-privileged number + function assemble_dexcr(c: ctrl_t; insn: std_ulogic_vector(31 downto 0)) return std_ulogic_vector is + variable ret : std_ulogic_vector(63 downto 0); + variable spr : std_ulogic_vector(9 downto 0); + variable dexh, dexl : aspect_bits_t; + begin + ret := (others => '0'); + spr := insn(15 downto 11) & insn(20 downto 16); + if spr(9) = '1' then + dexh := c.dexcr_pnh; + dexl := c.dexcr_pro; + else + dexh := c.hdexcr_hyp; + dexl := c.hdexcr_enf; + end if; + if spr(4) = '0' then + dexl := (others => '0'); + end if; + ret := dexh(DEXCR_SBHE) & "00" & dexh(DEXCR_IBRTPD) & dexh(DEXCR_SRAPD) & + dexh(DEXCR_NPHIE) & dexh(DEXCR_PHIE) & 25x"0" & + dexl(DEXCR_SBHE) & "00" & dexl(DEXCR_IBRTPD) & dexl(DEXCR_SRAPD) & + dexl(DEXCR_NPHIE) & dexl(DEXCR_PHIE) & 25x"0"; + return ret; + end; + -- Tell vivado to keep the hierarchy for the random module so that the -- net names in the xdc file match. attribute keep_hierarchy : string; @@ -1600,6 +1626,7 @@ begin variable go : std_ulogic; variable bypass_valid : std_ulogic; variable is_scv : std_ulogic; + variable dex : aspect_bits_t; begin v := ex1; if busy_out = '0' then @@ -1735,6 +1762,13 @@ begin bperm_start <= go and actions.start_bperm; pmu_trace <= go and actions.do_trace; + -- evaluate DEXCR/HDEXCR bits that apply at present + if ex1.msr(MSR_PR) = '0' then + dex := ctrl.hdexcr_hyp; + else + dex := ctrl.dexcr_pro or ctrl.hdexcr_enf; + end if; + if not HAS_FPU and ex1.div_in_progress = '1' then v.div_in_progress := not divider_to_x.valid; v.busy := not divider_to_x.valid; @@ -1850,6 +1884,11 @@ begin lv.second := e_in.second; lv.e2stall := fp_in.f2stall; lv.hashkey := ramspr_odd; + if e_in.insn(7) = '0' then + lv.hash_enable := dex(DEXCR_PHIE); + else + lv.hash_enable := dex(DEXCR_NPHIE); + end if; -- Outputs to FPU fv.op := e_in.insn_type; @@ -1897,6 +1936,7 @@ begin 39x"0" & ctrl.dscr when SPRSEL_DSCR, 56x"0" & std_ulogic_vector(to_unsigned(CPU_INDEX, 8)) when SPRSEL_PIR, ctrl.ciabr when SPRSEL_CIABR, + assemble_dexcr(ctrl, ex1.insn) when SPRSEL_DEXCR, assemble_xer(ex1.e.xerc, ctrl.xer_low) when others; stage2_stall <= l_in.l2stall or fp_in.f2stall; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 1785685..e3bd558 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -565,6 +565,7 @@ begin variable sprn : std_ulogic_vector(9 downto 0); variable misaligned : std_ulogic; variable addr_mask : std_ulogic_vector(2 downto 0); + variable hash_nop : std_ulogic; begin v := request_init; sprn := l_in.insn(15 downto 11) & l_in.insn(20 downto 16); @@ -641,7 +642,7 @@ begin if l_in.repeat = '1' and l_in.update = '0' and addr(3) /= l_in.second then misaligned := '1'; end if; - v.align_intr := (l_in.reserve or l_in.hash) and misaligned; + v.align_intr := (l_in.reserve or (l_in.hash and l_in.hash_enable)) and misaligned; v.atomic_first := not misaligned and not l_in.second; v.atomic_last := not misaligned and (l_in.second or not l_in.repeat); @@ -661,6 +662,7 @@ begin end if; end if; + hash_nop := '0'; case l_in.op is when OP_SYNC => v.sync := '1'; @@ -671,6 +673,7 @@ begin v.touch := '1'; end if; v.hashst := l_in.hash; + hash_nop := not l_in.hash_enable; when OP_LOAD => if l_in.update = '0' or l_in.second = '0' then v.load := '1'; @@ -686,6 +689,7 @@ begin v.do_update := '1'; end if; v.hashcmp := l_in.hash; + hash_nop := not l_in.hash_enable; when OP_DCBF => v.load := '1'; v.flush := '1'; @@ -709,7 +713,8 @@ begin v.mmu_op := '1'; when others => end case; - v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz) and not v.align_intr; + v.dc_req := l_in.valid and (v.load or v.store or v.sync or v.dcbz) and not v.align_intr and + not hash_nop; v.incomplete := v.dc_req and v.two_dwords; -- Work out controls for load and store formatting