From 5ddd8884fafd881aa3639c031dbc4b0d68f8ef6d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 15 Jan 2025 15:18:23 +1100 Subject: [PATCH] core: Implement two data watchpoints This implements the DAWR0, DAWRX0, DAWR1, and DAWRX1 registers, which provide the ability to set watchpoints on two ranges of data addresses and take an interrupt when an access is made to either range. The address comparisons are done in loadstore1 in the second cycle (doing it in the first cycle turned out to have poor timing). If a match is detected, a signal is sent to the dcache which causes the access to fail and generate an error signal back to loadstore1, in much the same way that a protection violation would, whereupon a data storage interrupt is generated. Signed-off-by: Paul Mackerras --- common.vhdl | 5 ++ dcache.vhdl | 11 +++- decode2.vhdl | 6 +- loadstore1.vhdl | 149 +++++++++++++++++++++++++++++++++++++++--------- 4 files changed, 142 insertions(+), 29 deletions(-) diff --git a/common.vhdl b/common.vhdl index dc5348a..1c8642b 100644 --- a/common.vhdl +++ b/common.vhdl @@ -65,6 +65,10 @@ package common is constant SPR_VRSAVE : spr_num_t := 256; constant SPR_PIR : spr_num_t := 1023; constant SPR_CIABR : spr_num_t := 187; + constant SPR_DAWR0 : spr_num_t := 180; + constant SPR_DAWR1 : spr_num_t := 181; + constant SPR_DAWRX0 : spr_num_t := 188; + constant SPR_DAWRX1 : spr_num_t := 189; -- PMU registers constant SPR_UPMC1 : spr_num_t := 771; @@ -624,6 +628,7 @@ package common is addr : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1 byte_sel : std_ulogic_vector(7 downto 0); + dawr_match : std_ulogic; -- valid the cycle after .valid = 1 end record; constant Loadstore1ToDcacheInit : Loadstore1ToDcacheType := (addr => (others => '0'), data => (others => '0'), byte_sel => x"00", diff --git a/dcache.vhdl b/dcache.vhdl index ce7b351..ff7383c 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -316,6 +316,7 @@ architecture rtl of dcache is hit_way : way_t; same_tag : std_ulogic; mmu_req : std_ulogic; + dawr_m : std_ulogic; end record; -- First stage register, contains state for stage 1 of load hits @@ -635,6 +636,8 @@ begin -- put directly into req.data in the dcache_slow process below. r0.req.data <= d_in.data; r0.d_valid <= r0.req.valid; + -- the dawr_match signal has the same timing as the data + r0.req.dawr_match <= d_in.dawr_match; end if; end if; end process; @@ -953,12 +956,18 @@ begin variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0); variable snoop_match : std_ulogic; variable hit_reload : std_ulogic; + variable dawr_match : std_ulogic; begin -- Extract line, row and tag from request rindex := get_index(r0.req.addr); req_index <= rindex; req_row := get_row(r0.req.addr); req_tag <= get_tag(ra); + if r0.d_valid = '0' then + dawr_match := d_in.dawr_match; + else + dawr_match := r0.req.dawr_match; + end if; go := r0_valid and not (r0.tlbie or r0.tlbld) and not r1.ls_error; if is_X(r0.req.addr) then @@ -1135,7 +1144,7 @@ begin rc_ok <= perm_attr.reference and (r0.req.load or perm_attr.changed); perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and (perm_attr.wr_perm or (r0.req.load and perm_attr.rd_perm)); - access_ok <= valid_ra and perm_ok and rc_ok; + access_ok <= valid_ra and perm_ok and rc_ok and not dawr_match; -- Combine the request and cache hit status to decide what -- operation needs to be done diff --git a/decode2.vhdl b/decode2.vhdl index 7e993d5..cc241a2 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -477,7 +477,8 @@ begin case decode_spr_num(d_in.insn) is when SPR_XER => v.input_ov := '1'; - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => unit := LDST; when SPR_TAR => v.e.uses_tar := '1'; @@ -499,7 +500,8 @@ begin when SPR_XER => v.e.output_xer := '1'; v.output_ov := '1'; - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => unit := LDST; if d_in.valid = '1' then v.sgl_pipe := '1'; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 85fb129..0816931 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -95,10 +95,11 @@ architecture behave of loadstore1 is virt_mode : std_ulogic; priv_mode : std_ulogic; load_sp : std_ulogic; - sprsel : std_ulogic_vector(1 downto 0); + sprsel : std_ulogic_vector(2 downto 0); ric : std_ulogic_vector(1 downto 0); is_slbia : std_ulogic; align_intr : std_ulogic; + dawr_intr : std_ulogic; dword_index : std_ulogic; two_dwords : std_ulogic; incomplete : std_ulogic; @@ -119,7 +120,8 @@ architecture behave of loadstore1 is atomic_qw => '0', atomic_first => '0', atomic_last => '0', rc => '0', nc => '0', virt_mode => '0', priv_mode => '0', load_sp => '0', - sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0', + sprsel => "000", ric => "00", is_slbia => '0', align_intr => '0', + dawr_intr => '0', dword_index => '0', two_dwords => '0', incomplete => '0', ea_valid => '0'); @@ -140,11 +142,15 @@ architecture behave of loadstore1 is one_cycle : std_ulogic; wr_sel : std_ulogic_vector(1 downto 0); addr0 : std_ulogic_vector(63 downto 0); - sprsel : std_ulogic_vector(1 downto 0); + sprsel : std_ulogic_vector(2 downto 0); dbg_spr : std_ulogic_vector(63 downto 0); dbg_spr_ack: std_ulogic; end record; + constant num_dawr : positive := 2; + type dawr_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(63 downto 3); + type dawrx_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(15 downto 0); + type reg_stage3_t is record state : state_t; complete : std_ulogic; @@ -166,6 +172,10 @@ architecture behave of loadstore1 is intr_vec : integer range 0 to 16#fff#; srr1 : std_ulogic_vector(15 downto 0); events : Loadstore1EventType; + dawr : dawr_array_t; + dawrx : dawrx_array_t; + dawr_uplim : dawr_array_t; + dawr_upd : std_ulogic; end record; signal req_in : request_t; @@ -185,6 +195,7 @@ architecture behave of loadstore1 is signal stage1_req : request_t; signal stage1_dcreq : std_ulogic; signal stage1_dreq : std_ulogic; + signal stage1_dawr_match : std_ulogic; -- Generate byte enables from sizes function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is @@ -287,6 +298,25 @@ architecture behave of loadstore1 is return fs2; end; + function dawrx_match_enable(dawrx : std_ulogic_vector(15 downto 0); virt_mode : std_ulogic; + priv_mode : std_ulogic; is_store : std_ulogic) + return boolean is + begin + -- check PRIVM field; note priv_mode = '1' implies hypervisor mode + if (priv_mode = '0' and dawrx(0) = '0') or (priv_mode = '1' and dawrx(2) = '0') then + return false; + end if; + -- check WT/WTI fields + if dawrx(3) = '0' and virt_mode /= dawrx(4) then + return false; + end if; + -- check DW/DR fields + if (is_store = '0' and dawrx(5) = '0') or (is_store = '1' and dawrx(6) = '0') then + return false; + end if; + return true; + end; + begin loadstore1_reg: process(clk) begin @@ -302,7 +332,7 @@ begin r1.req.instr_fault <= '0'; r1.req.load <= '0'; r1.req.priv_mode <= '0'; - r1.req.sprsel <= "00"; + r1.req.sprsel <= "000"; r1.req.ric <= "00"; r1.req.xerc <= xerc_init; @@ -313,7 +343,7 @@ begin r2.req.instr_fault <= '0'; r2.req.load <= '0'; r2.req.priv_mode <= '0'; - r2.req.sprsel <= "00"; + r2.req.sprsel <= "000"; r2.req.ric <= "00"; r2.req.xerc <= xerc_init; @@ -330,12 +360,19 @@ begin r3.stage1_en <= '1'; r3.events.load_complete <= '0'; r3.events.store_complete <= '0'; + for i in 0 to num_dawr - 1 loop + r3.dawr(i) <= (others => '0'); + r3.dawrx(i) <= (others => '0'); + r3.dawr_uplim(i) <= (others => '0'); + end loop; + r3.dawr_upd <= '0'; flushing <= '0'; else r1 <= r1in; r2 <= r2in; r3 <= r3in; - flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and + flushing <= (flushing or (r1in.req.valid and + (r1in.req.align_intr or r1in.req.dawr_intr))) and not flush; end if; stage1_dreq <= stage1_dcreq; @@ -437,12 +474,15 @@ begin v.virt_mode := l_in.virt_mode; v.priv_mode := l_in.priv_mode; v.ric := l_in.insn(19 downto 18); - if sprn(1) = '1' then + if sprn(8 downto 7) = "01" then + -- debug registers DAWR[X][01] + v.sprsel := '1' & sprn(3) & sprn(0); + elsif sprn(1) = '1' then -- DSISR and DAR - v.sprsel := '1' & sprn(0); + v.sprsel := "01" & sprn(0); else -- PID and PTCR - v.sprsel := '0' & sprn(8); + v.sprsel := "00" & sprn(8); end if; lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)); @@ -547,7 +587,7 @@ begin v.ea_valid := '0'; when OP_MTSPR => v.write_spr := '1'; - v.mmu_op := not sprn(1); + v.mmu_op := not (sprn(1) or sprn(2)); v.ea_valid := '0'; when OP_FETCH_FAILED => -- send it to the MMU to do the radix walk @@ -659,8 +699,12 @@ begin variable byte_offset : unsigned(2 downto 0); variable interrupt : std_ulogic; variable dbg_spr_rd : std_ulogic; - variable sprsel : std_ulogic_vector(1 downto 0); + variable sprsel : std_ulogic_vector(2 downto 0); variable sprval : std_ulogic_vector(63 downto 0); + variable dawr_match : std_ulogic; + variable addr : std_ulogic_vector(63 downto 3); + variable addl : unsigned(64 downto 3); + variable addu : unsigned(64 downto 3); begin v := r2; @@ -677,21 +721,47 @@ begin end if; end loop; + -- Test for DAWR0/1 matches + dawr_match := '0'; + for i in 0 to 1 loop + addr := r1.req.addr(63 downto 3); + if r1.req.priv_mode = '1' and r3.dawrx(i)(7) = '1' then + -- HRAMMC=1 => trim top bit from address + addr(63) := '0'; + end if; + addl := unsigned('0' & addr) - unsigned('0' & r3.dawr(i)); + addu := unsigned('0' & r3.dawr_uplim(i)) - unsigned('0' & addr); + if addl(64) = '0' and addu(64) = '0' and + dawrx_match_enable(r3.dawrx(i), r1.req.virt_mode, + r1.req.priv_mode, r1.req.store) then + dawr_match := r1.req.valid and r1.req.dc_req and not r3.dawr_upd and + not (r1.req.touch or r1.req.sync or r1.req.flush); + end if; + end loop; + stage1_dawr_match <= dawr_match; + dbg_spr_rd := dbg_spr_req and not (r1.req.valid and r1.req.read_spr); if dbg_spr_rd = '0' then sprsel := r1.req.sprsel; else - sprsel := dbg_spr_addr; + sprsel := '0' & dbg_spr_addr; end if; - if sprsel(1) = '1' then - if sprsel(0) = '0' then + case sprsel is + when "100" => + sprval := r3.dawr(0) & "000"; + when "101" => + sprval := r3.dawr(1) & "000"; + when "110" => + sprval := 48x"0" & r3.dawrx(0); + when "111" => + sprval := 48x"0" & r3.dawrx(1); + when "010" => sprval := x"00000000" & r3.dsisr; - else + when "011" => sprval := r3.dar; - end if; - else - sprval := m_in.sprval; - end if; + when others => + sprval := m_in.sprval; -- MMU regs + end case; if dbg_spr_req = '0' then v.dbg_spr_ack := '0'; elsif dbg_spr_rd = '1' and r2.dbg_spr_ack = '0' then @@ -704,6 +774,7 @@ begin v.req := r1.req; v.addr0 := r1.addr0; v.req.store_data := store_data; + v.req.dawr_intr := dawr_match; v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and not r1.req.incomplete; v.wait_mmu := r1.req.valid and r1.req.mmu_op; @@ -751,7 +822,7 @@ begin end if; interrupt := (r2.req.valid and r2.req.align_intr) or - (d_in.error and (d_in.cache_paradox or d_in.reserve_nc)) or + (d_in.error and (d_in.cache_paradox or d_in.reserve_nc or r2.req.dawr_intr)) or m_in.err; if interrupt = '1' then v.req.valid := '0'; @@ -808,6 +879,15 @@ begin v.srr1 := (others => '0'); v.events := (others => '0'); + -- Evaluate DAWR upper limits after a clock edge + v.dawr_upd := '0'; + if r3.dawr_upd = '1' then + for i in 0 to num_dawr - 1 loop + v.dawr_uplim(i) := std_ulogic_vector(unsigned(r3.dawr(i)) + + unsigned(r3.dawrx(i)(15 downto 10))); + end loop; + end if; + -- load data formatting -- shift and byte-reverse data bytes for i in 0 to 7 loop @@ -887,12 +967,25 @@ begin if r2.req.load_sp = '1' and r2.req.dc_req = '0' then write_enable := '1'; end if; - if r2.req.write_spr = '1' and r2.req.mmu_op = '0' then - if r2.req.sprsel(0) = '0' then - v.dsisr := r2.req.store_data(31 downto 0); - else - v.dar := r2.req.store_data; + if r2.req.write_spr = '1' then + if r2.req.sprsel(2) = '1' then + v.dawr_upd := '1'; end if; + case r2.req.sprsel is + when "100" => + v.dawr(0) := r2.req.store_data(63 downto 3); + when "101" => + v.dawr(1) := r2.req.store_data(63 downto 3); + when "110" => + v.dawrx(0) := r2.req.store_data(15 downto 0); + when "111" => + v.dawrx(1) := r2.req.store_data(15 downto 0); + when "010" => + v.dsisr := r2.req.store_data(31 downto 0); + when "011" => + v.dar := r2.req.store_data; + when others => + end case; end if; end if; @@ -915,9 +1008,10 @@ begin end if; end if; if d_in.error = '1' then - if d_in.cache_paradox = '1' then + if d_in.cache_paradox = '1' or d_in.reserve_nc = '1' or r2.req.dawr_intr = '1' then -- signal an interrupt straight away exception := '1'; + dsisr(63 - 41) := r2.req.dawr_intr; dsisr(63 - 38) := not r2.req.load; dsisr(63 - 37) := d_in.reserve_nc; -- XXX there is no architected bit for this @@ -970,6 +1064,7 @@ begin v.srr1(47 - 34) := r2.req.prefixed; v.dar := r2.req.addr; if m_in.segerr = '0' then + dsisr(63 - 38) := not r2.req.load; v.intr_vec := 16#300#; v.dsisr := dsisr; else @@ -1036,8 +1131,10 @@ begin end if; if stage1_dreq = '1' then d_out.data <= store_data; + d_out.dawr_match <= stage1_dawr_match; else d_out.data <= r2.req.store_data; + d_out.dawr_match <= r2.req.dawr_intr; end if; d_out.hold <= l_in.e2stall;