diff --git a/common.vhdl b/common.vhdl index dc5348a..1c8642b 100644 --- a/common.vhdl +++ b/common.vhdl @@ -65,6 +65,10 @@ package common is constant SPR_VRSAVE : spr_num_t := 256; constant SPR_PIR : spr_num_t := 1023; constant SPR_CIABR : spr_num_t := 187; + constant SPR_DAWR0 : spr_num_t := 180; + constant SPR_DAWR1 : spr_num_t := 181; + constant SPR_DAWRX0 : spr_num_t := 188; + constant SPR_DAWRX1 : spr_num_t := 189; -- PMU registers constant SPR_UPMC1 : spr_num_t := 771; @@ -624,6 +628,7 @@ package common is addr : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1 byte_sel : std_ulogic_vector(7 downto 0); + dawr_match : std_ulogic; -- valid the cycle after .valid = 1 end record; constant Loadstore1ToDcacheInit : Loadstore1ToDcacheType := (addr => (others => '0'), data => (others => '0'), byte_sel => x"00", diff --git a/dcache.vhdl b/dcache.vhdl index ce7b351..ff7383c 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -316,6 +316,7 @@ architecture rtl of dcache is hit_way : way_t; same_tag : std_ulogic; mmu_req : std_ulogic; + dawr_m : std_ulogic; end record; -- First stage register, contains state for stage 1 of load hits @@ -635,6 +636,8 @@ begin -- put directly into req.data in the dcache_slow process below. r0.req.data <= d_in.data; r0.d_valid <= r0.req.valid; + -- the dawr_match signal has the same timing as the data + r0.req.dawr_match <= d_in.dawr_match; end if; end if; end process; @@ -953,12 +956,18 @@ begin variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0); variable snoop_match : std_ulogic; variable hit_reload : std_ulogic; + variable dawr_match : std_ulogic; begin -- Extract line, row and tag from request rindex := get_index(r0.req.addr); req_index <= rindex; req_row := get_row(r0.req.addr); req_tag <= get_tag(ra); + if r0.d_valid = '0' then + dawr_match := d_in.dawr_match; + else + dawr_match := r0.req.dawr_match; + end if; go := r0_valid and not (r0.tlbie or r0.tlbld) and not r1.ls_error; if is_X(r0.req.addr) then @@ -1135,7 +1144,7 @@ begin rc_ok <= perm_attr.reference and (r0.req.load or perm_attr.changed); perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and (perm_attr.wr_perm or (r0.req.load and perm_attr.rd_perm)); - access_ok <= valid_ra and perm_ok and rc_ok; + access_ok <= valid_ra and perm_ok and rc_ok and not dawr_match; -- Combine the request and cache hit status to decide what -- operation needs to be done diff --git a/decode2.vhdl b/decode2.vhdl index 7e993d5..cc241a2 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -477,7 +477,8 @@ begin case decode_spr_num(d_in.insn) is when SPR_XER => v.input_ov := '1'; - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => unit := LDST; when SPR_TAR => v.e.uses_tar := '1'; @@ -499,7 +500,8 @@ begin when SPR_XER => v.e.output_xer := '1'; v.output_ov := '1'; - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => unit := LDST; if d_in.valid = '1' then v.sgl_pipe := '1'; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 85fb129..0816931 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -95,10 +95,11 @@ architecture behave of loadstore1 is virt_mode : std_ulogic; priv_mode : std_ulogic; load_sp : std_ulogic; - sprsel : std_ulogic_vector(1 downto 0); + sprsel : std_ulogic_vector(2 downto 0); ric : std_ulogic_vector(1 downto 0); is_slbia : std_ulogic; align_intr : std_ulogic; + dawr_intr : std_ulogic; dword_index : std_ulogic; two_dwords : std_ulogic; incomplete : std_ulogic; @@ -119,7 +120,8 @@ architecture behave of loadstore1 is atomic_qw => '0', atomic_first => '0', atomic_last => '0', rc => '0', nc => '0', virt_mode => '0', priv_mode => '0', load_sp => '0', - sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0', + sprsel => "000", ric => "00", is_slbia => '0', align_intr => '0', + dawr_intr => '0', dword_index => '0', two_dwords => '0', incomplete => '0', ea_valid => '0'); @@ -140,11 +142,15 @@ architecture behave of loadstore1 is one_cycle : std_ulogic; wr_sel : std_ulogic_vector(1 downto 0); addr0 : std_ulogic_vector(63 downto 0); - sprsel : std_ulogic_vector(1 downto 0); + sprsel : std_ulogic_vector(2 downto 0); dbg_spr : std_ulogic_vector(63 downto 0); dbg_spr_ack: std_ulogic; end record; + constant num_dawr : positive := 2; + type dawr_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(63 downto 3); + type dawrx_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(15 downto 0); + type reg_stage3_t is record state : state_t; complete : std_ulogic; @@ -166,6 +172,10 @@ architecture behave of loadstore1 is intr_vec : integer range 0 to 16#fff#; srr1 : std_ulogic_vector(15 downto 0); events : Loadstore1EventType; + dawr : dawr_array_t; + dawrx : dawrx_array_t; + dawr_uplim : dawr_array_t; + dawr_upd : std_ulogic; end record; signal req_in : request_t; @@ -185,6 +195,7 @@ architecture behave of loadstore1 is signal stage1_req : request_t; signal stage1_dcreq : std_ulogic; signal stage1_dreq : std_ulogic; + signal stage1_dawr_match : std_ulogic; -- Generate byte enables from sizes function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is @@ -287,6 +298,25 @@ architecture behave of loadstore1 is return fs2; end; + function dawrx_match_enable(dawrx : std_ulogic_vector(15 downto 0); virt_mode : std_ulogic; + priv_mode : std_ulogic; is_store : std_ulogic) + return boolean is + begin + -- check PRIVM field; note priv_mode = '1' implies hypervisor mode + if (priv_mode = '0' and dawrx(0) = '0') or (priv_mode = '1' and dawrx(2) = '0') then + return false; + end if; + -- check WT/WTI fields + if dawrx(3) = '0' and virt_mode /= dawrx(4) then + return false; + end if; + -- check DW/DR fields + if (is_store = '0' and dawrx(5) = '0') or (is_store = '1' and dawrx(6) = '0') then + return false; + end if; + return true; + end; + begin loadstore1_reg: process(clk) begin @@ -302,7 +332,7 @@ begin r1.req.instr_fault <= '0'; r1.req.load <= '0'; r1.req.priv_mode <= '0'; - r1.req.sprsel <= "00"; + r1.req.sprsel <= "000"; r1.req.ric <= "00"; r1.req.xerc <= xerc_init; @@ -313,7 +343,7 @@ begin r2.req.instr_fault <= '0'; r2.req.load <= '0'; r2.req.priv_mode <= '0'; - r2.req.sprsel <= "00"; + r2.req.sprsel <= "000"; r2.req.ric <= "00"; r2.req.xerc <= xerc_init; @@ -330,12 +360,19 @@ begin r3.stage1_en <= '1'; r3.events.load_complete <= '0'; r3.events.store_complete <= '0'; + for i in 0 to num_dawr - 1 loop + r3.dawr(i) <= (others => '0'); + r3.dawrx(i) <= (others => '0'); + r3.dawr_uplim(i) <= (others => '0'); + end loop; + r3.dawr_upd <= '0'; flushing <= '0'; else r1 <= r1in; r2 <= r2in; r3 <= r3in; - flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and + flushing <= (flushing or (r1in.req.valid and + (r1in.req.align_intr or r1in.req.dawr_intr))) and not flush; end if; stage1_dreq <= stage1_dcreq; @@ -437,12 +474,15 @@ begin v.virt_mode := l_in.virt_mode; v.priv_mode := l_in.priv_mode; v.ric := l_in.insn(19 downto 18); - if sprn(1) = '1' then + if sprn(8 downto 7) = "01" then + -- debug registers DAWR[X][01] + v.sprsel := '1' & sprn(3) & sprn(0); + elsif sprn(1) = '1' then -- DSISR and DAR - v.sprsel := '1' & sprn(0); + v.sprsel := "01" & sprn(0); else -- PID and PTCR - v.sprsel := '0' & sprn(8); + v.sprsel := "00" & sprn(8); end if; lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)); @@ -547,7 +587,7 @@ begin v.ea_valid := '0'; when OP_MTSPR => v.write_spr := '1'; - v.mmu_op := not sprn(1); + v.mmu_op := not (sprn(1) or sprn(2)); v.ea_valid := '0'; when OP_FETCH_FAILED => -- send it to the MMU to do the radix walk @@ -659,8 +699,12 @@ begin variable byte_offset : unsigned(2 downto 0); variable interrupt : std_ulogic; variable dbg_spr_rd : std_ulogic; - variable sprsel : std_ulogic_vector(1 downto 0); + variable sprsel : std_ulogic_vector(2 downto 0); variable sprval : std_ulogic_vector(63 downto 0); + variable dawr_match : std_ulogic; + variable addr : std_ulogic_vector(63 downto 3); + variable addl : unsigned(64 downto 3); + variable addu : unsigned(64 downto 3); begin v := r2; @@ -677,21 +721,47 @@ begin end if; end loop; + -- Test for DAWR0/1 matches + dawr_match := '0'; + for i in 0 to 1 loop + addr := r1.req.addr(63 downto 3); + if r1.req.priv_mode = '1' and r3.dawrx(i)(7) = '1' then + -- HRAMMC=1 => trim top bit from address + addr(63) := '0'; + end if; + addl := unsigned('0' & addr) - unsigned('0' & r3.dawr(i)); + addu := unsigned('0' & r3.dawr_uplim(i)) - unsigned('0' & addr); + if addl(64) = '0' and addu(64) = '0' and + dawrx_match_enable(r3.dawrx(i), r1.req.virt_mode, + r1.req.priv_mode, r1.req.store) then + dawr_match := r1.req.valid and r1.req.dc_req and not r3.dawr_upd and + not (r1.req.touch or r1.req.sync or r1.req.flush); + end if; + end loop; + stage1_dawr_match <= dawr_match; + dbg_spr_rd := dbg_spr_req and not (r1.req.valid and r1.req.read_spr); if dbg_spr_rd = '0' then sprsel := r1.req.sprsel; else - sprsel := dbg_spr_addr; + sprsel := '0' & dbg_spr_addr; end if; - if sprsel(1) = '1' then - if sprsel(0) = '0' then + case sprsel is + when "100" => + sprval := r3.dawr(0) & "000"; + when "101" => + sprval := r3.dawr(1) & "000"; + when "110" => + sprval := 48x"0" & r3.dawrx(0); + when "111" => + sprval := 48x"0" & r3.dawrx(1); + when "010" => sprval := x"00000000" & r3.dsisr; - else + when "011" => sprval := r3.dar; - end if; - else - sprval := m_in.sprval; - end if; + when others => + sprval := m_in.sprval; -- MMU regs + end case; if dbg_spr_req = '0' then v.dbg_spr_ack := '0'; elsif dbg_spr_rd = '1' and r2.dbg_spr_ack = '0' then @@ -704,6 +774,7 @@ begin v.req := r1.req; v.addr0 := r1.addr0; v.req.store_data := store_data; + v.req.dawr_intr := dawr_match; v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and not r1.req.incomplete; v.wait_mmu := r1.req.valid and r1.req.mmu_op; @@ -751,7 +822,7 @@ begin end if; interrupt := (r2.req.valid and r2.req.align_intr) or - (d_in.error and (d_in.cache_paradox or d_in.reserve_nc)) or + (d_in.error and (d_in.cache_paradox or d_in.reserve_nc or r2.req.dawr_intr)) or m_in.err; if interrupt = '1' then v.req.valid := '0'; @@ -808,6 +879,15 @@ begin v.srr1 := (others => '0'); v.events := (others => '0'); + -- Evaluate DAWR upper limits after a clock edge + v.dawr_upd := '0'; + if r3.dawr_upd = '1' then + for i in 0 to num_dawr - 1 loop + v.dawr_uplim(i) := std_ulogic_vector(unsigned(r3.dawr(i)) + + unsigned(r3.dawrx(i)(15 downto 10))); + end loop; + end if; + -- load data formatting -- shift and byte-reverse data bytes for i in 0 to 7 loop @@ -887,12 +967,25 @@ begin if r2.req.load_sp = '1' and r2.req.dc_req = '0' then write_enable := '1'; end if; - if r2.req.write_spr = '1' and r2.req.mmu_op = '0' then - if r2.req.sprsel(0) = '0' then - v.dsisr := r2.req.store_data(31 downto 0); - else - v.dar := r2.req.store_data; + if r2.req.write_spr = '1' then + if r2.req.sprsel(2) = '1' then + v.dawr_upd := '1'; end if; + case r2.req.sprsel is + when "100" => + v.dawr(0) := r2.req.store_data(63 downto 3); + when "101" => + v.dawr(1) := r2.req.store_data(63 downto 3); + when "110" => + v.dawrx(0) := r2.req.store_data(15 downto 0); + when "111" => + v.dawrx(1) := r2.req.store_data(15 downto 0); + when "010" => + v.dsisr := r2.req.store_data(31 downto 0); + when "011" => + v.dar := r2.req.store_data; + when others => + end case; end if; end if; @@ -915,9 +1008,10 @@ begin end if; end if; if d_in.error = '1' then - if d_in.cache_paradox = '1' then + if d_in.cache_paradox = '1' or d_in.reserve_nc = '1' or r2.req.dawr_intr = '1' then -- signal an interrupt straight away exception := '1'; + dsisr(63 - 41) := r2.req.dawr_intr; dsisr(63 - 38) := not r2.req.load; dsisr(63 - 37) := d_in.reserve_nc; -- XXX there is no architected bit for this @@ -970,6 +1064,7 @@ begin v.srr1(47 - 34) := r2.req.prefixed; v.dar := r2.req.addr; if m_in.segerr = '0' then + dsisr(63 - 38) := not r2.req.load; v.intr_vec := 16#300#; v.dsisr := dsisr; else @@ -1036,8 +1131,10 @@ begin end if; if stage1_dreq = '1' then d_out.data <= store_data; + d_out.dawr_match <= stage1_dawr_match; else d_out.data <= r2.req.store_data; + d_out.dawr_match <= r2.req.dawr_intr; end if; d_out.hold <= l_in.e2stall;