diff --git a/common.vhdl b/common.vhdl index 467f2a8..a6562e0 100644 --- a/common.vhdl +++ b/common.vhdl @@ -209,6 +209,11 @@ package common is next_predicted: std_ulogic; end record; + type IcacheEventType is record + icache_miss : std_ulogic; + itlb_miss_resolved : std_ulogic; + end record; + type Decode1ToDecode2Type is record valid: std_ulogic; stop_mark : std_ulogic; @@ -347,8 +352,10 @@ package common is itlb_miss_resolved : std_ulogic; icache_miss : std_ulogic; dc_miss_resolved : std_ulogic; + dc_load_miss : std_ulogic; dc_ld_miss_resolved : std_ulogic; dc_store_miss : std_ulogic; + dtlb_miss : std_ulogic; dtlb_miss_resolved : std_ulogic; ld_miss_nocache : std_ulogic; ld_fill_nocache : std_ulogic; @@ -468,6 +475,14 @@ package common is cache_paradox : std_ulogic; end record; + type DcacheEventType is record + load_miss : std_ulogic; + store_miss : std_ulogic; + dcache_refill : std_ulogic; + dtlb_miss : std_ulogic; + dtlb_miss_resolved : std_ulogic; + end record; + type Loadstore1ToMmuType is record valid : std_ulogic; tlbie : std_ulogic; @@ -537,6 +552,12 @@ package common is interrupt => '0', intr_vec => 0, srr0 => (others => '0'), srr1 => (others => '0')); + type Loadstore1EventType is record + load_complete : std_ulogic; + store_complete : std_ulogic; + itlb_miss : std_ulogic; + end record; + type Execute1ToWritebackType is record valid: std_ulogic; instr_tag : instr_tag_t; @@ -668,7 +689,8 @@ package common is write_cr_data => (others => '0')); type WritebackEventType is record - instr_complete : std_ulogic; + instr_complete : std_ulogic; + fp_complete : std_ulogic; end record; end common; diff --git a/core.vhdl b/core.vhdl index 5d8a822..32bfe88 100644 --- a/core.vhdl +++ b/core.vhdl @@ -148,6 +148,9 @@ architecture behave of core is signal msr : std_ulogic_vector(63 downto 0); -- PMU event bus + signal icache_events : IcacheEventType; + signal loadstore_events : Loadstore1EventType; + signal dcache_events : DcacheEventType; signal writeback_events : WritebackEventType; -- Debug status @@ -247,6 +250,7 @@ begin wishbone_out => wishbone_insn_out, wishbone_in => wishbone_insn_in, wb_snoop_in => wb_snoop_in, + events => icache_events, log_out => log_data(96 downto 43) ); @@ -356,6 +360,9 @@ begin icache_inval => ex1_icache_inval, dbg_msr_out => msr, wb_events => writeback_events, + ls_events => loadstore_events, + dc_events => dcache_events, + ic_events => icache_events, terminate_out => terminate, log_out => log_data(134 downto 120), log_rd_addr => log_rd_addr, @@ -397,6 +404,7 @@ begin m_out => loadstore1_to_mmu, m_in => mmu_to_loadstore1, dc_stall => dcache_stall_out, + events => loadstore_events, log_out => log_data(149 downto 140) ); @@ -431,6 +439,7 @@ begin wishbone_in => wishbone_data_in, wishbone_out => wishbone_data_out, snoop_in => wb_snoop_in, + events => dcache_events, log_out => log_data(170 downto 151) ); diff --git a/dcache.vhdl b/dcache.vhdl index 50060b2..90771f5 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -46,6 +46,8 @@ entity dcache is wishbone_out : out wishbone_master_out; wishbone_in : in wishbone_slave_out; + events : out DcacheEventType; + log_out : out std_ulogic_vector(19 downto 0) ); end entity dcache; @@ -355,6 +357,8 @@ architecture rtl of dcache is signal r1 : reg_stage_1_t; + signal ev : DcacheEventType; + -- Reservation information -- type reservation_t is record @@ -412,6 +416,7 @@ architecture rtl of dcache is signal rc_ok : std_ulogic; signal perm_ok : std_ulogic; signal access_ok : std_ulogic; + signal tlb_miss : std_ulogic; -- TLB PLRU output interface type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0); @@ -605,6 +610,8 @@ begin r0_valid <= r0_full and not r1.full and not d_in.hold; stall_out <= r0_stall; + events <= ev; + -- TLB -- Operates in the second cycle on the request latched in r0.req. -- TLB updates write the entry at the end of the second cycle. @@ -689,6 +696,7 @@ begin pte <= (others => '0'); end if; valid_ra <= tlb_hit or not r0.req.virt_mode; + tlb_miss <= r0_valid and r0.req.virt_mode and not tlb_hit; if r0.req.virt_mode = '1' then ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) & r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) & @@ -712,6 +720,7 @@ begin if rising_edge(clk) then tlbie := r0_valid and r0.tlbie; tlbwe := r0_valid and r0.tlbld; + ev.dtlb_miss_resolved <= tlbwe; if rst = '1' or (tlbie = '1' and r0.doall = '1') then -- clear all valid bits at once for i in tlb_index_t loop @@ -1286,6 +1295,11 @@ begin r1.forward_valid1 <= '0'; end if; + ev.dcache_refill <= '0'; + ev.load_miss <= '0'; + ev.store_miss <= '0'; + ev.dtlb_miss <= tlb_miss; + -- On reset, clear all valid bits to force misses if rst = '1' then for i in index_t loop @@ -1417,6 +1431,7 @@ begin -- Track that we had one request sent r1.state <= RELOAD_WAIT_ACK; r1.write_tag <= '1'; + ev.load_miss <= '1'; when OP_LOAD_NC => r1.wb.cyc <= '1'; @@ -1449,6 +1464,9 @@ begin r1.wb.we <= '1'; r1.wb.cyc <= '1'; r1.wb.stb <= '1'; + if req.op = OP_STORE_MISS then + ev.store_miss <= '1'; + end if; -- OP_NONE and OP_BAD do nothing -- OP_BAD & OP_STCX_FAIL were handled above already @@ -1500,6 +1518,7 @@ begin -- Cache line is now valid cache_valids(r1.store_index)(r1.store_way) <= '1'; + ev.dcache_refill <= not r1.dcbz; r1.state <= IDLE; end if; diff --git a/execute1.vhdl b/execute1.vhdl index 5d2fa79..3f21757 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -47,6 +47,9 @@ entity execute1 is -- PMU event buses wb_events : in WritebackEventType; + ls_events : in Loadstore1EventType; + dc_events : in DcacheEventType; + ic_events : in IcacheEventType; log_out : out std_ulogic_vector(14 downto 0); log_rd_addr : out std_ulogic_vector(31 downto 0); @@ -70,6 +73,11 @@ architecture behaviour of execute1 is mul_finish : std_ulogic; div_in_progress : std_ulogic; cntz_in_progress : std_ulogic; + no_instr_avail : std_ulogic; + instr_dispatch : std_ulogic; + ext_interrupt : std_ulogic; + taken_branch_event : std_ulogic; + br_mispredict : std_ulogic; log_addr_spr : std_ulogic_vector(31 downto 0); end record; constant reg_type_init : reg_type := @@ -78,6 +86,8 @@ architecture behaviour of execute1 is busy => '0', terminate => '0', intr_pending => '0', fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0', mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', + no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0', + taken_branch_event => '0', br_mispredict => '0', others => (others => '0')); signal r, rin : reg_type; @@ -302,7 +312,24 @@ begin c_in <= e_in.read_data3; cr_in <= e_in.cr; - x_to_pmu.occur <= (instr_complete => wb_events.instr_complete, others => '0'); + x_to_pmu.occur <= (instr_complete => wb_events.instr_complete, + fp_complete => wb_events.fp_complete, + ld_complete => ls_events.load_complete, + st_complete => ls_events.store_complete, + itlb_miss => ls_events.itlb_miss, + dc_load_miss => dc_events.load_miss, + dc_ld_miss_resolved => dc_events.dcache_refill, + dc_store_miss => dc_events.store_miss, + dtlb_miss => dc_events.dtlb_miss, + dtlb_miss_resolved => dc_events.dtlb_miss_resolved, + icache_miss => ic_events.icache_miss, + itlb_miss_resolved => ic_events.itlb_miss_resolved, + no_instr_avail => r.no_instr_avail, + dispatch => r.instr_dispatch, + ext_interrupt => r.ext_interrupt, + br_taken_complete => r.taken_branch_event, + br_mispredict => r.br_mispredict, + others => '0'); x_to_pmu.nia <= current.nia; x_to_pmu.addr <= (others => '0'); x_to_pmu.addr_v <= '0'; @@ -715,6 +742,9 @@ begin v.div_in_progress := '0'; v.cntz_in_progress := '0'; v.mul_finish := '0'; + v.ext_interrupt := '0'; + v.taken_branch_event := '0'; + v.br_mispredict := '0'; x_to_pmu.mfspr <= '0'; x_to_pmu.mtspr <= '0'; @@ -804,6 +834,7 @@ begin elsif ext_irq_in = '1' then v.e.intr_vec := 16#500#; report "IRQ valid: External"; + v.ext_interrupt := '1'; end if; exception := '1'; @@ -836,6 +867,9 @@ begin v.intr_pending := '0'; end if; + v.no_instr_avail := not (e_in.valid or l_in.busy or l_in.in_progress or r.busy or fp_in.busy); + v.instr_dispatch := valid_in and not exception and not illegal; + if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then v.e.valid := '1'; @@ -905,6 +939,7 @@ begin if ctrl.msr(MSR_BE) = '1' then do_trace := '1'; end if; + v.taken_branch_event := '1'; when OP_BC | OP_BCREG => -- read_data1 is CTR -- for OP_BCREG, read_data2 is target register (CTR, LR or TAR) @@ -920,6 +955,7 @@ begin taken_branch := r.br_taken; end if; v.br_taken := taken_branch; + v.taken_branch_event := taken_branch; abs_branch := e_in.br_abs; if e_in.repeat = '0' or e_in.second = '1' then is_branch := '1'; @@ -1114,6 +1150,7 @@ begin end if; if taken_branch /= e_in.br_pred then v.e.redirect := '1'; + v.br_mispredict := is_direct_branch; end if; v.e.br_last := is_direct_branch; v.e.br_taken := taken_branch; diff --git a/icache.vhdl b/icache.vhdl index 4d3b12e..57d3437 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -70,6 +70,7 @@ entity icache is wb_snoop_in : in wishbone_master_out := wishbone_master_out_init; + events : out IcacheEventType; log_out : out std_ulogic_vector(53 downto 0) ); end entity icache; @@ -197,6 +198,8 @@ architecture rtl of icache is signal r : reg_internal_t; + signal ev : IcacheEventType; + -- Async signals on incoming request signal req_index : index_t; signal req_row : row_t; @@ -494,6 +497,7 @@ begin itlb_ptes(wr_index) <= m_in.pte; itlb_valids(wr_index) <= '1'; end if; + ev.itlb_miss_resolved <= m_in.tlbld and not rst; end if; end process; @@ -627,6 +631,7 @@ begin variable snoop_cache_tags : cache_tags_set_t; begin if rising_edge(clk) then + ev.icache_miss <= '0'; -- On reset, clear all valid bits to force misses if rst = '1' then for i in index_t loop @@ -699,6 +704,7 @@ begin " way:" & integer'image(replace_way) & " tag:" & to_hstring(req_tag) & " RA:" & to_hstring(real_addr); + ev.icache_miss <= '1'; -- Keep track of our index and way for subsequent stores r.store_index <= req_index; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 243b99d..21ed836 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -33,6 +33,8 @@ entity loadstore1 is dc_stall : in std_ulogic; + events : out Loadstore1EventType; + log_out : out std_ulogic_vector(9 downto 0) ); end loadstore1; @@ -146,6 +148,7 @@ architecture behave of loadstore1 is intr_vec : integer range 0 to 16#fff#; nia : std_ulogic_vector(63 downto 0); srr1 : std_ulogic_vector(15 downto 0); + events : Loadstore1EventType; end record; signal req_in : request_t; @@ -668,6 +671,7 @@ begin do_update := '0'; v.convert_lfs := '0'; v.srr1 := (others => '0'); + v.events := (others => '0'); -- load data formatting -- shift and byte-reverse data bytes @@ -796,6 +800,7 @@ begin mmu_mtspr := r2.req.write_spr; if r2.req.instr_fault = '1' then v.state := MMU_LOOKUP; + v.events.itlb_miss := '1'; else v.state := TLBIE_WAIT; end if; @@ -838,6 +843,9 @@ begin v.state := IDLE; end if; + v.events.load_complete := r2.req.load and complete; + v.events.store_complete := (r2.req.store or r2.req.dcbz) and complete; + -- generate DSI or DSegI for load/store exceptions -- or ISI or ISegI for instruction fetch exceptions v.interrupt := exception; @@ -946,6 +954,8 @@ begin e_out.in_progress <= in_progress; e_out.interrupt <= r3.interrupt; + events <= r3.events; + -- Busy calculation. stage3_busy_next <= r2.req.valid and not (complete or part_done or exception); diff --git a/pmu.vhdl b/pmu.vhdl index ccb33e7..2967f4e 100644 --- a/pmu.vhdl +++ b/pmu.vhdl @@ -227,7 +227,12 @@ begin event := '1'; end if; if mmcr0(MMCR0_PMCjCE) = '1' and - (pmcs(2)(31) or pmcs(3)(31) or pmcs(4)(31) or pmcs(5)(31) or pmcs(6)(31)) = '1' then + (pmcs(2)(31) or pmcs(3)(31) or pmcs(4)(31)) = '1' then + event := '1'; + end if; + if mmcr0(MMCR0_PMCjCE) = '1' and + mmcr0(MMCR0_PMCC + 1 downto MMCR0_PMCC) /= "11" and + (pmcs(5)(31) or pmcs(6)(31)) = '1' then event := '1'; end if; @@ -285,13 +290,13 @@ begin when x"f8" => inc(3) := tbbit; when x"fe" => - inc(3) := p_in.occur.ld_fill_nocache; + inc(3) := p_in.occur.dtlb_miss; when others => end case; case mmcr1(7 downto 0) is when x"f0" => - inc(4) := p_in.occur.dc_store_miss; + inc(4) := p_in.occur.dc_load_miss; when x"f2" => inc(4) := p_in.occur.dispatch; when x"f4" => @@ -309,10 +314,8 @@ begin when others => end case; - if mmcr0(MMCR0_PMCC + 1 downto MMCR0_PMCC) /= "11" then - inc(5) := (mmcr0(MMCR0_CC56RUN) or p_in.run) and p_in.occur.instr_complete; - inc(6) := mmcr0(MMCR0_CC56RUN) or p_in.run; - end if; + inc(5) := (mmcr0(MMCR0_CC56RUN) or p_in.run) and p_in.occur.instr_complete; + inc(6) := mmcr0(MMCR0_CC56RUN) or p_in.run; -- Evaluate freeze conditions freeze := mmcr0(MMCR0_FC) or @@ -346,6 +349,14 @@ begin end if; end loop; + -- When MMCR0[PMCC] = "11", PMC5 and PMC6 are not controlled by the + -- MMCRs and don't generate events, but do continue to count run + -- instructions and run cycles. + if mmcr0(MMCR0_PMCC + 1 downto MMCR0_PMCC) = "11" then + inc(5) := p_in.run and p_in.occur.instr_complete; + inc(6) := p_in.run; + end if; + doinc <= inc; doevent <= event; doalert <= event and mmcr0(MMCR0_PMAE); diff --git a/writeback.vhdl b/writeback.vhdl index b056ee1..a99d4d2 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -104,6 +104,7 @@ begin complete_out <= fp_in.instr_tag; end if; events.instr_complete <= complete_out.valid; + events.fp_complete <= fp_in.valid; intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt;