PMU: Add several more events

This implements most of the architected PMU events.  The ones missing
are mostly the ones that depend on which level of the cache hierarchy
data is fetched from.  The events implemented here, and their raw
event codes, are:

    Floating-point operation completed (100f4)
    Load completed (100fc)
    Store completed (200f0)
    Icache miss (200fc)
    ITLB miss (100f6)
    ITLB miss resolved (400fc)
    Dcache load miss (400f0)
    Dcache load miss resolved (300f8)
    Dcache store miss (300f0)
    DTLB miss (300fc)
    DTLB miss resolved (200f6)
    No instruction available and none being executed (100f8)
    Instruction dispatched (200f2, 300f2, 400f2)
    Taken branch instruction completed (200fa)
    Branch mispredicted (400f6)
    External interrupt taken (200f8)

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
dcache-nc-fix
Paul Mackerras 3 years ago
parent e33fb26e7a
commit 65c43b488b

@ -209,6 +209,11 @@ package common is
next_predicted: std_ulogic; next_predicted: std_ulogic;
end record; end record;


type IcacheEventType is record
icache_miss : std_ulogic;
itlb_miss_resolved : std_ulogic;
end record;

type Decode1ToDecode2Type is record type Decode1ToDecode2Type is record
valid: std_ulogic; valid: std_ulogic;
stop_mark : std_ulogic; stop_mark : std_ulogic;
@ -347,8 +352,10 @@ package common is
itlb_miss_resolved : std_ulogic; itlb_miss_resolved : std_ulogic;
icache_miss : std_ulogic; icache_miss : std_ulogic;
dc_miss_resolved : std_ulogic; dc_miss_resolved : std_ulogic;
dc_load_miss : std_ulogic;
dc_ld_miss_resolved : std_ulogic; dc_ld_miss_resolved : std_ulogic;
dc_store_miss : std_ulogic; dc_store_miss : std_ulogic;
dtlb_miss : std_ulogic;
dtlb_miss_resolved : std_ulogic; dtlb_miss_resolved : std_ulogic;
ld_miss_nocache : std_ulogic; ld_miss_nocache : std_ulogic;
ld_fill_nocache : std_ulogic; ld_fill_nocache : std_ulogic;
@ -468,6 +475,14 @@ package common is
cache_paradox : std_ulogic; cache_paradox : std_ulogic;
end record; end record;


type DcacheEventType is record
load_miss : std_ulogic;
store_miss : std_ulogic;
dcache_refill : std_ulogic;
dtlb_miss : std_ulogic;
dtlb_miss_resolved : std_ulogic;
end record;

type Loadstore1ToMmuType is record type Loadstore1ToMmuType is record
valid : std_ulogic; valid : std_ulogic;
tlbie : std_ulogic; tlbie : std_ulogic;
@ -537,6 +552,12 @@ package common is
interrupt => '0', intr_vec => 0, interrupt => '0', intr_vec => 0,
srr0 => (others => '0'), srr1 => (others => '0')); srr0 => (others => '0'), srr1 => (others => '0'));


type Loadstore1EventType is record
load_complete : std_ulogic;
store_complete : std_ulogic;
itlb_miss : std_ulogic;
end record;

type Execute1ToWritebackType is record type Execute1ToWritebackType is record
valid: std_ulogic; valid: std_ulogic;
instr_tag : instr_tag_t; instr_tag : instr_tag_t;
@ -668,7 +689,8 @@ package common is
write_cr_data => (others => '0')); write_cr_data => (others => '0'));


type WritebackEventType is record type WritebackEventType is record
instr_complete : std_ulogic; instr_complete : std_ulogic;
fp_complete : std_ulogic;
end record; end record;


end common; end common;

@ -148,6 +148,9 @@ architecture behave of core is
signal msr : std_ulogic_vector(63 downto 0); signal msr : std_ulogic_vector(63 downto 0);


-- PMU event bus -- PMU event bus
signal icache_events : IcacheEventType;
signal loadstore_events : Loadstore1EventType;
signal dcache_events : DcacheEventType;
signal writeback_events : WritebackEventType; signal writeback_events : WritebackEventType;


-- Debug status -- Debug status
@ -247,6 +250,7 @@ begin
wishbone_out => wishbone_insn_out, wishbone_out => wishbone_insn_out,
wishbone_in => wishbone_insn_in, wishbone_in => wishbone_insn_in,
wb_snoop_in => wb_snoop_in, wb_snoop_in => wb_snoop_in,
events => icache_events,
log_out => log_data(96 downto 43) log_out => log_data(96 downto 43)
); );


@ -356,6 +360,9 @@ begin
icache_inval => ex1_icache_inval, icache_inval => ex1_icache_inval,
dbg_msr_out => msr, dbg_msr_out => msr,
wb_events => writeback_events, wb_events => writeback_events,
ls_events => loadstore_events,
dc_events => dcache_events,
ic_events => icache_events,
terminate_out => terminate, terminate_out => terminate,
log_out => log_data(134 downto 120), log_out => log_data(134 downto 120),
log_rd_addr => log_rd_addr, log_rd_addr => log_rd_addr,
@ -397,6 +404,7 @@ begin
m_out => loadstore1_to_mmu, m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1, m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out, dc_stall => dcache_stall_out,
events => loadstore_events,
log_out => log_data(149 downto 140) log_out => log_data(149 downto 140)
); );


@ -431,6 +439,7 @@ begin
wishbone_in => wishbone_data_in, wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out, wishbone_out => wishbone_data_out,
snoop_in => wb_snoop_in, snoop_in => wb_snoop_in,
events => dcache_events,
log_out => log_data(170 downto 151) log_out => log_data(170 downto 151)
); );



@ -46,6 +46,8 @@ entity dcache is
wishbone_out : out wishbone_master_out; wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out; wishbone_in : in wishbone_slave_out;


events : out DcacheEventType;

log_out : out std_ulogic_vector(19 downto 0) log_out : out std_ulogic_vector(19 downto 0)
); );
end entity dcache; end entity dcache;
@ -355,6 +357,8 @@ architecture rtl of dcache is


signal r1 : reg_stage_1_t; signal r1 : reg_stage_1_t;


signal ev : DcacheEventType;

-- Reservation information -- Reservation information
-- --
type reservation_t is record type reservation_t is record
@ -412,6 +416,7 @@ architecture rtl of dcache is
signal rc_ok : std_ulogic; signal rc_ok : std_ulogic;
signal perm_ok : std_ulogic; signal perm_ok : std_ulogic;
signal access_ok : std_ulogic; signal access_ok : std_ulogic;
signal tlb_miss : std_ulogic;


-- TLB PLRU output interface -- TLB PLRU output interface
type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0); type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
@ -605,6 +610,8 @@ begin
r0_valid <= r0_full and not r1.full and not d_in.hold; r0_valid <= r0_full and not r1.full and not d_in.hold;
stall_out <= r0_stall; stall_out <= r0_stall;


events <= ev;

-- TLB -- TLB
-- Operates in the second cycle on the request latched in r0.req. -- Operates in the second cycle on the request latched in r0.req.
-- TLB updates write the entry at the end of the second cycle. -- TLB updates write the entry at the end of the second cycle.
@ -689,6 +696,7 @@ begin
pte <= (others => '0'); pte <= (others => '0');
end if; end if;
valid_ra <= tlb_hit or not r0.req.virt_mode; valid_ra <= tlb_hit or not r0.req.virt_mode;
tlb_miss <= r0_valid and r0.req.virt_mode and not tlb_hit;
if r0.req.virt_mode = '1' then if r0.req.virt_mode = '1' then
ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) & ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) & r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) &
@ -712,6 +720,7 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
tlbie := r0_valid and r0.tlbie; tlbie := r0_valid and r0.tlbie;
tlbwe := r0_valid and r0.tlbld; tlbwe := r0_valid and r0.tlbld;
ev.dtlb_miss_resolved <= tlbwe;
if rst = '1' or (tlbie = '1' and r0.doall = '1') then if rst = '1' or (tlbie = '1' and r0.doall = '1') then
-- clear all valid bits at once -- clear all valid bits at once
for i in tlb_index_t loop for i in tlb_index_t loop
@ -1286,6 +1295,11 @@ begin
r1.forward_valid1 <= '0'; r1.forward_valid1 <= '0';
end if; end if;


ev.dcache_refill <= '0';
ev.load_miss <= '0';
ev.store_miss <= '0';
ev.dtlb_miss <= tlb_miss;

-- On reset, clear all valid bits to force misses -- On reset, clear all valid bits to force misses
if rst = '1' then if rst = '1' then
for i in index_t loop for i in index_t loop
@ -1417,6 +1431,7 @@ begin
-- Track that we had one request sent -- Track that we had one request sent
r1.state <= RELOAD_WAIT_ACK; r1.state <= RELOAD_WAIT_ACK;
r1.write_tag <= '1'; r1.write_tag <= '1';
ev.load_miss <= '1';


when OP_LOAD_NC => when OP_LOAD_NC =>
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
@ -1449,6 +1464,9 @@ begin
r1.wb.we <= '1'; r1.wb.we <= '1';
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
r1.wb.stb <= '1'; r1.wb.stb <= '1';
if req.op = OP_STORE_MISS then
ev.store_miss <= '1';
end if;


-- OP_NONE and OP_BAD do nothing -- OP_NONE and OP_BAD do nothing
-- OP_BAD & OP_STCX_FAIL were handled above already -- OP_BAD & OP_STCX_FAIL were handled above already
@ -1500,6 +1518,7 @@ begin
-- Cache line is now valid -- Cache line is now valid
cache_valids(r1.store_index)(r1.store_way) <= '1'; cache_valids(r1.store_index)(r1.store_way) <= '1';


ev.dcache_refill <= not r1.dcbz;
r1.state <= IDLE; r1.state <= IDLE;
end if; end if;



@ -47,6 +47,9 @@ entity execute1 is


-- PMU event buses -- PMU event buses
wb_events : in WritebackEventType; wb_events : in WritebackEventType;
ls_events : in Loadstore1EventType;
dc_events : in DcacheEventType;
ic_events : in IcacheEventType;


log_out : out std_ulogic_vector(14 downto 0); log_out : out std_ulogic_vector(14 downto 0);
log_rd_addr : out std_ulogic_vector(31 downto 0); log_rd_addr : out std_ulogic_vector(31 downto 0);
@ -70,6 +73,11 @@ architecture behaviour of execute1 is
mul_finish : std_ulogic; mul_finish : std_ulogic;
div_in_progress : std_ulogic; div_in_progress : std_ulogic;
cntz_in_progress : std_ulogic; cntz_in_progress : std_ulogic;
no_instr_avail : std_ulogic;
instr_dispatch : std_ulogic;
ext_interrupt : std_ulogic;
taken_branch_event : std_ulogic;
br_mispredict : std_ulogic;
log_addr_spr : std_ulogic_vector(31 downto 0); log_addr_spr : std_ulogic_vector(31 downto 0);
end record; end record;
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
@ -78,6 +86,8 @@ architecture behaviour of execute1 is
busy => '0', terminate => '0', intr_pending => '0', busy => '0', terminate => '0', intr_pending => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0', fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0',
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
taken_branch_event => '0', br_mispredict => '0',
others => (others => '0')); others => (others => '0'));


signal r, rin : reg_type; signal r, rin : reg_type;
@ -302,7 +312,24 @@ begin
c_in <= e_in.read_data3; c_in <= e_in.read_data3;
cr_in <= e_in.cr; cr_in <= e_in.cr;


x_to_pmu.occur <= (instr_complete => wb_events.instr_complete, others => '0'); x_to_pmu.occur <= (instr_complete => wb_events.instr_complete,
fp_complete => wb_events.fp_complete,
ld_complete => ls_events.load_complete,
st_complete => ls_events.store_complete,
itlb_miss => ls_events.itlb_miss,
dc_load_miss => dc_events.load_miss,
dc_ld_miss_resolved => dc_events.dcache_refill,
dc_store_miss => dc_events.store_miss,
dtlb_miss => dc_events.dtlb_miss,
dtlb_miss_resolved => dc_events.dtlb_miss_resolved,
icache_miss => ic_events.icache_miss,
itlb_miss_resolved => ic_events.itlb_miss_resolved,
no_instr_avail => r.no_instr_avail,
dispatch => r.instr_dispatch,
ext_interrupt => r.ext_interrupt,
br_taken_complete => r.taken_branch_event,
br_mispredict => r.br_mispredict,
others => '0');
x_to_pmu.nia <= current.nia; x_to_pmu.nia <= current.nia;
x_to_pmu.addr <= (others => '0'); x_to_pmu.addr <= (others => '0');
x_to_pmu.addr_v <= '0'; x_to_pmu.addr_v <= '0';
@ -715,6 +742,9 @@ begin
v.div_in_progress := '0'; v.div_in_progress := '0';
v.cntz_in_progress := '0'; v.cntz_in_progress := '0';
v.mul_finish := '0'; v.mul_finish := '0';
v.ext_interrupt := '0';
v.taken_branch_event := '0';
v.br_mispredict := '0';


x_to_pmu.mfspr <= '0'; x_to_pmu.mfspr <= '0';
x_to_pmu.mtspr <= '0'; x_to_pmu.mtspr <= '0';
@ -804,6 +834,7 @@ begin
elsif ext_irq_in = '1' then elsif ext_irq_in = '1' then
v.e.intr_vec := 16#500#; v.e.intr_vec := 16#500#;
report "IRQ valid: External"; report "IRQ valid: External";
v.ext_interrupt := '1';
end if; end if;
exception := '1'; exception := '1';


@ -836,6 +867,9 @@ begin
v.intr_pending := '0'; v.intr_pending := '0';
end if; end if;


v.no_instr_avail := not (e_in.valid or l_in.busy or l_in.in_progress or r.busy or fp_in.busy);
v.instr_dispatch := valid_in and not exception and not illegal;

if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then
v.e.valid := '1'; v.e.valid := '1';


@ -905,6 +939,7 @@ begin
if ctrl.msr(MSR_BE) = '1' then if ctrl.msr(MSR_BE) = '1' then
do_trace := '1'; do_trace := '1';
end if; end if;
v.taken_branch_event := '1';
when OP_BC | OP_BCREG => when OP_BC | OP_BCREG =>
-- read_data1 is CTR -- read_data1 is CTR
-- for OP_BCREG, read_data2 is target register (CTR, LR or TAR) -- for OP_BCREG, read_data2 is target register (CTR, LR or TAR)
@ -920,6 +955,7 @@ begin
taken_branch := r.br_taken; taken_branch := r.br_taken;
end if; end if;
v.br_taken := taken_branch; v.br_taken := taken_branch;
v.taken_branch_event := taken_branch;
abs_branch := e_in.br_abs; abs_branch := e_in.br_abs;
if e_in.repeat = '0' or e_in.second = '1' then if e_in.repeat = '0' or e_in.second = '1' then
is_branch := '1'; is_branch := '1';
@ -1114,6 +1150,7 @@ begin
end if; end if;
if taken_branch /= e_in.br_pred then if taken_branch /= e_in.br_pred then
v.e.redirect := '1'; v.e.redirect := '1';
v.br_mispredict := is_direct_branch;
end if; end if;
v.e.br_last := is_direct_branch; v.e.br_last := is_direct_branch;
v.e.br_taken := taken_branch; v.e.br_taken := taken_branch;

@ -70,6 +70,7 @@ entity icache is


wb_snoop_in : in wishbone_master_out := wishbone_master_out_init; wb_snoop_in : in wishbone_master_out := wishbone_master_out_init;


events : out IcacheEventType;
log_out : out std_ulogic_vector(53 downto 0) log_out : out std_ulogic_vector(53 downto 0)
); );
end entity icache; end entity icache;
@ -197,6 +198,8 @@ architecture rtl of icache is


signal r : reg_internal_t; signal r : reg_internal_t;


signal ev : IcacheEventType;

-- Async signals on incoming request -- Async signals on incoming request
signal req_index : index_t; signal req_index : index_t;
signal req_row : row_t; signal req_row : row_t;
@ -494,6 +497,7 @@ begin
itlb_ptes(wr_index) <= m_in.pte; itlb_ptes(wr_index) <= m_in.pte;
itlb_valids(wr_index) <= '1'; itlb_valids(wr_index) <= '1';
end if; end if;
ev.itlb_miss_resolved <= m_in.tlbld and not rst;
end if; end if;
end process; end process;


@ -627,6 +631,7 @@ begin
variable snoop_cache_tags : cache_tags_set_t; variable snoop_cache_tags : cache_tags_set_t;
begin begin
if rising_edge(clk) then if rising_edge(clk) then
ev.icache_miss <= '0';
-- On reset, clear all valid bits to force misses -- On reset, clear all valid bits to force misses
if rst = '1' then if rst = '1' then
for i in index_t loop for i in index_t loop
@ -699,6 +704,7 @@ begin
" way:" & integer'image(replace_way) & " way:" & integer'image(replace_way) &
" tag:" & to_hstring(req_tag) & " tag:" & to_hstring(req_tag) &
" RA:" & to_hstring(real_addr); " RA:" & to_hstring(real_addr);
ev.icache_miss <= '1';


-- Keep track of our index and way for subsequent stores -- Keep track of our index and way for subsequent stores
r.store_index <= req_index; r.store_index <= req_index;

@ -33,6 +33,8 @@ entity loadstore1 is


dc_stall : in std_ulogic; dc_stall : in std_ulogic;


events : out Loadstore1EventType;

log_out : out std_ulogic_vector(9 downto 0) log_out : out std_ulogic_vector(9 downto 0)
); );
end loadstore1; end loadstore1;
@ -146,6 +148,7 @@ architecture behave of loadstore1 is
intr_vec : integer range 0 to 16#fff#; intr_vec : integer range 0 to 16#fff#;
nia : std_ulogic_vector(63 downto 0); nia : std_ulogic_vector(63 downto 0);
srr1 : std_ulogic_vector(15 downto 0); srr1 : std_ulogic_vector(15 downto 0);
events : Loadstore1EventType;
end record; end record;


signal req_in : request_t; signal req_in : request_t;
@ -668,6 +671,7 @@ begin
do_update := '0'; do_update := '0';
v.convert_lfs := '0'; v.convert_lfs := '0';
v.srr1 := (others => '0'); v.srr1 := (others => '0');
v.events := (others => '0');


-- load data formatting -- load data formatting
-- shift and byte-reverse data bytes -- shift and byte-reverse data bytes
@ -796,6 +800,7 @@ begin
mmu_mtspr := r2.req.write_spr; mmu_mtspr := r2.req.write_spr;
if r2.req.instr_fault = '1' then if r2.req.instr_fault = '1' then
v.state := MMU_LOOKUP; v.state := MMU_LOOKUP;
v.events.itlb_miss := '1';
else else
v.state := TLBIE_WAIT; v.state := TLBIE_WAIT;
end if; end if;
@ -838,6 +843,9 @@ begin
v.state := IDLE; v.state := IDLE;
end if; end if;


v.events.load_complete := r2.req.load and complete;
v.events.store_complete := (r2.req.store or r2.req.dcbz) and complete;

-- generate DSI or DSegI for load/store exceptions -- generate DSI or DSegI for load/store exceptions
-- or ISI or ISegI for instruction fetch exceptions -- or ISI or ISegI for instruction fetch exceptions
v.interrupt := exception; v.interrupt := exception;
@ -946,6 +954,8 @@ begin
e_out.in_progress <= in_progress; e_out.in_progress <= in_progress;
e_out.interrupt <= r3.interrupt; e_out.interrupt <= r3.interrupt;


events <= r3.events;

-- Busy calculation. -- Busy calculation.
stage3_busy_next <= r2.req.valid and not (complete or part_done or exception); stage3_busy_next <= r2.req.valid and not (complete or part_done or exception);



@ -290,13 +290,13 @@ begin
when x"f8" => when x"f8" =>
inc(3) := tbbit; inc(3) := tbbit;
when x"fe" => when x"fe" =>
inc(3) := p_in.occur.ld_fill_nocache; inc(3) := p_in.occur.dtlb_miss;
when others => when others =>
end case; end case;


case mmcr1(7 downto 0) is case mmcr1(7 downto 0) is
when x"f0" => when x"f0" =>
inc(4) := p_in.occur.dc_store_miss; inc(4) := p_in.occur.dc_load_miss;
when x"f2" => when x"f2" =>
inc(4) := p_in.occur.dispatch; inc(4) := p_in.occur.dispatch;
when x"f4" => when x"f4" =>

@ -104,6 +104,7 @@ begin
complete_out <= fp_in.instr_tag; complete_out <= fp_in.instr_tag;
end if; end if;
events.instr_complete <= complete_out.valid; events.instr_complete <= complete_out.valid;
events.fp_complete <= fp_in.valid;


intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt; intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt;



Loading…
Cancel
Save