Merge pull request #318 from paulusmack/pmu

PMU enhancements
dcache-nc-fix
Michael Neuling 3 years ago committed by GitHub
commit 8bbb0018b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -209,6 +209,11 @@ package common is
next_predicted: std_ulogic; next_predicted: std_ulogic;
end record; end record;


type IcacheEventType is record
icache_miss : std_ulogic;
itlb_miss_resolved : std_ulogic;
end record;

type Decode1ToDecode2Type is record type Decode1ToDecode2Type is record
valid: std_ulogic; valid: std_ulogic;
stop_mark : std_ulogic; stop_mark : std_ulogic;
@ -347,8 +352,10 @@ package common is
itlb_miss_resolved : std_ulogic; itlb_miss_resolved : std_ulogic;
icache_miss : std_ulogic; icache_miss : std_ulogic;
dc_miss_resolved : std_ulogic; dc_miss_resolved : std_ulogic;
dc_load_miss : std_ulogic;
dc_ld_miss_resolved : std_ulogic; dc_ld_miss_resolved : std_ulogic;
dc_store_miss : std_ulogic; dc_store_miss : std_ulogic;
dtlb_miss : std_ulogic;
dtlb_miss_resolved : std_ulogic; dtlb_miss_resolved : std_ulogic;
ld_miss_nocache : std_ulogic; ld_miss_nocache : std_ulogic;
ld_fill_nocache : std_ulogic; ld_fill_nocache : std_ulogic;
@ -468,6 +475,14 @@ package common is
cache_paradox : std_ulogic; cache_paradox : std_ulogic;
end record; end record;


type DcacheEventType is record
load_miss : std_ulogic;
store_miss : std_ulogic;
dcache_refill : std_ulogic;
dtlb_miss : std_ulogic;
dtlb_miss_resolved : std_ulogic;
end record;

type Loadstore1ToMmuType is record type Loadstore1ToMmuType is record
valid : std_ulogic; valid : std_ulogic;
tlbie : std_ulogic; tlbie : std_ulogic;
@ -537,6 +552,12 @@ package common is
interrupt => '0', intr_vec => 0, interrupt => '0', intr_vec => 0,
srr0 => (others => '0'), srr1 => (others => '0')); srr0 => (others => '0'), srr1 => (others => '0'));


type Loadstore1EventType is record
load_complete : std_ulogic;
store_complete : std_ulogic;
itlb_miss : std_ulogic;
end record;

type Execute1ToWritebackType is record type Execute1ToWritebackType is record
valid: std_ulogic; valid: std_ulogic;
instr_tag : instr_tag_t; instr_tag : instr_tag_t;
@ -668,7 +689,8 @@ package common is
write_cr_data => (others => '0')); write_cr_data => (others => '0'));


type WritebackEventType is record type WritebackEventType is record
instr_complete : std_ulogic; instr_complete : std_ulogic;
fp_complete : std_ulogic;
end record; end record;


end common; end common;

@ -148,6 +148,9 @@ architecture behave of core is
signal msr : std_ulogic_vector(63 downto 0); signal msr : std_ulogic_vector(63 downto 0);


-- PMU event bus -- PMU event bus
signal icache_events : IcacheEventType;
signal loadstore_events : Loadstore1EventType;
signal dcache_events : DcacheEventType;
signal writeback_events : WritebackEventType; signal writeback_events : WritebackEventType;


-- Debug status -- Debug status
@ -247,6 +250,7 @@ begin
wishbone_out => wishbone_insn_out, wishbone_out => wishbone_insn_out,
wishbone_in => wishbone_insn_in, wishbone_in => wishbone_insn_in,
wb_snoop_in => wb_snoop_in, wb_snoop_in => wb_snoop_in,
events => icache_events,
log_out => log_data(96 downto 43) log_out => log_data(96 downto 43)
); );


@ -356,6 +360,9 @@ begin
icache_inval => ex1_icache_inval, icache_inval => ex1_icache_inval,
dbg_msr_out => msr, dbg_msr_out => msr,
wb_events => writeback_events, wb_events => writeback_events,
ls_events => loadstore_events,
dc_events => dcache_events,
ic_events => icache_events,
terminate_out => terminate, terminate_out => terminate,
log_out => log_data(134 downto 120), log_out => log_data(134 downto 120),
log_rd_addr => log_rd_addr, log_rd_addr => log_rd_addr,
@ -397,6 +404,7 @@ begin
m_out => loadstore1_to_mmu, m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1, m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out, dc_stall => dcache_stall_out,
events => loadstore_events,
log_out => log_data(149 downto 140) log_out => log_data(149 downto 140)
); );


@ -431,6 +439,7 @@ begin
wishbone_in => wishbone_data_in, wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out, wishbone_out => wishbone_data_out,
snoop_in => wb_snoop_in, snoop_in => wb_snoop_in,
events => dcache_events,
log_out => log_data(170 downto 151) log_out => log_data(170 downto 151)
); );



@ -46,6 +46,8 @@ entity dcache is
wishbone_out : out wishbone_master_out; wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out; wishbone_in : in wishbone_slave_out;


events : out DcacheEventType;

log_out : out std_ulogic_vector(19 downto 0) log_out : out std_ulogic_vector(19 downto 0)
); );
end entity dcache; end entity dcache;
@ -355,6 +357,8 @@ architecture rtl of dcache is


signal r1 : reg_stage_1_t; signal r1 : reg_stage_1_t;


signal ev : DcacheEventType;

-- Reservation information -- Reservation information
-- --
type reservation_t is record type reservation_t is record
@ -412,6 +416,7 @@ architecture rtl of dcache is
signal rc_ok : std_ulogic; signal rc_ok : std_ulogic;
signal perm_ok : std_ulogic; signal perm_ok : std_ulogic;
signal access_ok : std_ulogic; signal access_ok : std_ulogic;
signal tlb_miss : std_ulogic;


-- TLB PLRU output interface -- TLB PLRU output interface
type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0); type tlb_plru_out_t is array(tlb_index_t) of std_ulogic_vector(TLB_WAY_BITS-1 downto 0);
@ -605,6 +610,8 @@ begin
r0_valid <= r0_full and not r1.full and not d_in.hold; r0_valid <= r0_full and not r1.full and not d_in.hold;
stall_out <= r0_stall; stall_out <= r0_stall;


events <= ev;

-- TLB -- TLB
-- Operates in the second cycle on the request latched in r0.req. -- Operates in the second cycle on the request latched in r0.req.
-- TLB updates write the entry at the end of the second cycle. -- TLB updates write the entry at the end of the second cycle.
@ -689,6 +696,7 @@ begin
pte <= (others => '0'); pte <= (others => '0');
end if; end if;
valid_ra <= tlb_hit or not r0.req.virt_mode; valid_ra <= tlb_hit or not r0.req.virt_mode;
tlb_miss <= r0_valid and r0.req.virt_mode and not tlb_hit;
if r0.req.virt_mode = '1' then if r0.req.virt_mode = '1' then
ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) & ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) & r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) &
@ -712,6 +720,7 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
tlbie := r0_valid and r0.tlbie; tlbie := r0_valid and r0.tlbie;
tlbwe := r0_valid and r0.tlbld; tlbwe := r0_valid and r0.tlbld;
ev.dtlb_miss_resolved <= tlbwe;
if rst = '1' or (tlbie = '1' and r0.doall = '1') then if rst = '1' or (tlbie = '1' and r0.doall = '1') then
-- clear all valid bits at once -- clear all valid bits at once
for i in tlb_index_t loop for i in tlb_index_t loop
@ -1286,6 +1295,11 @@ begin
r1.forward_valid1 <= '0'; r1.forward_valid1 <= '0';
end if; end if;


ev.dcache_refill <= '0';
ev.load_miss <= '0';
ev.store_miss <= '0';
ev.dtlb_miss <= tlb_miss;

-- On reset, clear all valid bits to force misses -- On reset, clear all valid bits to force misses
if rst = '1' then if rst = '1' then
for i in index_t loop for i in index_t loop
@ -1417,6 +1431,7 @@ begin
-- Track that we had one request sent -- Track that we had one request sent
r1.state <= RELOAD_WAIT_ACK; r1.state <= RELOAD_WAIT_ACK;
r1.write_tag <= '1'; r1.write_tag <= '1';
ev.load_miss <= '1';


when OP_LOAD_NC => when OP_LOAD_NC =>
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
@ -1449,6 +1464,9 @@ begin
r1.wb.we <= '1'; r1.wb.we <= '1';
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
r1.wb.stb <= '1'; r1.wb.stb <= '1';
if req.op = OP_STORE_MISS then
ev.store_miss <= '1';
end if;


-- OP_NONE and OP_BAD do nothing -- OP_NONE and OP_BAD do nothing
-- OP_BAD & OP_STCX_FAIL were handled above already -- OP_BAD & OP_STCX_FAIL were handled above already
@ -1500,6 +1518,7 @@ begin
-- Cache line is now valid -- Cache line is now valid
cache_valids(r1.store_index)(r1.store_way) <= '1'; cache_valids(r1.store_index)(r1.store_way) <= '1';


ev.dcache_refill <= not r1.dcbz;
r1.state <= IDLE; r1.state <= IDLE;
end if; end if;



@ -47,6 +47,9 @@ entity execute1 is


-- PMU event buses -- PMU event buses
wb_events : in WritebackEventType; wb_events : in WritebackEventType;
ls_events : in Loadstore1EventType;
dc_events : in DcacheEventType;
ic_events : in IcacheEventType;


log_out : out std_ulogic_vector(14 downto 0); log_out : out std_ulogic_vector(14 downto 0);
log_rd_addr : out std_ulogic_vector(31 downto 0); log_rd_addr : out std_ulogic_vector(31 downto 0);
@ -70,6 +73,11 @@ architecture behaviour of execute1 is
mul_finish : std_ulogic; mul_finish : std_ulogic;
div_in_progress : std_ulogic; div_in_progress : std_ulogic;
cntz_in_progress : std_ulogic; cntz_in_progress : std_ulogic;
no_instr_avail : std_ulogic;
instr_dispatch : std_ulogic;
ext_interrupt : std_ulogic;
taken_branch_event : std_ulogic;
br_mispredict : std_ulogic;
log_addr_spr : std_ulogic_vector(31 downto 0); log_addr_spr : std_ulogic_vector(31 downto 0);
end record; end record;
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
@ -78,6 +86,8 @@ architecture behaviour of execute1 is
busy => '0', terminate => '0', intr_pending => '0', busy => '0', terminate => '0', intr_pending => '0',
fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0', fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0',
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
no_instr_avail => '0', instr_dispatch => '0', ext_interrupt => '0',
taken_branch_event => '0', br_mispredict => '0',
others => (others => '0')); others => (others => '0'));


signal r, rin : reg_type; signal r, rin : reg_type;
@ -302,7 +312,24 @@ begin
c_in <= e_in.read_data3; c_in <= e_in.read_data3;
cr_in <= e_in.cr; cr_in <= e_in.cr;


x_to_pmu.occur <= (instr_complete => wb_events.instr_complete, others => '0'); x_to_pmu.occur <= (instr_complete => wb_events.instr_complete,
fp_complete => wb_events.fp_complete,
ld_complete => ls_events.load_complete,
st_complete => ls_events.store_complete,
itlb_miss => ls_events.itlb_miss,
dc_load_miss => dc_events.load_miss,
dc_ld_miss_resolved => dc_events.dcache_refill,
dc_store_miss => dc_events.store_miss,
dtlb_miss => dc_events.dtlb_miss,
dtlb_miss_resolved => dc_events.dtlb_miss_resolved,
icache_miss => ic_events.icache_miss,
itlb_miss_resolved => ic_events.itlb_miss_resolved,
no_instr_avail => r.no_instr_avail,
dispatch => r.instr_dispatch,
ext_interrupt => r.ext_interrupt,
br_taken_complete => r.taken_branch_event,
br_mispredict => r.br_mispredict,
others => '0');
x_to_pmu.nia <= current.nia; x_to_pmu.nia <= current.nia;
x_to_pmu.addr <= (others => '0'); x_to_pmu.addr <= (others => '0');
x_to_pmu.addr_v <= '0'; x_to_pmu.addr_v <= '0';
@ -715,6 +742,9 @@ begin
v.div_in_progress := '0'; v.div_in_progress := '0';
v.cntz_in_progress := '0'; v.cntz_in_progress := '0';
v.mul_finish := '0'; v.mul_finish := '0';
v.ext_interrupt := '0';
v.taken_branch_event := '0';
v.br_mispredict := '0';


x_to_pmu.mfspr <= '0'; x_to_pmu.mfspr <= '0';
x_to_pmu.mtspr <= '0'; x_to_pmu.mtspr <= '0';
@ -804,6 +834,7 @@ begin
elsif ext_irq_in = '1' then elsif ext_irq_in = '1' then
v.e.intr_vec := 16#500#; v.e.intr_vec := 16#500#;
report "IRQ valid: External"; report "IRQ valid: External";
v.ext_interrupt := '1';
end if; end if;
exception := '1'; exception := '1';


@ -836,6 +867,9 @@ begin
v.intr_pending := '0'; v.intr_pending := '0';
end if; end if;


v.no_instr_avail := not (e_in.valid or l_in.busy or l_in.in_progress or r.busy or fp_in.busy);
v.instr_dispatch := valid_in and not exception and not illegal;

if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then
v.e.valid := '1'; v.e.valid := '1';


@ -905,6 +939,7 @@ begin
if ctrl.msr(MSR_BE) = '1' then if ctrl.msr(MSR_BE) = '1' then
do_trace := '1'; do_trace := '1';
end if; end if;
v.taken_branch_event := '1';
when OP_BC | OP_BCREG => when OP_BC | OP_BCREG =>
-- read_data1 is CTR -- read_data1 is CTR
-- for OP_BCREG, read_data2 is target register (CTR, LR or TAR) -- for OP_BCREG, read_data2 is target register (CTR, LR or TAR)
@ -920,6 +955,7 @@ begin
taken_branch := r.br_taken; taken_branch := r.br_taken;
end if; end if;
v.br_taken := taken_branch; v.br_taken := taken_branch;
v.taken_branch_event := taken_branch;
abs_branch := e_in.br_abs; abs_branch := e_in.br_abs;
if e_in.repeat = '0' or e_in.second = '1' then if e_in.repeat = '0' or e_in.second = '1' then
is_branch := '1'; is_branch := '1';
@ -1114,6 +1150,7 @@ begin
end if; end if;
if taken_branch /= e_in.br_pred then if taken_branch /= e_in.br_pred then
v.e.redirect := '1'; v.e.redirect := '1';
v.br_mispredict := is_direct_branch;
end if; end if;
v.e.br_last := is_direct_branch; v.e.br_last := is_direct_branch;
v.e.br_taken := taken_branch; v.e.br_taken := taken_branch;

@ -70,6 +70,7 @@ entity icache is


wb_snoop_in : in wishbone_master_out := wishbone_master_out_init; wb_snoop_in : in wishbone_master_out := wishbone_master_out_init;


events : out IcacheEventType;
log_out : out std_ulogic_vector(53 downto 0) log_out : out std_ulogic_vector(53 downto 0)
); );
end entity icache; end entity icache;
@ -197,6 +198,8 @@ architecture rtl of icache is


signal r : reg_internal_t; signal r : reg_internal_t;


signal ev : IcacheEventType;

-- Async signals on incoming request -- Async signals on incoming request
signal req_index : index_t; signal req_index : index_t;
signal req_row : row_t; signal req_row : row_t;
@ -494,6 +497,7 @@ begin
itlb_ptes(wr_index) <= m_in.pte; itlb_ptes(wr_index) <= m_in.pte;
itlb_valids(wr_index) <= '1'; itlb_valids(wr_index) <= '1';
end if; end if;
ev.itlb_miss_resolved <= m_in.tlbld and not rst;
end if; end if;
end process; end process;


@ -627,6 +631,7 @@ begin
variable snoop_cache_tags : cache_tags_set_t; variable snoop_cache_tags : cache_tags_set_t;
begin begin
if rising_edge(clk) then if rising_edge(clk) then
ev.icache_miss <= '0';
-- On reset, clear all valid bits to force misses -- On reset, clear all valid bits to force misses
if rst = '1' then if rst = '1' then
for i in index_t loop for i in index_t loop
@ -699,6 +704,7 @@ begin
" way:" & integer'image(replace_way) & " way:" & integer'image(replace_way) &
" tag:" & to_hstring(req_tag) & " tag:" & to_hstring(req_tag) &
" RA:" & to_hstring(real_addr); " RA:" & to_hstring(real_addr);
ev.icache_miss <= '1';


-- Keep track of our index and way for subsequent stores -- Keep track of our index and way for subsequent stores
r.store_index <= req_index; r.store_index <= req_index;

@ -33,6 +33,8 @@ entity loadstore1 is


dc_stall : in std_ulogic; dc_stall : in std_ulogic;


events : out Loadstore1EventType;

log_out : out std_ulogic_vector(9 downto 0) log_out : out std_ulogic_vector(9 downto 0)
); );
end loadstore1; end loadstore1;
@ -146,6 +148,7 @@ architecture behave of loadstore1 is
intr_vec : integer range 0 to 16#fff#; intr_vec : integer range 0 to 16#fff#;
nia : std_ulogic_vector(63 downto 0); nia : std_ulogic_vector(63 downto 0);
srr1 : std_ulogic_vector(15 downto 0); srr1 : std_ulogic_vector(15 downto 0);
events : Loadstore1EventType;
end record; end record;


signal req_in : request_t; signal req_in : request_t;
@ -668,6 +671,7 @@ begin
do_update := '0'; do_update := '0';
v.convert_lfs := '0'; v.convert_lfs := '0';
v.srr1 := (others => '0'); v.srr1 := (others => '0');
v.events := (others => '0');


-- load data formatting -- load data formatting
-- shift and byte-reverse data bytes -- shift and byte-reverse data bytes
@ -796,6 +800,7 @@ begin
mmu_mtspr := r2.req.write_spr; mmu_mtspr := r2.req.write_spr;
if r2.req.instr_fault = '1' then if r2.req.instr_fault = '1' then
v.state := MMU_LOOKUP; v.state := MMU_LOOKUP;
v.events.itlb_miss := '1';
else else
v.state := TLBIE_WAIT; v.state := TLBIE_WAIT;
end if; end if;
@ -838,6 +843,9 @@ begin
v.state := IDLE; v.state := IDLE;
end if; end if;


v.events.load_complete := r2.req.load and complete;
v.events.store_complete := (r2.req.store or r2.req.dcbz) and complete;

-- generate DSI or DSegI for load/store exceptions -- generate DSI or DSegI for load/store exceptions
-- or ISI or ISegI for instruction fetch exceptions -- or ISI or ISegI for instruction fetch exceptions
v.interrupt := exception; v.interrupt := exception;
@ -946,6 +954,8 @@ begin
e_out.in_progress <= in_progress; e_out.in_progress <= in_progress;
e_out.interrupt <= r3.interrupt; e_out.interrupt <= r3.interrupt;


events <= r3.events;

-- Busy calculation. -- Busy calculation.
stage3_busy_next <= r2.req.valid and not (complete or part_done or exception); stage3_busy_next <= r2.req.valid and not (complete or part_done or exception);



@ -227,7 +227,12 @@ begin
event := '1'; event := '1';
end if; end if;
if mmcr0(MMCR0_PMCjCE) = '1' and if mmcr0(MMCR0_PMCjCE) = '1' and
(pmcs(2)(31) or pmcs(3)(31) or pmcs(4)(31) or pmcs(5)(31) or pmcs(6)(31)) = '1' then (pmcs(2)(31) or pmcs(3)(31) or pmcs(4)(31)) = '1' then
event := '1';
end if;
if mmcr0(MMCR0_PMCjCE) = '1' and
mmcr0(MMCR0_PMCC + 1 downto MMCR0_PMCC) /= "11" and
(pmcs(5)(31) or pmcs(6)(31)) = '1' then
event := '1'; event := '1';
end if; end if;


@ -285,13 +290,13 @@ begin
when x"f8" => when x"f8" =>
inc(3) := tbbit; inc(3) := tbbit;
when x"fe" => when x"fe" =>
inc(3) := p_in.occur.ld_fill_nocache; inc(3) := p_in.occur.dtlb_miss;
when others => when others =>
end case; end case;


case mmcr1(7 downto 0) is case mmcr1(7 downto 0) is
when x"f0" => when x"f0" =>
inc(4) := p_in.occur.dc_store_miss; inc(4) := p_in.occur.dc_load_miss;
when x"f2" => when x"f2" =>
inc(4) := p_in.occur.dispatch; inc(4) := p_in.occur.dispatch;
when x"f4" => when x"f4" =>
@ -309,10 +314,8 @@ begin
when others => when others =>
end case; end case;


if mmcr0(MMCR0_PMCC + 1 downto MMCR0_PMCC) /= "11" then inc(5) := (mmcr0(MMCR0_CC56RUN) or p_in.run) and p_in.occur.instr_complete;
inc(5) := (mmcr0(MMCR0_CC56RUN) or p_in.run) and p_in.occur.instr_complete; inc(6) := mmcr0(MMCR0_CC56RUN) or p_in.run;
inc(6) := mmcr0(MMCR0_CC56RUN) or p_in.run;
end if;


-- Evaluate freeze conditions -- Evaluate freeze conditions
freeze := mmcr0(MMCR0_FC) or freeze := mmcr0(MMCR0_FC) or
@ -346,6 +349,14 @@ begin
end if; end if;
end loop; end loop;


-- When MMCR0[PMCC] = "11", PMC5 and PMC6 are not controlled by the
-- MMCRs and don't generate events, but do continue to count run
-- instructions and run cycles.
if mmcr0(MMCR0_PMCC + 1 downto MMCR0_PMCC) = "11" then
inc(5) := p_in.run and p_in.occur.instr_complete;
inc(6) := p_in.run;
end if;

doinc <= inc; doinc <= inc;
doevent <= event; doevent <= event;
doalert <= event and mmcr0(MMCR0_PMAE); doalert <= event and mmcr0(MMCR0_PMAE);

@ -104,6 +104,7 @@ begin
complete_out <= fp_in.instr_tag; complete_out <= fp_in.instr_tag;
end if; end if;
events.instr_complete <= complete_out.valid; events.instr_complete <= complete_out.valid;
events.fp_complete <= fp_in.valid;


intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt; intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt;



Loading…
Cancel
Save