Add core logging

This logs 256 bits of data per cycle to a ring buffer in BRAM.  The
data collected can be read out through 2 new SPRs or through the
debug interface.

The new SPRs are LOG_ADDR (724) and LOG_DATA (725).  LOG_ADDR contains
the buffer write pointer in the upper 32 bits (in units of entries,
i.e. 32 bytes) and the read pointer in the lower 32 bits (in units of
doublewords, i.e. 8 bytes).  Reading LOG_DATA gives the doubleword
from the buffer at the read pointer and increments the read pointer.
Setting bit 31 of LOG_ADDR inhibits the trace log system from writing
to the log buffer, so the contents are stable and can be read.

There are two new debug addresses which function similarly to the
LOG_ADDR and LOG_DATA SPRs.  The log is frozen while either or both of
the LOG_ADDR SPR bit 31 or the debug LOG_ADDR register bit 31 are set.

The buffer defaults to 2048 entries, i.e. 64kB.  The size is set by
the LOG_LENGTH generic on the core_debug module.  Software can
determine the length of the buffer because the length is ORed into the
buffer write pointer in the upper 32 bits of LOG_ADDR.  Hence the
length of the buffer can be calculated as 1 << (31 - clz(LOG_ADDR)).

There is a program to format the log entries in a somewhat readable
fashion in scripts/fmt_log/fmt_log.c.  The log_entry struct in that
file describes the layout of the bits in the log entries.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/208/head
Paul Mackerras 5 years ago
parent aab84acda8
commit 49a4d9f67a

@ -128,6 +128,12 @@ architecture behave of core is
-- Debug status -- Debug status
signal dbg_core_is_stopped: std_ulogic; signal dbg_core_is_stopped: std_ulogic;


-- Logging signals
signal log_data : std_ulogic_vector(255 downto 0);
signal log_rd_addr : std_ulogic_vector(31 downto 0);
signal log_wr_addr : std_ulogic_vector(31 downto 0);
signal log_rd_data : std_ulogic_vector(63 downto 0);

function keep_h(disable : boolean) return string is function keep_h(disable : boolean) return string is
begin begin
if disable then if disable then
@ -183,7 +189,8 @@ begin
flush_in => flush, flush_in => flush,
stop_in => dbg_core_stop, stop_in => dbg_core_stop,
e_in => execute1_to_fetch1, e_in => execute1_to_fetch1,
i_out => fetch1_to_icache i_out => fetch1_to_icache,
log_out => log_data(42 downto 0)
); );


fetch1_stall_in <= icache_stall_out or decode2_stall_out; fetch1_stall_in <= icache_stall_out or decode2_stall_out;
@ -205,7 +212,8 @@ begin
inval_in => dbg_icache_rst or ex1_icache_inval, inval_in => dbg_icache_rst or ex1_icache_inval,
stall_out => icache_stall_out, stall_out => icache_stall_out,
wishbone_out => wishbone_insn_out, wishbone_out => wishbone_insn_out,
wishbone_in => wishbone_insn_in wishbone_in => wishbone_insn_in,
log_out => log_data(96 downto 43)
); );


fetch2_0: entity work.fetch2 fetch2_0: entity work.fetch2
@ -227,7 +235,8 @@ begin
stall_in => decode1_stall_in, stall_in => decode1_stall_in,
flush_in => flush, flush_in => flush,
f_in => fetch2_to_decode1, f_in => fetch2_to_decode1,
d_out => decode1_to_decode2 d_out => decode1_to_decode2,
log_out => log_data(109 downto 97)
); );


decode1_stall_in <= decode2_stall_out; decode1_stall_in <= decode2_stall_out;
@ -249,7 +258,8 @@ begin
r_in => register_file_to_decode2, r_in => register_file_to_decode2,
r_out => decode2_to_register_file, r_out => decode2_to_register_file,
c_in => cr_file_to_decode2, c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file c_out => decode2_to_cr_file,
log_out => log_data(119 downto 110)
); );
decode2_stall_in <= ex1_stall_out or ls1_stall_out; decode2_stall_in <= ex1_stall_out or ls1_stall_out;


@ -267,7 +277,8 @@ begin
dbg_gpr_addr => dbg_gpr_addr, dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data, dbg_gpr_data => dbg_gpr_data,
sim_dump => terminate, sim_dump => terminate,
sim_dump_done => sim_cr_dump sim_dump_done => sim_cr_dump,
log_out => log_data(255 downto 185)
); );


cr_file_0: entity work.cr_file cr_file_0: entity work.cr_file
@ -279,7 +290,8 @@ begin
d_in => decode2_to_cr_file, d_in => decode2_to_cr_file,
d_out => cr_file_to_decode2, d_out => cr_file_to_decode2,
w_in => writeback_to_cr_file, w_in => writeback_to_cr_file,
sim_dump => sim_cr_dump sim_dump => sim_cr_dump,
log_out => log_data(184 downto 172)
); );


execute1_0: entity work.execute1 execute1_0: entity work.execute1
@ -299,7 +311,11 @@ begin
e_out => execute1_to_writeback, e_out => execute1_to_writeback,
icache_inval => ex1_icache_inval, icache_inval => ex1_icache_inval,
dbg_msr_out => msr, dbg_msr_out => msr,
terminate_out => terminate terminate_out => terminate,
log_out => log_data(134 downto 120),
log_rd_addr => log_rd_addr,
log_rd_data => log_rd_data,
log_wr_addr => log_wr_addr
); );


loadstore1_0: entity work.loadstore1 loadstore1_0: entity work.loadstore1
@ -314,7 +330,8 @@ begin
m_out => loadstore1_to_mmu, m_out => loadstore1_to_mmu,
m_in => mmu_to_loadstore1, m_in => mmu_to_loadstore1,
dc_stall => dcache_stall_out, dc_stall => dcache_stall_out,
stall_out => ls1_stall_out stall_out => ls1_stall_out,
log_out => log_data(149 downto 140)
); );


mmu_0: entity work.mmu mmu_0: entity work.mmu
@ -343,7 +360,8 @@ begin
m_out => dcache_to_mmu, m_out => dcache_to_mmu,
stall_out => dcache_stall_out, stall_out => dcache_stall_out,
wishbone_in => wishbone_data_in, wishbone_in => wishbone_data_in,
wishbone_out => wishbone_data_out wishbone_out => wishbone_data_out,
log_out => log_data(171 downto 152)
); );


writeback_0: entity work.writeback writeback_0: entity work.writeback
@ -356,6 +374,9 @@ begin
complete_out => complete complete_out => complete
); );


log_data(151 downto 150) <= "00";
log_data(139 downto 135) <= "00000";

debug_0: entity work.core_debug debug_0: entity work.core_debug
port map ( port map (
clk => clk, clk => clk,
@ -377,6 +398,10 @@ begin
dbg_gpr_ack => dbg_gpr_ack, dbg_gpr_ack => dbg_gpr_ack,
dbg_gpr_addr => dbg_gpr_addr, dbg_gpr_addr => dbg_gpr_addr,
dbg_gpr_data => dbg_gpr_data, dbg_gpr_data => dbg_gpr_data,
log_data => log_data,
log_read_addr => log_rd_addr,
log_read_data => log_rd_data,
log_write_addr => log_wr_addr,
terminated_out => terminated_out terminated_out => terminated_out
); );



@ -3,9 +3,14 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;


library work; library work;
use work.utils.all;
use work.common.all; use work.common.all;


entity core_debug is entity core_debug is
generic (
-- Length of log buffer
LOG_LENGTH : positive := 2048
);
port ( port (
clk : in std_logic; clk : in std_logic;
rst : in std_logic; rst : in std_logic;
@ -34,6 +39,12 @@ entity core_debug is
dbg_gpr_addr : out gspr_index_t; dbg_gpr_addr : out gspr_index_t;
dbg_gpr_data : in std_ulogic_vector(63 downto 0); dbg_gpr_data : in std_ulogic_vector(63 downto 0);


-- Core logging data
log_data : in std_ulogic_vector(255 downto 0);
log_read_addr : in std_ulogic_vector(31 downto 0);
log_read_data : out std_ulogic_vector(63 downto 0);
log_write_addr : out std_ulogic_vector(31 downto 0);

-- Misc -- Misc
terminated_out : out std_ulogic terminated_out : out std_ulogic
); );
@ -77,6 +88,10 @@ architecture behave of core_debug is
-- GSPR register data -- GSPR register data
constant DBG_CORE_GSPR_DATA : std_ulogic_vector(3 downto 0) := "0101"; constant DBG_CORE_GSPR_DATA : std_ulogic_vector(3 downto 0) := "0101";


-- Log buffer address and data registers
constant DBG_CORE_LOG_ADDR : std_ulogic_vector(3 downto 0) := "0110";
constant DBG_CORE_LOG_DATA : std_ulogic_vector(3 downto 0) := "0111";

-- Some internal wires -- Some internal wires
signal stat_reg : std_ulogic_vector(63 downto 0); signal stat_reg : std_ulogic_vector(63 downto 0);


@ -89,6 +104,38 @@ architecture behave of core_debug is
signal do_gspr_rd : std_ulogic; signal do_gspr_rd : std_ulogic;
signal gspr_index : gspr_index_t; signal gspr_index : gspr_index_t;


-- Logging RAM
constant LOG_INDEX_BITS : natural := log2(LOG_LENGTH);
subtype log_ptr_t is unsigned(LOG_INDEX_BITS - 1 downto 0);
type log_array_t is array(0 to LOG_LENGTH - 1) of std_ulogic_vector(255 downto 0);
signal log_array : log_array_t;
signal log_rd_ptr : log_ptr_t;
signal log_wr_ptr : log_ptr_t;
signal log_toggle : std_ulogic;
signal log_wr_enable : std_ulogic;
signal log_rd_ptr_latched : log_ptr_t;
signal log_rd : std_ulogic_vector(255 downto 0);
signal log_dmi_addr : std_ulogic_vector(31 downto 0);
signal log_dmi_data : std_ulogic_vector(63 downto 0);
signal do_dmi_log_rd : std_ulogic;
signal log_dmi_reading : std_ulogic;
signal log_dmi_read_done : std_ulogic;
signal dmi_read_log_data : std_ulogic;
signal dmi_read_log_data_1 : std_ulogic;

function select_dword(data : std_ulogic_vector(255 downto 0);
addr : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
variable firstbit : integer;
begin
firstbit := to_integer(unsigned(addr(1 downto 0))) * 64;
return data(firstbit + 63 downto firstbit);
end;

attribute ram_style : string;
attribute ram_style of log_array : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of log_array : signal is "power";

begin begin
-- Single cycle register accesses on DMI except for GSPR data -- Single cycle register accesses on DMI except for GSPR data
dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA
@ -108,6 +155,8 @@ begin
nia when DBG_CORE_NIA, nia when DBG_CORE_NIA,
msr when DBG_CORE_MSR, msr when DBG_CORE_MSR,
dbg_gpr_data when DBG_CORE_GSPR_DATA, dbg_gpr_data when DBG_CORE_GSPR_DATA,
log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR,
log_dmi_data when DBG_CORE_LOG_DATA,
(others => '0') when others; (others => '0') when others;


-- DMI writes -- DMI writes
@ -118,6 +167,7 @@ begin
do_step <= '0'; do_step <= '0';
do_reset <= '0'; do_reset <= '0';
do_icreset <= '0'; do_icreset <= '0';
do_dmi_log_rd <= '0';


if (rst) then if (rst) then
stopping <= '0'; stopping <= '0';
@ -151,11 +201,26 @@ begin
end if; end if;
elsif dmi_addr = DBG_CORE_GSPR_INDEX then elsif dmi_addr = DBG_CORE_GSPR_INDEX then
gspr_index <= dmi_din(gspr_index_t'left downto 0); gspr_index <= dmi_din(gspr_index_t'left downto 0);
elsif dmi_addr = DBG_CORE_LOG_ADDR then
log_dmi_addr <= dmi_din(31 downto 0);
do_dmi_log_rd <= '1';
end if; end if;
else else
report("DMI read from " & to_string(dmi_addr)); report("DMI read from " & to_string(dmi_addr));
end if; end if;

elsif dmi_read_log_data = '0' and dmi_read_log_data_1 = '1' then
-- Increment log_dmi_addr after the end of a read from DBG_CORE_LOG_DATA
log_dmi_addr(LOG_INDEX_BITS + 1 downto 0) <=
std_ulogic_vector(unsigned(log_dmi_addr(LOG_INDEX_BITS+1 downto 0)) + 1);
do_dmi_log_rd <= '1';
end if; end if;
dmi_read_log_data_1 <= dmi_read_log_data;
if dmi_req = '1' and dmi_addr = DBG_CORE_LOG_DATA then
dmi_read_log_data <= '1';
else
dmi_read_log_data <= '0';
end if;


-- Set core stop on terminate. We'll be stopping some time *after* -- Set core stop on terminate. We'll be stopping some time *after*
-- the offending instruction, at least until we can do back flushes -- the offending instruction, at least until we can do back flushes
@ -175,5 +240,51 @@ begin
core_rst <= do_reset; core_rst <= do_reset;
icache_rst <= do_icreset; icache_rst <= do_icreset;
terminated_out <= terminated; terminated_out <= terminated;

-- Use MSB of read addresses to stop the logging
log_wr_enable <= not (log_read_addr(31) or log_dmi_addr(31));

log_ram: process(clk)
begin
if rising_edge(clk) then
if log_wr_enable = '1' then
log_array(to_integer(log_wr_ptr)) <= log_data;
end if;
log_rd <= log_array(to_integer(log_rd_ptr_latched));
end if;
end process;


log_buffer: process(clk)
variable b : integer;
variable data : std_ulogic_vector(255 downto 0);
begin
if rising_edge(clk) then
if rst = '1' then
log_wr_ptr <= (others => '0');
log_toggle <= '0';
elsif log_wr_enable = '1' then
if log_wr_ptr = to_unsigned(LOG_LENGTH - 1, LOG_INDEX_BITS) then
log_toggle <= not log_toggle;
end if;
log_wr_ptr <= log_wr_ptr + 1;
end if;
if do_dmi_log_rd = '1' then
log_rd_ptr_latched <= unsigned(log_dmi_addr(LOG_INDEX_BITS + 1 downto 2));
else
log_rd_ptr_latched <= unsigned(log_read_addr(LOG_INDEX_BITS + 1 downto 2));
end if;
if log_dmi_read_done = '1' then
log_dmi_data <= select_dword(log_rd, log_dmi_addr);
else
log_read_data <= select_dword(log_rd, log_read_addr);
end if;
log_dmi_read_done <= log_dmi_reading;
log_dmi_reading <= do_dmi_log_rd;
end if;
end process;
log_write_addr(LOG_INDEX_BITS - 1 downto 0) <= std_ulogic_vector(log_wr_ptr);
log_write_addr(LOG_INDEX_BITS) <= '1';
log_write_addr(31 downto LOG_INDEX_BITS + 1) <= (others => '0');
end behave; end behave;



@ -18,7 +18,9 @@ entity cr_file is
w_in : in WritebackToCrFileType; w_in : in WritebackToCrFileType;


-- debug -- debug
sim_dump : in std_ulogic sim_dump : in std_ulogic;

log_out : out std_ulogic_vector(12 downto 0)
); );
end entity cr_file; end entity cr_file;


@ -27,6 +29,7 @@ architecture behaviour of cr_file is
signal crs_updated : std_ulogic_vector(31 downto 0); signal crs_updated : std_ulogic_vector(31 downto 0);
signal xerc : xer_common_t := xerc_init; signal xerc : xer_common_t := xerc_init;
signal xerc_updated : xer_common_t; signal xerc_updated : xer_common_t;
signal log_data : std_ulogic_vector(12 downto 0);
begin begin
cr_create_0: process(all) cr_create_0: process(all)
variable hi, lo : integer := 0; variable hi, lo : integer := 0;
@ -88,4 +91,14 @@ begin
end process; end process;
end generate; end generate;


cr_log: process(clk)
begin
if rising_edge(clk) then
log_data <= w_in.write_cr_enable &
w_in.write_cr_data(31 downto 28) &
w_in.write_cr_mask;
end if;
end process;
log_out <= log_data;

end architecture behaviour; end architecture behaviour;

@ -46,7 +46,9 @@ entity dcache is
stall_out : out std_ulogic; stall_out : out std_ulogic;


wishbone_out : out wishbone_master_out; wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out wishbone_in : in wishbone_slave_out;

log_out : out std_ulogic_vector(19 downto 0)
); );
end entity dcache; end entity dcache;


@ -419,6 +421,8 @@ architecture rtl of dcache is
ptes(j + TLB_PTE_BITS - 1 downto j) := newpte; ptes(j + TLB_PTE_BITS - 1 downto j) := newpte;
end; end;


signal log_data : std_ulogic_vector(19 downto 0);

begin begin


assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE; assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE;
@ -1265,4 +1269,22 @@ begin
end if; end if;
end if; end if;
end process; end process;

dcache_log: process(clk)
begin
if rising_edge(clk) then
log_data <= r1.wb.adr(5 downto 3) &
wishbone_in.stall &
wishbone_in.ack &
r1.wb.stb & r1.wb.cyc &
d_out.error &
d_out.valid &
std_ulogic_vector(to_unsigned(op_t'pos(req_op), 3)) &
stall_out &
std_ulogic_vector(to_unsigned(tlb_hit_way, 3)) &
valid_ra &
std_ulogic_vector(to_unsigned(state_t'pos(r1.state), 3));
end if;
end process;
log_out <= log_data;
end; end;

@ -15,7 +15,9 @@ entity decode1 is
flush_in : in std_ulogic; flush_in : in std_ulogic;


f_in : in Fetch2ToDecode1Type; f_in : in Fetch2ToDecode1Type;
d_out : out Decode1ToDecode2Type d_out : out Decode1ToDecode2Type;

log_out : out std_ulogic_vector(12 downto 0)
); );
end entity decode1; end entity decode1;


@ -352,6 +354,8 @@ architecture behaviour of decode1 is
constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');
constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0');


signal log_data : std_ulogic_vector(12 downto 0);

begin begin
decode1_0: process(clk) decode1_0: process(clk)
begin begin
@ -474,4 +478,16 @@ begin
-- Update outputs -- Update outputs
d_out <= r; d_out <= r;
end process; end process;

dec1_log : process(clk)
begin
if rising_edge(clk) then
log_data <= std_ulogic_vector(to_unsigned(insn_type_t'pos(r.decode.insn_type), 6)) &
r.nia(5 downto 2) &
std_ulogic_vector(to_unsigned(unit_t'pos(r.decode.unit), 2)) &
r.valid;
end if;
end process;
log_out <= log_data;

end architecture behaviour; end architecture behaviour;

@ -32,7 +32,9 @@ entity decode2 is
r_out : out Decode2ToRegisterFileType; r_out : out Decode2ToRegisterFileType;


c_in : in CrFileToDecode2Type; c_in : in CrFileToDecode2Type;
c_out : out Decode2ToCrFileType c_out : out Decode2ToCrFileType;

log_out : out std_ulogic_vector(9 downto 0)
); );
end entity decode2; end entity decode2;


@ -43,6 +45,8 @@ architecture behaviour of decode2 is


signal r, rin : reg_type; signal r, rin : reg_type;


signal log_data : std_ulogic_vector(9 downto 0);

type decode_input_reg_t is record type decode_input_reg_t is record
reg_valid : std_ulogic; reg_valid : std_ulogic;
reg : gspr_index_t; reg : gspr_index_t;
@ -381,4 +385,19 @@ begin
-- Update outputs -- Update outputs
e_out <= r.e; e_out <= r.e;
end process; end process;

dec2_log : process(clk)
begin
if rising_edge(clk) then
log_data <= r.e.nia(5 downto 2) &
r.e.valid &
stopped_out &
stall_out &
r.e.bypass_data3 &
r.e.bypass_data2 &
r.e.bypass_data1;
end if;
end process;
log_out <= log_data;

end architecture behaviour; end architecture behaviour;

@ -36,7 +36,12 @@ entity execute1 is
dbg_msr_out : out std_ulogic_vector(63 downto 0); dbg_msr_out : out std_ulogic_vector(63 downto 0);


icache_inval : out std_ulogic; icache_inval : out std_ulogic;
terminate_out : out std_ulogic terminate_out : out std_ulogic;

log_out : out std_ulogic_vector(14 downto 0);
log_rd_addr : out std_ulogic_vector(31 downto 0);
log_rd_data : in std_ulogic_vector(63 downto 0);
log_wr_addr : in std_ulogic_vector(31 downto 0)
); );
end entity execute1; end entity execute1;


@ -53,6 +58,7 @@ architecture behaviour of execute1 is
slow_op_oe : std_ulogic; slow_op_oe : std_ulogic;
slow_op_xerc : xer_common_t; slow_op_xerc : xer_common_t;
ldst_nia : std_ulogic_vector(63 downto 0); ldst_nia : std_ulogic_vector(63 downto 0);
log_addr_spr : std_ulogic_vector(31 downto 0);
end record; end record;
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, lr_update => '0', (e => Execute1ToWritebackInit, lr_update => '0',
@ -83,6 +89,11 @@ architecture behaviour of execute1 is
signal x_to_divider: Execute1ToDividerType; signal x_to_divider: Execute1ToDividerType;
signal divider_to_x: DividerToExecute1Type; signal divider_to_x: DividerToExecute1Type;


-- signals for logging
signal exception_log : std_ulogic;
signal irq_valid_log : std_ulogic;
signal log_data : std_ulogic_vector(14 downto 0);

type privilege_level is (USER, SUPER); type privilege_level is (USER, SUPER);
type op_privilege_array is array(insn_type_t) of privilege_level; type op_privilege_array is array(insn_type_t) of privilege_level;
constant op_privilege: op_privilege_array := ( constant op_privilege: op_privilege_array := (
@ -223,6 +234,7 @@ begin
); );


dbg_msr_out <= ctrl.msr; dbg_msr_out <= ctrl.msr;
log_rd_addr <= r.log_addr_spr;


a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1; a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
@ -767,6 +779,11 @@ begin
result := ctrl.tb; result := ctrl.tb;
when SPR_DEC => when SPR_DEC =>
result := ctrl.dec; result := ctrl.dec;
when 724 => -- LOG_ADDR SPR
result := log_wr_addr & r.log_addr_spr;
when 725 => -- LOG_DATA SPR
result := log_rd_data;
v.log_addr_spr := std_ulogic_vector(unsigned(r.log_addr_spr) + 1);
when others => when others =>
-- mfspr from unimplemented SPRs should be a nop in -- mfspr from unimplemented SPRs should be a nop in
-- supervisor mode and a program interrupt for user mode -- supervisor mode and a program interrupt for user mode
@ -840,6 +857,8 @@ begin
case decode_spr_num(e_in.insn) is case decode_spr_num(e_in.insn) is
when SPR_DEC => when SPR_DEC =>
ctrl_tmp.dec <= c_in; ctrl_tmp.dec <= c_in;
when 724 => -- LOG_ADDR SPR
v.log_addr_spr := c_in(31 downto 0);
when others => when others =>
-- mtspr to unimplemented SPRs should be a nop in -- mtspr to unimplemented SPRs should be a nop in
-- supervisor mode and a program interrupt for user mode -- supervisor mode and a program interrupt for user mode
@ -1040,5 +1059,26 @@ begin
l_out <= lv; l_out <= lv;
e_out <= r.e; e_out <= r.e;
flush_out <= f_out.redirect; flush_out <= f_out.redirect;

exception_log <= exception;
irq_valid_log <= irq_valid;
end process;

ex1_log : process(clk)
begin
if rising_edge(clk) then
log_data <= ctrl.msr(MSR_EE) & ctrl.msr(MSR_PR) &
ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) &
exception_log &
irq_valid_log &
std_ulogic_vector(to_unsigned(irq_state_t'pos(ctrl.irq_state), 1)) &
"000" &
r.e.write_enable &
r.e.valid &
f_out.redirect &
stall_out &
flush_out;
end if;
end process; end process;
log_out <= log_data;
end architecture behaviour; end architecture behaviour;

@ -24,7 +24,10 @@ entity fetch1 is
e_in : in Execute1ToFetch1Type; e_in : in Execute1ToFetch1Type;


-- Request to icache -- Request to icache
i_out : out Fetch1ToIcacheType i_out : out Fetch1ToIcacheType;

-- outputs to logger
log_out : out std_ulogic_vector(42 downto 0)
); );
end entity fetch1; end entity fetch1;


@ -35,11 +38,13 @@ architecture behaviour of fetch1 is
end record; end record;
signal r, r_next : Fetch1ToIcacheType; signal r, r_next : Fetch1ToIcacheType;
signal r_int, r_next_int : reg_internal_t; signal r_int, r_next_int : reg_internal_t;
signal log_nia : std_ulogic_vector(42 downto 0);
begin begin


regs : process(clk) regs : process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
log_nia <= r.nia(63) & r.nia(43 downto 2);
if r /= r_next then if r /= r_next then
report "fetch1 rst:" & std_ulogic'image(rst) & report "fetch1 rst:" & std_ulogic'image(rst) &
" IR:" & std_ulogic'image(e_in.virt_mode) & " IR:" & std_ulogic'image(e_in.virt_mode) &
@ -54,6 +59,7 @@ begin
r_int <= r_next_int; r_int <= r_next_int;
end if; end if;
end process; end process;
log_out <= log_nia;


comb : process(all) comb : process(all)
variable v : Fetch1ToIcacheType; variable v : Fetch1ToIcacheType;

@ -57,7 +57,9 @@ entity icache is
inval_in : in std_ulogic; inval_in : in std_ulogic;


wishbone_out : out wishbone_master_out; wishbone_out : out wishbone_master_out;
wishbone_in : in wishbone_slave_out wishbone_in : in wishbone_slave_out;

log_out : out std_ulogic_vector(53 downto 0)
); );
end entity icache; end entity icache;


@ -198,6 +200,9 @@ architecture rtl of icache is
signal priv_fault : std_ulogic; signal priv_fault : std_ulogic;
signal access_ok : std_ulogic; signal access_ok : std_ulogic;


-- Output data to logger
signal log_data : std_ulogic_vector(53 downto 0);

-- Cache RAM interface -- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t; type cache_ram_out_t is array(way_t) of cache_row_t;
signal cache_out : cache_ram_out_t; signal cache_out : cache_ram_out_t;
@ -674,4 +679,36 @@ begin
end if; end if;
end if; end if;
end process; end process;

data_log: process(clk)
variable lway: way_t;
variable wstate: std_ulogic;
begin
if rising_edge(clk) then
if req_is_hit then
lway := req_hit_way;
else
lway := replace_way;
end if;
wstate := '0';
if r.state /= IDLE then
wstate := '1';
end if;
log_data <= i_out.valid &
i_out.insn &
wishbone_in.ack &
r.wb.adr(5 downto 3) &
r.wb.stb & r.wb.cyc &
wishbone_in.stall &
stall_out &
r.fetch_failed &
r.hit_nia(5 downto 2) &
wstate &
std_ulogic_vector(to_unsigned(lway, 3)) &
req_is_hit & req_is_miss &
access_ok &
ra_valid;
end if;
end process;
log_out <= log_data;
end; end;

@ -25,7 +25,9 @@ entity loadstore1 is
m_in : in MmuToLoadstore1Type; m_in : in MmuToLoadstore1Type;


dc_stall : in std_ulogic; dc_stall : in std_ulogic;
stall_out : out std_ulogic stall_out : out std_ulogic;

log_out : out std_ulogic_vector(9 downto 0)
); );
end loadstore1; end loadstore1;


@ -80,6 +82,8 @@ architecture behave of loadstore1 is
signal r, rin : reg_stage_t; signal r, rin : reg_stage_t;
signal lsu_sum : std_ulogic_vector(63 downto 0); signal lsu_sum : std_ulogic_vector(63 downto 0);


signal log_data : std_ulogic_vector(9 downto 0);

-- Generate byte enables from sizes -- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
begin begin
@ -516,4 +520,18 @@ begin


end process; end process;


ls1_log: process(clk)
begin
if rising_edge(clk) then
log_data <= stall_out &
e_out.exception &
l_out.valid &
m_out.valid &
d_out.valid &
m_in.done &
r.dwords_done &
std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
end if;
end process;
log_out <= log_data;
end; end;

@ -24,7 +24,9 @@ entity register_file is


-- debug -- debug
sim_dump : in std_ulogic; sim_dump : in std_ulogic;
sim_dump_done : out std_ulogic sim_dump_done : out std_ulogic;

log_out : out std_ulogic_vector(70 downto 0)
); );
end entity register_file; end entity register_file;


@ -34,6 +36,7 @@ architecture behaviour of register_file is
signal rd_port_b : std_ulogic_vector(63 downto 0); signal rd_port_b : std_ulogic_vector(63 downto 0);
signal dbg_data : std_ulogic_vector(63 downto 0); signal dbg_data : std_ulogic_vector(63 downto 0);
signal dbg_ack : std_ulogic; signal dbg_ack : std_ulogic;
signal log_data : std_ulogic_vector(70 downto 0);
begin begin
-- synchronous writes -- synchronous writes
register_write_0: process(clk) register_write_0: process(clk)
@ -131,4 +134,13 @@ begin
sim_dump_done <= '0'; sim_dump_done <= '0';
end generate; end generate;


reg_log: process(clk)
begin
if rising_edge(clk) then
log_data <= w_in.write_data &
w_in.write_enable &
w_in.write_reg;
end if;
end process;
log_out <= log_data;
end architecture behaviour; end architecture behaviour;

@ -0,0 +1,12 @@
CFLAGS = -O2 -g -Wall -std=c99

all: fmt_log

fmt_log: fmt_log.c
$(CC) -o $@ $^ $(CFLAGS)

clean:
rm -f fmt_log
distclean:
rm -f *~

@ -0,0 +1,235 @@
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>

typedef unsigned long long u64;

struct log_entry {
u64 nia_lo: 42;
u64 nia_hi: 1;
u64 ic_ra_valid: 1;
u64 ic_access_ok: 1;
u64 ic_is_miss: 1;
u64 ic_is_hit: 1;
u64 ic_way: 3;
u64 ic_state: 1;
u64 ic_part_nia: 4;
u64 ic_fetch_failed: 1;
u64 ic_stall_out: 1;
u64 ic_wb_stall: 1;
u64 ic_wb_cyc: 1;
u64 ic_wb_stb: 1;
u64 ic_wb_adr: 3;
u64 ic_wb_ack: 1;

u64 ic_insn: 32;
u64 ic_valid: 1;
u64 d1_valid: 1;
u64 d1_unit: 2;
u64 d1_part_nia: 4;
u64 d1_insn_type: 6;
u64 d2_bypass_a: 1;
u64 d2_bypass_b: 1;
u64 d2_bypass_c: 1;
u64 d2_stall_out: 1;
u64 d2_stopped_out: 1;
u64 d2_valid: 1;
u64 d2_part_nia: 4;
u64 e1_flush_out: 1;
u64 e1_stall_out: 1;
u64 e1_redirect: 1;
u64 e1_valid: 1;
u64 e1_write_enable: 1;
u64 e1_unused: 3;

u64 e1_irq_state: 1;
u64 e1_irq: 1;
u64 e1_exception: 1;
u64 e1_msr_dr: 1;
u64 e1_msr_ir: 1;
u64 e1_msr_pr: 1;
u64 e1_msr_ee: 1;
u64 pad1: 5;
u64 ls_state: 3;
u64 ls_dw_done: 1;
u64 ls_min_done: 1;
u64 ls_do_valid: 1;
u64 ls_mo_valid: 1;
u64 ls_lo_valid: 1;
u64 ls_eo_except: 1;
u64 ls_stall_out: 1;
u64 pad2: 2;
u64 dc_state: 3;
u64 dc_ra_valid: 1;
u64 dc_tlb_way: 3;
u64 dc_stall_out: 1;
u64 dc_op: 3;
u64 dc_do_valid: 1;
u64 dc_do_error: 1;
u64 dc_wb_cyc: 1;
u64 dc_wb_stb: 1;
u64 dc_wb_ack: 1;
u64 dc_wb_stall: 1;
u64 dc_wb_adr: 3;
u64 cr_wr_mask: 8;
u64 cr_wr_data: 4;
u64 cr_wr_enable: 1;
u64 reg_wr_reg: 6;
u64 reg_wr_enable: 1;

u64 reg_wr_data;
};

#define FLAG(i, y) (log.i? y: ' ')
#define FLGA(i, y, z) (log.i? y: z)
#define PNIA(f) (full_nia[log.f] & 0xff)

const char *units[4] = { "--", "al", "ls", "?3" };
const char *ops[64] =
{
"illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ",
"bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ",
"dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ",
"extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "maddhd ",
"maddhdu", "maddld ", "mcrxr ", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ",
"mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ",
"prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ",
"shr ", "sync ", "tlbie ", "trap ", "xor ", "ffail ", "?62 ", "?63 "
};

const char *spr_names[13] =
{
"lr ", "ctr", "sr0", "sr1", "hr0", "hr1", "sg0", "sg1",
"sg2", "sg3", "hg0", "hg1", "xer"
};
int main(int ac, char **av)
{
struct log_entry log;
u64 full_nia[16];
long int lineno = 1;
FILE *f;
const char *filename;
int i;
long int ncompl = 0;

if (ac != 1 && ac != 2) {
fprintf(stderr, "Usage: %s [filename]\n", av[0]);
exit(1);
}
f = stdin;
if (ac == 2) {
filename = av[1];
f = fopen(filename, "rb");
if (f == NULL) {
perror(filename);
exit(1);
}
}

for (i = 0; i < 15; ++i)
full_nia[i] = i << 2;

while (fread(&log, sizeof(log), 1, f) == 1) {
full_nia[log.nia_lo & 0xf] = (log.nia_hi? 0xc000000000000000: 0) |
(log.nia_lo << 2);
if (lineno % 20 == 1) {
printf(" fetch1 NIA icache decode1 decode2 execute1 loadstore dcache CR GSPR\n");
printf(" ---------------- TAHW S -WB-- pN --insn-- pN un op pN byp FR IIE MSR WC SD MM CE SRTO DE -WB-- c ms reg val\n");
printf(" LdMy t csnSa IA IA it IA abc le srx EPID em tw rd mx tAwp vr csnSa 0 k\n");
}
printf("%4ld %c0000%.11llx %c ", lineno,
(log.nia_hi? 'c': '0'),
(unsigned long long)log.nia_lo << 2,
FLAG(ic_stall_out, '|'));
printf("%c%c%c%d %c %c%c%d%c%c %.2llx ",
FLGA(ic_ra_valid, ' ', 'T'),
FLGA(ic_access_ok, ' ', 'X'),
FLGA(ic_is_hit, 'H', FLGA(ic_is_miss, 'M', ' ')),
log.ic_way,
FLAG(ic_state, 'W'),
FLAG(ic_wb_cyc, 'c'),
FLAG(ic_wb_stb, 's'),
log.ic_wb_adr,
FLAG(ic_wb_stall, 'S'),
FLAG(ic_wb_ack, 'a'),
PNIA(ic_part_nia));
if (log.ic_valid)
printf("%.8x", log.ic_insn);
else if (log.ic_fetch_failed)
printf("!!!!!!!!");
else
printf("--------");
printf(" %c%c %.2llx ",
FLAG(ic_valid, '>'),
FLAG(d2_stall_out, '|'),
PNIA(d1_part_nia));
if (log.d1_valid)
printf("%s %s",
units[log.d1_unit],
ops[log.d1_insn_type]);
else
printf("-- -------");
printf(" %c%c ",
FLAG(d1_valid, '>'),
FLAG(d2_stall_out, '|'));
printf("%.2llx %c%c%c %c%c ",
PNIA(d2_part_nia),
FLAG(d2_bypass_a, 'a'),
FLAG(d2_bypass_b, 'b'),
FLAG(d2_bypass_c, 'c'),
FLAG(d2_valid, '>'),
FLAG(e1_stall_out, '|'));
printf("%c%c %c%c%c %c%c%c%c %c%c ",
FLAG(e1_flush_out, 'F'),
FLAG(e1_redirect, 'R'),
FLAG(e1_irq_state, 'w'),
FLAG(e1_irq, 'I'),
FLAG(e1_exception, 'X'),
FLAG(e1_msr_ee, 'E'),
FLGA(e1_msr_pr, 'u', 's'),
FLAG(e1_msr_ir, 'I'),
FLAG(e1_msr_dr, 'D'),
FLAG(e1_write_enable, 'W'),
FLAG(e1_valid, 'C'));
printf("%c %d%d %c%c %c%c %c ",
FLAG(ls_stall_out, '|'),
log.ls_state,
log.ls_dw_done,
FLAG(ls_mo_valid, 'M'),
FLAG(ls_min_done, 'm'),
FLAG(ls_lo_valid, 'C'),
FLAG(ls_eo_except, 'X'),
FLAG(ls_do_valid, '>'));
printf("%d%c%d%d %c%c %c%c%d%c%c ",
log.dc_state,
FLAG(dc_ra_valid, 'R'),
log.dc_tlb_way,
log.dc_op,
FLAG(dc_do_valid, 'V'),
FLAG(dc_do_error, 'E'),
FLAG(dc_wb_cyc, 'c'),
FLAG(dc_wb_stb, 's'),
log.dc_wb_adr,
FLAG(dc_wb_stall, 'S'),
FLAG(dc_wb_ack, 'a'));
if (log.cr_wr_enable)
printf("%x>%.2x ", log.cr_wr_data, log.cr_wr_mask);
else
printf(" ");
if (log.reg_wr_enable) {
if (log.reg_wr_reg < 32 || log.reg_wr_reg > 44)
printf("r%02d", log.reg_wr_reg);
else
printf("%s", spr_names[log.reg_wr_reg - 32]);
printf("=%.16llx", log.reg_wr_data);
}
printf("\n");
++lineno;
if (log.ls_lo_valid || log.e1_valid)
++ncompl;
}
printf("%ld instructions completed, %.2f CPI\n", ncompl,
(double)(lineno - 1) / ncompl);
exit(0);
}

@ -42,6 +42,9 @@
#define DBG_CORE_GSPR_INDEX 0x14 #define DBG_CORE_GSPR_INDEX 0x14
#define DBG_CORE_GSPR_DATA 0x15 #define DBG_CORE_GSPR_DATA 0x15


#define DBG_LOG_ADDR 0x16
#define DBG_LOG_DATA 0x17

static bool debug; static bool debug;


struct backend { struct backend {
@ -548,6 +551,73 @@ static void save(const char *filename, uint64_t addr, uint64_t size)
printf("%x done.\n", count); printf("%x done.\n", count);
} }


#define LOG_STOP 0x80000000ull

static void log_start(void)
{
check(dmi_write(DBG_LOG_ADDR, 0), "writing LOG_ADDR");
}

static void log_stop(void)
{
uint64_t lsize, laddr, waddr;

check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR");
check(dmi_read(DBG_LOG_ADDR, &laddr), "reading LOG_ADDR");
waddr = laddr >> 32;
for (lsize = 1; lsize; lsize <<= 1)
if ((waddr >> 1) < lsize)
break;
waddr &= ~lsize;
printf("Log size = %" PRIu64 " entries, ", lsize);
printf("write ptr = %" PRIx64 "\n", waddr);
}

static void log_dump(const char *filename)
{
FILE *f;
uint64_t lsize, laddr, waddr;
uint64_t orig_laddr;
uint64_t i, ldata;

f = fopen(filename, "w");
if (f == NULL) {
fprintf(stderr, "Failed to create '%s': %s\n", filename,
strerror(errno));
exit(1);
}

check(dmi_read(DBG_LOG_ADDR, &orig_laddr), "reading LOG_ADDR");
if (!(orig_laddr & LOG_STOP))
check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR");

waddr = orig_laddr >> 32;
for (lsize = 1; lsize; lsize <<= 1)
if ((waddr >> 1) < lsize)
break;
waddr &= ~lsize;
printf("Log size = %" PRIu64 " entries\n", lsize);

laddr = LOG_STOP | (waddr << 2);
check(dmi_write(DBG_LOG_ADDR, laddr), "writing LOG_ADDR");

for (i = 0; i < lsize * 4; ++i) {
check(dmi_read(DBG_LOG_DATA, &ldata), "reading LOG_DATA");
if (fwrite(&ldata, sizeof(ldata), 1, f) != 1) {
fprintf(stderr, "Write error on %s\n", filename);
exit(1);
}
if (!(i % 128)) {
printf("%" PRIu64 "...\r", i * 8);
fflush(stdout);
}
}
fclose(f);
printf("%" PRIu64 " done\n", lsize * 32);

check(dmi_write(DBG_LOG_ADDR, orig_laddr), "writing LOG_ADDR");
}

static void usage(const char *cmd) static void usage(const char *cmd)
{ {
fprintf(stderr, "Usage: %s -b <jtag|sim> <command> <args>\n", cmd); fprintf(stderr, "Usage: %s -b <jtag|sim> <command> <args>\n", cmd);
@ -572,6 +642,12 @@ static void usage(const char *cmd)
fprintf(stderr, " gpr <reg> [count]\n"); fprintf(stderr, " gpr <reg> [count]\n");
fprintf(stderr, " status\n"); fprintf(stderr, " status\n");


fprintf(stderr, "\n");
fprintf(stderr, " Core logging:\n");
fprintf(stderr, " lstart start logging\n");
fprintf(stderr, " lstop stop logging\n");
fprintf(stderr, " ldump <file> dump log to file\n");

fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, " JTAG:\n"); fprintf(stderr, " JTAG:\n");
fprintf(stderr, " dmiread <hex addr>\n"); fprintf(stderr, " dmiread <hex addr>\n");
@ -710,6 +786,17 @@ int main(int argc, char *argv[])
if (((i+1) < argc) && isdigit(argv[i+1][0])) if (((i+1) < argc) && isdigit(argv[i+1][0]))
count = strtoul(argv[++i], NULL, 10); count = strtoul(argv[++i], NULL, 10);
gpr_read(reg, count); gpr_read(reg, count);
} else if (strcmp(argv[i], "lstart") == 0) {
log_start();
} else if (strcmp(argv[i], "lstop") == 0) {
log_stop();
} else if (strcmp(argv[i], "ldump") == 0) {
const char *filename;

if ((i+1) >= argc)
usage(argv[0]);
filename = argv[++i];
log_dump(filename);
} else { } else {
fprintf(stderr, "Unknown command %s\n", argv[i]); fprintf(stderr, "Unknown command %s\n", argv[i]);
exit(1); exit(1);

Loading…
Cancel
Save