From 49a4d9f67a21438a4af703027baa72211409857a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 14 May 2020 13:25:48 +1000 Subject: [PATCH] Add core logging This logs 256 bits of data per cycle to a ring buffer in BRAM. The data collected can be read out through 2 new SPRs or through the debug interface. The new SPRs are LOG_ADDR (724) and LOG_DATA (725). LOG_ADDR contains the buffer write pointer in the upper 32 bits (in units of entries, i.e. 32 bytes) and the read pointer in the lower 32 bits (in units of doublewords, i.e. 8 bytes). Reading LOG_DATA gives the doubleword from the buffer at the read pointer and increments the read pointer. Setting bit 31 of LOG_ADDR inhibits the trace log system from writing to the log buffer, so the contents are stable and can be read. There are two new debug addresses which function similarly to the LOG_ADDR and LOG_DATA SPRs. The log is frozen while either or both of the LOG_ADDR SPR bit 31 or the debug LOG_ADDR register bit 31 are set. The buffer defaults to 2048 entries, i.e. 64kB. The size is set by the LOG_LENGTH generic on the core_debug module. Software can determine the length of the buffer because the length is ORed into the buffer write pointer in the upper 32 bits of LOG_ADDR. Hence the length of the buffer can be calculated as 1 << (31 - clz(LOG_ADDR)). There is a program to format the log entries in a somewhat readable fashion in scripts/fmt_log/fmt_log.c. The log_entry struct in that file describes the layout of the bits in the log entries. Signed-off-by: Paul Mackerras --- core.vhdl | 43 +++++-- core_debug.vhdl | 111 +++++++++++++++++ cr_file.vhdl | 15 ++- dcache.vhdl | 24 +++- decode1.vhdl | 18 ++- decode2.vhdl | 21 +++- execute1.vhdl | 42 ++++++- fetch1.vhdl | 8 +- icache.vhdl | 39 +++++- loadstore1.vhdl | 20 ++- register_file.vhdl | 14 ++- scripts/fmt_log/Makefile | 12 ++ scripts/fmt_log/fmt_log.c | 235 ++++++++++++++++++++++++++++++++++++ scripts/mw_debug/mw_debug.c | 87 +++++++++++++ 14 files changed, 671 insertions(+), 18 deletions(-) create mode 100644 scripts/fmt_log/Makefile create mode 100644 scripts/fmt_log/fmt_log.c diff --git a/core.vhdl b/core.vhdl index 8ba5b70..da9853f 100644 --- a/core.vhdl +++ b/core.vhdl @@ -128,6 +128,12 @@ architecture behave of core is -- Debug status signal dbg_core_is_stopped: std_ulogic; + -- Logging signals + signal log_data : std_ulogic_vector(255 downto 0); + signal log_rd_addr : std_ulogic_vector(31 downto 0); + signal log_wr_addr : std_ulogic_vector(31 downto 0); + signal log_rd_data : std_ulogic_vector(63 downto 0); + function keep_h(disable : boolean) return string is begin if disable then @@ -183,7 +189,8 @@ begin flush_in => flush, stop_in => dbg_core_stop, e_in => execute1_to_fetch1, - i_out => fetch1_to_icache + i_out => fetch1_to_icache, + log_out => log_data(42 downto 0) ); fetch1_stall_in <= icache_stall_out or decode2_stall_out; @@ -205,7 +212,8 @@ begin inval_in => dbg_icache_rst or ex1_icache_inval, stall_out => icache_stall_out, wishbone_out => wishbone_insn_out, - wishbone_in => wishbone_insn_in + wishbone_in => wishbone_insn_in, + log_out => log_data(96 downto 43) ); fetch2_0: entity work.fetch2 @@ -227,7 +235,8 @@ begin stall_in => decode1_stall_in, flush_in => flush, f_in => fetch2_to_decode1, - d_out => decode1_to_decode2 + d_out => decode1_to_decode2, + log_out => log_data(109 downto 97) ); decode1_stall_in <= decode2_stall_out; @@ -249,7 +258,8 @@ begin r_in => register_file_to_decode2, r_out => decode2_to_register_file, c_in => cr_file_to_decode2, - c_out => decode2_to_cr_file + c_out => decode2_to_cr_file, + log_out => log_data(119 downto 110) ); decode2_stall_in <= ex1_stall_out or ls1_stall_out; @@ -267,7 +277,8 @@ begin dbg_gpr_addr => dbg_gpr_addr, dbg_gpr_data => dbg_gpr_data, sim_dump => terminate, - sim_dump_done => sim_cr_dump + sim_dump_done => sim_cr_dump, + log_out => log_data(255 downto 185) ); cr_file_0: entity work.cr_file @@ -279,7 +290,8 @@ begin d_in => decode2_to_cr_file, d_out => cr_file_to_decode2, w_in => writeback_to_cr_file, - sim_dump => sim_cr_dump + sim_dump => sim_cr_dump, + log_out => log_data(184 downto 172) ); execute1_0: entity work.execute1 @@ -299,7 +311,11 @@ begin e_out => execute1_to_writeback, icache_inval => ex1_icache_inval, dbg_msr_out => msr, - terminate_out => terminate + terminate_out => terminate, + log_out => log_data(134 downto 120), + log_rd_addr => log_rd_addr, + log_rd_data => log_rd_data, + log_wr_addr => log_wr_addr ); loadstore1_0: entity work.loadstore1 @@ -314,7 +330,8 @@ begin m_out => loadstore1_to_mmu, m_in => mmu_to_loadstore1, dc_stall => dcache_stall_out, - stall_out => ls1_stall_out + stall_out => ls1_stall_out, + log_out => log_data(149 downto 140) ); mmu_0: entity work.mmu @@ -343,7 +360,8 @@ begin m_out => dcache_to_mmu, stall_out => dcache_stall_out, wishbone_in => wishbone_data_in, - wishbone_out => wishbone_data_out + wishbone_out => wishbone_data_out, + log_out => log_data(171 downto 152) ); writeback_0: entity work.writeback @@ -356,6 +374,9 @@ begin complete_out => complete ); + log_data(151 downto 150) <= "00"; + log_data(139 downto 135) <= "00000"; + debug_0: entity work.core_debug port map ( clk => clk, @@ -377,6 +398,10 @@ begin dbg_gpr_ack => dbg_gpr_ack, dbg_gpr_addr => dbg_gpr_addr, dbg_gpr_data => dbg_gpr_data, + log_data => log_data, + log_read_addr => log_rd_addr, + log_read_data => log_rd_data, + log_write_addr => log_wr_addr, terminated_out => terminated_out ); diff --git a/core_debug.vhdl b/core_debug.vhdl index c97213b..31e4ab8 100644 --- a/core_debug.vhdl +++ b/core_debug.vhdl @@ -3,9 +3,14 @@ use ieee.std_logic_1164.all; use ieee.numeric_std.all; library work; +use work.utils.all; use work.common.all; entity core_debug is + generic ( + -- Length of log buffer + LOG_LENGTH : positive := 2048 + ); port ( clk : in std_logic; rst : in std_logic; @@ -34,6 +39,12 @@ entity core_debug is dbg_gpr_addr : out gspr_index_t; dbg_gpr_data : in std_ulogic_vector(63 downto 0); + -- Core logging data + log_data : in std_ulogic_vector(255 downto 0); + log_read_addr : in std_ulogic_vector(31 downto 0); + log_read_data : out std_ulogic_vector(63 downto 0); + log_write_addr : out std_ulogic_vector(31 downto 0); + -- Misc terminated_out : out std_ulogic ); @@ -77,6 +88,10 @@ architecture behave of core_debug is -- GSPR register data constant DBG_CORE_GSPR_DATA : std_ulogic_vector(3 downto 0) := "0101"; + -- Log buffer address and data registers + constant DBG_CORE_LOG_ADDR : std_ulogic_vector(3 downto 0) := "0110"; + constant DBG_CORE_LOG_DATA : std_ulogic_vector(3 downto 0) := "0111"; + -- Some internal wires signal stat_reg : std_ulogic_vector(63 downto 0); @@ -89,6 +104,38 @@ architecture behave of core_debug is signal do_gspr_rd : std_ulogic; signal gspr_index : gspr_index_t; + -- Logging RAM + constant LOG_INDEX_BITS : natural := log2(LOG_LENGTH); + subtype log_ptr_t is unsigned(LOG_INDEX_BITS - 1 downto 0); + type log_array_t is array(0 to LOG_LENGTH - 1) of std_ulogic_vector(255 downto 0); + signal log_array : log_array_t; + signal log_rd_ptr : log_ptr_t; + signal log_wr_ptr : log_ptr_t; + signal log_toggle : std_ulogic; + signal log_wr_enable : std_ulogic; + signal log_rd_ptr_latched : log_ptr_t; + signal log_rd : std_ulogic_vector(255 downto 0); + signal log_dmi_addr : std_ulogic_vector(31 downto 0); + signal log_dmi_data : std_ulogic_vector(63 downto 0); + signal do_dmi_log_rd : std_ulogic; + signal log_dmi_reading : std_ulogic; + signal log_dmi_read_done : std_ulogic; + signal dmi_read_log_data : std_ulogic; + signal dmi_read_log_data_1 : std_ulogic; + + function select_dword(data : std_ulogic_vector(255 downto 0); + addr : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is + variable firstbit : integer; + begin + firstbit := to_integer(unsigned(addr(1 downto 0))) * 64; + return data(firstbit + 63 downto firstbit); + end; + + attribute ram_style : string; + attribute ram_style of log_array : signal is "block"; + attribute ram_decomp : string; + attribute ram_decomp of log_array : signal is "power"; + begin -- Single cycle register accesses on DMI except for GSPR data dmi_ack <= dmi_req when dmi_addr /= DBG_CORE_GSPR_DATA @@ -108,6 +155,8 @@ begin nia when DBG_CORE_NIA, msr when DBG_CORE_MSR, dbg_gpr_data when DBG_CORE_GSPR_DATA, + log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR, + log_dmi_data when DBG_CORE_LOG_DATA, (others => '0') when others; -- DMI writes @@ -118,6 +167,7 @@ begin do_step <= '0'; do_reset <= '0'; do_icreset <= '0'; + do_dmi_log_rd <= '0'; if (rst) then stopping <= '0'; @@ -151,11 +201,26 @@ begin end if; elsif dmi_addr = DBG_CORE_GSPR_INDEX then gspr_index <= dmi_din(gspr_index_t'left downto 0); + elsif dmi_addr = DBG_CORE_LOG_ADDR then + log_dmi_addr <= dmi_din(31 downto 0); + do_dmi_log_rd <= '1'; end if; else report("DMI read from " & to_string(dmi_addr)); end if; + + elsif dmi_read_log_data = '0' and dmi_read_log_data_1 = '1' then + -- Increment log_dmi_addr after the end of a read from DBG_CORE_LOG_DATA + log_dmi_addr(LOG_INDEX_BITS + 1 downto 0) <= + std_ulogic_vector(unsigned(log_dmi_addr(LOG_INDEX_BITS+1 downto 0)) + 1); + do_dmi_log_rd <= '1'; end if; + dmi_read_log_data_1 <= dmi_read_log_data; + if dmi_req = '1' and dmi_addr = DBG_CORE_LOG_DATA then + dmi_read_log_data <= '1'; + else + dmi_read_log_data <= '0'; + end if; -- Set core stop on terminate. We'll be stopping some time *after* -- the offending instruction, at least until we can do back flushes @@ -175,5 +240,51 @@ begin core_rst <= do_reset; icache_rst <= do_icreset; terminated_out <= terminated; + + -- Use MSB of read addresses to stop the logging + log_wr_enable <= not (log_read_addr(31) or log_dmi_addr(31)); + + log_ram: process(clk) + begin + if rising_edge(clk) then + if log_wr_enable = '1' then + log_array(to_integer(log_wr_ptr)) <= log_data; + end if; + log_rd <= log_array(to_integer(log_rd_ptr_latched)); + end if; + end process; + + + log_buffer: process(clk) + variable b : integer; + variable data : std_ulogic_vector(255 downto 0); + begin + if rising_edge(clk) then + if rst = '1' then + log_wr_ptr <= (others => '0'); + log_toggle <= '0'; + elsif log_wr_enable = '1' then + if log_wr_ptr = to_unsigned(LOG_LENGTH - 1, LOG_INDEX_BITS) then + log_toggle <= not log_toggle; + end if; + log_wr_ptr <= log_wr_ptr + 1; + end if; + if do_dmi_log_rd = '1' then + log_rd_ptr_latched <= unsigned(log_dmi_addr(LOG_INDEX_BITS + 1 downto 2)); + else + log_rd_ptr_latched <= unsigned(log_read_addr(LOG_INDEX_BITS + 1 downto 2)); + end if; + if log_dmi_read_done = '1' then + log_dmi_data <= select_dword(log_rd, log_dmi_addr); + else + log_read_data <= select_dword(log_rd, log_read_addr); + end if; + log_dmi_read_done <= log_dmi_reading; + log_dmi_reading <= do_dmi_log_rd; + end if; + end process; + log_write_addr(LOG_INDEX_BITS - 1 downto 0) <= std_ulogic_vector(log_wr_ptr); + log_write_addr(LOG_INDEX_BITS) <= '1'; + log_write_addr(31 downto LOG_INDEX_BITS + 1) <= (others => '0'); end behave; diff --git a/cr_file.vhdl b/cr_file.vhdl index a6dd585..37fa76b 100644 --- a/cr_file.vhdl +++ b/cr_file.vhdl @@ -18,7 +18,9 @@ entity cr_file is w_in : in WritebackToCrFileType; -- debug - sim_dump : in std_ulogic + sim_dump : in std_ulogic; + + log_out : out std_ulogic_vector(12 downto 0) ); end entity cr_file; @@ -27,6 +29,7 @@ architecture behaviour of cr_file is signal crs_updated : std_ulogic_vector(31 downto 0); signal xerc : xer_common_t := xerc_init; signal xerc_updated : xer_common_t; + signal log_data : std_ulogic_vector(12 downto 0); begin cr_create_0: process(all) variable hi, lo : integer := 0; @@ -88,4 +91,14 @@ begin end process; end generate; + cr_log: process(clk) + begin + if rising_edge(clk) then + log_data <= w_in.write_cr_enable & + w_in.write_cr_data(31 downto 28) & + w_in.write_cr_mask; + end if; + end process; + log_out <= log_data; + end architecture behaviour; diff --git a/dcache.vhdl b/dcache.vhdl index 9df5562..7a8c0ba 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -46,7 +46,9 @@ entity dcache is stall_out : out std_ulogic; wishbone_out : out wishbone_master_out; - wishbone_in : in wishbone_slave_out + wishbone_in : in wishbone_slave_out; + + log_out : out std_ulogic_vector(19 downto 0) ); end entity dcache; @@ -419,6 +421,8 @@ architecture rtl of dcache is ptes(j + TLB_PTE_BITS - 1 downto j) := newpte; end; + signal log_data : std_ulogic_vector(19 downto 0); + begin assert LINE_SIZE mod ROW_SIZE = 0 report "LINE_SIZE not multiple of ROW_SIZE" severity FAILURE; @@ -1265,4 +1269,22 @@ begin end if; end if; end process; + + dcache_log: process(clk) + begin + if rising_edge(clk) then + log_data <= r1.wb.adr(5 downto 3) & + wishbone_in.stall & + wishbone_in.ack & + r1.wb.stb & r1.wb.cyc & + d_out.error & + d_out.valid & + std_ulogic_vector(to_unsigned(op_t'pos(req_op), 3)) & + stall_out & + std_ulogic_vector(to_unsigned(tlb_hit_way, 3)) & + valid_ra & + std_ulogic_vector(to_unsigned(state_t'pos(r1.state), 3)); + end if; + end process; + log_out <= log_data; end; diff --git a/decode1.vhdl b/decode1.vhdl index b6da5d7..3e3b41a 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -15,7 +15,9 @@ entity decode1 is flush_in : in std_ulogic; f_in : in Fetch2ToDecode1Type; - d_out : out Decode1ToDecode2Type + d_out : out Decode1ToDecode2Type; + + log_out : out std_ulogic_vector(12 downto 0) ); end entity decode1; @@ -352,6 +354,8 @@ architecture behaviour of decode1 is constant nop_instr : decode_rom_t := (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); constant fetch_fail_inst: decode_rom_t := (LDST, OP_FETCH_FAILED, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); + signal log_data : std_ulogic_vector(12 downto 0); + begin decode1_0: process(clk) begin @@ -474,4 +478,16 @@ begin -- Update outputs d_out <= r; end process; + + dec1_log : process(clk) + begin + if rising_edge(clk) then + log_data <= std_ulogic_vector(to_unsigned(insn_type_t'pos(r.decode.insn_type), 6)) & + r.nia(5 downto 2) & + std_ulogic_vector(to_unsigned(unit_t'pos(r.decode.unit), 2)) & + r.valid; + end if; + end process; + log_out <= log_data; + end architecture behaviour; diff --git a/decode2.vhdl b/decode2.vhdl index f889a23..2c02a75 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -32,7 +32,9 @@ entity decode2 is r_out : out Decode2ToRegisterFileType; c_in : in CrFileToDecode2Type; - c_out : out Decode2ToCrFileType + c_out : out Decode2ToCrFileType; + + log_out : out std_ulogic_vector(9 downto 0) ); end entity decode2; @@ -43,6 +45,8 @@ architecture behaviour of decode2 is signal r, rin : reg_type; + signal log_data : std_ulogic_vector(9 downto 0); + type decode_input_reg_t is record reg_valid : std_ulogic; reg : gspr_index_t; @@ -381,4 +385,19 @@ begin -- Update outputs e_out <= r.e; end process; + + dec2_log : process(clk) + begin + if rising_edge(clk) then + log_data <= r.e.nia(5 downto 2) & + r.e.valid & + stopped_out & + stall_out & + r.e.bypass_data3 & + r.e.bypass_data2 & + r.e.bypass_data1; + end if; + end process; + log_out <= log_data; + end architecture behaviour; diff --git a/execute1.vhdl b/execute1.vhdl index cac8e8a..9066aa0 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -36,7 +36,12 @@ entity execute1 is dbg_msr_out : out std_ulogic_vector(63 downto 0); icache_inval : out std_ulogic; - terminate_out : out std_ulogic + terminate_out : out std_ulogic; + + log_out : out std_ulogic_vector(14 downto 0); + log_rd_addr : out std_ulogic_vector(31 downto 0); + log_rd_data : in std_ulogic_vector(63 downto 0); + log_wr_addr : in std_ulogic_vector(31 downto 0) ); end entity execute1; @@ -53,6 +58,7 @@ architecture behaviour of execute1 is slow_op_oe : std_ulogic; slow_op_xerc : xer_common_t; ldst_nia : std_ulogic_vector(63 downto 0); + log_addr_spr : std_ulogic_vector(31 downto 0); end record; constant reg_type_init : reg_type := (e => Execute1ToWritebackInit, lr_update => '0', @@ -83,6 +89,11 @@ architecture behaviour of execute1 is signal x_to_divider: Execute1ToDividerType; signal divider_to_x: DividerToExecute1Type; + -- signals for logging + signal exception_log : std_ulogic; + signal irq_valid_log : std_ulogic; + signal log_data : std_ulogic_vector(14 downto 0); + type privilege_level is (USER, SUPER); type op_privilege_array is array(insn_type_t) of privilege_level; constant op_privilege: op_privilege_array := ( @@ -223,6 +234,7 @@ begin ); dbg_msr_out <= ctrl.msr; + log_rd_addr <= r.log_addr_spr; a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1; b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2; @@ -767,6 +779,11 @@ begin result := ctrl.tb; when SPR_DEC => result := ctrl.dec; + when 724 => -- LOG_ADDR SPR + result := log_wr_addr & r.log_addr_spr; + when 725 => -- LOG_DATA SPR + result := log_rd_data; + v.log_addr_spr := std_ulogic_vector(unsigned(r.log_addr_spr) + 1); when others => -- mfspr from unimplemented SPRs should be a nop in -- supervisor mode and a program interrupt for user mode @@ -840,6 +857,8 @@ begin case decode_spr_num(e_in.insn) is when SPR_DEC => ctrl_tmp.dec <= c_in; + when 724 => -- LOG_ADDR SPR + v.log_addr_spr := c_in(31 downto 0); when others => -- mtspr to unimplemented SPRs should be a nop in -- supervisor mode and a program interrupt for user mode @@ -1040,5 +1059,26 @@ begin l_out <= lv; e_out <= r.e; flush_out <= f_out.redirect; + + exception_log <= exception; + irq_valid_log <= irq_valid; + end process; + + ex1_log : process(clk) + begin + if rising_edge(clk) then + log_data <= ctrl.msr(MSR_EE) & ctrl.msr(MSR_PR) & + ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) & + exception_log & + irq_valid_log & + std_ulogic_vector(to_unsigned(irq_state_t'pos(ctrl.irq_state), 1)) & + "000" & + r.e.write_enable & + r.e.valid & + f_out.redirect & + stall_out & + flush_out; + end if; end process; + log_out <= log_data; end architecture behaviour; diff --git a/fetch1.vhdl b/fetch1.vhdl index cb1d1df..758db24 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -24,7 +24,10 @@ entity fetch1 is e_in : in Execute1ToFetch1Type; -- Request to icache - i_out : out Fetch1ToIcacheType + i_out : out Fetch1ToIcacheType; + + -- outputs to logger + log_out : out std_ulogic_vector(42 downto 0) ); end entity fetch1; @@ -35,11 +38,13 @@ architecture behaviour of fetch1 is end record; signal r, r_next : Fetch1ToIcacheType; signal r_int, r_next_int : reg_internal_t; + signal log_nia : std_ulogic_vector(42 downto 0); begin regs : process(clk) begin if rising_edge(clk) then + log_nia <= r.nia(63) & r.nia(43 downto 2); if r /= r_next then report "fetch1 rst:" & std_ulogic'image(rst) & " IR:" & std_ulogic'image(e_in.virt_mode) & @@ -54,6 +59,7 @@ begin r_int <= r_next_int; end if; end process; + log_out <= log_nia; comb : process(all) variable v : Fetch1ToIcacheType; diff --git a/icache.vhdl b/icache.vhdl index 27f8c6a..2107d5a 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -57,7 +57,9 @@ entity icache is inval_in : in std_ulogic; wishbone_out : out wishbone_master_out; - wishbone_in : in wishbone_slave_out + wishbone_in : in wishbone_slave_out; + + log_out : out std_ulogic_vector(53 downto 0) ); end entity icache; @@ -198,6 +200,9 @@ architecture rtl of icache is signal priv_fault : std_ulogic; signal access_ok : std_ulogic; + -- Output data to logger + signal log_data : std_ulogic_vector(53 downto 0); + -- Cache RAM interface type cache_ram_out_t is array(way_t) of cache_row_t; signal cache_out : cache_ram_out_t; @@ -674,4 +679,36 @@ begin end if; end if; end process; + + data_log: process(clk) + variable lway: way_t; + variable wstate: std_ulogic; + begin + if rising_edge(clk) then + if req_is_hit then + lway := req_hit_way; + else + lway := replace_way; + end if; + wstate := '0'; + if r.state /= IDLE then + wstate := '1'; + end if; + log_data <= i_out.valid & + i_out.insn & + wishbone_in.ack & + r.wb.adr(5 downto 3) & + r.wb.stb & r.wb.cyc & + wishbone_in.stall & + stall_out & + r.fetch_failed & + r.hit_nia(5 downto 2) & + wstate & + std_ulogic_vector(to_unsigned(lway, 3)) & + req_is_hit & req_is_miss & + access_ok & + ra_valid; + end if; + end process; + log_out <= log_data; end; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index e71ad74..6e71df9 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -25,7 +25,9 @@ entity loadstore1 is m_in : in MmuToLoadstore1Type; dc_stall : in std_ulogic; - stall_out : out std_ulogic + stall_out : out std_ulogic; + + log_out : out std_ulogic_vector(9 downto 0) ); end loadstore1; @@ -80,6 +82,8 @@ architecture behave of loadstore1 is signal r, rin : reg_stage_t; signal lsu_sum : std_ulogic_vector(63 downto 0); + signal log_data : std_ulogic_vector(9 downto 0); + -- Generate byte enables from sizes function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is begin @@ -516,4 +520,18 @@ begin end process; + ls1_log: process(clk) + begin + if rising_edge(clk) then + log_data <= stall_out & + e_out.exception & + l_out.valid & + m_out.valid & + d_out.valid & + m_in.done & + r.dwords_done & + std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3)); + end if; + end process; + log_out <= log_data; end; diff --git a/register_file.vhdl b/register_file.vhdl index 4df032c..260255e 100644 --- a/register_file.vhdl +++ b/register_file.vhdl @@ -24,7 +24,9 @@ entity register_file is -- debug sim_dump : in std_ulogic; - sim_dump_done : out std_ulogic + sim_dump_done : out std_ulogic; + + log_out : out std_ulogic_vector(70 downto 0) ); end entity register_file; @@ -34,6 +36,7 @@ architecture behaviour of register_file is signal rd_port_b : std_ulogic_vector(63 downto 0); signal dbg_data : std_ulogic_vector(63 downto 0); signal dbg_ack : std_ulogic; + signal log_data : std_ulogic_vector(70 downto 0); begin -- synchronous writes register_write_0: process(clk) @@ -131,4 +134,13 @@ begin sim_dump_done <= '0'; end generate; + reg_log: process(clk) + begin + if rising_edge(clk) then + log_data <= w_in.write_data & + w_in.write_enable & + w_in.write_reg; + end if; + end process; + log_out <= log_data; end architecture behaviour; diff --git a/scripts/fmt_log/Makefile b/scripts/fmt_log/Makefile new file mode 100644 index 0000000..04d1e9a --- /dev/null +++ b/scripts/fmt_log/Makefile @@ -0,0 +1,12 @@ +CFLAGS = -O2 -g -Wall -std=c99 + +all: fmt_log + +fmt_log: fmt_log.c + $(CC) -o $@ $^ $(CFLAGS) + +clean: + rm -f fmt_log +distclean: + rm -f *~ + diff --git a/scripts/fmt_log/fmt_log.c b/scripts/fmt_log/fmt_log.c new file mode 100644 index 0000000..c8fb501 --- /dev/null +++ b/scripts/fmt_log/fmt_log.c @@ -0,0 +1,235 @@ +#include +#include +#include + +typedef unsigned long long u64; + +struct log_entry { + u64 nia_lo: 42; + u64 nia_hi: 1; + u64 ic_ra_valid: 1; + u64 ic_access_ok: 1; + u64 ic_is_miss: 1; + u64 ic_is_hit: 1; + u64 ic_way: 3; + u64 ic_state: 1; + u64 ic_part_nia: 4; + u64 ic_fetch_failed: 1; + u64 ic_stall_out: 1; + u64 ic_wb_stall: 1; + u64 ic_wb_cyc: 1; + u64 ic_wb_stb: 1; + u64 ic_wb_adr: 3; + u64 ic_wb_ack: 1; + + u64 ic_insn: 32; + u64 ic_valid: 1; + u64 d1_valid: 1; + u64 d1_unit: 2; + u64 d1_part_nia: 4; + u64 d1_insn_type: 6; + u64 d2_bypass_a: 1; + u64 d2_bypass_b: 1; + u64 d2_bypass_c: 1; + u64 d2_stall_out: 1; + u64 d2_stopped_out: 1; + u64 d2_valid: 1; + u64 d2_part_nia: 4; + u64 e1_flush_out: 1; + u64 e1_stall_out: 1; + u64 e1_redirect: 1; + u64 e1_valid: 1; + u64 e1_write_enable: 1; + u64 e1_unused: 3; + + u64 e1_irq_state: 1; + u64 e1_irq: 1; + u64 e1_exception: 1; + u64 e1_msr_dr: 1; + u64 e1_msr_ir: 1; + u64 e1_msr_pr: 1; + u64 e1_msr_ee: 1; + u64 pad1: 5; + u64 ls_state: 3; + u64 ls_dw_done: 1; + u64 ls_min_done: 1; + u64 ls_do_valid: 1; + u64 ls_mo_valid: 1; + u64 ls_lo_valid: 1; + u64 ls_eo_except: 1; + u64 ls_stall_out: 1; + u64 pad2: 2; + u64 dc_state: 3; + u64 dc_ra_valid: 1; + u64 dc_tlb_way: 3; + u64 dc_stall_out: 1; + u64 dc_op: 3; + u64 dc_do_valid: 1; + u64 dc_do_error: 1; + u64 dc_wb_cyc: 1; + u64 dc_wb_stb: 1; + u64 dc_wb_ack: 1; + u64 dc_wb_stall: 1; + u64 dc_wb_adr: 3; + u64 cr_wr_mask: 8; + u64 cr_wr_data: 4; + u64 cr_wr_enable: 1; + u64 reg_wr_reg: 6; + u64 reg_wr_enable: 1; + + u64 reg_wr_data; +}; + +#define FLAG(i, y) (log.i? y: ' ') +#define FLGA(i, y, z) (log.i? y: z) +#define PNIA(f) (full_nia[log.f] & 0xff) + +const char *units[4] = { "--", "al", "ls", "?3" }; +const char *ops[64] = +{ + "illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ", + "bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ", + "dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ", + "extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "maddhd ", + "maddhdu", "maddld ", "mcrxr ", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ", + "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ", + "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ", + "shr ", "sync ", "tlbie ", "trap ", "xor ", "ffail ", "?62 ", "?63 " +}; + +const char *spr_names[13] = +{ + "lr ", "ctr", "sr0", "sr1", "hr0", "hr1", "sg0", "sg1", + "sg2", "sg3", "hg0", "hg1", "xer" +}; + +int main(int ac, char **av) +{ + struct log_entry log; + u64 full_nia[16]; + long int lineno = 1; + FILE *f; + const char *filename; + int i; + long int ncompl = 0; + + if (ac != 1 && ac != 2) { + fprintf(stderr, "Usage: %s [filename]\n", av[0]); + exit(1); + } + f = stdin; + if (ac == 2) { + filename = av[1]; + f = fopen(filename, "rb"); + if (f == NULL) { + perror(filename); + exit(1); + } + } + + for (i = 0; i < 15; ++i) + full_nia[i] = i << 2; + + while (fread(&log, sizeof(log), 1, f) == 1) { + full_nia[log.nia_lo & 0xf] = (log.nia_hi? 0xc000000000000000: 0) | + (log.nia_lo << 2); + if (lineno % 20 == 1) { + printf(" fetch1 NIA icache decode1 decode2 execute1 loadstore dcache CR GSPR\n"); + printf(" ---------------- TAHW S -WB-- pN --insn-- pN un op pN byp FR IIE MSR WC SD MM CE SRTO DE -WB-- c ms reg val\n"); + printf(" LdMy t csnSa IA IA it IA abc le srx EPID em tw rd mx tAwp vr csnSa 0 k\n"); + } + printf("%4ld %c0000%.11llx %c ", lineno, + (log.nia_hi? 'c': '0'), + (unsigned long long)log.nia_lo << 2, + FLAG(ic_stall_out, '|')); + printf("%c%c%c%d %c %c%c%d%c%c %.2llx ", + FLGA(ic_ra_valid, ' ', 'T'), + FLGA(ic_access_ok, ' ', 'X'), + FLGA(ic_is_hit, 'H', FLGA(ic_is_miss, 'M', ' ')), + log.ic_way, + FLAG(ic_state, 'W'), + FLAG(ic_wb_cyc, 'c'), + FLAG(ic_wb_stb, 's'), + log.ic_wb_adr, + FLAG(ic_wb_stall, 'S'), + FLAG(ic_wb_ack, 'a'), + PNIA(ic_part_nia)); + if (log.ic_valid) + printf("%.8x", log.ic_insn); + else if (log.ic_fetch_failed) + printf("!!!!!!!!"); + else + printf("--------"); + printf(" %c%c %.2llx ", + FLAG(ic_valid, '>'), + FLAG(d2_stall_out, '|'), + PNIA(d1_part_nia)); + if (log.d1_valid) + printf("%s %s", + units[log.d1_unit], + ops[log.d1_insn_type]); + else + printf("-- -------"); + printf(" %c%c ", + FLAG(d1_valid, '>'), + FLAG(d2_stall_out, '|')); + printf("%.2llx %c%c%c %c%c ", + PNIA(d2_part_nia), + FLAG(d2_bypass_a, 'a'), + FLAG(d2_bypass_b, 'b'), + FLAG(d2_bypass_c, 'c'), + FLAG(d2_valid, '>'), + FLAG(e1_stall_out, '|')); + printf("%c%c %c%c%c %c%c%c%c %c%c ", + FLAG(e1_flush_out, 'F'), + FLAG(e1_redirect, 'R'), + FLAG(e1_irq_state, 'w'), + FLAG(e1_irq, 'I'), + FLAG(e1_exception, 'X'), + FLAG(e1_msr_ee, 'E'), + FLGA(e1_msr_pr, 'u', 's'), + FLAG(e1_msr_ir, 'I'), + FLAG(e1_msr_dr, 'D'), + FLAG(e1_write_enable, 'W'), + FLAG(e1_valid, 'C')); + printf("%c %d%d %c%c %c%c %c ", + FLAG(ls_stall_out, '|'), + log.ls_state, + log.ls_dw_done, + FLAG(ls_mo_valid, 'M'), + FLAG(ls_min_done, 'm'), + FLAG(ls_lo_valid, 'C'), + FLAG(ls_eo_except, 'X'), + FLAG(ls_do_valid, '>')); + printf("%d%c%d%d %c%c %c%c%d%c%c ", + log.dc_state, + FLAG(dc_ra_valid, 'R'), + log.dc_tlb_way, + log.dc_op, + FLAG(dc_do_valid, 'V'), + FLAG(dc_do_error, 'E'), + FLAG(dc_wb_cyc, 'c'), + FLAG(dc_wb_stb, 's'), + log.dc_wb_adr, + FLAG(dc_wb_stall, 'S'), + FLAG(dc_wb_ack, 'a')); + if (log.cr_wr_enable) + printf("%x>%.2x ", log.cr_wr_data, log.cr_wr_mask); + else + printf(" "); + if (log.reg_wr_enable) { + if (log.reg_wr_reg < 32 || log.reg_wr_reg > 44) + printf("r%02d", log.reg_wr_reg); + else + printf("%s", spr_names[log.reg_wr_reg - 32]); + printf("=%.16llx", log.reg_wr_data); + } + printf("\n"); + ++lineno; + if (log.ls_lo_valid || log.e1_valid) + ++ncompl; + } + printf("%ld instructions completed, %.2f CPI\n", ncompl, + (double)(lineno - 1) / ncompl); + exit(0); +} diff --git a/scripts/mw_debug/mw_debug.c b/scripts/mw_debug/mw_debug.c index 7f77558..28e43b4 100644 --- a/scripts/mw_debug/mw_debug.c +++ b/scripts/mw_debug/mw_debug.c @@ -42,6 +42,9 @@ #define DBG_CORE_GSPR_INDEX 0x14 #define DBG_CORE_GSPR_DATA 0x15 +#define DBG_LOG_ADDR 0x16 +#define DBG_LOG_DATA 0x17 + static bool debug; struct backend { @@ -548,6 +551,73 @@ static void save(const char *filename, uint64_t addr, uint64_t size) printf("%x done.\n", count); } +#define LOG_STOP 0x80000000ull + +static void log_start(void) +{ + check(dmi_write(DBG_LOG_ADDR, 0), "writing LOG_ADDR"); +} + +static void log_stop(void) +{ + uint64_t lsize, laddr, waddr; + + check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR"); + check(dmi_read(DBG_LOG_ADDR, &laddr), "reading LOG_ADDR"); + waddr = laddr >> 32; + for (lsize = 1; lsize; lsize <<= 1) + if ((waddr >> 1) < lsize) + break; + waddr &= ~lsize; + printf("Log size = %" PRIu64 " entries, ", lsize); + printf("write ptr = %" PRIx64 "\n", waddr); +} + +static void log_dump(const char *filename) +{ + FILE *f; + uint64_t lsize, laddr, waddr; + uint64_t orig_laddr; + uint64_t i, ldata; + + f = fopen(filename, "w"); + if (f == NULL) { + fprintf(stderr, "Failed to create '%s': %s\n", filename, + strerror(errno)); + exit(1); + } + + check(dmi_read(DBG_LOG_ADDR, &orig_laddr), "reading LOG_ADDR"); + if (!(orig_laddr & LOG_STOP)) + check(dmi_write(DBG_LOG_ADDR, LOG_STOP), "writing LOG_ADDR"); + + waddr = orig_laddr >> 32; + for (lsize = 1; lsize; lsize <<= 1) + if ((waddr >> 1) < lsize) + break; + waddr &= ~lsize; + printf("Log size = %" PRIu64 " entries\n", lsize); + + laddr = LOG_STOP | (waddr << 2); + check(dmi_write(DBG_LOG_ADDR, laddr), "writing LOG_ADDR"); + + for (i = 0; i < lsize * 4; ++i) { + check(dmi_read(DBG_LOG_DATA, &ldata), "reading LOG_DATA"); + if (fwrite(&ldata, sizeof(ldata), 1, f) != 1) { + fprintf(stderr, "Write error on %s\n", filename); + exit(1); + } + if (!(i % 128)) { + printf("%" PRIu64 "...\r", i * 8); + fflush(stdout); + } + } + fclose(f); + printf("%" PRIu64 " done\n", lsize * 32); + + check(dmi_write(DBG_LOG_ADDR, orig_laddr), "writing LOG_ADDR"); +} + static void usage(const char *cmd) { fprintf(stderr, "Usage: %s -b \n", cmd); @@ -572,6 +642,12 @@ static void usage(const char *cmd) fprintf(stderr, " gpr [count]\n"); fprintf(stderr, " status\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " Core logging:\n"); + fprintf(stderr, " lstart start logging\n"); + fprintf(stderr, " lstop stop logging\n"); + fprintf(stderr, " ldump dump log to file\n"); + fprintf(stderr, "\n"); fprintf(stderr, " JTAG:\n"); fprintf(stderr, " dmiread \n"); @@ -710,6 +786,17 @@ int main(int argc, char *argv[]) if (((i+1) < argc) && isdigit(argv[i+1][0])) count = strtoul(argv[++i], NULL, 10); gpr_read(reg, count); + } else if (strcmp(argv[i], "lstart") == 0) { + log_start(); + } else if (strcmp(argv[i], "lstop") == 0) { + log_stop(); + } else if (strcmp(argv[i], "ldump") == 0) { + const char *filename; + + if ((i+1) >= argc) + usage(argv[0]); + filename = argv[++i]; + log_dump(filename); } else { fprintf(stderr, "Unknown command %s\n", argv[i]); exit(1);