From 1206dfe18cf5c5e23a9f93e66a2ac6d86955da04 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Sep 2019 17:03:37 +0100 Subject: [PATCH 01/11] Use a 3 way WB arbiter and cleanup fpga toplevel The 3rd master is currently unused, it will host the WB debug module. Signed-off-by: Benjamin Herrenschmidt --- soc.vhdl | 19 +++++---- wishbone_arbiter.vhdl | 94 ++++++++++++++++++++++++++----------------- 2 files changed, 69 insertions(+), 44 deletions(-) diff --git a/soc.vhdl b/soc.vhdl index 045679f..4ccbc12 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -35,6 +35,8 @@ architecture behaviour of soc is signal wishbone_dcore_out : wishbone_master_out; signal wishbone_icore_in : wishbone_slave_out; signal wishbone_icore_out : wishbone_master_out; + signal wishbone_debug_in : wishbone_slave_out; + signal wishbone_debug_out : wishbone_master_out; -- Wishbone master (output of arbiter): signal wb_master_in : wishbone_slave_out; @@ -75,16 +77,17 @@ begin -- Wishbone bus master arbiter & mux wishbone_arbiter_0: entity work.wishbone_arbiter port map( - clk => system_clk, - rst => rst, - wb1_in => wishbone_dcore_out, - wb1_out => wishbone_dcore_in, - wb2_in => wishbone_icore_out, - wb2_out => wishbone_icore_in, - wb_out => wb_master_out, - wb_in => wb_master_in + clk => system_clk, rst => rst, + wb1_in => wishbone_dcore_out, wb1_out => wishbone_dcore_in, + wb2_in => wishbone_icore_out, wb2_out => wishbone_icore_in, + wb3_in => wishbone_debug_out, wb3_out => wishbone_debug_in, + wb_out => wb_master_out, wb_in => wb_master_in ); + -- Dummy wishbone debug module + wishbone_debug_out.cyc <= '0'; + wishbone_debug_out.stb <= '0'; + -- Wishbone slaves address decoder & mux slave_intercon: process(wb_master_out, wb_bram_out, wb_uart0_out) -- Selected slave diff --git a/wishbone_arbiter.vhdl b/wishbone_arbiter.vhdl index 7d5cbcd..d839b31 100644 --- a/wishbone_arbiter.vhdl +++ b/wishbone_arbiter.vhdl @@ -4,54 +4,76 @@ use ieee.std_logic_1164.all; library work; use work.wishbone_types.all; +-- TODO: Use an array of master/slaves with parametric size entity wishbone_arbiter is - port ( - clk : in std_ulogic; - rst : in std_ulogic; + port (clk : in std_ulogic; + rst : in std_ulogic; - wb1_in : in wishbone_master_out; - wb1_out : out wishbone_slave_out; + wb1_in : in wishbone_master_out; + wb1_out : out wishbone_slave_out; - wb2_in : in wishbone_master_out; - wb2_out : out wishbone_slave_out; + wb2_in : in wishbone_master_out; + wb2_out : out wishbone_slave_out; - wb_out : out wishbone_master_out; - wb_in : in wishbone_slave_out - ); + wb3_in : in wishbone_master_out; + wb3_out : out wishbone_slave_out; + + wb_out : out wishbone_master_out; + wb_in : in wishbone_slave_out + ); end wishbone_arbiter; architecture behave of wishbone_arbiter is - type wishbone_arbiter_state_t is (IDLE, WB1_BUSY, WB2_BUSY); + type wishbone_arbiter_state_t is (IDLE, WB1_BUSY, WB2_BUSY, WB3_BUSY); signal state : wishbone_arbiter_state_t := IDLE; begin - wb1_out <= wb_in when state = WB1_BUSY else wishbone_slave_out_init; - wb2_out <= wb_in when state = WB2_BUSY else wishbone_slave_out_init; - wb_out <= wb1_in when state = WB1_BUSY else wb2_in when state = WB2_BUSY else wishbone_master_out_init; + wishbone_muxes: process(state, wb_in, wb1_in, wb2_in, wb3_in) + begin + -- Requests from masters are fully muxed + wb_out <= wb1_in when state = WB1_BUSY else + wb2_in when state = WB2_BUSY else + wb3_in when state = WB3_BUSY else + wishbone_master_out_init; + + -- Responses from slave don't need to mux the data bus + wb1_out.dat <= wb_in.dat; + wb2_out.dat <= wb_in.dat; + wb3_out.dat <= wb_in.dat; + wb1_out.ack <= wb_in.ack when state = WB1_BUSY else '0'; + wb2_out.ack <= wb_in.ack when state = WB2_BUSY else '0'; + wb3_out.ack <= wb_in.ack when state = WB3_BUSY else '0'; + end process; wishbone_arbiter_process: process(clk) begin - if rising_edge(clk) then - if rst = '1' then - state <= IDLE; - else - case state is - when IDLE => - if wb1_in.cyc = '1' then - state <= WB1_BUSY; - elsif wb2_in.cyc = '1' then - state <= WB2_BUSY; - end if; - when WB1_BUSY => - if wb1_in.cyc = '0' then - state <= IDLE; - end if; - when WB2_BUSY => - if wb2_in.cyc = '0' then - state <= IDLE; - end if; - end case; - end if; - end if; + if rising_edge(clk) then + if rst = '1' then + state <= IDLE; + else + case state is + when IDLE => + if wb1_in.cyc = '1' then + state <= WB1_BUSY; + elsif wb2_in.cyc = '1' then + state <= WB2_BUSY; + elsif wb3_in.cyc = '1' then + state <= WB3_BUSY; + end if; + when WB1_BUSY => + if wb1_in.cyc = '0' then + state <= IDLE; + end if; + when WB2_BUSY => + if wb2_in.cyc = '0' then + state <= IDLE; + end if; + when WB3_BUSY => + if wb3_in.cyc = '0' then + state <= IDLE; + end if; + end case; + end if; + end if; end process; end behave; From ee52fd4d809ef4c85424742387740e59825d8245 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Sep 2019 17:17:59 +0100 Subject: [PATCH 02/11] Add a debug (DMI) bus and a JTAG interface to it on Xilinx FPGAs This adds a simple bus that can be mastered from an external system via JTAG, which will be used to hookup various debug modules. It's loosely based on the RiscV model (hence the DMI name). The module currently only supports hooking up to a Xilinx BSCANE2 but it shouldn't be too hard to adapt it to support different TAPs if necessary. The JTAG protocol proper is not exactly the RiscV one at this point, though I might still change it. This comes with some sim variants of Xilinx BSCANE2 and BUFG and a test bench. Signed-off-by: Benjamin Herrenschmidt --- Makefile | 21 ++- dmi_dtm_dummy.vhdl | 30 ++++ dmi_dtm_tb.vhdl | 214 ++++++++++++++++++++++ dmi_dtm_xilinx.vhdl | 276 +++++++++++++++++++++++++++++ microwatt.core | 16 +- scripts/mw_debug.py | 62 +++++++ sim-unisim/BSCANE2.vhdl | 39 ++++ sim-unisim/BUFG.vhdl | 12 ++ sim-unisim/unisim_vcomponents.vhdl | 45 +++++ soc.vhdl | 31 +++- 10 files changed, 737 insertions(+), 9 deletions(-) create mode 100644 dmi_dtm_dummy.vhdl create mode 100644 dmi_dtm_tb.vhdl create mode 100644 dmi_dtm_xilinx.vhdl create mode 100755 scripts/mw_debug.py create mode 100644 sim-unisim/BSCANE2.vhdl create mode 100644 sim-unisim/BUFG.vhdl create mode 100644 sim-unisim/unisim_vcomponents.vhdl diff --git a/Makefile b/Makefile index 62e9644..a554529 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,9 @@ GHDL=ghdl -GHDLFLAGS=--std=08 +GHDLFLAGS=--std=08 -Psim-unisim CFLAGS=-O2 -Wall -all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb +all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb dmi_dtm_tb + # XXX # loadstore_tb fetch_tb @@ -40,10 +41,18 @@ simple_ram_behavioural_helpers.o: simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o sim_uart.o: wishbone_types.o sim_console.o -soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o +soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_arbiter.o: wishbone_types.o wishbone_types.o: writeback.o: common.o +dmi_dtm_tb.o: dmi_dtm_xilinx.o +dmi_dtm_xilinx.o: sim-unisim/unisim_vcomponents.o + +UNISIM_BITS = sim-unisim/unisim_vcomponents.vhdl sim-unisim/BSCANE2.vhdl sim-unisim/BUFG.vhdl +sim-unisim/unisim_vcomponents.o: $(UNISIM_BITS) + $(GHDL) -a $(GHDLFLAGS) --work=unisim --workdir=sim-unisim $^ + + fpga/soc_reset_tb.o: fpga/soc_reset.o soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o @@ -70,6 +79,9 @@ simple_ram_tb: simple_ram_tb.o simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ +dmi_dtm_tb: dmi_dtm_tb.o + $(GHDL) -e $(GHDLFLAGS) $@ + tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out))) check: $(tests) test_micropython test_micropython_long @@ -86,4 +98,5 @@ test_micropython_long: core_tb @./scripts/test_micropython_long.py clean: - rm -f *.o work-*cf $(all) + rm -f *.o work-*cf unisim-*cf $(all) + rm -f sim-unisim/*.o sim-unisim/unisim-*cf diff --git a/dmi_dtm_dummy.vhdl b/dmi_dtm_dummy.vhdl new file mode 100644 index 0000000..3cabf38 --- /dev/null +++ b/dmi_dtm_dummy.vhdl @@ -0,0 +1,30 @@ +-- Dummy/empty DMI interface to make toplevel happy on unsupported FPGAs + +library ieee; +use ieee.std_logic_1164.all; + +library work; +use work.wishbone_types.all; + +entity dmi_dtm is + generic(ABITS : INTEGER:=8; + DBITS : INTEGER:=32); + + port(sys_clk : in std_ulogic; + sys_reset : in std_ulogic; + dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0); + dmi_din : in std_ulogic_vector(DBITS - 1 downto 0); + dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0); + dmi_req : out std_ulogic; + dmi_wr : out std_ulogic; + dmi_ack : in std_ulogic + ); +end entity dmi_dtm; + +architecture behaviour of dmi_dtm is + dmi_addr <= (others => '0'); + dmi_dout <= (others => '0'); + dmi_req <= '0'; + dmi_wr <= '0'; +end architecture behaviour; + diff --git a/dmi_dtm_tb.vhdl b/dmi_dtm_tb.vhdl new file mode 100644 index 0000000..d872c13 --- /dev/null +++ b/dmi_dtm_tb.vhdl @@ -0,0 +1,214 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.common.all; +use work.wishbone_types.all; + +library unisim; +use unisim.vcomponents.all; + +entity dmi_dtm_tb is +end dmi_dtm_tb; + +architecture behave of dmi_dtm_tb is + signal clk : std_ulogic; + signal rst : std_ulogic; + constant clk_period : time := 10 ns; + constant jclk_period : time := 30 ns; + + -- DMI debug bus signals + signal dmi_addr : std_ulogic_vector(7 downto 0); + signal dmi_din : std_ulogic_vector(63 downto 0); + signal dmi_dout : std_ulogic_vector(63 downto 0); + signal dmi_req : std_ulogic; + signal dmi_wr : std_ulogic; + signal dmi_ack : std_ulogic; + + -- Global JTAG signals (used by BSCANE2 inside dmi_dtm + alias j : glob_jtag_t is glob_jtag; + + -- Wishbone interfaces + signal wishbone_ram_in : wishbone_slave_out; + signal wishbone_ram_out : wishbone_master_out; + +begin + dtm: entity work.dmi_dtm + generic map( + ABITS => 8, + DBITS => 64 + ) + port map( + sys_clk => clk, + sys_reset => rst, + dmi_addr => dmi_addr, + dmi_din => dmi_din, + dmi_dout => dmi_dout, + dmi_req => dmi_req, + dmi_wr => dmi_wr, + dmi_ack => dmi_ack + ); + + -- Dummy loopback until a debug module is present + dmi_din <= dmi_dout; + dmi_ack <= dmi_ack; + + -- system clock + sys_clk: process + begin + clk <= '1'; + wait for clk_period / 2; + clk <= '0'; + wait for clk_period / 2; + end process sys_clk; + + -- system sim: just reset and wait + sys_sim: process + begin + rst <= '1'; + wait for clk_period; + rst <= '0'; + wait; + end process; + + -- jtag sim process + sim_jtag: process + procedure clock(count: in INTEGER) is + begin + for i in 1 to count loop + j.tck <= '0'; + wait for jclk_period/2; + j.tck <= '1'; + wait for jclk_period/2; + end loop; + end procedure clock; + + procedure shift_out(val: in std_ulogic_vector) is + begin + for i in 0 to val'length-1 loop + j.tdi <= val(i); + clock(1); + end loop; + end procedure shift_out; + + procedure shift_in(val: out std_ulogic_vector) is + begin + for i in val'length-1 downto 0 loop + val := j.tdo & val(val'length-1 downto 1); + clock(1); + end loop; + end procedure shift_in; + + procedure send_command( + addr : in std_ulogic_vector(7 downto 0); + data : in std_ulogic_vector(63 downto 0); + op : in std_ulogic_vector(1 downto 0)) is + begin + j.capture <= '1'; + clock(1); + j.capture <= '0'; + clock(1); + j.shift <= '1'; + shift_out(op); + shift_out(data); + shift_out(addr); + j.shift <= '0'; + j.update <= '1'; + clock(1); + j.update <= '0'; + clock(1); + end procedure send_command; + + procedure read_resp( + op : out std_ulogic_vector(1 downto 0); + data : out std_ulogic_vector(63 downto 0)) is + + variable addr : std_ulogic_vector(7 downto 0); + begin + j.capture <= '1'; + clock(1); + j.capture <= '0'; + clock(1); + j.shift <= '1'; + shift_in(op); + shift_in(data); + shift_in(addr); + j.shift <= '0'; + j.update <= '1'; + clock(1); + j.update <= '0'; + clock(1); + end procedure read_resp; + + procedure dmi_write(addr : in std_ulogic_vector(7 downto 0); + data : in std_ulogic_vector(63 downto 0)) is + variable resp_op : std_ulogic_vector(1 downto 0); + variable resp_data : std_ulogic_vector(63 downto 0); + variable timeout : integer; + begin + send_command(addr, data, "10"); + loop + read_resp(resp_op, resp_data); + case resp_op is + when "00" => + return; + when "11" => + timeout := timeout + 1; + assert timeout < 0 + report "dmi_write timed out !" severity error; + when others => + assert 0 > 1 report "dmi_write got odd status: " & + to_hstring(resp_op) severity error; + end case; + end loop; + end procedure dmi_write; + + + procedure dmi_read(addr : in std_ulogic_vector(7 downto 0); + data : out std_ulogic_vector(63 downto 0)) is + variable resp_op : std_ulogic_vector(1 downto 0); + variable timeout : integer; + begin + send_command(addr, (others => '0'), "01"); + loop + read_resp(resp_op, data); + case resp_op is + when "00" => + return; + when "11" => + timeout := timeout + 1; + assert timeout < 0 + report "dmi_read timed out !" severity error; + when others => + assert 0 > 1 report "dmi_read got odd status: " & + to_hstring(resp_op) severity error; + end case; + end loop; + end procedure dmi_read; + + variable data : std_ulogic_vector(63 downto 0); + begin + -- init & reset + j.reset <= '1'; + j.sel <= "0000"; + j.capture <= '0'; + j.update <= '0'; + j.shift <= '0'; + j.tdi <= '0'; + j.tms <= '0'; + j.runtest <= '0'; + clock(5); + j.reset <= '0'; + clock(5); + + -- select chain 2 + j.sel <= "0010"; + clock(1); + + -- send command + dmi_read(x"00", data); + report "Read addr reg:" & to_hstring(data); + std.env.finish; + end process; +end behave; diff --git a/dmi_dtm_xilinx.vhdl b/dmi_dtm_xilinx.vhdl new file mode 100644 index 0000000..bab7ce8 --- /dev/null +++ b/dmi_dtm_xilinx.vhdl @@ -0,0 +1,276 @@ +-- Xilinx internal JTAG to DMI interface +-- +-- DMI bus +-- +-- req : ____/------------\_____ +-- addr: xxxx< >xxxxx +-- dout: xxxx< >xxxxx +-- wr : xxxx< >xxxxx +-- din : xxxxxxxxxxxx< >xxx +-- ack : ____________/------\___ +-- +-- * addr/dout set along with req, can be latched on same cycle by slave +-- * ack & din remain up until req is dropped by master, the slave must +-- provide a stable output on din on reads during that time. +-- * req remains low at until at least one sysclk after ack seen down. +-- +-- JTAG (tck) DMI (sys_clk) +-- +-- * jtag_req = 1 +-- (jtag_req_0) * +-- (jtag_req_1) -> * dmi_req = 1 > +-- *.../... +-- * dmi_ack = 1 < +-- * (dmi_ack_0) +-- * <- (dmi_ack_1) +-- * jtag_req = 0 (and latch dmi_din) +-- (jtag_req_0) * +-- (jtag_req_1) -> * dmi_req = 0 > +-- * dmi_ack = 0 < +-- * (dmi_ack_0) +-- * <- (dmi_ack_1) +-- +-- jtag_req can go back to 1 when jtag_rsp_1 is 0 +-- +-- Questions/TODO: +-- - I use 2 flip fops for sync, is that enough ? +-- - I treat the jtag_reset as an async reset, is that necessary ? +-- - Dbl check reset situation since we have two different resets +-- each only resetting part of the logic... +-- - Look at optionally removing the synchronizer on the ack path, +-- assuming JTAG is always slow enough that ack will have been +-- stable long enough by the time CAPTURE comes in. +-- - We could avoid the latched request by not shifting while a +-- request is in progress (and force TDO to 1 to return a busy +-- status). +-- +-- WARNING: This isn't the real DMI JTAG protocol (at least not yet). +-- a command while busy will be ignored. A response of "11" +-- means the previous command is still going, try again. +-- As such We don't implement the DMI "error" status, and +-- we don't implement DTMCS yet... This may still all change +-- but for now it's easier that way as the real DMI protocol +-- requires for a command to work properly that enough TCK +-- are sent while IDLE and I'm having trouble getting that +-- working with UrJtag and the Xilinx BSCAN2 for now. + +library ieee; +use ieee.std_logic_1164.all; +use ieee.math_real.all; + +library work; +use work.wishbone_types.all; + +library unisim; +use unisim.vcomponents.all; + +entity dmi_dtm is + generic(ABITS : INTEGER:=8; + DBITS : INTEGER:=32); + + port(sys_clk : in std_ulogic; + sys_reset : in std_ulogic; + dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0); + dmi_din : in std_ulogic_vector(DBITS - 1 downto 0); + dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0); + dmi_req : out std_ulogic; + dmi_wr : out std_ulogic; + dmi_ack : in std_ulogic +-- dmi_err : in std_ulogic TODO: Add error response + ); +end entity dmi_dtm; + +architecture behaviour of dmi_dtm is + + -- Signals coming out of the BSCANE2 block + signal jtag_reset : std_ulogic; + signal capture : std_ulogic; + signal update : std_ulogic; + signal drck : std_ulogic; + signal jtag_clk : std_ulogic; + signal sel : std_ulogic; + signal shift : std_ulogic; + signal tdi : std_ulogic; + signal tdo : std_ulogic; + signal tck : std_ulogic; + + -- ** JTAG clock domain ** + + -- Shift register + signal shiftr : std_ulogic_vector(ABITS + DBITS + 1 downto 0); + + -- Latched request + signal request : std_ulogic_vector(ABITS + DBITS + 1 downto 0); + + -- A request is present + signal jtag_req : std_ulogic; + + -- Synchronizer for jtag_rsp (sys clk -> jtag_clk) + signal dmi_ack_0 : std_ulogic; + signal dmi_ack_1 : std_ulogic; + + -- ** sys clock domain ** + + -- Synchronizer for jtag_req (jtag clk -> sys clk) + signal jtag_req_0 : std_ulogic; + signal jtag_req_1 : std_ulogic; + + -- ** combination signals + signal jtag_bsy : std_ulogic; + signal op_valid : std_ulogic; + signal rsp_op : std_ulogic_vector(1 downto 0); + + -- ** Constants ** + constant DMI_REQ_NOP : std_ulogic_vector(1 downto 0) := "00"; + constant DMI_REQ_RD : std_ulogic_vector(1 downto 0) := "01"; + constant DMI_REQ_WR : std_ulogic_vector(1 downto 0) := "10"; + constant DMI_RSP_OK : std_ulogic_vector(1 downto 0) := "00"; + constant DMI_RSP_BSY : std_ulogic_vector(1 downto 0) := "11"; + +begin + + -- Implement the Xilinx bscan2 for series 7 devices (TODO: use PoC to + -- wrap this if compatibility is required with older devices). + bscan : BSCANE2 + generic map ( + JTAG_CHAIN => 2 + ) + port map ( + CAPTURE => capture, + DRCK => drck, + RESET => jtag_reset, + RUNTEST => open, + SEL => sel, + SHIFT => shift, + TCK => tck, + TDI => tdi, + TMS => open, + UPDATE => update, + TDO => tdo + ); + + -- Some examples out there suggest buffering the clock so it's + -- treated as a proper clock net. This is probably needed when using + -- drck (the gated clock) but I'm using the real tck here to avoid + -- missing the update phase so maybe not... + -- + clkbuf : BUFG + port map ( +-- I => drck, + I => tck, + O => jtag_clk + ); + + + -- dmi_req synchronization + dmi_req_sync : process(sys_clk) + begin + -- sys_reset is synchronous + if rising_edge(sys_clk) then + if (sys_reset = '1') then + jtag_req_0 <= '0'; + jtag_req_1 <= '0'; + else + jtag_req_0 <= jtag_req; + jtag_req_1 <= jtag_req_0; + end if; + end if; + end process; + dmi_req <= jtag_req_1; + + -- dmi_ack synchronization + dmi_ack_sync: process(jtag_clk, jtag_reset) + begin + -- jtag_reset is async (see comments) + if jtag_reset = '1' then + dmi_ack_0 <= '0'; + dmi_ack_1 <= '0'; + elsif rising_edge(jtag_clk) then + dmi_ack_0 <= dmi_ack; + dmi_ack_1 <= dmi_ack_0; + end if; + end process; + + -- jtag_bsy indicates whether we can start a new request, we can when + -- we aren't already processing one (jtag_req) and the synchronized ack + -- of the previous one is 0. + -- + jtag_bsy <= jtag_req or dmi_ack_1; + + -- decode request type in shift register + with shiftr(1 downto 0) select op_valid <= + '1' when DMI_REQ_RD, + '1' when DMI_REQ_WR, + '0' when others; + + -- encode response op + rsp_op <= DMI_RSP_BSY when jtag_bsy = '1' else DMI_RSP_OK; + + -- Some DMI out signals are directly driven from the request register + dmi_addr <= request(ABITS + DBITS + 1 downto DBITS + 2); + dmi_dout <= request(DBITS + 1 downto 2); + dmi_wr <= '1' when request(1 downto 0) = DMI_REQ_WR else '0'; + + -- TDO is wired to shift register bit 0 + tdo <= shiftr(0); + + -- Main state machine. Handles shift registers, request latch and + -- jtag_req latch. Could be split into 3 processes but it's probably + -- not worthwhile. + -- + shifter: process(jtag_clk, jtag_reset) + begin + if jtag_reset = '1' then + shiftr <= (others => '0'); + request <= (others => '0'); + jtag_req <= '0'; + elsif rising_edge(jtag_clk) then + + -- Handle jtag "commands" when sel is 1 + if sel = '1' then + -- Shift state, rotate the register + if shift = '1' then + shiftr <= tdi & shiftr(ABITS + DBITS + 1 downto 1); + end if; + + -- Update state (trigger) + -- + -- Latch the request if we aren't already processing one and + -- it has a valid command opcode. + -- + if update = '1' and op_valid = '1' then + if jtag_bsy = '0' then + request <= shiftr; + jtag_req <= '1'; + end if; + -- Set the shift register "op" to "busy". This will prevent + -- us from re-starting the command on the next update if + -- the command completes before that. + shiftr(1 downto 0) <= DMI_RSP_BSY; + end if; + + -- Request completion. + -- + -- Capture the response data for reads and clear request flag. + -- + -- Note: We clear req (and thus dmi_req) here which relies on tck + -- ticking and sel set. This means we are stuck with dmi_req up if + -- the jtag interface stops. Slaves must be resilient to this. + -- + if jtag_req = '1' and dmi_ack_1 = '1' then + jtag_req <= '0'; + if request(1 downto 0) = DMI_REQ_RD then + request(DBITS + 1 downto 2) <= dmi_din; + end if; + end if; + + -- Capture state, grab latch content with updated status + if capture = '1' then + shiftr <= request(ABITS + DBITS + 1 downto 2) & rsp_op; + end if; + + end if; + end if; + end process; +end architecture behaviour; + diff --git a/microwatt.core b/microwatt.core index b62aef9..04b9d2c 100644 --- a/microwatt.core +++ b/microwatt.core @@ -46,6 +46,14 @@ filesets: - fpga/firmware.hex : {copyto : firmware.hex, file_type : user} file_type : vhdlSource-2008 + debug_xilinx: + files: + - dmi_dtm_xilinx.vhdl : {file_type : vhdlSource-2008} + + debug_dummy: + files: + - dmi_dtm_dummy.vhdl : {file_type : vhdlSource-2008} + nexys_a7: files: - fpga/nexys_a7.xdc : {file_type : xdc} @@ -69,7 +77,7 @@ filesets: targets: nexys_a7: default_tool: vivado - filesets: [core, nexys_a7, soc, fpga] + filesets: [core, nexys_a7, soc, fpga, debug_xilinx] parameters : [memory_size, ram_init_file] tools: vivado: {part : xc7a100tcsg324-1} @@ -77,7 +85,7 @@ targets: nexys_video: default_tool: vivado - filesets: [core, nexys_video, soc, fpga] + filesets: [core, nexys_video, soc, fpga, debug_xilinx] parameters : [memory_size, ram_init_file] tools: vivado: {part : xc7a200tsbg484-1} @@ -85,7 +93,7 @@ targets: arty_a7-35: default_tool: vivado - filesets: [core, arty_a7-35, soc, fpga] + filesets: [core, arty_a7-35, soc, fpga, debug_xilinx] parameters : [memory_size, ram_init_file] tools: vivado: {part : xc7a35ticsg324-1L} @@ -93,7 +101,7 @@ targets: cmod_a7-35: default_tool: vivado - filesets: [core, cmod_a7-35, soc, fpga] + filesets: [core, cmod_a7-35, soc, fpga, debug_xilinx] parameters : [memory_size, ram_init_file, reset_low=false] tools: vivado: {part : xc7a35tcpg236-1} diff --git a/scripts/mw_debug.py b/scripts/mw_debug.py new file mode 100755 index 0000000..fe48743 --- /dev/null +++ b/scripts/mw_debug.py @@ -0,0 +1,62 @@ +#!/usr/bin/python3 + +import urjtag; + +def do_command(urc, op, addr, data): + urc.set_dr_in(op,1,0) + urc.set_dr_in(data,65,2) + urc.set_dr_in(addr,73,66) +# print("Sending:", urc.get_dr_in_string()) + urc.shift_dr() + urc.set_dr_in(0x0,73,0) + for x in range(5): + urc.shift_dr() +# print("Received:", urc.get_dr_out_string()) + rsp_code = urc.get_dr_out(1,0) + if rsp_code == 0: + return urc.get_dr_out(65,2) + if rsp_code != 3: + print("Weird response ! rsp=%x" % rsp_code); + print("Timeout sending command !") + +def do_read(urc, addr): + return do_command(urc, 1, addr, 0) + +def do_write(urc, addr, val): + do_command(urc, 2, addr, val) + +def main(): + # Init jtag + #urjtag.loglevel( urjtag.URJ_LOG_LEVEL_ALL ) + + urc = urjtag.chain() + urc.cable("DigilentHS1") + print('Cable frequency:', urc.get_frequency()) + #urc.tap_detect() + #length = urc.len() + #for i in range(0,urc.len()): + # idcode = urc.partid(0) + # print('[%d] 0x%08x' % (i, idcode)) + urc.addpart(6); + print("Part ID: ", urc.partid(0)) + #urc.part(0) + #urc.reset(); + urc.add_register("USER2_REG", 74); + urc.add_instruction("USER2", "000011", "USER2_REG"); + urc.add_register("IDCODE_REG", 32); + urc.add_instruction("IDCODE", "001001", "IDCODE_REG"); + # Send test command + urc.set_instruction("IDCODE") + urc.shift_ir() + urc.shift_dr() + print("Got:", hex(urc.get_dr_out())) + + urc.set_instruction("USER2") + urc.shift_ir() + + print("Reading 0x00: %x" % do_read(urc, 0)) + print("Reading 0xaa: %x" % do_read(urc, 0xaa)) + + +if __name__ == "__main__": + main() diff --git a/sim-unisim/BSCANE2.vhdl b/sim-unisim/BSCANE2.vhdl new file mode 100644 index 0000000..15211fa --- /dev/null +++ b/sim-unisim/BSCANE2.vhdl @@ -0,0 +1,39 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.ALL; + +library unisim; +use unisim.vcomponents.all; + +entity BSCANE2 is + generic(jtag_chain: INTEGER); + port(capture : out std_logic; + drck : out std_logic; + reset : out std_logic; + runtest : out std_logic; + sel : out std_logic; + shift : out std_logic; + tck : out std_logic; + tdi : out std_logic; + tms : out std_logic; + update : out std_logic; + tdo : in std_logic + ); +end BSCANE2; + +architecture behaviour of BSCANE2 is + alias j : glob_jtag_t is glob_jtag; +begin + sel <= j.sel(jtag_chain); + tck <= j.tck; + drck <= tck and sel and (capture or shift); + capture <= j.capture; + reset <= j.reset; + runtest <= j.runtest; + shift <= j.shift; + tdi <= j.tdi; + tms <= j.tms; + update <= j.update; + j.tdo <= tdo; +end architecture behaviour; + diff --git a/sim-unisim/BUFG.vhdl b/sim-unisim/BUFG.vhdl new file mode 100644 index 0000000..462017a --- /dev/null +++ b/sim-unisim/BUFG.vhdl @@ -0,0 +1,12 @@ +library IEEE; +use IEEE.std_logic_1164.all; + +entity BUFG is + port(I : in std_logic; + O : out std_logic + ); +end BUFG; +architecture behaviour of BUFG is +begin + O <= I; +end architecture behaviour; diff --git a/sim-unisim/unisim_vcomponents.vhdl b/sim-unisim/unisim_vcomponents.vhdl new file mode 100644 index 0000000..7faebac --- /dev/null +++ b/sim-unisim/unisim_vcomponents.vhdl @@ -0,0 +1,45 @@ +library IEEE; +use IEEE.std_logic_1164.all; + +package vcomponents is + + -- Global JTAG signals. Xilinx implementation hooks that up to + -- their internal JTAG tap, we just expose them for the testbench + -- to use. These are used by our BSCANE2 block. + -- + type glob_jtag_t is record + reset : std_logic; + tck : std_logic; + tdo : std_logic; + tdi : std_logic; + tms : std_logic; + sel : std_logic_vector(4 downto 1); + capture : std_logic; + shift : std_logic; + update : std_logic; + runtest : std_logic; + end record glob_jtag_t; + signal glob_jtag : glob_jtag_t; + + component BSCANE2 is + generic(jtag_chain: integer); + port(capture : out std_logic; + drck : out std_logic; + reset : out std_logic; + runtest : out std_logic; + sel : out std_logic; + shift : out std_logic; + tck : out std_logic; + tdi : out std_logic; + tms : out std_logic; + update : out std_logic; + tdo : in std_logic + ); + end component BSCANE2; + + component BUFG is + port(I : in std_logic; + O : out std_logic + ); + end component BUFG; +end package vcomponents; diff --git a/soc.vhdl b/soc.vhdl index 4ccbc12..735d86c 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -52,10 +52,18 @@ architecture behaviour of soc is signal wb_bram_out : wishbone_slave_out; constant mem_adr_bits : positive := positive(ceil(log2(real(MEMORY_SIZE)))); - -- Debug signals (used in SIM only) + -- Core debug signals (used in SIM only) signal registers : regfile; signal terminate : std_ulogic; + -- DMI debug bus signals + signal dmi_addr : std_ulogic_vector(7 downto 0); + signal dmi_din : std_ulogic_vector(63 downto 0); + signal dmi_dout : std_ulogic_vector(63 downto 0); + signal dmi_req : std_ulogic; + signal dmi_wr : std_ulogic; + signal dmi_ack : std_ulogic; + begin -- Processor core @@ -177,4 +185,25 @@ begin wishbone_out => wb_bram_out ); + -- DMI(debug bus) <-> JTAG bridge + dtm: entity work.dmi_dtm + generic map( + ABITS => 8, + DBITS => 64 + ) + port map( + sys_clk => system_clk, + sys_reset => rst, + dmi_addr => dmi_addr, + dmi_din => dmi_din, + dmi_dout => dmi_dout, + dmi_req => dmi_req, + dmi_wr => dmi_wr, + dmi_ack => dmi_ack + ); + + -- Dummy loopback until a debug module is present + dmi_din <= dmi_dout; + dmi_ack <= dmi_ack; + end architecture behaviour; From b46f81fae4c2700547ef791606fe20ed71c4fa81 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Sep 2019 17:31:25 +0100 Subject: [PATCH 03/11] Wishbone debug module This adds a debug module off the DMI (debug) bus which can act as a wishbone master to generate read and write cycles. Signed-off-by: Benjamin Herrenschmidt --- Makefile | 9 +- dmi_dtm_tb.vhdl | 42 +++++++++- microwatt.core | 2 + scripts/mw_debug.py | 45 +++++++++- soc.vhdl | 19 +++-- wishbone_debug_master.vhdl | 167 +++++++++++++++++++++++++++++++++++++ 6 files changed, 268 insertions(+), 16 deletions(-) create mode 100644 wishbone_debug_master.vhdl diff --git a/Makefile b/Makefile index a554529..b675a8f 100644 --- a/Makefile +++ b/Makefile @@ -41,12 +41,13 @@ simple_ram_behavioural_helpers.o: simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o sim_uart.o: wishbone_types.o sim_console.o -soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o +soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_debug_master.o wishbone_arbiter.o: wishbone_types.o wishbone_types.o: writeback.o: common.o -dmi_dtm_tb.o: dmi_dtm_xilinx.o +dmi_dtm_tb.o: dmi_dtm_xilinx.o wishbone_debug_master.o dmi_dtm_xilinx.o: sim-unisim/unisim_vcomponents.o +wishbone_debug_master.o: wishbone_types.o UNISIM_BITS = sim-unisim/unisim_vcomponents.vhdl sim-unisim/BSCANE2.vhdl sim-unisim/BUFG.vhdl sim-unisim/unisim_vcomponents.o: $(UNISIM_BITS) @@ -79,8 +80,8 @@ simple_ram_tb: simple_ram_tb.o simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ -dmi_dtm_tb: dmi_dtm_tb.o - $(GHDL) -e $(GHDLFLAGS) $@ +dmi_dtm_tb: dmi_dtm_tb.o simple_ram_behavioural_helpers_c.o + $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out))) diff --git a/dmi_dtm_tb.vhdl b/dmi_dtm_tb.vhdl index d872c13..fe60c12 100644 --- a/dmi_dtm_tb.vhdl +++ b/dmi_dtm_tb.vhdl @@ -50,9 +50,23 @@ begin dmi_ack => dmi_ack ); - -- Dummy loopback until a debug module is present - dmi_din <= dmi_dout; - dmi_ack <= dmi_ack; + simple_ram_0: entity work.mw_soc_memory + generic map(RAM_INIT_FILE => "simple_ram_behavioural.bin", + MEMORY_SIZE => 524288) + port map(clk => clk, rst => rst, + wishbone_in => wishbone_ram_out, + wishbone_out => wishbone_ram_in); + + wishbone_debug_0: entity work.wishbone_debug_master + port map(clk => clk, rst => rst, + dmi_addr => dmi_addr(1 downto 0), + dmi_dout => dmi_din, + dmi_din => dmi_dout, + dmi_wr => dmi_wr, + dmi_ack => dmi_ack, + dmi_req => dmi_req, + wb_in => wishbone_ram_in, + wb_out => wishbone_ram_out); -- system clock sys_clk: process @@ -209,6 +223,28 @@ begin -- send command dmi_read(x"00", data); report "Read addr reg:" & to_hstring(data); + report "Writing addr reg to all 1's"; + dmi_write(x"00", (others => '1')); + dmi_read(x"00", data); + report "Read addr reg:" & to_hstring(data); + + report "Writing ctrl reg to all 1's"; + dmi_write(x"02", (others => '1')); + dmi_read(x"02", data); + report "Read ctrl reg:" & to_hstring(data); + + report "Read memory at 0...\n"; + dmi_write(x"00", x"0000000000000000"); + dmi_write(x"02", x"00000000000007ff"); + dmi_read(x"01", data); + report "00:" & to_hstring(data); + dmi_read(x"01", data); + report "08:" & to_hstring(data); + dmi_read(x"01", data); + report "10:" & to_hstring(data); + dmi_read(x"01", data); + report "18:" & to_hstring(data); + clock(10); std.env.finish; end process; end behave; diff --git a/microwatt.core b/microwatt.core index 04b9d2c..6efe7c9 100644 --- a/microwatt.core +++ b/microwatt.core @@ -25,6 +25,7 @@ filesets: - multiply.vhdl - writeback.vhdl - insn_helpers.vhdl + - wishbone_debug_master.vhdl - core.vhdl - icache.vhdl file_type : vhdlSource-2008 @@ -32,6 +33,7 @@ filesets: soc: files: - wishbone_arbiter.vhdl + - wishbone_debug_master.vhdl - soc.vhdl file_type : vhdlSource-2008 diff --git a/scripts/mw_debug.py b/scripts/mw_debug.py index fe48743..f22039c 100755 --- a/scripts/mw_debug.py +++ b/scripts/mw_debug.py @@ -54,8 +54,49 @@ def main(): urc.set_instruction("USER2") urc.shift_ir() - print("Reading 0x00: %x" % do_read(urc, 0)) - print("Reading 0xaa: %x" % do_read(urc, 0xaa)) + print("Reading memory at 0:") + do_write(urc, 0, 0) + do_write(urc, 2, 0x7ff) + print("00: %016x" % do_read(urc, 1)) + print("08: %016x" % do_read(urc, 1)) + print("10: %016x" % do_read(urc, 1)) + print("18: %016x" % do_read(urc, 1)) + do_write(urc, 0, 0x10) + do_write(urc, 1, 0xabcdef0123456789) + do_write(urc, 0, 0) + do_write(urc, 2, 0x7ff) + print("00: %016x" % do_read(urc, 1)) + print("08: %016x" % do_read(urc, 1)) + print("10: %016x" % do_read(urc, 1)) + print("18: %016x" % do_read(urc, 1)) + +# urc.set_dr_in(0,73,0); +# print("Test DR_IN 1:", urc.get_dr_in_string()) +# urc.set_dr_in(0xa,3,0); +# print("Test DR_IN 2:", urc.get_dr_in_string()) +# urc.set_dr_in(0x5,7,4); +# print("Test DR_IN 3:", urc.get_dr_in_string()) +# urc.set_dr_in(1,73,73); +# print("Test DR_IN 4:", urc.get_dr_in_string()) + +# print("Reading ADDR reg: %x" % do_read(urc, 0)) +# print("Writing all 1's to it:") +# do_write(urc, 0, 0xffffffffffffffff) +# print("Reading ADDR reg: %x" % do_read(urc, 0)) +# print("Writing 0xabcdef0123456789 to it:") +# do_write(urc, 0, 0xabcdef0123456789) +# print("Reading ADDR reg: %x" % do_read(urc, 0)) + + + +# urc.set_dr_in(0x1,41,0) +# print("Sending:", urc.get_dr_in_string()) +# urc.shift_dr() +# urc.set_dr_in(0x0,41,0) +# urc.shift_dr() +# print("Got1:", urc.get_dr_out_string()) +# urc.shift_dr() +# print("Got2:", hex(urc.get_dr_out())) if __name__ == "__main__": diff --git a/soc.vhdl b/soc.vhdl index 735d86c..dcc25a7 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -92,10 +92,6 @@ begin wb_out => wb_master_out, wb_in => wb_master_in ); - -- Dummy wishbone debug module - wishbone_debug_out.cyc <= '0'; - wishbone_debug_out.stb <= '0'; - -- Wishbone slaves address decoder & mux slave_intercon: process(wb_master_out, wb_bram_out, wb_uart0_out) -- Selected slave @@ -202,8 +198,17 @@ begin dmi_ack => dmi_ack ); - -- Dummy loopback until a debug module is present - dmi_din <= dmi_dout; - dmi_ack <= dmi_ack; + -- Wishbone debug master (TODO: Add a DMI address decoder) + wishbone_debug: entity work.wishbone_debug_master + port map(clk => system_clk, rst => rst, + dmi_addr => dmi_addr(1 downto 0), + dmi_dout => dmi_din, + dmi_din => dmi_dout, + dmi_wr => dmi_wr, + dmi_ack => dmi_ack, + dmi_req => dmi_req, + wb_in => wishbone_debug_in, + wb_out => wishbone_debug_out); + end architecture behaviour; diff --git a/wishbone_debug_master.vhdl b/wishbone_debug_master.vhdl new file mode 100644 index 0000000..51441d5 --- /dev/null +++ b/wishbone_debug_master.vhdl @@ -0,0 +1,167 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.wishbone_types.all; + +entity wishbone_debug_master is + port(clk : in std_ulogic; + rst : in std_ulogic; + + -- Debug bus interface + dmi_addr : in std_ulogic_vector(1 downto 0); + dmi_din : in std_ulogic_vector(63 downto 0); + dmi_dout : out std_ulogic_vector(63 downto 0); + dmi_req : in std_ulogic; + dmi_wr : in std_ulogic; + dmi_ack : out std_ulogic; + + -- Wishbone master interface + wb_out : out wishbone_master_out; + wb_in : in wishbone_slave_out + ); +end entity wishbone_debug_master; + +architecture behaviour of wishbone_debug_master is + + -- ** Register offsets definitions. All registers are 64-bit + constant DBG_WB_ADDR : std_ulogic_vector(1 downto 0) := "00"; + constant DBG_WB_DATA : std_ulogic_vector(1 downto 0) := "01"; + constant DBG_WB_CTRL : std_ulogic_vector(1 downto 0) := "10"; + constant DBG_WB_RSVD : std_ulogic_vector(1 downto 0) := "11"; + + -- CTRL register: + -- + -- bit 0..7 : SEL bits (byte enables) + -- bit 8 : address auto-increment + -- bit 10..9 : auto-increment value: + -- 00 - +1 + -- 01 - +2 + -- 10 - +4 + -- 11 - +8 + + -- ** Address and control registers and read data + signal reg_addr : std_ulogic_vector(63 downto 0); + signal reg_ctrl_out : std_ulogic_vector(63 downto 0); + signal reg_ctrl : std_ulogic_vector(10 downto 0); + signal data_latch : std_ulogic_vector(63 downto 0); + + type state_t is (IDLE, WB_CYCLE, DMI_WAIT); + signal state : state_t; + +begin + + -- Hard wire unused bits to 0 + reg_ctrl_out <= (63 downto 11 => '0', + 10 downto 0 => reg_ctrl); + + -- DMI read data mux + with dmi_addr select dmi_dout <= + reg_addr when DBG_WB_ADDR, + data_latch when DBG_WB_DATA, + reg_ctrl_out when DBG_WB_CTRL, + (others => '0') when others; + + -- ADDR and CTRL register writes + reg_write : process(clk) + subtype autoinc_inc_t is integer range 1 to 8; + function decode_autoinc(c : std_ulogic_vector(1 downto 0)) + return autoinc_inc_t is + begin + case c is + when "00" => return 1; + when "01" => return 2; + when "10" => return 4; + when "11" => return 8; + -- Below shouldn't be necessary but GHDL complains + when others => return 8; + end case; + end function decode_autoinc; + begin + if rising_edge(clk) then + if (rst) then + reg_addr <= (others => '0'); + reg_ctrl <= (others => '0'); + else -- Standard register writes + if dmi_req and dmi_wr then + if dmi_addr = DBG_WB_ADDR then + reg_addr <= dmi_din; + elsif dmi_addr = DBG_WB_CTRL then + reg_ctrl <= dmi_din(10 downto 0); + end if; + end if; + -- Address register auto-increment + if state = WB_CYCLE and (wb_in.ack and reg_ctrl(8))= '1' then + reg_addr <= std_ulogic_vector(unsigned(reg_addr) + + decode_autoinc(reg_ctrl(10 downto 9))); + end if; + end if; + end if; + end process; + + -- ACK is hard wired to req for register writes. For data read/writes + -- (aka commands), it's sent when the state machine got the WB ack. + -- + -- Note: We never set it to 1, we just pass dmi_req back when acking. + -- This fullfills two purposes: + -- + -- * Avoids polluting the ack signal when another DMI slave is + -- selected. This allows the decoder to just OR all the acks + -- together rather than mux them. + -- + -- * Makes ack go down on the same cycle as req goes down, thus + -- saving a clock cycle. This is safe because we know that + -- the state machine will no longer be in DMI_WAIT state on + -- the next cycle, so we won't be bouncing the signal back up. + -- + dmi_ack <= dmi_req when (dmi_addr /= DBG_WB_DATA or state = DMI_WAIT) else '0'; + + -- Some WB signals are direct wires from registers or DMI + wb_out.adr <= reg_addr; + wb_out.dat <= dmi_din; + wb_out.sel <= reg_ctrl(7 downto 0); + wb_out.we <= dmi_wr; + + -- We always move WB cyc and stb simultaneously (no pipelining yet...) + wb_out.cyc <= '1' when state = WB_CYCLE else '0'; + wb_out.stb <= '1' when state = WB_CYCLE else '0'; + + -- Data latch. WB will take the read data away as soon as the cycle + -- terminates but we must maintain it on DMI until req goes down, so + -- we latch it. (Q: Should we move that latch to dmi_dtm itself ?) + -- + latch_reads : process(clk) + begin + if rising_edge(clk) then + if state = WB_CYCLE and wb_in.ack = '1' and dmi_wr = '0' then + data_latch <= wb_in.dat; + end if; + end if; + end process; + + -- Command state machine (generate wb_cyc) + wb_trigger : process(clk) + begin + if rising_edge(clk) then + if (rst) then + state <= IDLE; + else + case state is + when IDLE => + if dmi_req = '1' and dmi_addr = DBG_WB_DATA then + state <= WB_CYCLE; + end if; + when WB_CYCLE => + if wb_in.ack then + state <= DMI_WAIT; + end if; + when DMI_WAIT => + if dmi_req = '0' then + state <= IDLE; + end if; + end case; + end if; + end if; + end process; +end architecture behaviour; From ad14a41d801a23ba6f3afa1a992bbe7458c83636 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Sep 2019 17:39:59 +0100 Subject: [PATCH 04/11] Add DMI address decoder And prepare signals for core DMI support Signed-off-by: Benjamin Herrenschmidt --- soc.vhdl | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/soc.vhdl b/soc.vhdl index dcc25a7..4b02807 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -64,6 +64,13 @@ architecture behaviour of soc is signal dmi_wr : std_ulogic; signal dmi_ack : std_ulogic; + -- Per slave DMI signals + signal dmi_wb_dout : std_ulogic_vector(63 downto 0); + signal dmi_wb_req : std_ulogic; + signal dmi_wb_ack : std_ulogic; + signal dmi_core_dout : std_ulogic_vector(63 downto 0); + signal dmi_core_req : std_ulogic; + signal dmi_core_ack : std_ulogic; begin -- Processor core @@ -198,15 +205,62 @@ begin dmi_ack => dmi_ack ); + -- DMI interconnect + dmi_intercon: process(dmi_addr, dmi_req, + dmi_wb_ack, dmi_wb_dout, + dmi_core_ack, dmi_core_dout) + + -- DMI address map (each address is a full 64-bit register) + -- + -- Offset: Size: Slave: + -- 0 4 Wishbone + -- 10 16 Core + + type slave_type is (SLAVE_WB, + SLAVE_CORE, + SLAVE_NONE); + variable slave : slave_type; + begin + -- Simple address decoder + if dmi_addr(7 downto 0) = "000000--" then + slave := SLAVE_WB; + elsif dmi_addr(7 downto 0) = "0001----" then + slave := SLAVE_CORE; + else + slave := SLAVE_NONE; + end if; + + -- DMI muxing + dmi_wb_req <= '0'; + dmi_core_req <= '0'; + case slave is + when SLAVE_WB => + dmi_wb_req <= dmi_req; + dmi_ack <= dmi_wb_ack; + dmi_din <= dmi_wb_dout; + when SLAVE_CORE => + dmi_core_req <= dmi_req; + dmi_ack <= dmi_core_ack; + dmi_din <= dmi_core_dout; + when others => + dmi_ack <= dmi_req; + dmi_din <= (others => '1'); + end case; + end process; + + -- Core dummy + dmi_core_ack <= dmi_core_req; + dmi_core_dout <= x"0000000000000000"; + -- Wishbone debug master (TODO: Add a DMI address decoder) wishbone_debug: entity work.wishbone_debug_master port map(clk => system_clk, rst => rst, dmi_addr => dmi_addr(1 downto 0), - dmi_dout => dmi_din, + dmi_dout => dmi_wb_dout, dmi_din => dmi_dout, dmi_wr => dmi_wr, - dmi_ack => dmi_ack, - dmi_req => dmi_req, + dmi_ack => dmi_wb_ack, + dmi_req => dmi_wb_req, wb_in => wishbone_debug_in, wb_out => wishbone_debug_out); From 554b753172a9166578731c6faa170feb69660da8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 16 Sep 2019 16:28:48 +0100 Subject: [PATCH 05/11] Add jtag support in simulation via a socket This adds a local socket that can be used to communicate with the debug tool (which will be committed separately) and generates the JTAG signals. We generate the low level JTAG signals, thus directly driving the simulated BSCANE2, and the Xilinx DTM Signed-off-by: Benjamin Herrenschmidt --- sim_jtag.vhdl | 105 ++++++++++++++++++++ sim_jtag_socket.vhdl | 24 +++++ sim_jtag_socket_c.c | 222 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 351 insertions(+) create mode 100644 sim_jtag.vhdl create mode 100644 sim_jtag_socket.vhdl create mode 100644 sim_jtag_socket_c.c diff --git a/sim_jtag.vhdl b/sim_jtag.vhdl new file mode 100644 index 0000000..694491f --- /dev/null +++ b/sim_jtag.vhdl @@ -0,0 +1,105 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.sim_jtag_socket.all; + +library unisim; +use unisim.vcomponents.all; + +entity sim_jtag is +end sim_jtag; + +architecture behaviour of sim_jtag is +begin + jtag: process + -- Global JTAG signals (used by BSCANE2 inside dmi_dtm + alias j : glob_jtag_t is glob_jtag; + + -- Super fast JTAG clock for sim. For debugging the JTAG module, + -- change this to something much larger, for example 60ns, to reflect + -- more realistic conditions. + constant jclk_period : time := 1 ns; + + -- Polling the socket... this could be made slower when nothing + -- is connected once we have that indication from the C code. + constant poll_period : time := 100 ns; + + -- Number of dummy JTAG clocks to inject after a command. (I haven't + -- got that working with UrJtag but at least with sim, having the + -- right number here allows the synchronizers time to complete a + -- command on the first message exchange, thus avoiding the need + -- for two full shifts for a response. + constant dummy_clocks : integer := 80; + + procedure clock(count: in INTEGER) is + begin + for i in 1 to count loop + j.tck <= '0'; + wait for jclk_period/2; + j.tck <= '1'; + wait for jclk_period/2; + end loop; + end procedure clock; + + procedure clock_command(cmd: in std_ulogic_vector; + rsp: out std_ulogic_vector) is + begin + j.capture <= '1'; + clock(1); + j.capture <= '0'; + clock(1); + j.shift <= '1'; + for i in 0 to cmd'length-1 loop + j.tdi <= cmd(i); + rsp := rsp(1 to rsp'length-1) & j.tdo; + clock(1); + end loop; + j.shift <= '0'; + j.update <= '1'; + clock(1); + j.update <= '0'; + clock(1); + end procedure clock_command; + + variable cmd : std_ulogic_vector(0 to 247); + variable rsp : std_ulogic_vector(0 to 247); + variable msize : std_ulogic_vector(7 downto 0); + variable size : integer; + + begin + + -- init & reset + j.reset <= '1'; + j.sel <= "0000"; + j.capture <= '0'; + j.update <= '0'; + j.shift <= '0'; + j.tdi <= '0'; + j.tms <= '0'; + j.runtest <= '0'; + clock(5); + j.reset <= '0'; + clock(5); + + -- select chain USER2 + -- XXX TODO: Send that via protocol instead + -- XXX TODO: Also maybe have the C code tell us if connected or not + -- and clock when connected. + j.sel <= "0010"; + clock(1); + rsp := (others => '0'); + while true loop + wait for poll_period; + sim_jtag_read_msg(cmd, msize); + size := to_integer(unsigned(msize)); + if size /= 0 and size < 248 then + clock_command(cmd(0 to size-1), + rsp(0 to size-1)); + sim_jtag_write_msg(rsp, msize); + clock(dummy_clocks); + end if; + end loop; + end process; +end; diff --git a/sim_jtag_socket.vhdl b/sim_jtag_socket.vhdl new file mode 100644 index 0000000..b03eb48 --- /dev/null +++ b/sim_jtag_socket.vhdl @@ -0,0 +1,24 @@ +library ieee; +use ieee.std_logic_1164.all; + +package sim_jtag_socket is + procedure sim_jtag_read_msg(out_msg : out std_ulogic_vector(247 downto 0); + out_size : out std_ulogic_vector(7 downto 0)); + attribute foreign of sim_jtag_read_msg : procedure is "VHPIDIRECT sim_jtag_read_msg"; + procedure sim_jtag_write_msg(in_msg : in std_ulogic_vector(247 downto 0); + in_size : in std_ulogic_vector(7 downto 0)); + attribute foreign of sim_jtag_write_msg : procedure is "VHPIDIRECT sim_jtag_write_msg"; +end sim_jtag_socket; + +package body sim_jtag_socket is + procedure sim_jtag_read_msg(out_msg : out std_ulogic_vector(247 downto 0); + out_size : out std_ulogic_vector(7 downto 0)) is + begin + assert false report "VHPI" severity failure; + end sim_jtag_read_msg; + procedure sim_jtag_write_msg(in_msg : in std_ulogic_vector(247 downto 0); + in_size : in std_ulogic_vector(7 downto 0)) is + begin + assert false report "VHPI" severity failure; + end sim_jtag_write_msg; +end sim_jtag_socket; diff --git a/sim_jtag_socket_c.c b/sim_jtag_socket_c.c new file mode 100644 index 0000000..e0c21a4 --- /dev/null +++ b/sim_jtag_socket_c.c @@ -0,0 +1,222 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* XXX Make that some parameter */ +#define TCP_PORT 13245 +#define MAX_PACKET 32 + +#define vhpi0 2 /* forcing 0 */ +#define vhpi1 3 /* forcing 1 */ + +static void to_std_logic_vector(unsigned long val, unsigned char *p, + unsigned long len) +{ + if (len > 64) { + fprintf(stderr, "%s: invalid length %lu\n", __func__, len); + exit(1); + } + + for (unsigned long i = 0; i < len; i++) { + if ((val >> (len-1-i) & 1)) + *p = vhpi1; + else + *p = vhpi0; + + p++; + } +} + +static uint64_t from_std_logic_vector(unsigned char *p, unsigned long len) +{ + unsigned long ret = 0; + + if (len > 64) { + fprintf(stderr, "%s: invalid length %lu\n", __func__, len); + exit(1); + } + + for (unsigned long i = 0; i < len; i++) { + unsigned char bit; + + if (*p == vhpi0) { + bit = 0; + } else if (*p == vhpi1) { + bit = 1; + } else { + fprintf(stderr, "%s: bad bit %d\n", __func__, *p); + bit = 0; + } + + ret = (ret << 1) | bit; + p++; + } + + return ret; +} + +static int fd = -1; +static int cfd = -1; + +static void open_socket(void) +{ + struct sockaddr_in addr; + int opt, rc, flags; + + if (fd >= 0 || fd < -1) + return; + + signal(SIGPIPE, SIG_IGN); + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + fprintf(stderr, "Failed to open debug socket !\r\n"); + goto fail; + } + + rc = 0; + flags = fcntl(fd, F_GETFL); + if (flags >= 0) + rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); + if (flags < 0 || rc < 0) { + fprintf(stderr, "Failed to configure debug socket !\r\n"); + } + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(TCP_PORT); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + opt = 1; + setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + rc = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (rc < 0) { + fprintf(stderr, "Failed to bind debug socket !\r\n"); + goto fail; + } + rc = listen(fd,1); + if (rc < 0) { + fprintf(stderr, "Failed to listen to debug socket !\r\n"); + goto fail; + } + fprintf(stderr, "Debug socket ready\r\n"); + return; +fail: + if (fd >= 0) + close(fd); + fd = -2; +} + +static void check_connection(void) +{ + struct sockaddr_in addr; + socklen_t addr_len = sizeof(addr); + + cfd = accept(fd, (struct sockaddr *)&addr, &addr_len); + if (cfd < 0) + return; + fprintf(stderr, "Debug client connected !\r\n"); +} + +void sim_jtag_read_msg(unsigned char *out_msg, unsigned char *out_size) +{ + unsigned char data[MAX_PACKET]; + unsigned char size = 0; + struct pollfd fdset[1]; + int rc, i; + + if (fd == -1) + open_socket(); + if (fd < 0) + goto finish; + if (cfd < 0) + check_connection(); + if (cfd < 0) + goto finish; + + memset(fdset, 0, sizeof(fdset)); + fdset[0].fd = cfd; + fdset[0].events = POLLIN; + rc = poll(fdset, 1, 0); + if (rc <= 0) + goto finish; + rc = read(cfd, data, MAX_PACKET); + if (rc < 0) + fprintf(stderr, "Debug read error, assuming client disconnected !\r\n"); + if (rc == 0) + fprintf(stderr, "Debug client disconnected !\r\n"); + if (rc <= 0) { + close(cfd); + cfd = -1; + goto finish; + } + +#if 0 + fprintf(stderr, "Got message:\n\r"); + { + for (i=0; i> 3; + int bit = 1 << (i & 7); + out_msg[i] = (data[byte+1] & bit) ? vhpi1 : vhpi0; + } +finish: + to_std_logic_vector(size, out_size, 8); +} + +void sim_jtag_write_msg(unsigned char *in_msg, unsigned char *in_size) +{ + unsigned char data[MAX_PACKET]; + unsigned char size; + int rc, i; + + size = from_std_logic_vector(in_size, 8); + data[0] = size; + for (i = 0; i < size; i++) { + int byte = i >> 3; + int bit = 1 << (i & 7); + if (in_msg[i] == vhpi1) + data[byte+1] |= bit; + else + data[byte+1] &= ~bit; + } + rc = (size + 7) / 8; + +#if 0 + fprintf(stderr, "Sending response:\n\r"); + { + for (i=0; i Date: Tue, 10 Sep 2019 17:43:52 +0100 Subject: [PATCH 06/11] Add core debug module This module adds some simple core controls: reset, stop, start, step along with icache clear and reading the NIA and core status bits Signed-off-by: Benjamin Herrenschmidt '0', others => (others => '0')); + constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', others => (others => '0')); type Decode1ToDecode2Type is record valid: std_ulogic; + stop_mark : std_ulogic; nia: std_ulogic_vector(63 downto 0); insn: std_ulogic_vector(31 downto 0); decode: decode_rom_t; end record; - constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', decode => decode_rom_init, others => (others => '0')); + constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', decode => decode_rom_init, others => (others => '0')); type Fetch2ToIcacheType is record req: std_ulogic; diff --git a/core.vhdl b/core.vhdl index d34bf71..d0bd5c5 100644 --- a/core.vhdl +++ b/core.vhdl @@ -20,9 +20,14 @@ entity core is wishbone_data_in : in wishbone_slave_out; wishbone_data_out : out wishbone_master_out; - -- Added for debug, ghdl doesn't support external names unfortunately - registers : out regfile; - terminate_out : out std_ulogic + dmi_addr : in std_ulogic_vector(3 downto 0); + dmi_din : in std_ulogic_vector(63 downto 0); + dmi_dout : out std_ulogic_vector(63 downto 0); + dmi_req : in std_ulogic; + dmi_wr : in std_ulogic; + dmi_ack : out std_ulogic; + + terminated_out : out std_logic ); end core; @@ -73,11 +78,23 @@ architecture behave of core is signal flush: std_ulogic; signal complete: std_ulogic; - signal terminate: std_ulogic; + signal core_rst: std_ulogic; + + -- Debug actions + signal dbg_core_stop: std_ulogic; + signal dbg_core_rst: std_ulogic; + signal dbg_icache_rst: std_ulogic; + + -- Debug status + signal dbg_core_is_stopped: std_ulogic; + + -- For sim + signal registers: regfile; + begin - terminate_out <= terminate; + core_rst <= dbg_core_rst or rst; fetch1_0: entity work.fetch1 generic map ( @@ -85,7 +102,7 @@ begin ) port map ( clk => clk, - rst => rst, + rst => core_rst, stall_in => fetch1_stall_in, flush_in => flush, e_in => execute1_to_fetch1, @@ -97,12 +114,13 @@ begin fetch2_0: entity work.fetch2 port map ( clk => clk, - rst => rst, + rst => core_rst, stall_in => fetch2_stall_in, stall_out => fetch2_stall_out, flush_in => flush, i_in => icache_to_fetch2, i_out => fetch2_to_icache, + stop_in => dbg_core_stop, f_in => fetch1_to_fetch2, f_out => fetch2_to_decode1 ); @@ -116,7 +134,7 @@ begin ) port map( clk => clk, - rst => rst, + rst => rst or dbg_icache_rst, i_in => fetch2_to_icache, i_out => icache_to_fetch2, wishbone_out => wishbone_insn_out, @@ -126,7 +144,7 @@ begin decode1_0: entity work.decode1 port map ( clk => clk, - rst => rst, + rst => core_rst, stall_in => decode1_stall_in, flush_in => flush, f_in => fetch2_to_decode1, @@ -138,10 +156,11 @@ begin decode2_0: entity work.decode2 port map ( clk => clk, - rst => rst, + rst => core_rst, stall_out => decode2_stall_out, flush_in => flush, complete_in => complete, + stopped_out => dbg_core_is_stopped, d_in => decode1_to_decode2, e_out => decode2_to_execute1, l_out => decode2_to_loadstore1, @@ -222,4 +241,35 @@ begin complete_out => complete ); + debug_0: entity work.core_debug + port map ( + clk => clk, + rst => rst, + dmi_addr => dmi_addr, + dmi_din => dmi_din, + dmi_dout => dmi_dout, + dmi_req => dmi_req, + dmi_wr => dmi_wr, + dmi_ack => dmi_ack, + core_stop => dbg_core_stop, + core_rst => dbg_core_rst, + icache_rst => dbg_icache_rst, + terminate => terminate, + core_stopped => dbg_core_is_stopped, + nia => fetch1_to_fetch2.nia, + terminated_out => terminated_out + ); + + -- Dump registers if core terminates + sim_terminate_test: if SIM generate + dump_registers: process(all) + begin + if terminate = '1' then + loop_0: for i in 0 to 31 loop + report "REG " & to_hstring(registers(i)); + end loop loop_0; + end if; + end process; + end generate; + end behave; diff --git a/core_debug.vhdl b/core_debug.vhdl new file mode 100644 index 0000000..c93c70d --- /dev/null +++ b/core_debug.vhdl @@ -0,0 +1,152 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.common.all; + +entity core_debug is + port ( + clk : in std_logic; + rst : in std_logic; + + dmi_addr : in std_ulogic_vector(3 downto 0); + dmi_din : in std_ulogic_vector(63 downto 0); + dmi_dout : out std_ulogic_vector(63 downto 0); + dmi_req : in std_ulogic; + dmi_wr : in std_ulogic; + dmi_ack : out std_ulogic; + + -- Debug actions + core_stop : out std_ulogic; + core_rst : out std_ulogic; + icache_rst : out std_ulogic; + + -- Core status inputs + terminate : in std_ulogic; + core_stopped : in std_ulogic; + nia : in std_ulogic_vector(63 downto 0); + + -- Misc + terminated_out : out std_ulogic + ); +end core_debug; + +architecture behave of core_debug is + -- DMI needs fixing... make a one clock pulse + signal dmi_req_1: std_ulogic; + + -- CTRL register (direct actions, write 1 to act, read back 0) + -- bit 0 : Core stop + -- bit 1 : Core reset (doesn't clear stop) + -- bit 2 : Icache reset + -- bit 3 : Single step + -- bit 4 : Core start + constant DBG_CORE_CTRL : std_ulogic_vector(3 downto 0) := "0000"; + constant DBG_CORE_CTRL_STOP : integer := 0; + constant DBG_CORE_CTRL_RESET : integer := 1; + constant DBG_CORE_CTRL_ICRESET : integer := 2; + constant DBG_CORE_CTRL_STEP : integer := 3; + constant DBG_CORE_CTRL_START : integer := 4; + + -- STAT register (read only) + -- bit 0 : Core stopping (wait til bit 1 set) + -- bit 1 : Core stopped + -- bit 2 : Core terminated (clears with start or reset) + constant DBG_CORE_STAT : std_ulogic_vector(3 downto 0) := "0001"; + constant DBG_CORE_STAT_STOPPING : integer := 0; + constant DBG_CORE_STAT_STOPPED : integer := 1; + constant DBG_CORE_STAT_TERM : integer := 2; + + -- NIA register (read only for now) + constant DBG_CORE_NIA : std_ulogic_vector(3 downto 0) := "0010"; + + -- Some internal wires + signal stat_reg : std_ulogic_vector(63 downto 0); + + -- Some internal latches + signal stopping : std_ulogic; + signal do_step : std_ulogic; + signal do_reset : std_ulogic; + signal do_icreset : std_ulogic; + signal terminated : std_ulogic; + +begin + -- Single cycle register accesses on DMI + dmi_ack <= dmi_req; + + -- Status register read composition + stat_reg <= (2 => terminated, + 1 => core_stopped, + 0 => stopping, + others => '0'); + + -- DMI read data mux + with dmi_addr select dmi_dout <= + stat_reg when DBG_CORE_STAT, + nia when DBG_CORE_NIA, + (others => '0') when others; + + -- DMI writes + reg_write: process(clk) + begin + if rising_edge(clk) then + if (rst) then + stopping <= '0'; + terminated <= '0'; + else + -- Reset the 1-cycle "do" signals + do_step <= '0'; + do_reset <= '0'; + do_icreset <= '0'; + + -- Edge detect on dmi_req for 1-shot pulses + dmi_req_1 <= dmi_req; + if dmi_req = '1' and dmi_req_1 = '0' then + if dmi_wr = '1' then + report("DMI write to " & to_hstring(dmi_addr)); + + -- Control register actions + if dmi_addr = DBG_CORE_CTRL then + if dmi_din(DBG_CORE_CTRL_RESET) = '1' then + do_reset <= '1'; + terminated <= '0'; + end if; + if dmi_din(DBG_CORE_CTRL_STOP) = '1' then + stopping <= '1'; + end if; + if dmi_din(DBG_CORE_CTRL_STEP) = '1' then + do_step <= '1'; + terminated <= '0'; + end if; + if dmi_din(DBG_CORE_CTRL_ICRESET) = '1' then + do_icreset <= '1'; + end if; + if dmi_din(DBG_CORE_CTRL_START) = '1' then + stopping <= '0'; + terminated <= '0'; + end if; + end if; + else + report("DMI read from " & to_string(dmi_addr)); + end if; + end if; + + -- Set core stop on terminate. We'll be stopping some time *after* + -- the offending instruction, at least until we can do back flushes + -- that preserve NIA which we can't just yet. + if terminate = '1' then + stopping <= '1'; + terminated <= '1'; + end if; + end if; + end if; + end process; + + -- Core control signals generated by the debug module + core_stop <= stopping and not do_step; + core_rst <= do_reset; + icache_rst <= do_icreset; + terminated_out <= terminated; +end behave; + diff --git a/core_tb.vhdl b/core_tb.vhdl index 4522da4..672b424 100644 --- a/core_tb.vhdl +++ b/core_tb.vhdl @@ -1,5 +1,6 @@ library ieee; use ieee.std_logic_1164.all; +use ieee.numeric_std.all; library work; use work.common.all; @@ -29,19 +30,21 @@ begin uart0_txd => open ); - clk_process: process - begin - clk <= '0'; - wait for clk_period/2; - clk <= '1'; - wait for clk_period/2; - end process; - - rst_process: process - begin - rst <= '1'; - wait for 10*clk_period; - rst <= '0'; - wait; - end process; + clk_process: process + begin + clk <= '0'; + wait for clk_period/2; + clk <= '1'; + wait for clk_period/2; + end process; + + rst_process: process + begin + rst <= '1'; + wait for 10*clk_period; + rst <= '0'; + wait; + end process; + + jtag: entity work.sim_jtag; end; diff --git a/decode1.vhdl b/decode1.vhdl index 6e8a521..3e2026d 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -248,6 +248,7 @@ begin v.valid := f_in.valid; v.nia := f_in.nia; v.insn := f_in.insn; + v.stop_mark := f_in.stop_mark; ppc_insn := PPC_ILLEGAL; diff --git a/decode2.vhdl b/decode2.vhdl index 15dae5d..482b91c 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -16,6 +16,8 @@ entity decode2 is complete_in : in std_ulogic; stall_out : out std_ulogic; + stopped_out : out std_ulogic; + flush_in: in std_ulogic; d_in : in Decode1ToDecode2Type; @@ -330,9 +332,16 @@ begin -- through the pipeline. stall_out <= '0'; is_valid := d_in.valid; + + -- Handle debugger stop + stopped_out <= '0'; + if d_in.stop_mark = '1' and v_int.outstanding = 0 then + stopped_out <= '1'; + end if; + case v_int.state is when IDLE => - if (flush_in = '0') and (d_in.valid = '1') and (d_in.decode.sgl_pipe = '1') then + if (flush_in = '0') and (is_valid = '1') and (d_in.decode.sgl_pipe = '1') then if v_int.outstanding /= 0 then v_int.state := WAIT_FOR_PREV_TO_COMPLETE; stall_out <= '1'; diff --git a/fetch1.vhdl b/fetch1.vhdl index ff7d64a..8e8c5a5 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -68,6 +68,8 @@ begin -- Update outputs f_out <= r; + + report "fetch1 R:" & std_ulogic'image(e_in.redirect) & " v.nia:" & to_hstring(v.nia) & " f_out.nia:" & to_hstring(f_out.nia); end process; end architecture behaviour; diff --git a/fetch2.vhdl b/fetch2.vhdl index 37cb66c..9573761 100644 --- a/fetch2.vhdl +++ b/fetch2.vhdl @@ -15,6 +15,7 @@ entity fetch2 is stall_out : out std_ulogic; flush_in : in std_ulogic; + stop_in : in std_ulogic; i_in : in IcacheToFetch2Type; i_out : out Fetch2ToIcacheType; @@ -49,12 +50,12 @@ begin v.valid := i_in.ack; v.nia := f_in.nia; v.insn := i_in.insn; - stall_out <= not i_in.ack; + stall_out <= stop_in or not i_in.ack; - - if flush_in = '1' then + if flush_in = '1' or stop_in = '1' then v.valid := '0'; end if; + v.stop_mark := stop_in; -- Update registers rin <= v; diff --git a/soc.vhdl b/soc.vhdl index 4b02807..39d72a9 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -1,8 +1,9 @@ library ieee; use ieee.std_logic_1164.all; +use ieee.numeric_std.all; use ieee.math_real.all; - use std.textio.all; +use std.env.stop; library work; use work.common.all; @@ -24,7 +25,10 @@ entity soc is -- UART0 signals: uart0_txd : out std_ulogic; - uart0_rxd : in std_ulogic + uart0_rxd : in std_ulogic; + + -- Misc (to use for things like LEDs) + core_terminated : out std_ulogic ); end entity soc; @@ -52,10 +56,6 @@ architecture behaviour of soc is signal wb_bram_out : wishbone_slave_out; constant mem_adr_bits : positive := positive(ceil(log2(real(MEMORY_SIZE)))); - -- Core debug signals (used in SIM only) - signal registers : regfile; - signal terminate : std_ulogic; - -- DMI debug bus signals signal dmi_addr : std_ulogic_vector(7 downto 0); signal dmi_din : std_ulogic_vector(63 downto 0); @@ -85,8 +85,12 @@ begin wishbone_insn_out => wishbone_icore_out, wishbone_data_in => wishbone_dcore_in, wishbone_data_out => wishbone_dcore_out, - registers => registers, - terminate_out => terminate + dmi_addr => dmi_addr(3 downto 0), + dmi_dout => dmi_core_dout, + dmi_din => dmi_dout, + dmi_wr => dmi_wr, + dmi_ack => dmi_core_ack, + dmi_req => dmi_core_req ); -- Wishbone bus master arbiter & mux @@ -136,20 +140,6 @@ begin end process slave_intercon; -- Simulated memory and UART - sim_terminate_test: if SIM generate - - -- Dump registers if core terminates - dump_registers: process(all) - begin - if terminate = '1' then - loop_0: for i in 0 to 31 loop - report "REG " & to_hstring(registers(i)); - end loop loop_0; - assert false report "end of test" severity failure; - end if; - end process; - - end generate; -- UART0 wishbone slave -- XXX FIXME: Need a proper wb64->wb8 adapter that @@ -207,8 +197,8 @@ begin -- DMI interconnect dmi_intercon: process(dmi_addr, dmi_req, - dmi_wb_ack, dmi_wb_dout, - dmi_core_ack, dmi_core_dout) + dmi_wb_ack, dmi_wb_dout, + dmi_core_ack, dmi_core_dout) -- DMI address map (each address is a full 64-bit register) -- @@ -222,12 +212,11 @@ begin variable slave : slave_type; begin -- Simple address decoder - if dmi_addr(7 downto 0) = "000000--" then + slave := SLAVE_NONE; + if std_match(dmi_addr, "000000--") then slave := SLAVE_WB; - elsif dmi_addr(7 downto 0) = "0001----" then + elsif std_match(dmi_addr, "0001----") then slave := SLAVE_CORE; - else - slave := SLAVE_NONE; end if; -- DMI muxing @@ -246,11 +235,12 @@ begin dmi_ack <= dmi_req; dmi_din <= (others => '1'); end case; - end process; - -- Core dummy - dmi_core_ack <= dmi_core_req; - dmi_core_dout <= x"0000000000000000"; + -- SIM magic exit + if SIM and dmi_req = '1' and dmi_addr = "11111111" and dmi_wr = '1' then + stop; + end if; + end process; -- Wishbone debug master (TODO: Add a DMI address decoder) wishbone_debug: entity work.wishbone_debug_master From fe275effebf14afd93133eb0a82c85a399ea6212 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 16 Sep 2019 16:29:08 +0100 Subject: [PATCH 07/11] New C based JTAG debug tool This works with both the sim socket and urjtag, and supports the new core functions, loading a file in memory etc... The code still needs a lot of cleanup and a help! Signed-off-by: Benjamin Herrenschmidt --- scripts/mw_debug.py | 103 ------- scripts/mw_debug/Makefile | 7 + scripts/mw_debug/mw_debug.c | 583 ++++++++++++++++++++++++++++++++++++ 3 files changed, 590 insertions(+), 103 deletions(-) delete mode 100755 scripts/mw_debug.py create mode 100644 scripts/mw_debug/Makefile create mode 100644 scripts/mw_debug/mw_debug.c diff --git a/scripts/mw_debug.py b/scripts/mw_debug.py deleted file mode 100755 index f22039c..0000000 --- a/scripts/mw_debug.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/python3 - -import urjtag; - -def do_command(urc, op, addr, data): - urc.set_dr_in(op,1,0) - urc.set_dr_in(data,65,2) - urc.set_dr_in(addr,73,66) -# print("Sending:", urc.get_dr_in_string()) - urc.shift_dr() - urc.set_dr_in(0x0,73,0) - for x in range(5): - urc.shift_dr() -# print("Received:", urc.get_dr_out_string()) - rsp_code = urc.get_dr_out(1,0) - if rsp_code == 0: - return urc.get_dr_out(65,2) - if rsp_code != 3: - print("Weird response ! rsp=%x" % rsp_code); - print("Timeout sending command !") - -def do_read(urc, addr): - return do_command(urc, 1, addr, 0) - -def do_write(urc, addr, val): - do_command(urc, 2, addr, val) - -def main(): - # Init jtag - #urjtag.loglevel( urjtag.URJ_LOG_LEVEL_ALL ) - - urc = urjtag.chain() - urc.cable("DigilentHS1") - print('Cable frequency:', urc.get_frequency()) - #urc.tap_detect() - #length = urc.len() - #for i in range(0,urc.len()): - # idcode = urc.partid(0) - # print('[%d] 0x%08x' % (i, idcode)) - urc.addpart(6); - print("Part ID: ", urc.partid(0)) - #urc.part(0) - #urc.reset(); - urc.add_register("USER2_REG", 74); - urc.add_instruction("USER2", "000011", "USER2_REG"); - urc.add_register("IDCODE_REG", 32); - urc.add_instruction("IDCODE", "001001", "IDCODE_REG"); - # Send test command - urc.set_instruction("IDCODE") - urc.shift_ir() - urc.shift_dr() - print("Got:", hex(urc.get_dr_out())) - - urc.set_instruction("USER2") - urc.shift_ir() - - print("Reading memory at 0:") - do_write(urc, 0, 0) - do_write(urc, 2, 0x7ff) - print("00: %016x" % do_read(urc, 1)) - print("08: %016x" % do_read(urc, 1)) - print("10: %016x" % do_read(urc, 1)) - print("18: %016x" % do_read(urc, 1)) - do_write(urc, 0, 0x10) - do_write(urc, 1, 0xabcdef0123456789) - do_write(urc, 0, 0) - do_write(urc, 2, 0x7ff) - print("00: %016x" % do_read(urc, 1)) - print("08: %016x" % do_read(urc, 1)) - print("10: %016x" % do_read(urc, 1)) - print("18: %016x" % do_read(urc, 1)) - -# urc.set_dr_in(0,73,0); -# print("Test DR_IN 1:", urc.get_dr_in_string()) -# urc.set_dr_in(0xa,3,0); -# print("Test DR_IN 2:", urc.get_dr_in_string()) -# urc.set_dr_in(0x5,7,4); -# print("Test DR_IN 3:", urc.get_dr_in_string()) -# urc.set_dr_in(1,73,73); -# print("Test DR_IN 4:", urc.get_dr_in_string()) - -# print("Reading ADDR reg: %x" % do_read(urc, 0)) -# print("Writing all 1's to it:") -# do_write(urc, 0, 0xffffffffffffffff) -# print("Reading ADDR reg: %x" % do_read(urc, 0)) -# print("Writing 0xabcdef0123456789 to it:") -# do_write(urc, 0, 0xabcdef0123456789) -# print("Reading ADDR reg: %x" % do_read(urc, 0)) - - - -# urc.set_dr_in(0x1,41,0) -# print("Sending:", urc.get_dr_in_string()) -# urc.shift_dr() -# urc.set_dr_in(0x0,41,0) -# urc.shift_dr() -# print("Got1:", urc.get_dr_out_string()) -# urc.shift_dr() -# print("Got2:", hex(urc.get_dr_out())) - - -if __name__ == "__main__": - main() diff --git a/scripts/mw_debug/Makefile b/scripts/mw_debug/Makefile new file mode 100644 index 0000000..439b198 --- /dev/null +++ b/scripts/mw_debug/Makefile @@ -0,0 +1,7 @@ +CFLAGS = -O2 -g -Wall -std=c99 + +all: mw_debug + +mw_debug: mw_debug.c + $(CC) -o $@ $^ -lurjtag + diff --git a/scripts/mw_debug/mw_debug.c b/scripts/mw_debug/mw_debug.c new file mode 100644 index 0000000..f1a7cab --- /dev/null +++ b/scripts/mw_debug/mw_debug.c @@ -0,0 +1,583 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DBG_WB_ADDR 0x00 +#define DBG_WB_DATA 0x01 +#define DBG_WB_CTRL 0x02 + +#define DBG_CORE_CTRL 0x10 +#define DBG_CORE_CTRL_STOP (1 << 0) +#define DBG_CORE_CTRL_RESET (1 << 1) +#define DBG_CORE_CTRL_ICRESET (1 << 2) +#define DBG_CORE_CTRL_STEP (1 << 3) +#define DBG_CORE_CTRL_START (1 << 4) + +#define DBG_CORE_STAT 0x11 +#define DBG_CORE_STAT_STOPPING (1 << 0) +#define DBG_CORE_STAT_STOPPED (1 << 1) +#define DBG_CORE_STAT_TERM (1 << 2) + +#define DBG_CORE_NIA 0x12 + +static bool debug; + +struct backend { + int (*init)(const char *target); + int (*reset)(void); + int (*command)(uint8_t op, uint8_t addr, uint64_t *data); +}; +static struct backend *b; + +static void check(int r, const char *failstr) +{ + if (r >= 0) + return; + fprintf(stderr, "Error %s\n", failstr); + exit(1); +} + +/* -------------- SIM backend -------------- */ + +static int sim_fd = -1; + +static int sim_init(const char *target) +{ + struct sockaddr_in saddr; + struct hostent *hp; + const char *p, *host; + int port, rc; + + if (!target) + target = "localhost:13245"; + p = strchr(target, ':'); + host = strndup(target, p - target); + if (p && *p) + p++; + else + p = "13245"; + port = strtoul(p, NULL, 10); + if (debug) + printf("Opening sim backend host '%s' port %d\n", host, port); + + sim_fd = socket(PF_INET, SOCK_STREAM, 0); + if (sim_fd < 0) { + fprintf(stderr, "Error opening socket: %s\n", + strerror(errno)); + return -1; + } + hp = gethostbyname(host); + if (!hp) { + fprintf(stderr,"Unknown host '%s'\n", host); + return -1; + } + memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length); + saddr.sin_port = htons(port); + saddr.sin_family = PF_INET; + rc = connect(sim_fd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (rc < 0) { + close(sim_fd); + fprintf(stderr,"Connection to '%s' failed: %s\n", + host, strerror(errno)); + return -1; + } + return 0; +} + +static int sim_reset(void) +{ +} + +static void add_bits(uint8_t **p, int *b, uint64_t d, int c) +{ + uint8_t md = 1 << *b; + uint64_t ms = 1; + + while (c--) { + if (d & ms) + (**p) |= md; + ms <<= 1; + if (*b == 7) { + *b = 0; + (*p)++; + md = 1; + } else { + (*b)++; + md <<= 1; + } + } +} + +static uint64_t read_bits(uint8_t **p, int *b, int c) +{ + uint8_t ms = 1 << *b; + uint64_t md = 1; + uint64_t d = 0; + + while (c--) { + if ((**p) & ms) + d |= md; + md <<= 1; + if (*b == 7) { + *b = 0; + (*p)++; + ms = 1; + } else { + (*b)++; + ms <<= 1; + } + } + return d; +} + +static int sim_command(uint8_t op, uint8_t addr, uint64_t *data) +{ + uint8_t buf[16], *p; + uint64_t d = data ? *data : 0; + int r, s, b = 0; + + memset(buf, 0, 16); + p = buf+1; + add_bits(&p, &b, op, 2); + add_bits(&p, &b, d, 64); + add_bits(&p, &b, addr, 8); + if (b) + p++; + buf[0] = 74; + if (0) + { + int i; + + for (i=0; i<(p-buf); i++) + printf("%02x ", buf[i]); + printf("\n"); + } + write(sim_fd, buf, p - buf); + r = read(sim_fd, buf, 127); + if (0 && r > 0) { + int i; + + for (i=0; imain_part = 0; + + rc = urj_tap_chain_connect(jc, cable, params); + if (rc != URJ_STATUS_OK) { + fprintf(stderr, "JTAG cable detect failed\n"); + return -1; + } + + /* XXX Hard wire part 0, that might need to change (use params and detect !) */ + rc = urj_tap_manual_add(jc, 6); + if (rc < 0) { + fprintf(stderr, "JTAG failed to add part !\n"); + return -1; + } + if (jc->parts == NULL || jc->parts->len == 0) { + fprintf(stderr, "JTAG Something's wrong after adding part !\n"); + return -1; + } + urj_part_parts_set_instruction(jc->parts, "BYPASS"); + + jc->active_part = part = 0; + + p = urj_tap_chain_active_part(jc); + if (!p) { + fprintf(stderr, "Failed to get active JTAG part\n"); + return -1; + } + rc = urj_part_data_register_define(p, "IDCODE_REG", 32); + if (rc != URJ_STATUS_OK) { + fprintf(stderr, "JTAG failed to add IDCODE_REG register !\n"); + return -1; + } + if (urj_part_instruction_define(p, "IDCODE", "001001", "IDCODE_REG") == NULL) { + fprintf(stderr, "JTAG failed to add IDCODE instruction !\n"); + return -1; + } + rc = urj_part_data_register_define(p, "USER2_REG", 74); + if (rc != URJ_STATUS_OK) { + fprintf(stderr, "JTAG failed to add USER2_REG register !\n"); + return -1; + } + if (urj_part_instruction_define(p, "USER2", "000011", "USER2_REG") == NULL) { + fprintf(stderr, "JTAG failed to add USER2 instruction !\n"); + return -1; + } + urj_part_set_instruction(p, "IDCODE"); + urj_tap_chain_shift_instructions(jc); + urj_tap_chain_shift_data_registers(jc, 1); + id = urj_tap_register_get_value(p->active_instruction->data_register->out); + printf("Found device ID: 0x%08x\n", id); + urj_part_set_instruction(p, "USER2"); + urj_tap_chain_shift_instructions(jc); + + return 0; +} + +static int jtag_reset(void) +{ +} + +static int jtag_command(uint8_t op, uint8_t addr, uint64_t *data) +{ + urj_part_t *p = urj_tap_chain_active_part(jc); + urj_part_instruction_t *insn; + urj_data_register_t *dr; + uint64_t d = data ? *data : 0; + int rc; + + if (!p) + return -1; + insn = p->active_instruction; + if (!insn) + return -1; + dr = insn->data_register; + if (!dr) + return -1; + rc = urj_tap_register_set_value_bit_range(dr->in, op, 1, 0); + if (rc != URJ_STATUS_OK) + return -1; + rc = urj_tap_register_set_value_bit_range(dr->in, d, 65, 2); + if (rc != URJ_STATUS_OK) + return -1; + rc = urj_tap_register_set_value_bit_range(dr->in, addr, 73, 66); + if (rc != URJ_STATUS_OK) + return -1; + rc = urj_tap_chain_shift_data_registers(jc, 1); + if (rc != URJ_STATUS_OK) + return -1; + rc = urj_tap_register_get_value_bit_range(dr->out, 1, 0); + if (data) + *data = urj_tap_register_get_value_bit_range(dr->out, 65, 2); + return rc; +} + +static struct backend jtag_backend = { + .init = jtag_init, + .reset = jtag_reset, + .command = jtag_command, +}; + +static int dmi_read(uint8_t addr, uint64_t *data) +{ + int rc; + + rc = b->command(1, addr, data); + if (rc < 0) + return rc; + for (;;) { + rc = b->command(0, 0, data); + if (rc < 0) + return rc; + if (rc == 0) + return 0; + if (rc != 3) + fprintf(stderr, "Unknown status code %d !\n", rc); + } +} + +static int dmi_write(uint8_t addr, uint64_t data) +{ + int rc; + + rc = b->command(2, addr, &data); + if (rc < 0) + return rc; + for (;;) { + rc = b->command(0, 0, NULL); + if (rc < 0) + return rc; + if (rc == 0) + return 0; + if (rc != 3) + fprintf(stderr, "Unknown status code %d !\n", rc); + } +} + +static void core_status(void) +{ + uint64_t stat, nia; + const char *statstr, *statstr2; + + check(dmi_read(DBG_CORE_STAT, &stat), "reading core status"); + check(dmi_read(DBG_CORE_NIA, &nia), "reading core NIA"); + + if (debug) + printf("Core status = 0x%llx\n", (unsigned long long)stat); + statstr = "running"; + statstr2 = ""; + if (stat & DBG_CORE_STAT_STOPPED) { + statstr = "stopped"; + if (!(stat & DBG_CORE_STAT_STOPPING)) + statstr2 = " (restarting?)"; + else if (stat & DBG_CORE_STAT_TERM) + statstr2 = " (terminated)"; + } else if (stat & DBG_CORE_STAT_STOPPING) + statstr = "stopping"; + else if (stat & DBG_CORE_STAT_TERM) + statstr = "odd state (TERM but no STOP)"; + printf("Core: %s%s\n", statstr, statstr2); + printf(" NIA: %016llx\n", (unsigned long long)nia); +} + +static void core_stop(void) +{ + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_STOP), "stopping core"); +} + +static void core_start(void) +{ + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_START), "starting core"); +} + +static void core_reset(void) +{ + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_START), "resetting core"); +} + +static void core_step(void) +{ + uint64_t stat; + + check(dmi_read(DBG_CORE_STAT, &stat), "reading core status"); + + if (!(stat & DBG_CORE_STAT_STOPPED)) { + printf("Core not stopped !\n"); + return; + } + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_STEP), "stepping core"); +} + +static void icache_reset(void) +{ + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_ICRESET), "resetting icache"); +} + +static void mem_read(uint64_t addr, uint64_t count) +{ + uint64_t data; + int i, rc; + + rc = dmi_write(2, 0x7ff); + if (rc < 0) + return; + rc = dmi_write(0, addr); + if (rc < 0) + return; + for (i = 0; i < count; i++) { + rc = dmi_read(1, &data); + if (rc < 0) + return; + printf("%016llx: %016llx\n", + (unsigned long long)addr, + (unsigned long long)data); + addr += 8; + } +} + +static void load(const char *filename, uint64_t addr) +{ + uint64_t data; + int fd, rc, count; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Failed to open '%s': %s\n", filename, strerror(errno)); + exit(1); + } + // XX dumb, do better + rc = dmi_write(2, 0x7ff); + if (rc < 0) + return; + rc = dmi_write(0, addr); + if (rc < 0) + return; + count = 0; + for (;;) { + data = 0; + rc = read(fd, &data, 8); + if (rc <= 0) + break; + // if (rc < 8) XXX fixup endian ? + dmi_write(1, data); + count += 8; + if (!(count % 1024)) + printf("%x...\n", count); + } + printf("%x done.\n", count); +} + +static void usage(const char *cmd) +{ + fprintf(stderr, "Usage: %s \n", cmd); + exit(1); +} + +int main(int argc, char *argv[]) +{ + const char *progname = argv[0]; + const char *target = NULL; + int rc, i = 1; + + b = NULL; + + while(1) { + int c, oindex; + static struct option lopts[] = { + { "help", no_argument, 0, 'h' }, + { "backend", required_argument, 0, 'b' }, + { "target", required_argument, 0, 't' }, + { "debug", no_argument, 0, 'd' }, + { 0, 0, 0, 0 } + }; + c = getopt_long(argc, argv, "dhb:t:", lopts, &oindex); + if (c < 0) + break; + switch(c) { + case 'h': + usage(progname); + break; + case 'b': + if (strcmp(optarg, "sim") == 0) + b = &sim_backend; + else if (strcmp(optarg, "jtag") == 0) + b = &jtag_backend; + else { + fprintf(stderr, "Unknown backend %s\n", optarg); + exit(1); + } + break; + case 't': + target = optarg; + break; + case 'd': + debug = true; + } + } + + if (b == NULL) { + fprintf(stderr, "No backend selected\n"); + exit(1); + } + + rc = b->init(target); + if (rc < 0) + exit(1); + for (i = optind; i < argc; i++) { + if (strcmp(argv[i], "dmiread") == 0) { + uint8_t addr; + uint64_t data; + + if ((i+1) >= argc) + usage(argv[0]); + addr = strtoul(argv[++i], NULL, 16); + dmi_read(addr, &data); + printf("%02x: %016llx\n", addr, (unsigned long long)data); + } else if (strcmp(argv[i], "dmiwrite") == 0) { + uint8_t addr; + uint64_t data; + + if ((i+2) >= argc) + usage(argv[0]); + addr = strtoul(argv[++i], NULL, 16); + data = strtoul(argv[++i], NULL, 16); + dmi_write(addr, data); + } else if (strcmp(argv[i], "creset") == 0) { + core_reset(); + } else if (strcmp(argv[i], "stop") == 0) { + core_stop(); + } else if (strcmp(argv[i], "start") == 0) { + core_start(); + } else if (strcmp(argv[i], "step") == 0) { + core_step(); + } else if (strcmp(argv[i], "quit") == 0) { + dmi_write(0xff, 0); + } else if (strcmp(argv[i], "status") == 0) { + /* do nothing, always done below */ + } else if (strcmp(argv[i], "mr") == 0) { + uint64_t addr, count = 1; + + if ((i+1) >= argc) + usage(argv[0]); + addr = strtoul(argv[++i], NULL, 16); + if (((i+1) < argc) && isdigit(argv[i+1][0])) + count = strtoul(argv[++i], NULL, 16); + mem_read(addr, count); + } else if (strcmp(argv[i], "load") == 0) { + const char *filename; + uint64_t addr = 0; + + if ((i+1) >= argc) + usage(argv[0]); + filename = argv[++i]; + if (((i+1) < argc) && isdigit(argv[i+1][0])) + addr = strtoul(argv[++i], NULL, 16); + load(filename, addr); + } else { + fprintf(stderr, "Unknown command %s\n", argv[i]); + exit(1); + } + } + core_status(); + return 0; +} From 42d802bed0bf05582bbb21e54bb3e68254e14892 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 20 Sep 2019 16:45:26 +1000 Subject: [PATCH 08/11] Add distclean to Makefile Signed-off-by: Benjamin Herrenschmidt --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 2bd0074..0649f45 100644 --- a/Makefile +++ b/Makefile @@ -103,3 +103,6 @@ test_micropython_long: core_tb clean: rm -f *.o work-*cf unisim-*cf $(all) rm -f sim-unisim/*.o sim-unisim/unisim-*cf + +distclean: clean + rm -f *~ fpga/~ From d82f4c18b6037d5a63841940f2e5142f68e43e1a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 23 Sep 2019 20:49:21 +1000 Subject: [PATCH 09/11] Add core_debug.vhdl to fusesoc configs Signed-off-by: Anton Blanchard --- microwatt.core | 1 + 1 file changed, 1 insertion(+) diff --git a/microwatt.core b/microwatt.core index 6efe7c9..2bc428c 100644 --- a/microwatt.core +++ b/microwatt.core @@ -28,6 +28,7 @@ filesets: - wishbone_debug_master.vhdl - core.vhdl - icache.vhdl + - core_debug.vhdl file_type : vhdlSource-2008 soc: From 8c5dcc8c4c3c5e6b7d4e23b869dd404e271311c6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 23 Sep 2019 21:20:12 +1000 Subject: [PATCH 10/11] Fix ghdl error I'm seeing an issue on my version of ghdl: core.vhdl:137:24:error: actual expression must be globally static Signed-off-by: Anton Blanchard --- core.vhdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core.vhdl b/core.vhdl index d0bd5c5..e7ea1de 100644 --- a/core.vhdl +++ b/core.vhdl @@ -80,6 +80,7 @@ architecture behave of core is signal complete: std_ulogic; signal terminate: std_ulogic; signal core_rst: std_ulogic; + signal icache_rst: std_ulogic; -- Debug actions signal dbg_core_stop: std_ulogic; @@ -134,13 +135,15 @@ begin ) port map( clk => clk, - rst => rst or dbg_icache_rst, + rst => icache_rst, i_in => fetch2_to_icache, i_out => icache_to_fetch2, wishbone_out => wishbone_insn_out, wishbone_in => wishbone_insn_in ); + icache_rst <= rst or dbg_icache_rst; + decode1_0: entity work.decode1 port map ( clk => clk, From 6cae10eebd043979a0b1cec0db0f1024e979abd4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 23 Sep 2019 21:22:18 +1000 Subject: [PATCH 11/11] Terminate test on illegal instruction This gets the CI going again, but we will want to fix the test harness since it's useful to be able to debug the core after it executes an illegal instruction. Signed-off-by: Anton Blanchard --- core.vhdl | 1 + 1 file changed, 1 insertion(+) diff --git a/core.vhdl b/core.vhdl index e7ea1de..21ceb1a 100644 --- a/core.vhdl +++ b/core.vhdl @@ -271,6 +271,7 @@ begin loop_0: for i in 0 to 31 loop report "REG " & to_hstring(registers(i)); end loop loop_0; + assert false report "end of test" severity failure; end if; end process; end generate;