diff --git a/Makefile b/Makefile index 62e9644..0649f45 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,9 @@ GHDL=ghdl -GHDLFLAGS=--std=08 +GHDLFLAGS=--std=08 -Psim-unisim CFLAGS=-O2 -Wall -all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb +all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb dmi_dtm_tb + # XXX # loadstore_tb fetch_tb @@ -12,8 +13,10 @@ all: $(all) $(GHDL) -a $(GHDLFLAGS) $< common.o: decode_types.o -core_tb.o: common.o core.o soc.o -core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o +sim_jtag.o: sim_jtag_socket.o +core_tb.o: common.o core.o soc.o sim_jtag.o +core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o execute2.o loadstore1.o loadstore2.o multiply.o writeback.o core_debug.o +core_debug.o: cr_file.o: common.o crhelpers.o: common.o decode1.o: common.o decode_types.o @@ -40,17 +43,26 @@ simple_ram_behavioural_helpers.o: simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o sim_uart.o: wishbone_types.o sim_console.o -soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o +soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_debug_master.o wishbone_arbiter.o: wishbone_types.o wishbone_types.o: writeback.o: common.o +dmi_dtm_tb.o: dmi_dtm_xilinx.o wishbone_debug_master.o +dmi_dtm_xilinx.o: sim-unisim/unisim_vcomponents.o +wishbone_debug_master.o: wishbone_types.o + +UNISIM_BITS = sim-unisim/unisim_vcomponents.vhdl sim-unisim/BSCANE2.vhdl sim-unisim/BUFG.vhdl +sim-unisim/unisim_vcomponents.o: $(UNISIM_BITS) + $(GHDL) -a $(GHDLFLAGS) --work=unisim --workdir=sim-unisim $^ + + fpga/soc_reset_tb.o: fpga/soc_reset.o soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o $(GHDL) -e $(GHDLFLAGS) soc_reset_tb -core_tb: core_tb.o simple_ram_behavioural_helpers_c.o sim_console_c.o - $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o -Wl,sim_console_c.o $@ +core_tb: core_tb.o simple_ram_behavioural_helpers_c.o sim_console_c.o sim_jtag_socket_c.o + $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@ fetch_tb: fetch_tb.o $(GHDL) -e $(GHDLFLAGS) $@ @@ -70,6 +82,9 @@ simple_ram_tb: simple_ram_tb.o simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ +dmi_dtm_tb: dmi_dtm_tb.o simple_ram_behavioural_helpers_c.o + $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ + tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out))) check: $(tests) test_micropython test_micropython_long @@ -86,4 +101,8 @@ test_micropython_long: core_tb @./scripts/test_micropython_long.py clean: - rm -f *.o work-*cf $(all) + rm -f *.o work-*cf unisim-*cf $(all) + rm -f sim-unisim/*.o sim-unisim/unisim-*cf + +distclean: clean + rm -f *~ fpga/~ diff --git a/common.vhdl b/common.vhdl index bf383ca..fe6738e 100644 --- a/common.vhdl +++ b/common.vhdl @@ -13,23 +13,26 @@ package common is end record; type Fetch1ToFetch2Type is record - nia: std_ulogic_vector(63 downto 0); + nia: std_ulogic_vector(63 downto 0); + pipe_stop : std_ulogic; end record; type Fetch2ToDecode1Type is record valid: std_ulogic; + stop_mark : std_ulogic; nia: std_ulogic_vector(63 downto 0); insn: std_ulogic_vector(31 downto 0); end record; - constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', others => (others => '0')); + constant Fetch2ToDecode1Init : Fetch2ToDecode1Type := (valid => '0', stop_mark => '0', others => (others => '0')); type Decode1ToDecode2Type is record valid: std_ulogic; + stop_mark : std_ulogic; nia: std_ulogic_vector(63 downto 0); insn: std_ulogic_vector(31 downto 0); decode: decode_rom_t; end record; - constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', decode => decode_rom_init, others => (others => '0')); + constant Decode1ToDecode2Init : Decode1ToDecode2Type := (valid => '0', stop_mark => '0', decode => decode_rom_init, others => (others => '0')); type Fetch2ToIcacheType is record req: std_ulogic; diff --git a/core.vhdl b/core.vhdl index d34bf71..21ceb1a 100644 --- a/core.vhdl +++ b/core.vhdl @@ -20,9 +20,14 @@ entity core is wishbone_data_in : in wishbone_slave_out; wishbone_data_out : out wishbone_master_out; - -- Added for debug, ghdl doesn't support external names unfortunately - registers : out regfile; - terminate_out : out std_ulogic + dmi_addr : in std_ulogic_vector(3 downto 0); + dmi_din : in std_ulogic_vector(63 downto 0); + dmi_dout : out std_ulogic_vector(63 downto 0); + dmi_req : in std_ulogic; + dmi_wr : in std_ulogic; + dmi_ack : out std_ulogic; + + terminated_out : out std_logic ); end core; @@ -73,11 +78,24 @@ architecture behave of core is signal flush: std_ulogic; signal complete: std_ulogic; - signal terminate: std_ulogic; + signal core_rst: std_ulogic; + signal icache_rst: std_ulogic; + + -- Debug actions + signal dbg_core_stop: std_ulogic; + signal dbg_core_rst: std_ulogic; + signal dbg_icache_rst: std_ulogic; + + -- Debug status + signal dbg_core_is_stopped: std_ulogic; + + -- For sim + signal registers: regfile; + begin - terminate_out <= terminate; + core_rst <= dbg_core_rst or rst; fetch1_0: entity work.fetch1 generic map ( @@ -85,7 +103,7 @@ begin ) port map ( clk => clk, - rst => rst, + rst => core_rst, stall_in => fetch1_stall_in, flush_in => flush, e_in => execute1_to_fetch1, @@ -97,12 +115,13 @@ begin fetch2_0: entity work.fetch2 port map ( clk => clk, - rst => rst, + rst => core_rst, stall_in => fetch2_stall_in, stall_out => fetch2_stall_out, flush_in => flush, i_in => icache_to_fetch2, i_out => fetch2_to_icache, + stop_in => dbg_core_stop, f_in => fetch1_to_fetch2, f_out => fetch2_to_decode1 ); @@ -116,17 +135,19 @@ begin ) port map( clk => clk, - rst => rst, + rst => icache_rst, i_in => fetch2_to_icache, i_out => icache_to_fetch2, wishbone_out => wishbone_insn_out, wishbone_in => wishbone_insn_in ); + icache_rst <= rst or dbg_icache_rst; + decode1_0: entity work.decode1 port map ( clk => clk, - rst => rst, + rst => core_rst, stall_in => decode1_stall_in, flush_in => flush, f_in => fetch2_to_decode1, @@ -138,10 +159,11 @@ begin decode2_0: entity work.decode2 port map ( clk => clk, - rst => rst, + rst => core_rst, stall_out => decode2_stall_out, flush_in => flush, complete_in => complete, + stopped_out => dbg_core_is_stopped, d_in => decode1_to_decode2, e_out => decode2_to_execute1, l_out => decode2_to_loadstore1, @@ -222,4 +244,36 @@ begin complete_out => complete ); + debug_0: entity work.core_debug + port map ( + clk => clk, + rst => rst, + dmi_addr => dmi_addr, + dmi_din => dmi_din, + dmi_dout => dmi_dout, + dmi_req => dmi_req, + dmi_wr => dmi_wr, + dmi_ack => dmi_ack, + core_stop => dbg_core_stop, + core_rst => dbg_core_rst, + icache_rst => dbg_icache_rst, + terminate => terminate, + core_stopped => dbg_core_is_stopped, + nia => fetch1_to_fetch2.nia, + terminated_out => terminated_out + ); + + -- Dump registers if core terminates + sim_terminate_test: if SIM generate + dump_registers: process(all) + begin + if terminate = '1' then + loop_0: for i in 0 to 31 loop + report "REG " & to_hstring(registers(i)); + end loop loop_0; + assert false report "end of test" severity failure; + end if; + end process; + end generate; + end behave; diff --git a/core_debug.vhdl b/core_debug.vhdl new file mode 100644 index 0000000..c93c70d --- /dev/null +++ b/core_debug.vhdl @@ -0,0 +1,152 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.common.all; + +entity core_debug is + port ( + clk : in std_logic; + rst : in std_logic; + + dmi_addr : in std_ulogic_vector(3 downto 0); + dmi_din : in std_ulogic_vector(63 downto 0); + dmi_dout : out std_ulogic_vector(63 downto 0); + dmi_req : in std_ulogic; + dmi_wr : in std_ulogic; + dmi_ack : out std_ulogic; + + -- Debug actions + core_stop : out std_ulogic; + core_rst : out std_ulogic; + icache_rst : out std_ulogic; + + -- Core status inputs + terminate : in std_ulogic; + core_stopped : in std_ulogic; + nia : in std_ulogic_vector(63 downto 0); + + -- Misc + terminated_out : out std_ulogic + ); +end core_debug; + +architecture behave of core_debug is + -- DMI needs fixing... make a one clock pulse + signal dmi_req_1: std_ulogic; + + -- CTRL register (direct actions, write 1 to act, read back 0) + -- bit 0 : Core stop + -- bit 1 : Core reset (doesn't clear stop) + -- bit 2 : Icache reset + -- bit 3 : Single step + -- bit 4 : Core start + constant DBG_CORE_CTRL : std_ulogic_vector(3 downto 0) := "0000"; + constant DBG_CORE_CTRL_STOP : integer := 0; + constant DBG_CORE_CTRL_RESET : integer := 1; + constant DBG_CORE_CTRL_ICRESET : integer := 2; + constant DBG_CORE_CTRL_STEP : integer := 3; + constant DBG_CORE_CTRL_START : integer := 4; + + -- STAT register (read only) + -- bit 0 : Core stopping (wait til bit 1 set) + -- bit 1 : Core stopped + -- bit 2 : Core terminated (clears with start or reset) + constant DBG_CORE_STAT : std_ulogic_vector(3 downto 0) := "0001"; + constant DBG_CORE_STAT_STOPPING : integer := 0; + constant DBG_CORE_STAT_STOPPED : integer := 1; + constant DBG_CORE_STAT_TERM : integer := 2; + + -- NIA register (read only for now) + constant DBG_CORE_NIA : std_ulogic_vector(3 downto 0) := "0010"; + + -- Some internal wires + signal stat_reg : std_ulogic_vector(63 downto 0); + + -- Some internal latches + signal stopping : std_ulogic; + signal do_step : std_ulogic; + signal do_reset : std_ulogic; + signal do_icreset : std_ulogic; + signal terminated : std_ulogic; + +begin + -- Single cycle register accesses on DMI + dmi_ack <= dmi_req; + + -- Status register read composition + stat_reg <= (2 => terminated, + 1 => core_stopped, + 0 => stopping, + others => '0'); + + -- DMI read data mux + with dmi_addr select dmi_dout <= + stat_reg when DBG_CORE_STAT, + nia when DBG_CORE_NIA, + (others => '0') when others; + + -- DMI writes + reg_write: process(clk) + begin + if rising_edge(clk) then + if (rst) then + stopping <= '0'; + terminated <= '0'; + else + -- Reset the 1-cycle "do" signals + do_step <= '0'; + do_reset <= '0'; + do_icreset <= '0'; + + -- Edge detect on dmi_req for 1-shot pulses + dmi_req_1 <= dmi_req; + if dmi_req = '1' and dmi_req_1 = '0' then + if dmi_wr = '1' then + report("DMI write to " & to_hstring(dmi_addr)); + + -- Control register actions + if dmi_addr = DBG_CORE_CTRL then + if dmi_din(DBG_CORE_CTRL_RESET) = '1' then + do_reset <= '1'; + terminated <= '0'; + end if; + if dmi_din(DBG_CORE_CTRL_STOP) = '1' then + stopping <= '1'; + end if; + if dmi_din(DBG_CORE_CTRL_STEP) = '1' then + do_step <= '1'; + terminated <= '0'; + end if; + if dmi_din(DBG_CORE_CTRL_ICRESET) = '1' then + do_icreset <= '1'; + end if; + if dmi_din(DBG_CORE_CTRL_START) = '1' then + stopping <= '0'; + terminated <= '0'; + end if; + end if; + else + report("DMI read from " & to_string(dmi_addr)); + end if; + end if; + + -- Set core stop on terminate. We'll be stopping some time *after* + -- the offending instruction, at least until we can do back flushes + -- that preserve NIA which we can't just yet. + if terminate = '1' then + stopping <= '1'; + terminated <= '1'; + end if; + end if; + end if; + end process; + + -- Core control signals generated by the debug module + core_stop <= stopping and not do_step; + core_rst <= do_reset; + icache_rst <= do_icreset; + terminated_out <= terminated; +end behave; + diff --git a/core_tb.vhdl b/core_tb.vhdl index 4522da4..672b424 100644 --- a/core_tb.vhdl +++ b/core_tb.vhdl @@ -1,5 +1,6 @@ library ieee; use ieee.std_logic_1164.all; +use ieee.numeric_std.all; library work; use work.common.all; @@ -29,19 +30,21 @@ begin uart0_txd => open ); - clk_process: process - begin - clk <= '0'; - wait for clk_period/2; - clk <= '1'; - wait for clk_period/2; - end process; - - rst_process: process - begin - rst <= '1'; - wait for 10*clk_period; - rst <= '0'; - wait; - end process; + clk_process: process + begin + clk <= '0'; + wait for clk_period/2; + clk <= '1'; + wait for clk_period/2; + end process; + + rst_process: process + begin + rst <= '1'; + wait for 10*clk_period; + rst <= '0'; + wait; + end process; + + jtag: entity work.sim_jtag; end; diff --git a/decode1.vhdl b/decode1.vhdl index 6e8a521..3e2026d 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -248,6 +248,7 @@ begin v.valid := f_in.valid; v.nia := f_in.nia; v.insn := f_in.insn; + v.stop_mark := f_in.stop_mark; ppc_insn := PPC_ILLEGAL; diff --git a/decode2.vhdl b/decode2.vhdl index 15dae5d..482b91c 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -16,6 +16,8 @@ entity decode2 is complete_in : in std_ulogic; stall_out : out std_ulogic; + stopped_out : out std_ulogic; + flush_in: in std_ulogic; d_in : in Decode1ToDecode2Type; @@ -330,9 +332,16 @@ begin -- through the pipeline. stall_out <= '0'; is_valid := d_in.valid; + + -- Handle debugger stop + stopped_out <= '0'; + if d_in.stop_mark = '1' and v_int.outstanding = 0 then + stopped_out <= '1'; + end if; + case v_int.state is when IDLE => - if (flush_in = '0') and (d_in.valid = '1') and (d_in.decode.sgl_pipe = '1') then + if (flush_in = '0') and (is_valid = '1') and (d_in.decode.sgl_pipe = '1') then if v_int.outstanding /= 0 then v_int.state := WAIT_FOR_PREV_TO_COMPLETE; stall_out <= '1'; diff --git a/dmi_dtm_dummy.vhdl b/dmi_dtm_dummy.vhdl new file mode 100644 index 0000000..3cabf38 --- /dev/null +++ b/dmi_dtm_dummy.vhdl @@ -0,0 +1,30 @@ +-- Dummy/empty DMI interface to make toplevel happy on unsupported FPGAs + +library ieee; +use ieee.std_logic_1164.all; + +library work; +use work.wishbone_types.all; + +entity dmi_dtm is + generic(ABITS : INTEGER:=8; + DBITS : INTEGER:=32); + + port(sys_clk : in std_ulogic; + sys_reset : in std_ulogic; + dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0); + dmi_din : in std_ulogic_vector(DBITS - 1 downto 0); + dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0); + dmi_req : out std_ulogic; + dmi_wr : out std_ulogic; + dmi_ack : in std_ulogic + ); +end entity dmi_dtm; + +architecture behaviour of dmi_dtm is + dmi_addr <= (others => '0'); + dmi_dout <= (others => '0'); + dmi_req <= '0'; + dmi_wr <= '0'; +end architecture behaviour; + diff --git a/dmi_dtm_tb.vhdl b/dmi_dtm_tb.vhdl new file mode 100644 index 0000000..fe60c12 --- /dev/null +++ b/dmi_dtm_tb.vhdl @@ -0,0 +1,250 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.common.all; +use work.wishbone_types.all; + +library unisim; +use unisim.vcomponents.all; + +entity dmi_dtm_tb is +end dmi_dtm_tb; + +architecture behave of dmi_dtm_tb is + signal clk : std_ulogic; + signal rst : std_ulogic; + constant clk_period : time := 10 ns; + constant jclk_period : time := 30 ns; + + -- DMI debug bus signals + signal dmi_addr : std_ulogic_vector(7 downto 0); + signal dmi_din : std_ulogic_vector(63 downto 0); + signal dmi_dout : std_ulogic_vector(63 downto 0); + signal dmi_req : std_ulogic; + signal dmi_wr : std_ulogic; + signal dmi_ack : std_ulogic; + + -- Global JTAG signals (used by BSCANE2 inside dmi_dtm + alias j : glob_jtag_t is glob_jtag; + + -- Wishbone interfaces + signal wishbone_ram_in : wishbone_slave_out; + signal wishbone_ram_out : wishbone_master_out; + +begin + dtm: entity work.dmi_dtm + generic map( + ABITS => 8, + DBITS => 64 + ) + port map( + sys_clk => clk, + sys_reset => rst, + dmi_addr => dmi_addr, + dmi_din => dmi_din, + dmi_dout => dmi_dout, + dmi_req => dmi_req, + dmi_wr => dmi_wr, + dmi_ack => dmi_ack + ); + + simple_ram_0: entity work.mw_soc_memory + generic map(RAM_INIT_FILE => "simple_ram_behavioural.bin", + MEMORY_SIZE => 524288) + port map(clk => clk, rst => rst, + wishbone_in => wishbone_ram_out, + wishbone_out => wishbone_ram_in); + + wishbone_debug_0: entity work.wishbone_debug_master + port map(clk => clk, rst => rst, + dmi_addr => dmi_addr(1 downto 0), + dmi_dout => dmi_din, + dmi_din => dmi_dout, + dmi_wr => dmi_wr, + dmi_ack => dmi_ack, + dmi_req => dmi_req, + wb_in => wishbone_ram_in, + wb_out => wishbone_ram_out); + + -- system clock + sys_clk: process + begin + clk <= '1'; + wait for clk_period / 2; + clk <= '0'; + wait for clk_period / 2; + end process sys_clk; + + -- system sim: just reset and wait + sys_sim: process + begin + rst <= '1'; + wait for clk_period; + rst <= '0'; + wait; + end process; + + -- jtag sim process + sim_jtag: process + procedure clock(count: in INTEGER) is + begin + for i in 1 to count loop + j.tck <= '0'; + wait for jclk_period/2; + j.tck <= '1'; + wait for jclk_period/2; + end loop; + end procedure clock; + + procedure shift_out(val: in std_ulogic_vector) is + begin + for i in 0 to val'length-1 loop + j.tdi <= val(i); + clock(1); + end loop; + end procedure shift_out; + + procedure shift_in(val: out std_ulogic_vector) is + begin + for i in val'length-1 downto 0 loop + val := j.tdo & val(val'length-1 downto 1); + clock(1); + end loop; + end procedure shift_in; + + procedure send_command( + addr : in std_ulogic_vector(7 downto 0); + data : in std_ulogic_vector(63 downto 0); + op : in std_ulogic_vector(1 downto 0)) is + begin + j.capture <= '1'; + clock(1); + j.capture <= '0'; + clock(1); + j.shift <= '1'; + shift_out(op); + shift_out(data); + shift_out(addr); + j.shift <= '0'; + j.update <= '1'; + clock(1); + j.update <= '0'; + clock(1); + end procedure send_command; + + procedure read_resp( + op : out std_ulogic_vector(1 downto 0); + data : out std_ulogic_vector(63 downto 0)) is + + variable addr : std_ulogic_vector(7 downto 0); + begin + j.capture <= '1'; + clock(1); + j.capture <= '0'; + clock(1); + j.shift <= '1'; + shift_in(op); + shift_in(data); + shift_in(addr); + j.shift <= '0'; + j.update <= '1'; + clock(1); + j.update <= '0'; + clock(1); + end procedure read_resp; + + procedure dmi_write(addr : in std_ulogic_vector(7 downto 0); + data : in std_ulogic_vector(63 downto 0)) is + variable resp_op : std_ulogic_vector(1 downto 0); + variable resp_data : std_ulogic_vector(63 downto 0); + variable timeout : integer; + begin + send_command(addr, data, "10"); + loop + read_resp(resp_op, resp_data); + case resp_op is + when "00" => + return; + when "11" => + timeout := timeout + 1; + assert timeout < 0 + report "dmi_write timed out !" severity error; + when others => + assert 0 > 1 report "dmi_write got odd status: " & + to_hstring(resp_op) severity error; + end case; + end loop; + end procedure dmi_write; + + + procedure dmi_read(addr : in std_ulogic_vector(7 downto 0); + data : out std_ulogic_vector(63 downto 0)) is + variable resp_op : std_ulogic_vector(1 downto 0); + variable timeout : integer; + begin + send_command(addr, (others => '0'), "01"); + loop + read_resp(resp_op, data); + case resp_op is + when "00" => + return; + when "11" => + timeout := timeout + 1; + assert timeout < 0 + report "dmi_read timed out !" severity error; + when others => + assert 0 > 1 report "dmi_read got odd status: " & + to_hstring(resp_op) severity error; + end case; + end loop; + end procedure dmi_read; + + variable data : std_ulogic_vector(63 downto 0); + begin + -- init & reset + j.reset <= '1'; + j.sel <= "0000"; + j.capture <= '0'; + j.update <= '0'; + j.shift <= '0'; + j.tdi <= '0'; + j.tms <= '0'; + j.runtest <= '0'; + clock(5); + j.reset <= '0'; + clock(5); + + -- select chain 2 + j.sel <= "0010"; + clock(1); + + -- send command + dmi_read(x"00", data); + report "Read addr reg:" & to_hstring(data); + report "Writing addr reg to all 1's"; + dmi_write(x"00", (others => '1')); + dmi_read(x"00", data); + report "Read addr reg:" & to_hstring(data); + + report "Writing ctrl reg to all 1's"; + dmi_write(x"02", (others => '1')); + dmi_read(x"02", data); + report "Read ctrl reg:" & to_hstring(data); + + report "Read memory at 0...\n"; + dmi_write(x"00", x"0000000000000000"); + dmi_write(x"02", x"00000000000007ff"); + dmi_read(x"01", data); + report "00:" & to_hstring(data); + dmi_read(x"01", data); + report "08:" & to_hstring(data); + dmi_read(x"01", data); + report "10:" & to_hstring(data); + dmi_read(x"01", data); + report "18:" & to_hstring(data); + clock(10); + std.env.finish; + end process; +end behave; diff --git a/dmi_dtm_xilinx.vhdl b/dmi_dtm_xilinx.vhdl new file mode 100644 index 0000000..bab7ce8 --- /dev/null +++ b/dmi_dtm_xilinx.vhdl @@ -0,0 +1,276 @@ +-- Xilinx internal JTAG to DMI interface +-- +-- DMI bus +-- +-- req : ____/------------\_____ +-- addr: xxxx< >xxxxx +-- dout: xxxx< >xxxxx +-- wr : xxxx< >xxxxx +-- din : xxxxxxxxxxxx< >xxx +-- ack : ____________/------\___ +-- +-- * addr/dout set along with req, can be latched on same cycle by slave +-- * ack & din remain up until req is dropped by master, the slave must +-- provide a stable output on din on reads during that time. +-- * req remains low at until at least one sysclk after ack seen down. +-- +-- JTAG (tck) DMI (sys_clk) +-- +-- * jtag_req = 1 +-- (jtag_req_0) * +-- (jtag_req_1) -> * dmi_req = 1 > +-- *.../... +-- * dmi_ack = 1 < +-- * (dmi_ack_0) +-- * <- (dmi_ack_1) +-- * jtag_req = 0 (and latch dmi_din) +-- (jtag_req_0) * +-- (jtag_req_1) -> * dmi_req = 0 > +-- * dmi_ack = 0 < +-- * (dmi_ack_0) +-- * <- (dmi_ack_1) +-- +-- jtag_req can go back to 1 when jtag_rsp_1 is 0 +-- +-- Questions/TODO: +-- - I use 2 flip fops for sync, is that enough ? +-- - I treat the jtag_reset as an async reset, is that necessary ? +-- - Dbl check reset situation since we have two different resets +-- each only resetting part of the logic... +-- - Look at optionally removing the synchronizer on the ack path, +-- assuming JTAG is always slow enough that ack will have been +-- stable long enough by the time CAPTURE comes in. +-- - We could avoid the latched request by not shifting while a +-- request is in progress (and force TDO to 1 to return a busy +-- status). +-- +-- WARNING: This isn't the real DMI JTAG protocol (at least not yet). +-- a command while busy will be ignored. A response of "11" +-- means the previous command is still going, try again. +-- As such We don't implement the DMI "error" status, and +-- we don't implement DTMCS yet... This may still all change +-- but for now it's easier that way as the real DMI protocol +-- requires for a command to work properly that enough TCK +-- are sent while IDLE and I'm having trouble getting that +-- working with UrJtag and the Xilinx BSCAN2 for now. + +library ieee; +use ieee.std_logic_1164.all; +use ieee.math_real.all; + +library work; +use work.wishbone_types.all; + +library unisim; +use unisim.vcomponents.all; + +entity dmi_dtm is + generic(ABITS : INTEGER:=8; + DBITS : INTEGER:=32); + + port(sys_clk : in std_ulogic; + sys_reset : in std_ulogic; + dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0); + dmi_din : in std_ulogic_vector(DBITS - 1 downto 0); + dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0); + dmi_req : out std_ulogic; + dmi_wr : out std_ulogic; + dmi_ack : in std_ulogic +-- dmi_err : in std_ulogic TODO: Add error response + ); +end entity dmi_dtm; + +architecture behaviour of dmi_dtm is + + -- Signals coming out of the BSCANE2 block + signal jtag_reset : std_ulogic; + signal capture : std_ulogic; + signal update : std_ulogic; + signal drck : std_ulogic; + signal jtag_clk : std_ulogic; + signal sel : std_ulogic; + signal shift : std_ulogic; + signal tdi : std_ulogic; + signal tdo : std_ulogic; + signal tck : std_ulogic; + + -- ** JTAG clock domain ** + + -- Shift register + signal shiftr : std_ulogic_vector(ABITS + DBITS + 1 downto 0); + + -- Latched request + signal request : std_ulogic_vector(ABITS + DBITS + 1 downto 0); + + -- A request is present + signal jtag_req : std_ulogic; + + -- Synchronizer for jtag_rsp (sys clk -> jtag_clk) + signal dmi_ack_0 : std_ulogic; + signal dmi_ack_1 : std_ulogic; + + -- ** sys clock domain ** + + -- Synchronizer for jtag_req (jtag clk -> sys clk) + signal jtag_req_0 : std_ulogic; + signal jtag_req_1 : std_ulogic; + + -- ** combination signals + signal jtag_bsy : std_ulogic; + signal op_valid : std_ulogic; + signal rsp_op : std_ulogic_vector(1 downto 0); + + -- ** Constants ** + constant DMI_REQ_NOP : std_ulogic_vector(1 downto 0) := "00"; + constant DMI_REQ_RD : std_ulogic_vector(1 downto 0) := "01"; + constant DMI_REQ_WR : std_ulogic_vector(1 downto 0) := "10"; + constant DMI_RSP_OK : std_ulogic_vector(1 downto 0) := "00"; + constant DMI_RSP_BSY : std_ulogic_vector(1 downto 0) := "11"; + +begin + + -- Implement the Xilinx bscan2 for series 7 devices (TODO: use PoC to + -- wrap this if compatibility is required with older devices). + bscan : BSCANE2 + generic map ( + JTAG_CHAIN => 2 + ) + port map ( + CAPTURE => capture, + DRCK => drck, + RESET => jtag_reset, + RUNTEST => open, + SEL => sel, + SHIFT => shift, + TCK => tck, + TDI => tdi, + TMS => open, + UPDATE => update, + TDO => tdo + ); + + -- Some examples out there suggest buffering the clock so it's + -- treated as a proper clock net. This is probably needed when using + -- drck (the gated clock) but I'm using the real tck here to avoid + -- missing the update phase so maybe not... + -- + clkbuf : BUFG + port map ( +-- I => drck, + I => tck, + O => jtag_clk + ); + + + -- dmi_req synchronization + dmi_req_sync : process(sys_clk) + begin + -- sys_reset is synchronous + if rising_edge(sys_clk) then + if (sys_reset = '1') then + jtag_req_0 <= '0'; + jtag_req_1 <= '0'; + else + jtag_req_0 <= jtag_req; + jtag_req_1 <= jtag_req_0; + end if; + end if; + end process; + dmi_req <= jtag_req_1; + + -- dmi_ack synchronization + dmi_ack_sync: process(jtag_clk, jtag_reset) + begin + -- jtag_reset is async (see comments) + if jtag_reset = '1' then + dmi_ack_0 <= '0'; + dmi_ack_1 <= '0'; + elsif rising_edge(jtag_clk) then + dmi_ack_0 <= dmi_ack; + dmi_ack_1 <= dmi_ack_0; + end if; + end process; + + -- jtag_bsy indicates whether we can start a new request, we can when + -- we aren't already processing one (jtag_req) and the synchronized ack + -- of the previous one is 0. + -- + jtag_bsy <= jtag_req or dmi_ack_1; + + -- decode request type in shift register + with shiftr(1 downto 0) select op_valid <= + '1' when DMI_REQ_RD, + '1' when DMI_REQ_WR, + '0' when others; + + -- encode response op + rsp_op <= DMI_RSP_BSY when jtag_bsy = '1' else DMI_RSP_OK; + + -- Some DMI out signals are directly driven from the request register + dmi_addr <= request(ABITS + DBITS + 1 downto DBITS + 2); + dmi_dout <= request(DBITS + 1 downto 2); + dmi_wr <= '1' when request(1 downto 0) = DMI_REQ_WR else '0'; + + -- TDO is wired to shift register bit 0 + tdo <= shiftr(0); + + -- Main state machine. Handles shift registers, request latch and + -- jtag_req latch. Could be split into 3 processes but it's probably + -- not worthwhile. + -- + shifter: process(jtag_clk, jtag_reset) + begin + if jtag_reset = '1' then + shiftr <= (others => '0'); + request <= (others => '0'); + jtag_req <= '0'; + elsif rising_edge(jtag_clk) then + + -- Handle jtag "commands" when sel is 1 + if sel = '1' then + -- Shift state, rotate the register + if shift = '1' then + shiftr <= tdi & shiftr(ABITS + DBITS + 1 downto 1); + end if; + + -- Update state (trigger) + -- + -- Latch the request if we aren't already processing one and + -- it has a valid command opcode. + -- + if update = '1' and op_valid = '1' then + if jtag_bsy = '0' then + request <= shiftr; + jtag_req <= '1'; + end if; + -- Set the shift register "op" to "busy". This will prevent + -- us from re-starting the command on the next update if + -- the command completes before that. + shiftr(1 downto 0) <= DMI_RSP_BSY; + end if; + + -- Request completion. + -- + -- Capture the response data for reads and clear request flag. + -- + -- Note: We clear req (and thus dmi_req) here which relies on tck + -- ticking and sel set. This means we are stuck with dmi_req up if + -- the jtag interface stops. Slaves must be resilient to this. + -- + if jtag_req = '1' and dmi_ack_1 = '1' then + jtag_req <= '0'; + if request(1 downto 0) = DMI_REQ_RD then + request(DBITS + 1 downto 2) <= dmi_din; + end if; + end if; + + -- Capture state, grab latch content with updated status + if capture = '1' then + shiftr <= request(ABITS + DBITS + 1 downto 2) & rsp_op; + end if; + + end if; + end if; + end process; +end architecture behaviour; + diff --git a/fetch1.vhdl b/fetch1.vhdl index ff7d64a..8e8c5a5 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -68,6 +68,8 @@ begin -- Update outputs f_out <= r; + + report "fetch1 R:" & std_ulogic'image(e_in.redirect) & " v.nia:" & to_hstring(v.nia) & " f_out.nia:" & to_hstring(f_out.nia); end process; end architecture behaviour; diff --git a/fetch2.vhdl b/fetch2.vhdl index 37cb66c..9573761 100644 --- a/fetch2.vhdl +++ b/fetch2.vhdl @@ -15,6 +15,7 @@ entity fetch2 is stall_out : out std_ulogic; flush_in : in std_ulogic; + stop_in : in std_ulogic; i_in : in IcacheToFetch2Type; i_out : out Fetch2ToIcacheType; @@ -49,12 +50,12 @@ begin v.valid := i_in.ack; v.nia := f_in.nia; v.insn := i_in.insn; - stall_out <= not i_in.ack; + stall_out <= stop_in or not i_in.ack; - - if flush_in = '1' then + if flush_in = '1' or stop_in = '1' then v.valid := '0'; end if; + v.stop_mark := stop_in; -- Update registers rin <= v; diff --git a/microwatt.core b/microwatt.core index b62aef9..2bc428c 100644 --- a/microwatt.core +++ b/microwatt.core @@ -25,13 +25,16 @@ filesets: - multiply.vhdl - writeback.vhdl - insn_helpers.vhdl + - wishbone_debug_master.vhdl - core.vhdl - icache.vhdl + - core_debug.vhdl file_type : vhdlSource-2008 soc: files: - wishbone_arbiter.vhdl + - wishbone_debug_master.vhdl - soc.vhdl file_type : vhdlSource-2008 @@ -46,6 +49,14 @@ filesets: - fpga/firmware.hex : {copyto : firmware.hex, file_type : user} file_type : vhdlSource-2008 + debug_xilinx: + files: + - dmi_dtm_xilinx.vhdl : {file_type : vhdlSource-2008} + + debug_dummy: + files: + - dmi_dtm_dummy.vhdl : {file_type : vhdlSource-2008} + nexys_a7: files: - fpga/nexys_a7.xdc : {file_type : xdc} @@ -69,7 +80,7 @@ filesets: targets: nexys_a7: default_tool: vivado - filesets: [core, nexys_a7, soc, fpga] + filesets: [core, nexys_a7, soc, fpga, debug_xilinx] parameters : [memory_size, ram_init_file] tools: vivado: {part : xc7a100tcsg324-1} @@ -77,7 +88,7 @@ targets: nexys_video: default_tool: vivado - filesets: [core, nexys_video, soc, fpga] + filesets: [core, nexys_video, soc, fpga, debug_xilinx] parameters : [memory_size, ram_init_file] tools: vivado: {part : xc7a200tsbg484-1} @@ -85,7 +96,7 @@ targets: arty_a7-35: default_tool: vivado - filesets: [core, arty_a7-35, soc, fpga] + filesets: [core, arty_a7-35, soc, fpga, debug_xilinx] parameters : [memory_size, ram_init_file] tools: vivado: {part : xc7a35ticsg324-1L} @@ -93,7 +104,7 @@ targets: cmod_a7-35: default_tool: vivado - filesets: [core, cmod_a7-35, soc, fpga] + filesets: [core, cmod_a7-35, soc, fpga, debug_xilinx] parameters : [memory_size, ram_init_file, reset_low=false] tools: vivado: {part : xc7a35tcpg236-1} diff --git a/scripts/mw_debug/Makefile b/scripts/mw_debug/Makefile new file mode 100644 index 0000000..439b198 --- /dev/null +++ b/scripts/mw_debug/Makefile @@ -0,0 +1,7 @@ +CFLAGS = -O2 -g -Wall -std=c99 + +all: mw_debug + +mw_debug: mw_debug.c + $(CC) -o $@ $^ -lurjtag + diff --git a/scripts/mw_debug/mw_debug.c b/scripts/mw_debug/mw_debug.c new file mode 100644 index 0000000..f1a7cab --- /dev/null +++ b/scripts/mw_debug/mw_debug.c @@ -0,0 +1,583 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DBG_WB_ADDR 0x00 +#define DBG_WB_DATA 0x01 +#define DBG_WB_CTRL 0x02 + +#define DBG_CORE_CTRL 0x10 +#define DBG_CORE_CTRL_STOP (1 << 0) +#define DBG_CORE_CTRL_RESET (1 << 1) +#define DBG_CORE_CTRL_ICRESET (1 << 2) +#define DBG_CORE_CTRL_STEP (1 << 3) +#define DBG_CORE_CTRL_START (1 << 4) + +#define DBG_CORE_STAT 0x11 +#define DBG_CORE_STAT_STOPPING (1 << 0) +#define DBG_CORE_STAT_STOPPED (1 << 1) +#define DBG_CORE_STAT_TERM (1 << 2) + +#define DBG_CORE_NIA 0x12 + +static bool debug; + +struct backend { + int (*init)(const char *target); + int (*reset)(void); + int (*command)(uint8_t op, uint8_t addr, uint64_t *data); +}; +static struct backend *b; + +static void check(int r, const char *failstr) +{ + if (r >= 0) + return; + fprintf(stderr, "Error %s\n", failstr); + exit(1); +} + +/* -------------- SIM backend -------------- */ + +static int sim_fd = -1; + +static int sim_init(const char *target) +{ + struct sockaddr_in saddr; + struct hostent *hp; + const char *p, *host; + int port, rc; + + if (!target) + target = "localhost:13245"; + p = strchr(target, ':'); + host = strndup(target, p - target); + if (p && *p) + p++; + else + p = "13245"; + port = strtoul(p, NULL, 10); + if (debug) + printf("Opening sim backend host '%s' port %d\n", host, port); + + sim_fd = socket(PF_INET, SOCK_STREAM, 0); + if (sim_fd < 0) { + fprintf(stderr, "Error opening socket: %s\n", + strerror(errno)); + return -1; + } + hp = gethostbyname(host); + if (!hp) { + fprintf(stderr,"Unknown host '%s'\n", host); + return -1; + } + memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length); + saddr.sin_port = htons(port); + saddr.sin_family = PF_INET; + rc = connect(sim_fd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (rc < 0) { + close(sim_fd); + fprintf(stderr,"Connection to '%s' failed: %s\n", + host, strerror(errno)); + return -1; + } + return 0; +} + +static int sim_reset(void) +{ +} + +static void add_bits(uint8_t **p, int *b, uint64_t d, int c) +{ + uint8_t md = 1 << *b; + uint64_t ms = 1; + + while (c--) { + if (d & ms) + (**p) |= md; + ms <<= 1; + if (*b == 7) { + *b = 0; + (*p)++; + md = 1; + } else { + (*b)++; + md <<= 1; + } + } +} + +static uint64_t read_bits(uint8_t **p, int *b, int c) +{ + uint8_t ms = 1 << *b; + uint64_t md = 1; + uint64_t d = 0; + + while (c--) { + if ((**p) & ms) + d |= md; + md <<= 1; + if (*b == 7) { + *b = 0; + (*p)++; + ms = 1; + } else { + (*b)++; + ms <<= 1; + } + } + return d; +} + +static int sim_command(uint8_t op, uint8_t addr, uint64_t *data) +{ + uint8_t buf[16], *p; + uint64_t d = data ? *data : 0; + int r, s, b = 0; + + memset(buf, 0, 16); + p = buf+1; + add_bits(&p, &b, op, 2); + add_bits(&p, &b, d, 64); + add_bits(&p, &b, addr, 8); + if (b) + p++; + buf[0] = 74; + if (0) + { + int i; + + for (i=0; i<(p-buf); i++) + printf("%02x ", buf[i]); + printf("\n"); + } + write(sim_fd, buf, p - buf); + r = read(sim_fd, buf, 127); + if (0 && r > 0) { + int i; + + for (i=0; imain_part = 0; + + rc = urj_tap_chain_connect(jc, cable, params); + if (rc != URJ_STATUS_OK) { + fprintf(stderr, "JTAG cable detect failed\n"); + return -1; + } + + /* XXX Hard wire part 0, that might need to change (use params and detect !) */ + rc = urj_tap_manual_add(jc, 6); + if (rc < 0) { + fprintf(stderr, "JTAG failed to add part !\n"); + return -1; + } + if (jc->parts == NULL || jc->parts->len == 0) { + fprintf(stderr, "JTAG Something's wrong after adding part !\n"); + return -1; + } + urj_part_parts_set_instruction(jc->parts, "BYPASS"); + + jc->active_part = part = 0; + + p = urj_tap_chain_active_part(jc); + if (!p) { + fprintf(stderr, "Failed to get active JTAG part\n"); + return -1; + } + rc = urj_part_data_register_define(p, "IDCODE_REG", 32); + if (rc != URJ_STATUS_OK) { + fprintf(stderr, "JTAG failed to add IDCODE_REG register !\n"); + return -1; + } + if (urj_part_instruction_define(p, "IDCODE", "001001", "IDCODE_REG") == NULL) { + fprintf(stderr, "JTAG failed to add IDCODE instruction !\n"); + return -1; + } + rc = urj_part_data_register_define(p, "USER2_REG", 74); + if (rc != URJ_STATUS_OK) { + fprintf(stderr, "JTAG failed to add USER2_REG register !\n"); + return -1; + } + if (urj_part_instruction_define(p, "USER2", "000011", "USER2_REG") == NULL) { + fprintf(stderr, "JTAG failed to add USER2 instruction !\n"); + return -1; + } + urj_part_set_instruction(p, "IDCODE"); + urj_tap_chain_shift_instructions(jc); + urj_tap_chain_shift_data_registers(jc, 1); + id = urj_tap_register_get_value(p->active_instruction->data_register->out); + printf("Found device ID: 0x%08x\n", id); + urj_part_set_instruction(p, "USER2"); + urj_tap_chain_shift_instructions(jc); + + return 0; +} + +static int jtag_reset(void) +{ +} + +static int jtag_command(uint8_t op, uint8_t addr, uint64_t *data) +{ + urj_part_t *p = urj_tap_chain_active_part(jc); + urj_part_instruction_t *insn; + urj_data_register_t *dr; + uint64_t d = data ? *data : 0; + int rc; + + if (!p) + return -1; + insn = p->active_instruction; + if (!insn) + return -1; + dr = insn->data_register; + if (!dr) + return -1; + rc = urj_tap_register_set_value_bit_range(dr->in, op, 1, 0); + if (rc != URJ_STATUS_OK) + return -1; + rc = urj_tap_register_set_value_bit_range(dr->in, d, 65, 2); + if (rc != URJ_STATUS_OK) + return -1; + rc = urj_tap_register_set_value_bit_range(dr->in, addr, 73, 66); + if (rc != URJ_STATUS_OK) + return -1; + rc = urj_tap_chain_shift_data_registers(jc, 1); + if (rc != URJ_STATUS_OK) + return -1; + rc = urj_tap_register_get_value_bit_range(dr->out, 1, 0); + if (data) + *data = urj_tap_register_get_value_bit_range(dr->out, 65, 2); + return rc; +} + +static struct backend jtag_backend = { + .init = jtag_init, + .reset = jtag_reset, + .command = jtag_command, +}; + +static int dmi_read(uint8_t addr, uint64_t *data) +{ + int rc; + + rc = b->command(1, addr, data); + if (rc < 0) + return rc; + for (;;) { + rc = b->command(0, 0, data); + if (rc < 0) + return rc; + if (rc == 0) + return 0; + if (rc != 3) + fprintf(stderr, "Unknown status code %d !\n", rc); + } +} + +static int dmi_write(uint8_t addr, uint64_t data) +{ + int rc; + + rc = b->command(2, addr, &data); + if (rc < 0) + return rc; + for (;;) { + rc = b->command(0, 0, NULL); + if (rc < 0) + return rc; + if (rc == 0) + return 0; + if (rc != 3) + fprintf(stderr, "Unknown status code %d !\n", rc); + } +} + +static void core_status(void) +{ + uint64_t stat, nia; + const char *statstr, *statstr2; + + check(dmi_read(DBG_CORE_STAT, &stat), "reading core status"); + check(dmi_read(DBG_CORE_NIA, &nia), "reading core NIA"); + + if (debug) + printf("Core status = 0x%llx\n", (unsigned long long)stat); + statstr = "running"; + statstr2 = ""; + if (stat & DBG_CORE_STAT_STOPPED) { + statstr = "stopped"; + if (!(stat & DBG_CORE_STAT_STOPPING)) + statstr2 = " (restarting?)"; + else if (stat & DBG_CORE_STAT_TERM) + statstr2 = " (terminated)"; + } else if (stat & DBG_CORE_STAT_STOPPING) + statstr = "stopping"; + else if (stat & DBG_CORE_STAT_TERM) + statstr = "odd state (TERM but no STOP)"; + printf("Core: %s%s\n", statstr, statstr2); + printf(" NIA: %016llx\n", (unsigned long long)nia); +} + +static void core_stop(void) +{ + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_STOP), "stopping core"); +} + +static void core_start(void) +{ + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_START), "starting core"); +} + +static void core_reset(void) +{ + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_START), "resetting core"); +} + +static void core_step(void) +{ + uint64_t stat; + + check(dmi_read(DBG_CORE_STAT, &stat), "reading core status"); + + if (!(stat & DBG_CORE_STAT_STOPPED)) { + printf("Core not stopped !\n"); + return; + } + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_STEP), "stepping core"); +} + +static void icache_reset(void) +{ + check(dmi_write(DBG_CORE_CTRL, DBG_CORE_CTRL_ICRESET), "resetting icache"); +} + +static void mem_read(uint64_t addr, uint64_t count) +{ + uint64_t data; + int i, rc; + + rc = dmi_write(2, 0x7ff); + if (rc < 0) + return; + rc = dmi_write(0, addr); + if (rc < 0) + return; + for (i = 0; i < count; i++) { + rc = dmi_read(1, &data); + if (rc < 0) + return; + printf("%016llx: %016llx\n", + (unsigned long long)addr, + (unsigned long long)data); + addr += 8; + } +} + +static void load(const char *filename, uint64_t addr) +{ + uint64_t data; + int fd, rc, count; + + fd = open(filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Failed to open '%s': %s\n", filename, strerror(errno)); + exit(1); + } + // XX dumb, do better + rc = dmi_write(2, 0x7ff); + if (rc < 0) + return; + rc = dmi_write(0, addr); + if (rc < 0) + return; + count = 0; + for (;;) { + data = 0; + rc = read(fd, &data, 8); + if (rc <= 0) + break; + // if (rc < 8) XXX fixup endian ? + dmi_write(1, data); + count += 8; + if (!(count % 1024)) + printf("%x...\n", count); + } + printf("%x done.\n", count); +} + +static void usage(const char *cmd) +{ + fprintf(stderr, "Usage: %s \n", cmd); + exit(1); +} + +int main(int argc, char *argv[]) +{ + const char *progname = argv[0]; + const char *target = NULL; + int rc, i = 1; + + b = NULL; + + while(1) { + int c, oindex; + static struct option lopts[] = { + { "help", no_argument, 0, 'h' }, + { "backend", required_argument, 0, 'b' }, + { "target", required_argument, 0, 't' }, + { "debug", no_argument, 0, 'd' }, + { 0, 0, 0, 0 } + }; + c = getopt_long(argc, argv, "dhb:t:", lopts, &oindex); + if (c < 0) + break; + switch(c) { + case 'h': + usage(progname); + break; + case 'b': + if (strcmp(optarg, "sim") == 0) + b = &sim_backend; + else if (strcmp(optarg, "jtag") == 0) + b = &jtag_backend; + else { + fprintf(stderr, "Unknown backend %s\n", optarg); + exit(1); + } + break; + case 't': + target = optarg; + break; + case 'd': + debug = true; + } + } + + if (b == NULL) { + fprintf(stderr, "No backend selected\n"); + exit(1); + } + + rc = b->init(target); + if (rc < 0) + exit(1); + for (i = optind; i < argc; i++) { + if (strcmp(argv[i], "dmiread") == 0) { + uint8_t addr; + uint64_t data; + + if ((i+1) >= argc) + usage(argv[0]); + addr = strtoul(argv[++i], NULL, 16); + dmi_read(addr, &data); + printf("%02x: %016llx\n", addr, (unsigned long long)data); + } else if (strcmp(argv[i], "dmiwrite") == 0) { + uint8_t addr; + uint64_t data; + + if ((i+2) >= argc) + usage(argv[0]); + addr = strtoul(argv[++i], NULL, 16); + data = strtoul(argv[++i], NULL, 16); + dmi_write(addr, data); + } else if (strcmp(argv[i], "creset") == 0) { + core_reset(); + } else if (strcmp(argv[i], "stop") == 0) { + core_stop(); + } else if (strcmp(argv[i], "start") == 0) { + core_start(); + } else if (strcmp(argv[i], "step") == 0) { + core_step(); + } else if (strcmp(argv[i], "quit") == 0) { + dmi_write(0xff, 0); + } else if (strcmp(argv[i], "status") == 0) { + /* do nothing, always done below */ + } else if (strcmp(argv[i], "mr") == 0) { + uint64_t addr, count = 1; + + if ((i+1) >= argc) + usage(argv[0]); + addr = strtoul(argv[++i], NULL, 16); + if (((i+1) < argc) && isdigit(argv[i+1][0])) + count = strtoul(argv[++i], NULL, 16); + mem_read(addr, count); + } else if (strcmp(argv[i], "load") == 0) { + const char *filename; + uint64_t addr = 0; + + if ((i+1) >= argc) + usage(argv[0]); + filename = argv[++i]; + if (((i+1) < argc) && isdigit(argv[i+1][0])) + addr = strtoul(argv[++i], NULL, 16); + load(filename, addr); + } else { + fprintf(stderr, "Unknown command %s\n", argv[i]); + exit(1); + } + } + core_status(); + return 0; +} diff --git a/sim-unisim/BSCANE2.vhdl b/sim-unisim/BSCANE2.vhdl new file mode 100644 index 0000000..15211fa --- /dev/null +++ b/sim-unisim/BSCANE2.vhdl @@ -0,0 +1,39 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.ALL; + +library unisim; +use unisim.vcomponents.all; + +entity BSCANE2 is + generic(jtag_chain: INTEGER); + port(capture : out std_logic; + drck : out std_logic; + reset : out std_logic; + runtest : out std_logic; + sel : out std_logic; + shift : out std_logic; + tck : out std_logic; + tdi : out std_logic; + tms : out std_logic; + update : out std_logic; + tdo : in std_logic + ); +end BSCANE2; + +architecture behaviour of BSCANE2 is + alias j : glob_jtag_t is glob_jtag; +begin + sel <= j.sel(jtag_chain); + tck <= j.tck; + drck <= tck and sel and (capture or shift); + capture <= j.capture; + reset <= j.reset; + runtest <= j.runtest; + shift <= j.shift; + tdi <= j.tdi; + tms <= j.tms; + update <= j.update; + j.tdo <= tdo; +end architecture behaviour; + diff --git a/sim-unisim/BUFG.vhdl b/sim-unisim/BUFG.vhdl new file mode 100644 index 0000000..462017a --- /dev/null +++ b/sim-unisim/BUFG.vhdl @@ -0,0 +1,12 @@ +library IEEE; +use IEEE.std_logic_1164.all; + +entity BUFG is + port(I : in std_logic; + O : out std_logic + ); +end BUFG; +architecture behaviour of BUFG is +begin + O <= I; +end architecture behaviour; diff --git a/sim-unisim/unisim_vcomponents.vhdl b/sim-unisim/unisim_vcomponents.vhdl new file mode 100644 index 0000000..7faebac --- /dev/null +++ b/sim-unisim/unisim_vcomponents.vhdl @@ -0,0 +1,45 @@ +library IEEE; +use IEEE.std_logic_1164.all; + +package vcomponents is + + -- Global JTAG signals. Xilinx implementation hooks that up to + -- their internal JTAG tap, we just expose them for the testbench + -- to use. These are used by our BSCANE2 block. + -- + type glob_jtag_t is record + reset : std_logic; + tck : std_logic; + tdo : std_logic; + tdi : std_logic; + tms : std_logic; + sel : std_logic_vector(4 downto 1); + capture : std_logic; + shift : std_logic; + update : std_logic; + runtest : std_logic; + end record glob_jtag_t; + signal glob_jtag : glob_jtag_t; + + component BSCANE2 is + generic(jtag_chain: integer); + port(capture : out std_logic; + drck : out std_logic; + reset : out std_logic; + runtest : out std_logic; + sel : out std_logic; + shift : out std_logic; + tck : out std_logic; + tdi : out std_logic; + tms : out std_logic; + update : out std_logic; + tdo : in std_logic + ); + end component BSCANE2; + + component BUFG is + port(I : in std_logic; + O : out std_logic + ); + end component BUFG; +end package vcomponents; diff --git a/sim_jtag.vhdl b/sim_jtag.vhdl new file mode 100644 index 0000000..694491f --- /dev/null +++ b/sim_jtag.vhdl @@ -0,0 +1,105 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.sim_jtag_socket.all; + +library unisim; +use unisim.vcomponents.all; + +entity sim_jtag is +end sim_jtag; + +architecture behaviour of sim_jtag is +begin + jtag: process + -- Global JTAG signals (used by BSCANE2 inside dmi_dtm + alias j : glob_jtag_t is glob_jtag; + + -- Super fast JTAG clock for sim. For debugging the JTAG module, + -- change this to something much larger, for example 60ns, to reflect + -- more realistic conditions. + constant jclk_period : time := 1 ns; + + -- Polling the socket... this could be made slower when nothing + -- is connected once we have that indication from the C code. + constant poll_period : time := 100 ns; + + -- Number of dummy JTAG clocks to inject after a command. (I haven't + -- got that working with UrJtag but at least with sim, having the + -- right number here allows the synchronizers time to complete a + -- command on the first message exchange, thus avoiding the need + -- for two full shifts for a response. + constant dummy_clocks : integer := 80; + + procedure clock(count: in INTEGER) is + begin + for i in 1 to count loop + j.tck <= '0'; + wait for jclk_period/2; + j.tck <= '1'; + wait for jclk_period/2; + end loop; + end procedure clock; + + procedure clock_command(cmd: in std_ulogic_vector; + rsp: out std_ulogic_vector) is + begin + j.capture <= '1'; + clock(1); + j.capture <= '0'; + clock(1); + j.shift <= '1'; + for i in 0 to cmd'length-1 loop + j.tdi <= cmd(i); + rsp := rsp(1 to rsp'length-1) & j.tdo; + clock(1); + end loop; + j.shift <= '0'; + j.update <= '1'; + clock(1); + j.update <= '0'; + clock(1); + end procedure clock_command; + + variable cmd : std_ulogic_vector(0 to 247); + variable rsp : std_ulogic_vector(0 to 247); + variable msize : std_ulogic_vector(7 downto 0); + variable size : integer; + + begin + + -- init & reset + j.reset <= '1'; + j.sel <= "0000"; + j.capture <= '0'; + j.update <= '0'; + j.shift <= '0'; + j.tdi <= '0'; + j.tms <= '0'; + j.runtest <= '0'; + clock(5); + j.reset <= '0'; + clock(5); + + -- select chain USER2 + -- XXX TODO: Send that via protocol instead + -- XXX TODO: Also maybe have the C code tell us if connected or not + -- and clock when connected. + j.sel <= "0010"; + clock(1); + rsp := (others => '0'); + while true loop + wait for poll_period; + sim_jtag_read_msg(cmd, msize); + size := to_integer(unsigned(msize)); + if size /= 0 and size < 248 then + clock_command(cmd(0 to size-1), + rsp(0 to size-1)); + sim_jtag_write_msg(rsp, msize); + clock(dummy_clocks); + end if; + end loop; + end process; +end; diff --git a/sim_jtag_socket.vhdl b/sim_jtag_socket.vhdl new file mode 100644 index 0000000..b03eb48 --- /dev/null +++ b/sim_jtag_socket.vhdl @@ -0,0 +1,24 @@ +library ieee; +use ieee.std_logic_1164.all; + +package sim_jtag_socket is + procedure sim_jtag_read_msg(out_msg : out std_ulogic_vector(247 downto 0); + out_size : out std_ulogic_vector(7 downto 0)); + attribute foreign of sim_jtag_read_msg : procedure is "VHPIDIRECT sim_jtag_read_msg"; + procedure sim_jtag_write_msg(in_msg : in std_ulogic_vector(247 downto 0); + in_size : in std_ulogic_vector(7 downto 0)); + attribute foreign of sim_jtag_write_msg : procedure is "VHPIDIRECT sim_jtag_write_msg"; +end sim_jtag_socket; + +package body sim_jtag_socket is + procedure sim_jtag_read_msg(out_msg : out std_ulogic_vector(247 downto 0); + out_size : out std_ulogic_vector(7 downto 0)) is + begin + assert false report "VHPI" severity failure; + end sim_jtag_read_msg; + procedure sim_jtag_write_msg(in_msg : in std_ulogic_vector(247 downto 0); + in_size : in std_ulogic_vector(7 downto 0)) is + begin + assert false report "VHPI" severity failure; + end sim_jtag_write_msg; +end sim_jtag_socket; diff --git a/sim_jtag_socket_c.c b/sim_jtag_socket_c.c new file mode 100644 index 0000000..e0c21a4 --- /dev/null +++ b/sim_jtag_socket_c.c @@ -0,0 +1,222 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* XXX Make that some parameter */ +#define TCP_PORT 13245 +#define MAX_PACKET 32 + +#define vhpi0 2 /* forcing 0 */ +#define vhpi1 3 /* forcing 1 */ + +static void to_std_logic_vector(unsigned long val, unsigned char *p, + unsigned long len) +{ + if (len > 64) { + fprintf(stderr, "%s: invalid length %lu\n", __func__, len); + exit(1); + } + + for (unsigned long i = 0; i < len; i++) { + if ((val >> (len-1-i) & 1)) + *p = vhpi1; + else + *p = vhpi0; + + p++; + } +} + +static uint64_t from_std_logic_vector(unsigned char *p, unsigned long len) +{ + unsigned long ret = 0; + + if (len > 64) { + fprintf(stderr, "%s: invalid length %lu\n", __func__, len); + exit(1); + } + + for (unsigned long i = 0; i < len; i++) { + unsigned char bit; + + if (*p == vhpi0) { + bit = 0; + } else if (*p == vhpi1) { + bit = 1; + } else { + fprintf(stderr, "%s: bad bit %d\n", __func__, *p); + bit = 0; + } + + ret = (ret << 1) | bit; + p++; + } + + return ret; +} + +static int fd = -1; +static int cfd = -1; + +static void open_socket(void) +{ + struct sockaddr_in addr; + int opt, rc, flags; + + if (fd >= 0 || fd < -1) + return; + + signal(SIGPIPE, SIG_IGN); + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + fprintf(stderr, "Failed to open debug socket !\r\n"); + goto fail; + } + + rc = 0; + flags = fcntl(fd, F_GETFL); + if (flags >= 0) + rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); + if (flags < 0 || rc < 0) { + fprintf(stderr, "Failed to configure debug socket !\r\n"); + } + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(TCP_PORT); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + opt = 1; + setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + rc = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (rc < 0) { + fprintf(stderr, "Failed to bind debug socket !\r\n"); + goto fail; + } + rc = listen(fd,1); + if (rc < 0) { + fprintf(stderr, "Failed to listen to debug socket !\r\n"); + goto fail; + } + fprintf(stderr, "Debug socket ready\r\n"); + return; +fail: + if (fd >= 0) + close(fd); + fd = -2; +} + +static void check_connection(void) +{ + struct sockaddr_in addr; + socklen_t addr_len = sizeof(addr); + + cfd = accept(fd, (struct sockaddr *)&addr, &addr_len); + if (cfd < 0) + return; + fprintf(stderr, "Debug client connected !\r\n"); +} + +void sim_jtag_read_msg(unsigned char *out_msg, unsigned char *out_size) +{ + unsigned char data[MAX_PACKET]; + unsigned char size = 0; + struct pollfd fdset[1]; + int rc, i; + + if (fd == -1) + open_socket(); + if (fd < 0) + goto finish; + if (cfd < 0) + check_connection(); + if (cfd < 0) + goto finish; + + memset(fdset, 0, sizeof(fdset)); + fdset[0].fd = cfd; + fdset[0].events = POLLIN; + rc = poll(fdset, 1, 0); + if (rc <= 0) + goto finish; + rc = read(cfd, data, MAX_PACKET); + if (rc < 0) + fprintf(stderr, "Debug read error, assuming client disconnected !\r\n"); + if (rc == 0) + fprintf(stderr, "Debug client disconnected !\r\n"); + if (rc <= 0) { + close(cfd); + cfd = -1; + goto finish; + } + +#if 0 + fprintf(stderr, "Got message:\n\r"); + { + for (i=0; i> 3; + int bit = 1 << (i & 7); + out_msg[i] = (data[byte+1] & bit) ? vhpi1 : vhpi0; + } +finish: + to_std_logic_vector(size, out_size, 8); +} + +void sim_jtag_write_msg(unsigned char *in_msg, unsigned char *in_size) +{ + unsigned char data[MAX_PACKET]; + unsigned char size; + int rc, i; + + size = from_std_logic_vector(in_size, 8); + data[0] = size; + for (i = 0; i < size; i++) { + int byte = i >> 3; + int bit = 1 << (i & 7); + if (in_msg[i] == vhpi1) + data[byte+1] |= bit; + else + data[byte+1] &= ~bit; + } + rc = (size + 7) / 8; + +#if 0 + fprintf(stderr, "Sending response:\n\r"); + { + for (i=0; i wishbone_icore_out, wishbone_data_in => wishbone_dcore_in, wishbone_data_out => wishbone_dcore_out, - registers => registers, - terminate_out => terminate + dmi_addr => dmi_addr(3 downto 0), + dmi_dout => dmi_core_dout, + dmi_din => dmi_dout, + dmi_wr => dmi_wr, + dmi_ack => dmi_core_ack, + dmi_req => dmi_core_req ); -- Wishbone bus master arbiter & mux wishbone_arbiter_0: entity work.wishbone_arbiter port map( - clk => system_clk, - rst => rst, - wb1_in => wishbone_dcore_out, - wb1_out => wishbone_dcore_in, - wb2_in => wishbone_icore_out, - wb2_out => wishbone_icore_in, - wb_out => wb_master_out, - wb_in => wb_master_in + clk => system_clk, rst => rst, + wb1_in => wishbone_dcore_out, wb1_out => wishbone_dcore_in, + wb2_in => wishbone_icore_out, wb2_out => wishbone_icore_in, + wb3_in => wishbone_debug_out, wb3_out => wishbone_debug_in, + wb_out => wb_master_out, wb_in => wb_master_in ); -- Wishbone slaves address decoder & mux @@ -122,20 +140,6 @@ begin end process slave_intercon; -- Simulated memory and UART - sim_terminate_test: if SIM generate - - -- Dump registers if core terminates - dump_registers: process(all) - begin - if terminate = '1' then - loop_0: for i in 0 to 31 loop - report "REG " & to_hstring(registers(i)); - end loop loop_0; - assert false report "end of test" severity failure; - end if; - end process; - - end generate; -- UART0 wishbone slave -- XXX FIXME: Need a proper wb64->wb8 adapter that @@ -174,4 +178,81 @@ begin wishbone_out => wb_bram_out ); + -- DMI(debug bus) <-> JTAG bridge + dtm: entity work.dmi_dtm + generic map( + ABITS => 8, + DBITS => 64 + ) + port map( + sys_clk => system_clk, + sys_reset => rst, + dmi_addr => dmi_addr, + dmi_din => dmi_din, + dmi_dout => dmi_dout, + dmi_req => dmi_req, + dmi_wr => dmi_wr, + dmi_ack => dmi_ack + ); + + -- DMI interconnect + dmi_intercon: process(dmi_addr, dmi_req, + dmi_wb_ack, dmi_wb_dout, + dmi_core_ack, dmi_core_dout) + + -- DMI address map (each address is a full 64-bit register) + -- + -- Offset: Size: Slave: + -- 0 4 Wishbone + -- 10 16 Core + + type slave_type is (SLAVE_WB, + SLAVE_CORE, + SLAVE_NONE); + variable slave : slave_type; + begin + -- Simple address decoder + slave := SLAVE_NONE; + if std_match(dmi_addr, "000000--") then + slave := SLAVE_WB; + elsif std_match(dmi_addr, "0001----") then + slave := SLAVE_CORE; + end if; + + -- DMI muxing + dmi_wb_req <= '0'; + dmi_core_req <= '0'; + case slave is + when SLAVE_WB => + dmi_wb_req <= dmi_req; + dmi_ack <= dmi_wb_ack; + dmi_din <= dmi_wb_dout; + when SLAVE_CORE => + dmi_core_req <= dmi_req; + dmi_ack <= dmi_core_ack; + dmi_din <= dmi_core_dout; + when others => + dmi_ack <= dmi_req; + dmi_din <= (others => '1'); + end case; + + -- SIM magic exit + if SIM and dmi_req = '1' and dmi_addr = "11111111" and dmi_wr = '1' then + stop; + end if; + end process; + + -- Wishbone debug master (TODO: Add a DMI address decoder) + wishbone_debug: entity work.wishbone_debug_master + port map(clk => system_clk, rst => rst, + dmi_addr => dmi_addr(1 downto 0), + dmi_dout => dmi_wb_dout, + dmi_din => dmi_dout, + dmi_wr => dmi_wr, + dmi_ack => dmi_wb_ack, + dmi_req => dmi_wb_req, + wb_in => wishbone_debug_in, + wb_out => wishbone_debug_out); + + end architecture behaviour; diff --git a/wishbone_arbiter.vhdl b/wishbone_arbiter.vhdl index 7d5cbcd..d839b31 100644 --- a/wishbone_arbiter.vhdl +++ b/wishbone_arbiter.vhdl @@ -4,54 +4,76 @@ use ieee.std_logic_1164.all; library work; use work.wishbone_types.all; +-- TODO: Use an array of master/slaves with parametric size entity wishbone_arbiter is - port ( - clk : in std_ulogic; - rst : in std_ulogic; + port (clk : in std_ulogic; + rst : in std_ulogic; - wb1_in : in wishbone_master_out; - wb1_out : out wishbone_slave_out; + wb1_in : in wishbone_master_out; + wb1_out : out wishbone_slave_out; - wb2_in : in wishbone_master_out; - wb2_out : out wishbone_slave_out; + wb2_in : in wishbone_master_out; + wb2_out : out wishbone_slave_out; - wb_out : out wishbone_master_out; - wb_in : in wishbone_slave_out - ); + wb3_in : in wishbone_master_out; + wb3_out : out wishbone_slave_out; + + wb_out : out wishbone_master_out; + wb_in : in wishbone_slave_out + ); end wishbone_arbiter; architecture behave of wishbone_arbiter is - type wishbone_arbiter_state_t is (IDLE, WB1_BUSY, WB2_BUSY); + type wishbone_arbiter_state_t is (IDLE, WB1_BUSY, WB2_BUSY, WB3_BUSY); signal state : wishbone_arbiter_state_t := IDLE; begin - wb1_out <= wb_in when state = WB1_BUSY else wishbone_slave_out_init; - wb2_out <= wb_in when state = WB2_BUSY else wishbone_slave_out_init; - wb_out <= wb1_in when state = WB1_BUSY else wb2_in when state = WB2_BUSY else wishbone_master_out_init; + wishbone_muxes: process(state, wb_in, wb1_in, wb2_in, wb3_in) + begin + -- Requests from masters are fully muxed + wb_out <= wb1_in when state = WB1_BUSY else + wb2_in when state = WB2_BUSY else + wb3_in when state = WB3_BUSY else + wishbone_master_out_init; + + -- Responses from slave don't need to mux the data bus + wb1_out.dat <= wb_in.dat; + wb2_out.dat <= wb_in.dat; + wb3_out.dat <= wb_in.dat; + wb1_out.ack <= wb_in.ack when state = WB1_BUSY else '0'; + wb2_out.ack <= wb_in.ack when state = WB2_BUSY else '0'; + wb3_out.ack <= wb_in.ack when state = WB3_BUSY else '0'; + end process; wishbone_arbiter_process: process(clk) begin - if rising_edge(clk) then - if rst = '1' then - state <= IDLE; - else - case state is - when IDLE => - if wb1_in.cyc = '1' then - state <= WB1_BUSY; - elsif wb2_in.cyc = '1' then - state <= WB2_BUSY; - end if; - when WB1_BUSY => - if wb1_in.cyc = '0' then - state <= IDLE; - end if; - when WB2_BUSY => - if wb2_in.cyc = '0' then - state <= IDLE; - end if; - end case; - end if; - end if; + if rising_edge(clk) then + if rst = '1' then + state <= IDLE; + else + case state is + when IDLE => + if wb1_in.cyc = '1' then + state <= WB1_BUSY; + elsif wb2_in.cyc = '1' then + state <= WB2_BUSY; + elsif wb3_in.cyc = '1' then + state <= WB3_BUSY; + end if; + when WB1_BUSY => + if wb1_in.cyc = '0' then + state <= IDLE; + end if; + when WB2_BUSY => + if wb2_in.cyc = '0' then + state <= IDLE; + end if; + when WB3_BUSY => + if wb3_in.cyc = '0' then + state <= IDLE; + end if; + end case; + end if; + end if; end process; end behave; diff --git a/wishbone_debug_master.vhdl b/wishbone_debug_master.vhdl new file mode 100644 index 0000000..51441d5 --- /dev/null +++ b/wishbone_debug_master.vhdl @@ -0,0 +1,167 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.wishbone_types.all; + +entity wishbone_debug_master is + port(clk : in std_ulogic; + rst : in std_ulogic; + + -- Debug bus interface + dmi_addr : in std_ulogic_vector(1 downto 0); + dmi_din : in std_ulogic_vector(63 downto 0); + dmi_dout : out std_ulogic_vector(63 downto 0); + dmi_req : in std_ulogic; + dmi_wr : in std_ulogic; + dmi_ack : out std_ulogic; + + -- Wishbone master interface + wb_out : out wishbone_master_out; + wb_in : in wishbone_slave_out + ); +end entity wishbone_debug_master; + +architecture behaviour of wishbone_debug_master is + + -- ** Register offsets definitions. All registers are 64-bit + constant DBG_WB_ADDR : std_ulogic_vector(1 downto 0) := "00"; + constant DBG_WB_DATA : std_ulogic_vector(1 downto 0) := "01"; + constant DBG_WB_CTRL : std_ulogic_vector(1 downto 0) := "10"; + constant DBG_WB_RSVD : std_ulogic_vector(1 downto 0) := "11"; + + -- CTRL register: + -- + -- bit 0..7 : SEL bits (byte enables) + -- bit 8 : address auto-increment + -- bit 10..9 : auto-increment value: + -- 00 - +1 + -- 01 - +2 + -- 10 - +4 + -- 11 - +8 + + -- ** Address and control registers and read data + signal reg_addr : std_ulogic_vector(63 downto 0); + signal reg_ctrl_out : std_ulogic_vector(63 downto 0); + signal reg_ctrl : std_ulogic_vector(10 downto 0); + signal data_latch : std_ulogic_vector(63 downto 0); + + type state_t is (IDLE, WB_CYCLE, DMI_WAIT); + signal state : state_t; + +begin + + -- Hard wire unused bits to 0 + reg_ctrl_out <= (63 downto 11 => '0', + 10 downto 0 => reg_ctrl); + + -- DMI read data mux + with dmi_addr select dmi_dout <= + reg_addr when DBG_WB_ADDR, + data_latch when DBG_WB_DATA, + reg_ctrl_out when DBG_WB_CTRL, + (others => '0') when others; + + -- ADDR and CTRL register writes + reg_write : process(clk) + subtype autoinc_inc_t is integer range 1 to 8; + function decode_autoinc(c : std_ulogic_vector(1 downto 0)) + return autoinc_inc_t is + begin + case c is + when "00" => return 1; + when "01" => return 2; + when "10" => return 4; + when "11" => return 8; + -- Below shouldn't be necessary but GHDL complains + when others => return 8; + end case; + end function decode_autoinc; + begin + if rising_edge(clk) then + if (rst) then + reg_addr <= (others => '0'); + reg_ctrl <= (others => '0'); + else -- Standard register writes + if dmi_req and dmi_wr then + if dmi_addr = DBG_WB_ADDR then + reg_addr <= dmi_din; + elsif dmi_addr = DBG_WB_CTRL then + reg_ctrl <= dmi_din(10 downto 0); + end if; + end if; + -- Address register auto-increment + if state = WB_CYCLE and (wb_in.ack and reg_ctrl(8))= '1' then + reg_addr <= std_ulogic_vector(unsigned(reg_addr) + + decode_autoinc(reg_ctrl(10 downto 9))); + end if; + end if; + end if; + end process; + + -- ACK is hard wired to req for register writes. For data read/writes + -- (aka commands), it's sent when the state machine got the WB ack. + -- + -- Note: We never set it to 1, we just pass dmi_req back when acking. + -- This fullfills two purposes: + -- + -- * Avoids polluting the ack signal when another DMI slave is + -- selected. This allows the decoder to just OR all the acks + -- together rather than mux them. + -- + -- * Makes ack go down on the same cycle as req goes down, thus + -- saving a clock cycle. This is safe because we know that + -- the state machine will no longer be in DMI_WAIT state on + -- the next cycle, so we won't be bouncing the signal back up. + -- + dmi_ack <= dmi_req when (dmi_addr /= DBG_WB_DATA or state = DMI_WAIT) else '0'; + + -- Some WB signals are direct wires from registers or DMI + wb_out.adr <= reg_addr; + wb_out.dat <= dmi_din; + wb_out.sel <= reg_ctrl(7 downto 0); + wb_out.we <= dmi_wr; + + -- We always move WB cyc and stb simultaneously (no pipelining yet...) + wb_out.cyc <= '1' when state = WB_CYCLE else '0'; + wb_out.stb <= '1' when state = WB_CYCLE else '0'; + + -- Data latch. WB will take the read data away as soon as the cycle + -- terminates but we must maintain it on DMI until req goes down, so + -- we latch it. (Q: Should we move that latch to dmi_dtm itself ?) + -- + latch_reads : process(clk) + begin + if rising_edge(clk) then + if state = WB_CYCLE and wb_in.ack = '1' and dmi_wr = '0' then + data_latch <= wb_in.dat; + end if; + end if; + end process; + + -- Command state machine (generate wb_cyc) + wb_trigger : process(clk) + begin + if rising_edge(clk) then + if (rst) then + state <= IDLE; + else + case state is + when IDLE => + if dmi_req = '1' and dmi_addr = DBG_WB_DATA then + state <= WB_CYCLE; + end if; + when WB_CYCLE => + if wb_in.ack then + state <= DMI_WAIT; + end if; + when DMI_WAIT => + if dmi_req = '0' then + state <= IDLE; + end if; + end case; + end if; + end if; + end process; +end architecture behaviour;