@ -192,6 +192,7 @@ ECP_FLASH_OFFSET=0x80000
soc_extra_v += litesdcard/generated/lattice/litesdcard_core.v


@ -0,0 +1,298 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.math_real.all;

library work;
use work.wishbone_types.all;

entity dmi_dtm is
generic(ABITS : INTEGER:=8;

port(sys_clk : in std_ulogic;
sys_reset : in std_ulogic;
dmi_addr : out std_ulogic_vector(ABITS - 1 downto 0);
dmi_din : in std_ulogic_vector(DBITS - 1 downto 0);
dmi_dout : out std_ulogic_vector(DBITS - 1 downto 0);
dmi_req : out std_ulogic;
dmi_wr : out std_ulogic;
dmi_ack : in std_ulogic
-- dmi_err : in std_ulogic TODO: Add error response
end entity dmi_dtm;

architecture behaviour of dmi_dtm is
-- Signals coming out of the JTAGG block
signal jtag_reset_n : std_ulogic;
signal tdi : std_ulogic;
signal tdo : std_ulogic;
signal tck : std_ulogic;
signal jce1 : std_ulogic;
signal jshift : std_ulogic;
signal update : std_ulogic;

-- signals to match dmi_dtb_xilinx
signal jtag_reset : std_ulogic;
signal capture : std_ulogic;
signal jtag_clk : std_ulogic;
signal sel : std_ulogic;
signal shift : std_ulogic;

-- delays
signal jce1_d : std_ulogic;
constant TCK_DELAY : INTEGER := 8;
signal tck_d : std_ulogic_vector(TCK_DELAY+1 downto 1);

-- ** JTAG clock domain **

-- Shift register
signal shiftr : std_ulogic_vector(ABITS + DBITS + 1 downto 0);

-- Latched request
signal request : std_ulogic_vector(ABITS + DBITS + 1 downto 0);

-- A request is present
signal jtag_req : std_ulogic;

-- Synchronizer for jtag_rsp (sys clk -> jtag_clk)
signal dmi_ack_0 : std_ulogic;
signal dmi_ack_1 : std_ulogic;

-- ** sys clock domain **

-- Synchronizer for jtag_req (jtag clk -> sys clk)
signal jtag_req_0 : std_ulogic;
signal jtag_req_1 : std_ulogic;

-- ** combination signals
signal jtag_bsy : std_ulogic;
signal op_valid : std_ulogic;
signal rsp_op : std_ulogic_vector(1 downto 0);

-- ** Constants **
constant DMI_REQ_NOP : std_ulogic_vector(1 downto 0) := "00";
constant DMI_REQ_RD : std_ulogic_vector(1 downto 0) := "01";
constant DMI_REQ_WR : std_ulogic_vector(1 downto 0) := "10";
constant DMI_RSP_OK : std_ulogic_vector(1 downto 0) := "00";
constant DMI_RSP_BSY : std_ulogic_vector(1 downto 0) := "11";

attribute ASYNC_REG : string;
attribute ASYNC_REG of jtag_req_0: signal is "TRUE";
attribute ASYNC_REG of jtag_req_1: signal is "TRUE";
attribute ASYNC_REG of dmi_ack_0: signal is "TRUE";
attribute ASYNC_REG of dmi_ack_1: signal is "TRUE";

component JTAGG is
generic (
ER1 : string := "ENABLED";
ER2 : string := "ENABLED"
JTDO1 : in std_ulogic;
JTDO2 : in std_ulogic;
JTDI : out std_ulogic;
JTCK : out std_ulogic;
JRTI1 : out std_ulogic;
JRTI2 : out std_ulogic;
JSHIFT : out std_ulogic;
JUPDATE : out std_ulogic;
JRSTN : out std_ulogic;
JCE1 : out std_ulogic;
JCE2 : out std_ulogic
end component;

component LUT4 is
generic (
INIT : std_logic_vector
end component;


jtag: JTAGG
generic map(
port map (
JTDO1 => tdo,
JTDO2 => '0',
JTDI => tdi,
JTCK => tck,
JRTI1 => open,
JRTI2 => open,
JSHIFT => jshift,
JUPDATE => update,
JRSTN => jtag_reset_n,
JCE1 => jce1,
JCE2 => open

-- JRTI1 looks like it could be connected to SEL, but
-- in practise JRTI1 is only high briefly, not for the duration
-- of the transmission. possibly mw_debug could be modified.
-- The ecp5 is probably the only jtag device anyway.
sel <= '1';

-- TDI needs to align with TCK, we use LUT delays here.
-- From
tck_d(1) <= tck;
del: for i in 1 to TCK_DELAY generate
attribute keep : boolean;
attribute keep of l: label is true;
l: LUT4
generic map(
INIT => b"0000_0000_0000_0010"
port map (
A => tck_d(i),
B => '0', C => '0', D => '0',
Z => tck_d(i+1)
end generate;
jtag_clk <= tck_d(TCK_DELAY+1);

-- capture signal
jce1_sync : process(jtag_clk)
if rising_edge(jtag_clk) then
jce1_d <= jce1;
capture <= jce1 and not jce1_d;
end if;
end process;

-- latch the shift signal, otherwise
-- we miss the last shift in
-- (maybe because we are delaying tck?)
shift_sync : process(jtag_clk)
if (sys_reset = '1') then
shift <= '0';
elsif rising_edge(jtag_clk) then
shift <= jshift;
end if;
end process;

jtag_reset <= not jtag_reset_n;

-- dmi_req synchronization
dmi_req_sync : process(sys_clk)
-- sys_reset is synchronous
if rising_edge(sys_clk) then
if (sys_reset = '1') then
jtag_req_0 <= '0';
jtag_req_1 <= '0';
jtag_req_0 <= jtag_req;
jtag_req_1 <= jtag_req_0;
end if;
end if;
end process;
dmi_req <= jtag_req_1;

-- dmi_ack synchronization
dmi_ack_sync: process(jtag_clk, jtag_reset)
-- jtag_reset is async (see comments)
if jtag_reset = '1' then
dmi_ack_0 <= '0';
dmi_ack_1 <= '0';
elsif rising_edge(jtag_clk) then
dmi_ack_0 <= dmi_ack;
dmi_ack_1 <= dmi_ack_0;
end if;
end process;
-- jtag_bsy indicates whether we can start a new request, we can when
-- we aren't already processing one (jtag_req) and the synchronized ack
-- of the previous one is 0.
jtag_bsy <= jtag_req or dmi_ack_1;

-- decode request type in shift register
with shiftr(1 downto 0) select op_valid <=
'1' when DMI_REQ_RD,
'1' when DMI_REQ_WR,
'0' when others;

-- encode response op
rsp_op <= DMI_RSP_BSY when jtag_bsy = '1' else DMI_RSP_OK;

-- Some DMI out signals are directly driven from the request register
dmi_addr <= request(ABITS + DBITS + 1 downto DBITS + 2);
dmi_dout <= request(DBITS + 1 downto 2);
dmi_wr <= '1' when request(1 downto 0) = DMI_REQ_WR else '0';

-- TDO is wired to shift register bit 0
tdo <= shiftr(0);

-- Main state machine. Handles shift registers, request latch and
-- jtag_req latch. Could be split into 3 processes but it's probably
-- not worthwhile.
shifter: process(jtag_clk, jtag_reset, sys_reset)
if jtag_reset = '1' or sys_reset = '1' then
shiftr <= (others => '0');
jtag_req <= '0';
request <= (others => '0');
elsif rising_edge(jtag_clk) then

-- Handle jtag "commands" when sel is 1
if sel = '1' then
-- Shift state, rotate the register
if shift = '1' then
shiftr <= tdi & shiftr(ABITS + DBITS + 1 downto 1);
end if;

-- Update state (trigger)
-- Latch the request if we aren't already processing one and
-- it has a valid command opcode.
if update = '1' and op_valid = '1' then
if jtag_bsy = '0' then
request <= shiftr;
jtag_req <= '1';
end if;
-- Set the shift register "op" to "busy". This will prevent
-- us from re-starting the command on the next update if
-- the command completes before that.
shiftr(1 downto 0) <= DMI_RSP_BSY;
end if;

-- Request completion.
-- Capture the response data for reads and clear request flag.
-- Note: We clear req (and thus dmi_req) here which relies on tck
-- ticking and sel set. This means we are stuck with dmi_req up if
-- the jtag interface stops. Slaves must be resilient to this.
if jtag_req = '1' and dmi_ack_1 = '1' then
jtag_req <= '0';
if request(1 downto 0) = DMI_REQ_RD then
request(DBITS + 1 downto 2) <= dmi_din;
end if;
end if;

-- Capture state, grab latch content with updated status
if capture = '1' then
shiftr <= request(ABITS + DBITS + 1 downto 2) & rsp_op;
end if;

end if;
end if;
end process;
end architecture behaviour;

@ -4,7 +4,7 @@ CFLAGS = -O2 -g -Wall -std=c99
all: mw_debug

mw_debug: mw_debug.c
$(CC) -o $@ $^ $(CFLAGS) -lurjtag
$(CC) -o $@ $^ $(CFLAGS) -Wl,-Bstatic -lurjtag -Wl,-Bdynamic -lftdi1 -lusb-1.0 -lreadline

rm -f mw_debug

@ -49,7 +49,7 @@
static bool debug;

struct backend {
int (*init)(const char *target);
int (*init)(const char *target, int freq);
int (*reset)(void);
int (*command)(uint8_t op, uint8_t addr, uint64_t *data);
@ -67,13 +67,15 @@ static void check(int r, const char *failstr)

static int sim_fd = -1;

static int sim_init(const char *target)
static int sim_init(const char *target, int freq)
struct sockaddr_in saddr;
struct hostent *hp;
const char *p, *host;
int port, rc;


if (!target)
target = "localhost:13245";
p = strchr(target, ':');
@ -210,23 +212,34 @@ static struct backend sim_backend = {

static urj_chain_t *jc;

static int jtag_init(const char *target)
static int common_jtag_init(const char *target, int freq)
const char *sep;
const char *cable;
char *params[] = { NULL, };
urj_part_t *p;
uint32_t id;
int rc, part;
const int max_params = 20;
char *params[max_params+1];
int rc;

if (!target)
target = "probe";
sep = strchr(target, ':');
memset(params, 0x0, sizeof(params));
sep = strchr(target, ' ');
cable = strndup(target, sep - target);
if (sep && *sep) {
fprintf(stderr, "jtag cable params not supported yet\n");
char *param_str = strdup(sep);
char *s = param_str;
for (int i = 0; *s; s++) {
if (*s == ' ') {
if (i >= max_params) {
fprintf(stderr, "Too many jtag cable params\n");
return -1;
*s = '\0';
params[i] = s+1;
if (debug)
printf("Opening jtag backend cable '%s'\n", cable);

@ -241,17 +254,35 @@ static int jtag_init(const char *target)
char *cparams[] = { NULL, NULL,};
rc = urj_tap_cable_usb_probe(cparams);
if (rc != URJ_STATUS_OK) {
fprintf(stderr, "JTAG cable probe failed\n");
fprintf(stderr, "JTAG cable probe failed: %s\n", urj_error_describe());
return -1;
cable = strdup(cparams[1]);
rc = urj_tap_chain_connect(jc, cable, params);
if (rc != URJ_STATUS_OK) {
fprintf(stderr, "JTAG cable detect failed\n");
fprintf(stderr, "JTAG cable detect failed: %s\n", urj_error_describe());
return -1;

if (freq) {
urj_tap_cable_set_frequency(jc->cable, freq);

return 0;

static int bscane2_init(const char *target, int freq)
urj_part_t *p;
uint32_t id;
int rc;

rc = common_jtag_init(target, freq);
if (rc < 0) {
return rc;

/* XXX Hard wire part 0, that might need to change (use params and detect !) */
rc = urj_tap_manual_add(jc, 6);
if (rc < 0) {
@ -264,7 +295,7 @@ static int jtag_init(const char *target)
urj_part_parts_set_instruction(jc->parts, "BYPASS");

jc->active_part = part = 0;
jc->active_part = 0;

p = urj_tap_chain_active_part(jc);
if (!p) {
@ -300,6 +331,69 @@ static int jtag_init(const char *target)
return 0;

static int ecp5_init(const char *target, int freq)
urj_part_t *p;
uint32_t id;
int rc;

rc = common_jtag_init(target, freq);
if (rc < 0) {
return rc;

/* XXX Hard wire part 0, that might need to change (use params and detect !) */
rc = urj_tap_manual_add(jc, 8);
if (rc < 0) {
fprintf(stderr, "JTAG failed to add part! : %s\n", urj_error_describe());
return -1;
if (jc->parts == NULL || jc->parts->len == 0) {
fprintf(stderr, "JTAG Something's wrong after adding part! : %s\n", urj_error_describe());
return -1;
urj_part_parts_set_instruction(jc->parts, "BYPASS");

jc->active_part = 0;

p = urj_tap_chain_active_part(jc);
if (!p) {
fprintf(stderr, "Failed to get active JTAG part\n");
return -1;
rc = urj_part_data_register_define(p, "IDCODE_REG", 32);
if (rc != URJ_STATUS_OK) {
fprintf(stderr, "JTAG failed to add IDCODE_REG register! : %s\n",
return -1;
// READ_ID = 0xE0 = 11100000, from Lattice TN1260 sysconfig guide
if (urj_part_instruction_define(p, "IDCODE", "11100000", "IDCODE_REG") == NULL) {
fprintf(stderr, "JTAG failed to add IDCODE instruction! : %s\n",
return -1;
rc = urj_part_data_register_define(p, "USER2_REG", 74);
if (rc != URJ_STATUS_OK) {
fprintf(stderr, "JTAG failed to add USER2_REG register !\n");
return -1;
// ER1 = 0x32 = 00110010b
if (urj_part_instruction_define(p, "USER2", "00110010", "USER2_REG") == NULL) {
fprintf(stderr, "JTAG failed to add USER2 instruction !\n");
return -1;
urj_part_set_instruction(p, "IDCODE");
urj_tap_chain_shift_data_registers(jc, 1);
id = urj_tap_register_get_value(p->active_instruction->data_register->out);
printf("Found device ID: 0x%08x\n", id);
urj_part_set_instruction(p, "USER2");

return 0;

static int jtag_reset(void)
return 0;
@ -339,8 +433,14 @@ static int jtag_command(uint8_t op, uint8_t addr, uint64_t *data)
return rc;

static struct backend jtag_backend = {
.init = jtag_init,
static struct backend bscane2_backend = {
.init = bscane2_init,
.reset = jtag_reset,
.command = jtag_command,

static struct backend ecp5_backend = {
.init = ecp5_init,
.reset = jtag_reset,
.command = jtag_command,
@ -662,7 +762,7 @@ static void ltrig_set(uint64_t addr)

static void usage(const char *cmd)
fprintf(stderr, "Usage: %s -b <jtag|sim> <command> <args>\n", cmd);
fprintf(stderr, "Usage: %s -b <jtag|ecp5|sim> <command> <args>\n", cmd);

fprintf(stderr, "\n");
fprintf(stderr, " CPU core:\n");
@ -706,7 +806,7 @@ int main(int argc, char *argv[])
const char *progname = argv[0];
const char *target = NULL;
int rc, i = 1;
int rc, i = 1, freq = 0;

b = NULL;

@ -717,9 +817,10 @@ int main(int argc, char *argv[])
{ "backend", required_argument, 0, 'b' },
{ "target", required_argument, 0, 't' },
{ "debug", no_argument, 0, 'd' },
{ "frequency", no_argument, 0, 's' },
{ 0, 0, 0, 0 }
c = getopt_long(argc, argv, "dhb:t:", lopts, &oindex);
c = getopt_long(argc, argv, "dhb:t:s:", lopts, &oindex);
if (c < 0)
switch(c) {
@ -729,8 +830,10 @@ int main(int argc, char *argv[])
case 'b':
if (strcmp(optarg, "sim") == 0)
b = &sim_backend;
else if (strcmp(optarg, "jtag") == 0)
b = &jtag_backend;
else if (strcmp(optarg, "jtag") == 0 || strcmp(optarg, "bscane2") == 0)
b = &bscane2_backend;
else if (strcmp(optarg, "ecp5") == 0)
b = &ecp5_backend;
else {
fprintf(stderr, "Unknown backend %s\n", optarg);
@ -739,15 +842,22 @@ int main(int argc, char *argv[])
case 't':
target = optarg;
case 's':
freq = atoi(optarg);
if (freq == 0) {
fprintf(stderr, "Bad frequency %s\n", optarg);
case 'd':
debug = true;

if (b == NULL)
b = &jtag_backend;
b = &bscane2_backend;

rc = b->init(target);
rc = b->init(target, freq);
if (rc < 0)
for (i = optind; i < argc; i++) {
@ -789,7 +899,7 @@ int main(int argc, char *argv[])
if ((i+1) >= argc)
addr = strtoul(argv[++i], NULL, 16);
if (((i+1) < argc) && isdigit(argv[i+1][0]))
if (((i+1) < argc) && isxdigit(argv[i+1][0]))
count = strtoul(argv[++i], NULL, 16);
mem_read(addr, count);
} else if (strcmp(argv[i], "mw") == 0) {
@ -807,7 +917,7 @@ int main(int argc, char *argv[])
if ((i+1) >= argc)
filename = argv[++i];
if (((i+1) < argc) && isdigit(argv[i+1][0]))
if (((i+1) < argc) && isxdigit(argv[i+1][0]))
addr = strtoul(argv[++i], NULL, 16);
load(filename, addr);
} else if (strcmp(argv[i], "save") == 0) {
