Browse Source

latest a2node

master
wtf 4 months ago
parent
commit
6d5d5f1f21
  1. 411
      rtl/a2node/a2wb.v
  2. 186
      rtl/a2node/arb.v
  3. 183
      rtl/a2node/bus_wb2.v
  4. 15
      rtl/a2node/cmd_a2l2.v
  5. 180
      rtl/a2node/cmd_wb.v
  6. 71
      rtl/a2node/defs.v
  7. 68
      rtl/a2node/readme.md
  8. 156
      rtl/a2node/sim_1000_4w.txt
  9. 26
      rtl/a2node/smp.v
  10. 313
      rtl/a2node/top.v

411
rtl/a2node/a2wb.v

@ -0,0 +1,411 @@ @@ -0,0 +1,411 @@
// A2 Core Bridge

// adapt cores and buses with generic module
// one thread/core for now; multithread needs thread tag, deeper queues
// could include l2, or interface to wider bus to speed up cache line fetches

`include "defs.v"

module A2WB #(
parameter [0:15] CORE_TYPES = {`CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE},
parameter [0:3] BUS_TYPE = `BUS_TYPE_WB2,
parameter MAX_CORES = 4
) (
input clk,
input rst,
input [(`WB2_WB_OUT_START+1)*4-1:0] cores_in,
output [(`WB2_WB_IN_START+1)*4-1:0] cores_out,
input [127:0] bus_in,
output [255:0] bus_out
);

integer NUM_CORES;

genvar i;

// ------------------------------------------------------------------------------------------------
// I/O Connections

// cores must be contiguous, starting at 0

// core inputs to bridge:

/* wb1 core out:
assign c0_out = {wb_stb_0, wb_cyc_0, wb_we_0, wb_sel_0, wb_adr_0, wb_dat_o_0, 1'b0, 32'b0, 27'b0, // 128
ext_in}; // 128
*/
/* wb2 core out:
assign c0_out = {i_wb_stb_0, i_wb_cyc_0, 1'b0 , 4'b0, {i_wb_adr_0, 2'b0}, 32'b0, 1'b0, 32'b0, 27'b0, // 128
d_wb_stb_0, d_wb_cyc_0, d_wb_we_0, d_wb_sel_0, d_wb_adr_0, d_wb_dat_o_0, 1'b0, 32'b0, 27'b0, // 128
ext_in}; // 128
*/
/* a2l2 core out:
assign c0_in = {i_wb_ack_0, i_wb_dat_i_0, 95'b0, // 128
ext_out}; // 128
*/

/*
bus out/in:
assign bus_out[102] = bus_i_wb_cyc;
assign bus_out[101] = bus_i_wb_stb;
assign bus_out[100:71] = bus_i_wb_adr;
assign bus_out[70] = bus_d_wb_cyc;
assign bus_out[69] = bus_d_wb_stb;
assign bus_out[68] = bus_d_wb_we;
assign bus_out[67:64] = bus_d_wb_sel;
assign bus_out[63:32] = bus_d_wb_adr;
assign bus_out[31:0] = bus_d_wb_datw;

assign bus_i_wb_ack = bus_in[65];
assign bus_i_wb_datr = bus_in[64:33];
assign bus_d_wb_ack = bus_in[32];
assign bus_d_wb_datr = bus_in[31:0];
*/


wire [`WB2_WB_OUT_START:0] core_in [0:3];
assign {core_in[0], core_in[1], core_in[2], core_in[3]} = cores_in;
wire [`WB2_WB_OUT_START:0] core_out [0:3];
assign cores_out = {core_out[0], core_out[1], core_out[2], core_out[3]};

wire i_wb_cyc [0:3];
wire i_wb_stb [0:3];
wire [31:0] i_wb_adr [0:3];
wire i_wb_ack [0:3];
wire [31:0] i_wb_datr[0:3];
wire d_wb_cyc [0:3];
wire d_wb_stb [0:3];
wire d_wb_we [0:3];
wire [3:0] d_wb_sel [0:3];
wire [31:0] d_wb_adr [0:3];
wire [31:0] d_wb_datw [0:3];
wire d_wb_ack [0:3];
wire [31:0] d_wb_datr [0:3];
wire [127:0] ext_cmd [0:3];
wire [127:0] ext_rsp [0:3];

wire [`CMD_SIZE-1:0] cmd_out_0[0:3];
wire [`CMD_SIZE-1:0] cmd_out_1[0:3];
wire [1:0] cmd_taken[0:3];
wire [1:0] cmd_complete[0:3];
wire [`CMD_SIZE-1:0] req_0;
wire [`CMD_SIZE-1:0] req_1;
wire [`RSP_SIZE-1:0] core_rsp_0[0:3];
wire [`RSP_SIZE-1:0] core_rsp_1[0:3];
wire [`RSP_SIZE-1:0] rsp_0;
wire [`RSP_SIZE-1:0] rsp_1;

// to do this, need to label scope and ref it
//if (BUS_TYPE == `BUS_TYPE_WB1) begin
wire bus_wb_cyc;
wire bus_wb_stb;
wire bus_wb_we;
wire [3:0] bus_wb_sel;
wire [31:0] bus_wb_adr;
wire [31:0] bus_wb_datw;
wire bus_wb_ack;
wire [31:0] bus_wb_datr;
//end

//if (BUS_TYPE == `BUS_TYPE_WB2) begin
wire bus_i_wb_cyc;
wire bus_i_wb_stb;
wire [31:0] bus_i_wb_adr;
wire bus_i_wb_ack;
wire [31:0] bus_i_wb_datr;
wire bus_i_rdy;
wire bus_d_wb_cyc;
wire bus_d_wb_stb;
wire bus_d_wb_we;
wire [3:0] bus_d_wb_sel;
wire [31:0] bus_d_wb_adr;
wire [31:0] bus_d_wb_datw;
wire bus_d_wb_ack;
wire [31:0] bus_d_wb_datr;
wire bus_d_rdy;
wire bus_i_wb_ack;
wire [31:0] bus_i_wb_datr;
wire bus_d_wb_ack;
wire [31:0] bus_d_wb_datr;
//end

generate
assign NUM_CORES = 0;
for (i = 0; i < MAX_CORES; i++) begin
case (CORE_TYPES[i*4:i*4+3])
`CORE_TYPE_NONE: begin
end
`CORE_TYPE_A2L2: begin
assign NUM_CORES = NUM_CORES + 1;
// a2l2
end
`CORE_TYPE_WB1: begin
assign NUM_CORES = NUM_CORES + 1;
assign d_wb_cyc[i] = core_in[i][`WB1_WB_OUT_START];
assign d_wb_stb[i] = core_in[i][`WB1_WB_OUT_START-1];
assign d_wb_we[i] = core_in[i][`WB1_WB_OUT_START-2];
assign d_wb_sel[i] = core_in[i][`WB1_WB_OUT_START-3:`WB1_WB_OUT_START-6];
assign d_wb_adr[i] = core_in[i][`WB1_WB_OUT_START-7:`WB1_WB_OUT_START-38];
assign d_wb_datw[i] = core_in[i][`WB1_WB_OUT_START-39:`WB1_WB_OUT_START-70];
assign ext_cmd[i] = core_in[i][`WB1_EXT_OUT_START:0];
assign core_out[i][`WB1_WB_IN_START] = core_rsp_0[i][`RSP_VALID];
assign core_out[i][`WB1_WB_IN_START-1:`WB1_WB_IN_START-32] = core_rsp_0[i][`RSP_DATA];
assign core_out[i][`WB1_EXT_IN_START:0] = ext_rsp[i];
end
`CORE_TYPE_WB2: begin
assign NUM_CORES = NUM_CORES + 1;
assign i_wb_cyc[i] = core_in[i][`WB2_I_WB_OUT_START];
assign i_wb_stb[i] = core_in[i][`WB2_I_WB_OUT_START-1];
assign i_wb_adr[i] = core_in[i][`WB2_I_WB_OUT_START-7:`WB2_I_WB_OUT_START-38];
assign d_wb_cyc[i] = core_in[i][`WB2_D_WB_OUT_START];
assign d_wb_stb[i] = core_in[i][`WB2_D_WB_OUT_START-1];
assign d_wb_we[i] = core_in[i][`WB2_D_WB_OUT_START-2];
assign d_wb_sel[i] = core_in[i][`WB2_D_WB_OUT_START-3:`WB2_D_WB_OUT_START-6];
assign d_wb_adr[i] = core_in[i][`WB2_D_WB_OUT_START-7:`WB2_D_WB_OUT_START-38];
assign d_wb_datw[i] = core_in[i][`WB2_D_WB_OUT_START-39:`WB2_D_WB_OUT_START-70];
assign ext_cmd[i] = core_in[i][`WB2_EXT_OUT_START:0];
assign core_out[i][`WB2_I_WB_IN_START] = core_rsp_0[i][`RSP_VALID];
assign core_out[i][`WB2_I_WB_IN_START-1:`WB2_I_WB_IN_START-32] = core_rsp_0[i][`RSP_DATA];
assign core_out[i][`WB2_D_WB_IN_START] = core_rsp_1[i][`RSP_VALID];
assign core_out[i][`WB2_D_WB_IN_START-1:`WB2_D_WB_IN_START-32] = core_rsp_1[i][`RSP_DATA];
assign core_out[i][`WB2_EXT_IN_START:0] = ext_rsp[i];
end
endcase
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Command Interfaces
//

generate
for (i = 0; i < MAX_CORES; i++) begin: cmd
case (CORE_TYPES[i*4:i*4+3])
`CORE_TYPE_NONE: begin
end
`CORE_TYPE_A2L2: begin
// convert a2l2 to internal format
end
`CORE_TYPE_WB1: begin
cmd_wb #(.CORE_ID(i), .CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) cmd_wb1 (
.clk(clk),
.rst(rst),
.i_wb_cyc('b0),
.i_wb_stb('b0),
.i_wb_adr('h0),
.d_wb_cyc(d_wb_cyc[i]),
.d_wb_stb(d_wb_stb[i]),
.d_wb_we(d_wb_we[i]),
.d_wb_sel(d_wb_sel[i]),
.d_wb_adr(d_wb_adr[i]),
.d_wb_datw(d_wb_datw[i]),
.ext_cmd(ext_cmd[i]),
.cmd_taken(cmd_taken[i]),
.cmd_complete(cmd_complete[i]),
.cmd_out_0(cmd_out_0[i]),
.cmd_out_1(cmd_out_1[i])
);
end
`CORE_TYPE_WB2: begin
cmd_wb #(.CORE_ID(i), .CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) cmd_wb2 (
.clk(clk),
.rst(rst),
.i_wb_cyc(i_wb_cyc[i]),
.i_wb_stb(i_wb_stb[i]),
.i_wb_adr(i_wb_adr[i]),
.d_wb_cyc(d_wb_cyc[i]),
.d_wb_stb(d_wb_stb[i]),
.d_wb_we(d_wb_we[i]),
.d_wb_sel(d_wb_sel[i]),
.d_wb_adr(d_wb_adr[i]),
.d_wb_datw(d_wb_datw[i]),
.ext_cmd(ext_cmd[i]),
.cmd_taken(cmd_taken[i]),
.cmd_complete(cmd_complete[i]),
.cmd_out_0(cmd_out_0[i]),
.cmd_out_1(cmd_out_1[i])
);
end
endcase
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Arbitration
//
// LRU, etc. select from pending cmds; also needs smp to stall some/all cmds
// do addr cmp here, if necessary? or could do in smp - important if multiple outstanding req's allowed
// by any bus, which means there will be cmd and rsp queues with ordering requirements
// also needs to block cmds when bus is busy

// cmds include valid indicator
// cmd_taken is bit vector for cmds 1,0
// cmd_out's go to the bus; rsp_in's have to be associated with requesting core

arb #() arb (
.clk(clk),
.rst(rst),
.cmd_in_0_0(cmd_out_0[0]),
.cmd_in_1_0(cmd_out_1[0]),
.cmd_tkn_0(cmd_taken[0]),
.cmd_in_0_1(cmd_out_0[1]),
.cmd_in_1_1(cmd_out_1[1]),
.cmd_tkn_1(cmd_taken[1]),
.cmd_in_0_2(cmd_out_0[2]),
.cmd_in_1_2(cmd_out_1[2]),
.cmd_tkn_2(cmd_taken[2]),
.cmd_in_0_3(cmd_out_0[3]),
.cmd_in_1_3(cmd_out_1[3]),
.cmd_tkn_3(cmd_taken[3]),
.bus_rdy_0(bus_i_rdy),
.bus_rdy_1(bus_d_rdy),
.cmd_out_0(req_0),
.cmd_out_1(req_1)
);

// ------------------------------------------------------------------------------------------------
// SMP

// special ops: track resv, stall pending cmds, gen rsp
smp #() smp (

);

// ------------------------------------------------------------------------------------------------
// ------------------------------------------------------------------------------------------------
// Bus interface and logic can be replaced for different buses

// ------------------------------------------------------------------------------------------------
// Bus Out
// requests to main bus

generate begin: bus_wire_out
case(BUS_TYPE)
`BUS_TYPE_WB1: begin
assign bus_out[`BUS_WB1_OUT_START] = bus_i_wb_cyc;
assign bus_out[`BUS_WB1_OUT_START-1] = bus_wb_stb;
assign bus_out[`BUS_WB1_OUT_START-2] = bus_wb_we;
assign bus_out[`BUS_WB1_OUT_START-3:`BUS_WB1_OUT_START-6] = bus_wb_sel;
assign bus_out[`BUS_WB1_OUT_START-7:`BUS_WB1_OUT_START-38] = bus_wb_adr;
assign bus_out[`BUS_WB1_OUT_START-39:`BUS_WB1_OUT_START-70] = bus_wb_datw;
end
`BUS_TYPE_WB2: begin
assign bus_out[`BUS_WB2_I_OUT_START] = bus_i_wb_cyc;
assign bus_out[`BUS_WB2_I_OUT_START-1] = bus_i_wb_stb;
assign bus_out[`BUS_WB2_I_OUT_START-7:`BUS_WB2_I_OUT_START-38] = bus_i_wb_adr;
assign bus_out[`BUS_WB2_D_OUT_START] = bus_d_wb_cyc;
assign bus_out[`BUS_WB2_D_OUT_START-1] = bus_d_wb_stb;
assign bus_out[`BUS_WB2_D_OUT_START-2] = bus_d_wb_we;
assign bus_out[`BUS_WB2_D_OUT_START-3:`BUS_WB2_D_OUT_START-6] = bus_d_wb_sel;
assign bus_out[`BUS_WB2_D_OUT_START-7:`BUS_WB2_D_OUT_START-38] = bus_d_wb_adr;
assign bus_out[`BUS_WB2_D_OUT_START-39:`BUS_WB2_D_OUT_START-70] = bus_d_wb_datw;
end
endcase
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Bus In
// responses from main bus

generate begin: bus_wire_in
case(BUS_TYPE)
`BUS_TYPE_WB1: begin
assign bus_wb_ack = bus_in[`BUS_WB1_IN_START];
assign bus_wb_datr = bus_in[`BUS_WB1_IN_START-1:`BUS_WB1_IN_START-32];
end
`BUS_TYPE_WB2: begin
assign bus_i_wb_ack = bus_in[`BUS_WB2_I_IN_START];
assign bus_i_wb_datr = bus_in[`BUS_WB2_I_IN_START-1:`BUS_WB2_I_IN_START-32];
assign bus_d_wb_ack = bus_in[`BUS_WB2_D_IN_START];
assign bus_d_wb_datr = bus_in[`BUS_WB2_D_IN_START-1:`BUS_WB2_D_IN_START-32];
end
endcase
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Bus Interface
// translate reqs and handle bus transactions

generate begin: bus
case(BUS_TYPE)
`BUS_TYPE_WB1: begin
bus_wb1 #() bus(
.clk(clk),
.rst(rst),
.rdy(bus_rdy),
.cmd(req_0),
.rsp(rsp_0),
.wb_stb(bus_wb_stb),
.wb_cyc(bus_wb_cyc),
.wb_we(bus_wb_we),
.wb_sel(bus_wb_sel),
.wb_adr(bus_wb_adr),
.wb_datw(bus_wb_datw),
.wb_ack(bus_wb_ack),
.wb_datr(bus_wb_datr)
);
end
`BUS_TYPE_WB2: begin
bus_wb2 #() bus(
.clk(clk),
.rst(rst),
.rdy_i(bus_i_rdy),
.rdy_d(bus_d_rdy),
.cmd_i(req_0),
.cmd_d(req_1),
.rsp_i(rsp_0),
.rsp_d(rsp_1),
.i_wb_cyc(bus_i_wb_cyc),
.i_wb_stb(bus_i_wb_stb),
.i_wb_adr(bus_i_wb_adr),
.i_wb_ack(bus_i_wb_ack),
.i_wb_datr(bus_i_wb_datr),
.d_wb_cyc(bus_d_wb_cyc),
.d_wb_stb(bus_d_wb_stb),
.d_wb_we(bus_d_wb_we),
.d_wb_sel(bus_d_wb_sel),
.d_wb_adr(bus_d_wb_adr),
.d_wb_datw(bus_d_wb_datw),
.d_wb_ack(bus_d_wb_ack),
.d_wb_datr(bus_d_wb_datr)
);
end
endcase
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Response Queues
// responses for cores - just routing if no queues needed
// but should be component; rsp's are formatted differently based on core interface type, plus
// a2l2 could support queues even if wb doesn't
// also, rsp may be gen'd from other units like smp, config, mailbox, etc.
generate begin: rsp
for (i = 0; i < MAX_CORES; i++) begin
assign core_rsp_0[i][`RSP_VALID] = rsp_0[`RSP_VALID] & (rsp_0[`RSP_CORE_ID] == i);
assign core_rsp_0[i][`RSP_DATA] = rsp_0[`RSP_DATA];
assign cmd_complete[i][0] = core_rsp_0[i][`RSP_VALID];
assign core_rsp_1[i][`RSP_VALID] = rsp_1[`RSP_VALID] & (rsp_1[`RSP_CORE_ID] == i);
assign core_rsp_1[i][`RSP_DATA] = rsp_1[`RSP_DATA];
assign cmd_complete[i][1] = core_rsp_1[i][`RSP_VALID];
end
end
endgenerate



// ------------------------------------------------------------------------------------------------
// Misc/Errors/Debug
// stuff

/*
generate begin: misc
for (i = 0; i < MAX_CORES; i++) begin
end
end
endgenerate
*/

endmodule

186
rtl/a2node/arb.v

@ -0,0 +1,186 @@ @@ -0,0 +1,186 @@
`include "defs.v"

module arb # (
) (
input clk,
input rst,
input bus_rdy_0,
input bus_rdy_1,
input [`CMD_SIZE-1:0] cmd_in_0_0,
input [`CMD_SIZE-1:0] cmd_in_1_0,
output [1:0] cmd_tkn_0,
input [`CMD_SIZE-1:0] cmd_in_0_1,
input [`CMD_SIZE-1:0] cmd_in_1_1,
output [1:0] cmd_tkn_1,
input [`CMD_SIZE-1:0] cmd_in_0_2,
input [`CMD_SIZE-1:0] cmd_in_1_2,
output [1:0] cmd_tkn_2,
input [`CMD_SIZE-1:0] cmd_in_0_3,
input [`CMD_SIZE-1:0] cmd_in_1_3,
output [1:0] cmd_tkn_3,
output [`CMD_SIZE-1:0] cmd_out_0,
output [`CMD_SIZE-1:0] cmd_out_1
);

// fairly choose 1 or 2 (depending on output buses) cmds
// mark taken from queue
// obey restrictions from smp, addr cmp, etc. (cmd_stall)

//reg [7:0] pri_0_q;
//wire [7:0] pri_0_d;
reg [1:0] pri_0_q[0:3];
wire [1:0] pri_0_d[0:3];
wire [7:0] pri_update_0;
reg [1:0] pri_1_q[0:3];
wire [1:0] pri_1_d[0:3];
wire [7:0] pri_update_1;
wire [3:0] cmd_valids_0;
wire [1:0] cmd_sel_0;
wire cmd_out_val_0;
wire [3:0] cmd_valids_1;
wire [1:0] cmd_sel_1;
wire cmd_out_val_1;

integer i;

// FF
always @(posedge clk) begin

if (rst) begin

for (i = 0; i < 4; i++) begin
pri_0_q[i] = i;
pri_1_q[i] = i;
end

end else begin

for (i = 0; i < 4; i++) begin
pri_0_q[i] = pri_0_d[i];
pri_1_q[i] = pri_1_d[i];
end
end

end

// select next commands; 00 is highest priority
assign cmd_valids_0 = {cmd_in_0_0[`CMD_VALID],cmd_in_0_1[`CMD_VALID],cmd_in_0_2[`CMD_VALID],cmd_in_0_3[`CMD_VALID]};
assign cmd_sel_0 = pri_sel(cmd_valids_0, pri_0_q);
assign cmd_out_val_0 = bus_rdy_0 & (|cmd_valids_0); //wtf depends if you want to allow it to change while bus is busy
assign cmd_out_0 = cmd_sel_0 == 2'b00 ? cmd_in_0_0 :
cmd_sel_0 == 2'b01 ? cmd_in_0_1 :
cmd_sel_0 == 2'b10 ? cmd_in_0_2 :
cmd_in_0_3;

assign cmd_valids_1 = {cmd_in_1_0[`CMD_VALID],cmd_in_1_1[`CMD_VALID],cmd_in_1_2[`CMD_VALID],cmd_in_1_3[`CMD_VALID]};
assign cmd_sel_1 = pri_sel(cmd_valids_1, pri_1_q);
assign cmd_out_val_1 = bus_rdy_1 & (|cmd_valids_1); //wtf depends if you want to allow it to change while bus is busy
assign cmd_out_1 = cmd_sel_1 == 2'b00 ? cmd_in_1_0 :
cmd_sel_1 == 2'b01 ? cmd_in_1_1 :
cmd_sel_1 == 2'b10 ? cmd_in_1_2 :
cmd_in_1_3;

// update priorities if cmd selected
assign pri_update_0 = pri_upd(cmd_sel_0, pri_0_q);

assign pri_0_d[0] = cmd_out_val_0 ? pri_update_0[1:0] : pri_0_q[0];
assign pri_0_d[1] = cmd_out_val_0 ? pri_update_0[3:2] : pri_0_q[1];
assign pri_0_d[2] = cmd_out_val_0 ? pri_update_0[5:4] : pri_0_q[2];
assign pri_0_d[3] = cmd_out_val_0 ? pri_update_0[7:6] : pri_0_q[3];

assign pri_update_1 = pri_upd(cmd_sel_1, pri_1_q);

assign pri_1_d[0] = cmd_out_val_1 ? pri_update_1[1:0] : pri_1_q[0];
assign pri_1_d[1] = cmd_out_val_1 ? pri_update_1[3:2] : pri_1_q[1];
assign pri_1_d[2] = cmd_out_val_1 ? pri_update_1[5:4] : pri_1_q[2];
assign pri_1_d[3] = cmd_out_val_1 ? pri_update_1[7:6] : pri_1_q[3];

// mark taken
assign cmd_tkn_0[0] = cmd_out_val_0 & (cmd_sel_0 == 2'b00);
assign cmd_tkn_1[0] = cmd_out_val_0 & (cmd_sel_0 == 2'b01);
assign cmd_tkn_2[0] = cmd_out_val_0 & (cmd_sel_0 == 2'b10);
assign cmd_tkn_3[0] = cmd_out_val_0 & (cmd_sel_0 == 2'b11);

assign cmd_tkn_0[1] = cmd_out_val_1 & (cmd_sel_1 == 2'b00);
assign cmd_tkn_1[1] = cmd_out_val_1 & (cmd_sel_1 == 2'b01);
assign cmd_tkn_2[1] = cmd_out_val_1 & (cmd_sel_1 == 2'b10);
assign cmd_tkn_3[1] = cmd_out_val_1 & (cmd_sel_1 == 2'b11);

endmodule

// could also account for configured thread/core priority
function [1:0] pri_sel (input [0:3] val, [1:0] pri[0:3]);
begin
pri_sel = 2'b00;
if (val[0]) begin
if (~(val[1] & pri[1] < pri[0]) | (val[2] & pri[2] < pri[0] | val[3] & pri[3] < pri[0])) begin
assign pri_sel = 2'b00;
end
end
if (val[1]) begin
if (~(val[0] & pri[0] < pri[1]) | (val[2] & pri[2] < pri[1] | val[3] & pri[3] < pri[1])) begin
assign pri_sel = 2'b01;
end
end
if (val[2]) begin
if (~(val[0] & pri[0] < pri[2]) | (val[1] & pri[1] < pri[2] | val[3] & pri[3] < pri[2])) begin
assign pri_sel = 2'b10;
end
end
if (val[3]) begin
if (~(val[0] & pri[0] < pri[3]) | (val[1] & pri[1] < pri[3] | val[2] & pri[2] < pri[3])) begin
assign pri_sel = 2'b11;
end
end
end
endfunction


//function [7:0] pri_upd (input [1:0] sel, [1:0] p3, [1:0] p2, [1:0] p1, [1:0] p0);
function [7:0] pri_upd (input [1:0] sel, [1:0] pri[0:3]);
begin
if (sel == 2'b00) begin
assign pri_upd[1:0] = 2'b11;
assign pri_upd[3:2] = pri_up(pri[0], pri[1]);
assign pri_upd[5:4] = pri_up(pri[0], pri[2]);
assign pri_upd[7:6] = pri_up(pri[0], pri[3]);
end
if (sel == 2'b01) begin
assign pri_upd[1:0] = pri_up(pri[1], pri[0]);
assign pri_upd[3:2] = 2'b11;
assign pri_upd[5:4] = pri_up(pri[1], pri[2]);
assign pri_upd[7:6] = pri_up(pri[1], pri[3]);
end
if (sel == 2'b10) begin
assign pri_upd[1:0] = pri_up(pri[2], pri[0]);
assign pri_upd[3:2] = pri_up(pri[2], pri[1]);
assign pri_upd[5:4] = 2'b11;
assign pri_upd[7:6] = pri_up(pri[2], pri[3]);
end
if (sel == 2'b11) begin
assign pri_upd[1:0] = pri_up(pri[3], pri[0]);
assign pri_upd[3:2] = pri_up(pri[3], pri[1]);
assign pri_upd[5:4] = pri_up(pri[3], pri[2]);
assign pri_upd[7:6] = 2'b11;
end
end
endfunction

// raise priority by 1, if it's higher than lvl
function [1:0] pri_up(input [1:0] lvl, [1:0] pri);
begin
/* dont work!
if (pri > lvl) begin
if (pri == 2'b01)
assign pri_up = 2'b00;
if (pri == 2'b10)
assign pri_up = 2'b01;
if (pri == 2'b11)
assign pri_up = 2'b10;
else
assign pri_up = pri;
end
*/
assign pri_up = pri > lvl ? pri - 1: pri; // 0 always sticks
end
endfunction

183
rtl/a2node/bus_wb2.v

@ -0,0 +1,183 @@ @@ -0,0 +1,183 @@

module bus_wb2 # (

) (
input clk,
input rst,
output rdy_i,
output rdy_d,
input [`CMD_SIZE-1:0] cmd_i,
input [`CMD_SIZE-1:0] cmd_d,
output [`RSP_SIZE-1:0] rsp_i,
output [`RSP_SIZE-1:0] rsp_d,
output i_wb_stb,
output i_wb_cyc,
output [31:0] i_wb_adr,
input i_wb_ack,
input [31:0] i_wb_datr,
output d_wb_stb,
output d_wb_cyc,
output d_wb_we,
output [3:0] d_wb_sel,
output [31:0] d_wb_adr,
output [31:0] d_wb_datw,
input d_wb_ack,
input [31:0] d_wb_datr
);

reg [`CMD_SIZE-1:0] cmd_i_q;
wire [`CMD_SIZE-1:0] cmd_i_d;
reg [`CMD_SIZE-1:0] cmd_d_q;
wire [`CMD_SIZE-1:0] cmd_d_d;
reg [1:0] cmdseq_i_q;
wire [1:0] cmdseq_i_d;
reg [1:0] cmdseq_d_q;
wire [1:0] cmdseq_d_d;
wire idle_i;
wire cmd_val_i;
wire ld_cmd_i;
wire rsp_val_i;
wire idle_d;
wire cmd_val_d;
wire ld_cmd_d;
wire rsp_val_d;

// FF
always @(posedge clk) begin

if (rst) begin

cmdseq_i_q = 2'b11;
cmdseq_d_q = 2'b11;
cmd_i_q = 'h0;
cmd_d_q = 'h0;

end else begin

cmdseq_i_q = cmdseq_i_d;
cmdseq_d_q = cmdseq_d_d;
cmd_i_q = cmd_i_d;
cmd_d_q = cmd_d_d;

end
end

// super-simple; latch cmd -> send req -> rtn rsp

assign cmd_val_i = cmd_i[`CMD_SIZE-1];

//tbl cmdseq_i
//n cmdseq_i_q cmdseq_i_d
//n | cmd_val_i | ld_cmd_i
//n | | i_wb_ack | |
//n | | | | |
//n | | | | |
//n | | | | | idle_i
//n | | | | | |
//n | | | | | |
//b 10 | | 10 | |
//t ii i i oo o o
//*------------------------------------------------
//* Idle ******************************************
//s 11 - - -- - 1
//s 11 0 - 11 0 - * ...zzz...
//s 11 1 - 01 1 -
//* Request Pending *******************************
//s 01 - 0 01 0 0
//s 01 - 1 11 0 0
//*------------------------------------------------
//tbl cmdseq_i

assign cmd_i_d = ld_cmd_i ? cmd_i : {cmd_i_q[`CMD_VALID] & ~i_wb_ack, cmd_i_q[`CMD_VALID-1:0]};

assign i_wb_stb = cmd_i_q[`CMD_VALID];
assign i_wb_cyc = cmd_i_q[`CMD_VALID];
assign i_wb_adr = cmd_i_q[`CMD_ADR];

assign rdy_i = idle_i;
assign rsp_i[`RSP_VALID] = i_wb_ack;
assign rsp_i[`RSP_CORE_ID] = cmd_i_q[`CMD_CORE_ID];
assign rsp_i[`RSP_DATA] = i_wb_datr;


//wtf eventually move this to config unit; it will respond and block cmd val to bus unit
// use adr compare to return coreid for d-read
wire adr_coreid;
assign adr_coreid = ~cmd_d_q[`CMD_WE] & (cmd_d_q[`CMD_ADR] == 32'b0); //wtf why is the adr cmp part segving verilator?????
assign cmd_val_d = cmd_d[`CMD_VALID];

//tbl cmdseq_d
//n cmdseq_d_q cmdseq_d_d
//n | cmd_val_d | ld_cmd_d
//n | | rsp_d_complete | |
//n | | | | |
//n | | | | |
//n | | | | | idle_d
//n | | | | | |
//n | | | | | |
//b 10 | | 10 | |
//t ii i i oo o o
//*------------------------------------------------
//* Idle ******************************************
//s 11 - - -- - 1
//s 11 0 - 11 0 - * ...zzz...
//s 11 1 - 01 1 -
//* Request Pending *******************************
//s 01 - 0 01 0 0
//s 01 - 1 11 0 0
//*------------------------------------------------
//tbl cmdseq_d

//assign cmd_d_d = ld_cmd_d ? cmd_d : {cmd_d_q[`CMD_VALID] & ~d_wb_ack, cmd_d_q[`CMD_VALID-1:0]};
assign cmd_d_d = ld_cmd_d ? cmd_d : {cmd_d_q[`CMD_VALID] & ~rsp_d_complete, cmd_d_q[`CMD_VALID-1:0]};

//assign d_wb_cyc = cmd_d_q[`CMD_VALID];
//assign d_wb_stb = cmd_d_q[`CMD_VALID];
assign d_wb_cyc = cmd_d_q[`CMD_VALID] & ~adr_coreid;
assign d_wb_stb = cmd_d_q[`CMD_VALID] & ~adr_coreid;
assign d_wb_we = cmd_d_q[`CMD_WE];
assign d_wb_sel = cmd_d_q[`CMD_SEL];
assign d_wb_adr = cmd_d_q[`CMD_ADR];
assign d_wb_datw = cmd_d_q[`CMD_DATW];

assign rdy_d = idle_d;

//assign rsp_d[`RSP_VALID] = d_wb_ack;
wire rsp_d_complete;
assign rsp_d_complete = d_wb_ack | (cmd_d_q[`CMD_VALID] & adr_coreid);
assign rsp_d[`RSP_VALID] = rsp_d_complete;
assign rsp_d[`RSP_CORE_ID] = cmd_d_q[`CMD_CORE_ID];
//assign rsp_d[`RSP_DATA] = d_wb_datr;
assign rsp_d[`RSP_DATA] = adr_coreid ? {6'b0, cmd_d_q[`CMD_CORE_ID], 24'b0} : d_wb_datr; // byte 3 = core_id

// Generated...
//vtable cmdseq_i
assign cmdseq_i_d[1] =
(cmdseq_i_q[1] & cmdseq_i_q[0] & ~cmd_val_i) +
(~cmdseq_i_q[1] & cmdseq_i_q[0] & i_wb_ack);
assign cmdseq_i_d[0] =
(cmdseq_i_q[1] & cmdseq_i_q[0] & ~cmd_val_i) +
(cmdseq_i_q[1] & cmdseq_i_q[0] & cmd_val_i) +
(~cmdseq_i_q[1] & cmdseq_i_q[0] & ~i_wb_ack) +
(~cmdseq_i_q[1] & cmdseq_i_q[0] & i_wb_ack);
assign ld_cmd_i =
(cmdseq_i_q[1] & cmdseq_i_q[0] & cmd_val_i);
assign idle_i =
(cmdseq_i_q[1] & cmdseq_i_q[0]);
//vtable cmdseq_i
//vtable cmdseq_d
assign cmdseq_d_d[1] =
(cmdseq_d_q[1] & cmdseq_d_q[0] & ~cmd_val_d) +
(~cmdseq_d_q[1] & cmdseq_d_q[0] & rsp_d_complete);
assign cmdseq_d_d[0] =
(cmdseq_d_q[1] & cmdseq_d_q[0] & ~cmd_val_d) +
(cmdseq_d_q[1] & cmdseq_d_q[0] & cmd_val_d) +
(~cmdseq_d_q[1] & cmdseq_d_q[0] & ~rsp_d_complete) +
(~cmdseq_d_q[1] & cmdseq_d_q[0] & rsp_d_complete);
assign ld_cmd_d =
(cmdseq_d_q[1] & cmdseq_d_q[0] & cmd_val_d);
assign idle_d =
(cmdseq_d_q[1] & cmdseq_d_q[0]);
//vtable cmdseq_d

endmodule

15
rtl/a2node/cmd_a2l2.v

@ -0,0 +1,15 @@ @@ -0,0 +1,15 @@
// a2l2 default: allow 1 ld, 1 st credit and use 2 dedicated queues

`include "defs.v"

reg [77:0] cmd_queue_q[0:3][0:1];
wire [77:0] cmd_queue_d[0:3][0:1];
wire [77:0] cmd_queue_in[0:3][0:1];
wire [71:0] cmd_queue_out[0:3];

module cmd_wb #(
parameter CORE_TYPE = CORE_TYPE_A2L2
) (
);

endmodule

180
rtl/a2node/cmd_wb.v

@ -0,0 +1,180 @@ @@ -0,0 +1,180 @@
// Wishbone-Wishbone Command Interface

// allow single- or dual-wb in/out:
// 1/1 : passthru
// 1/2 : route to proper - but this requires indicator in extcmd to distinguish i vs. d
// 2/1 : arbitrate
// 2/2 : passthru
//
// also handle special ops when possible (dcbz, ...)

// select one command per output bus

// ext_cmd is not tied to i/d, but does it need to have multiple outstanding (nop=0, and valid that require i/d info also must have that info provided)
// needs ext_tkn if not tied to i/d; needs to set q valid if tied to i/d (based on ext i or d type)
// seems like these are all tied to an i or d and require a response, so shouldn't need i+d+ext outstanding?

// possible extended command modifiers
// prefetch
// larx
// stcx
// lwsync
// hwsync
// tlbsync
// ici, icbi
// dci, dcbi, etc
// dcbtst
// dcbz
// tlbie, etc

// possible extended responses
// errors
// crit first, xfer# for larger bus width on core side
// credits
// resv valid
// stcx comp/pass
// sync ack
// back inv val/addr

// possible extra functions
// integrated L2
// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar)

`include "defs.v"

module cmd_wb #(
parameter CORE_ID = 0,
parameter CORE_TYPE = `CORE_TYPE_WB2,
parameter BUS_TYPE = `BUS_TYPE_WB2
) (
input clk,
input rst,
input i_wb_cyc,
input i_wb_stb,
input [31:0] i_wb_adr,
input d_wb_cyc,
input d_wb_stb,
input d_wb_we,
input [3:0] d_wb_sel,
input [31:0] d_wb_adr,
input [31:0] d_wb_datw,
input [127:0] ext_cmd,
input [1:0] cmd_taken, // bit vector, one per queued cmd (could simultaneously occur in some designs)
input [1:0] cmd_complete, // bit vector, one per queued cmd (could simultaneously occur in some designs)
output [`CMD_SIZE-1:0] cmd_out_0,
output [`CMD_SIZE-1:0] cmd_out_1
);

reg [`CMD_SIZE-1:0] cmd_queue_q[0:1];
wire [`CMD_SIZE-1:0] cmd_queue_d[0:1];
wire [`CMD_SIZE-1:0] cmd_queue_in[0:1];
wire [`CMD_SIZE-1:0] cmd_queue_out;
reg [127:0] ext_queue_q;
wire [127:0] ext_queue_d;
wire [127:0] ext_queue_in;

genvar i;

// FF
always @(posedge clk) begin

if (rst) begin

cmd_queue_q[0] = 'h0;
cmd_queue_q[1] = 'h0;
ext_queue_q = 'h0;

end else begin

cmd_queue_q[0] = cmd_queue_d[0];
cmd_queue_q[1] = cmd_queue_d[1];
ext_queue_q = ext_queue_d;

end
end

case (CORE_TYPE)
`CORE_TYPE_WB1: begin
// q[0] = i or d
assign cmd_queue_in[0][`CMD_VALID] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too
assign cmd_queue_in[0][`CMD_CORE_ID] = CORE_ID; // core id
assign cmd_queue_in[0][`CMD_RSVD_0] = 1'b0; // rsvd (possible cores > 4)
assign cmd_queue_in[0][`CMD_RSVD_1] = 1'b0; // rsvd (possible cores > 4)
assign cmd_queue_in[0][`CMD_TAKEN] = 1'b0; // taken
assign cmd_queue_in[0][`CMD_RSVD_2] = 1'b0; // rsp rcvd?
assign cmd_queue_in[0][`CMD_RSVD_3] = 1'b0;
assign cmd_queue_in[0][`CMD_WE] = d_wb_we;
assign cmd_queue_in[0][`CMD_SEL] = d_wb_sel;
assign cmd_queue_in[0][`CMD_ADR] = d_wb_adr;
assign cmd_queue_in[0][`CMD_SIZE-46:`CMD_SIZE-77] = d_wb_datw;
assign ext_queue_in[0] = ext_cmd;
end
`CORE_TYPE_WB2: begin
// q[0]=i, q[1]=d
assign cmd_queue_in[0][`CMD_VALID] = i_wb_cyc & i_wb_stb; // valid - may need ext decode too
assign cmd_queue_in[0][`CMD_CORE_ID] = CORE_ID; // core id
assign cmd_queue_in[0][`CMD_RSVD_0] = 1'b0; // rsvd (possible cores > 4)
assign cmd_queue_in[0][`CMD_RSVD_1] = 1'b0; // rsvd (possible cores > 4)
assign cmd_queue_in[0][`CMD_TAKEN] = 1'b0; // taken
assign cmd_queue_in[0][`CMD_RSVD_2] = 1'b0; // rsp rcvd?
assign cmd_queue_in[0][`CMD_RSVD_3] = 1'b0;
assign cmd_queue_in[0][`CMD_WE] = 1'b0;
assign cmd_queue_in[0][`CMD_SEL] = 4'b0;
assign cmd_queue_in[0][`CMD_ADR] = i_wb_adr;
assign cmd_queue_in[0][`CMD_DATW] = 32'b0;

assign cmd_queue_in[1][`CMD_VALID] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too
assign cmd_queue_in[1][`CMD_CORE_ID] = CORE_ID; // core id
assign cmd_queue_in[0][`CMD_RSVD_0] = 1'b0; // rsvd (possible cores > 4)
assign cmd_queue_in[0][`CMD_RSVD_1] = 1'b0; // rsvd (possible cores > 4)
assign cmd_queue_in[0][`CMD_TAKEN] = 1'b0; // taken
assign cmd_queue_in[0][`CMD_RSVD_2] = 1'b0; // rsp rcvd?
assign cmd_queue_in[0][`CMD_RSVD_3] = 1'b0;
assign cmd_queue_in[1][`CMD_WE] = d_wb_we;
assign cmd_queue_in[1][`CMD_SEL] = d_wb_sel;
assign cmd_queue_in[1][`CMD_ADR] = d_wb_adr;
assign cmd_queue_in[1][`CMD_DATW] = d_wb_datw;

assign ext_queue_in = ext_cmd;
end
endcase

// queue routing/arbitration to cmd processing

case (CORE_TYPE)
`CORE_TYPE_WB1: begin
case (BUS_TYPE)
`BUS_TYPE_WB1: begin
assign cmd_out_0 = cmd_queue_q[0];
end
`BUS_TYPE_WB2: begin
assign cmd_out_0 = ext_queue_q[0] ? cmd_queue_q[1] : cmd_queue_q[7]; // select i vs d
end
endcase
end
`CORE_TYPE_WB2: begin
case (BUS_TYPE)
`BUS_TYPE_WB1:begin
// both valid: send d
// want selected bit; set first cycle; dont change once selected until not valid
assign cmd_out_0 = cmd_queue_q[1][`CMD_VALID] ? cmd_queue_q[1] : cmd_queue_q[0];
end
`BUS_TYPE_WB2: begin
assign cmd_out_0 = cmd_queue_q[0];
assign cmd_out_1 = cmd_queue_q[1];
end
endcase
end
endcase

for (i = 0; i < 2; i++) begin
// valid
assign cmd_queue_d[i][`CMD_VALID] = cmd_queue_q[i][`CMD_VALID] ? ~cmd_complete[i] : cmd_queue_in[i][`CMD_VALID];
// taken
assign cmd_queue_d[i][`CMD_TAKEN] = cmd_queue_q[i][`CMD_VALID] ? (cmd_queue_q[i][`CMD_TAKEN] | cmd_taken[i]) & ~cmd_complete[i] : 1'b0;
// rest
assign cmd_queue_d[i][`CMD_VALID-1:`CMD_TAKEN+1] = cmd_queue_q[i][`CMD_VALID] ? cmd_queue_q[i][`CMD_VALID-1:`CMD_TAKEN+1] : cmd_queue_in[i][`CMD_VALID-1:`CMD_TAKEN+1];
assign cmd_queue_d[i][`CMD_TAKEN-1:0] = cmd_queue_q[i][`CMD_VALID] ? cmd_queue_q[i][`CMD_TAKEN-1:0] : cmd_queue_in[i][`CMD_TAKEN-1:0];
end

endmodule

71
rtl/a2node/defs.v

@ -0,0 +1,71 @@ @@ -0,0 +1,71 @@
// a2wb defines

`define CORE_TYPE_NONE 4'h0
`define CORE_TYPE_A2L2 4'h1
`define CORE_TYPE_WB1 4'h2
`define CORE_TYPE_WB2 4'h3

`define BUS_TYPE_NONE 4'h0
`define BUS_TYPE_WB1 4'h1
`define BUS_TYPE_WB2 4'h2

// starting bits for core in/out subvectors

// out's are core out/bridge in

`define WB1_WB_OUT_START 383
`define WB1_EXT_OUT_START `WB1_WB_OUT_START-128

`define WB2_WB_OUT_START 383
`define WB2_I_WB_OUT_START 383
`define WB2_D_WB_OUT_START `WB2_I_WB_OUT_START-128
`define WB2_EXT_OUT_START `WB2_D_WB_OUT_START-128

// in's are bridge out/core in

`define WB1_WB_IN_START 383
`define WB1_EXT_IN_START `WB1_WB_IN_START-128

`define WB2_WB_IN_START 383
`define WB2_I_WB_IN_START 383
`define WB2_D_WB_IN_START `WB2_I_WB_IN_START-128
`define WB2_EXT_IN_START `WB2_D_WB_IN_START-128

// starting bits for bus in/out subvectors

`define BUS_WB1_OUT_START 127
`define BUS_WB1_IN_START 127

`define BUS_WB2_OUT_START 255
`define BUS_WB2_I_OUT_START 255
`define BUS_WB2_D_OUT_START 127
`define BUS_WB2_IN_START 127
`define BUS_WB2_I_IN_START 127
`define BUS_WB2_D_IN_START 63


// internal

`define CMD_SIZE 77
`define CMD_VALID `CMD_SIZE-1
`define CMD_CORE_ID `CMD_SIZE-2:`CMD_SIZE-3
`define CMD_RSVD_0 `CMD_SIZE-4
`define CMD_RSVD_1 `CMD_SIZE-5
`define CMD_TAKEN `CMD_SIZE-6
`define CMD_RSVD_2 `CMD_SIZE-7
`define CMD_RSVD_3 `CMD_SIZE-8
`define CMD_WE `CMD_SIZE-9
`define CMD_SEL `CMD_SIZE-10:`CMD_SIZE-13
`define CMD_ADR `CMD_SIZE-14:`CMD_SIZE-45
`define CMD_DATW `CMD_SIZE-46:`CMD_SIZE-77

`define RSP_SIZE 64
`define RSP_VALID `RSP_SIZE-1
`define RSP_CORE_ID `RSP_SIZE-2:`RSP_SIZE-3
`define RSP_RSVD_0 `RSP_SIZE-4:`RSP_SIZE-8
`define RSP_DATA `RSP_SIZE-9:`RSP_SIZE-40


// main bus
`define WB1_BUS_OUT_START 127
`define WB1_BUS_IN_START 127

68
rtl/a2node/readme.md

@ -0,0 +1,68 @@ @@ -0,0 +1,68 @@
# A2 Interfaces to WB

* core interfaces

* A2I/A2O A2L2 bus

* Single (combined I/D) w/SMP extensions

* Dual (separate I/D) WB buses w/SMP extensions

* bus interfaces

* single WB

* dual WB

* functions

* queues one or more core commands

* point of coherncy for larx/stcx, sync, tlbie, etc. for multicores below it (single/mulithread)

* address compares necessary for ordering/coherency

* optional mailbox interface for core-core peer and broadcast

* arbitrates for WB bus(es)

* gen responses for cores

## Possible configurations

* one core, WB: pass-through with SMP functions

* one core, A2L2: bridge with SMP functions

* multi-core: identical or mixed WB1/WB2/A2L2, queueing, arbitration, and SMP functions


### syntax check

```verilator --lint-only a2wb.v -Wno-LITENDIAN```

### sim build

```
verilator --cc --exe --trace -Wno-Litendian -Wno-fatal -I./src top.v tb.cpp
cd obj_dir;make -f Vtop.mk;cd ..
obj_dir/Vtop
```

### synth build (Litex)

* had to make some source changes for Vivado

```
rm obj_dir/*
# use sim top so tb.cpp is ok
verilator --cc --exe --trace -Wno-Litendian -Wno-fatal -I./litex/a2node/verilog -I./src top.v tb.cpp uart.cpp
cd obj_dir;make -f Vtop.mk;cd ..
obj_dir/Vtop

gtkwave wtf.vcd wtf.gtkw

vcd2fst wtf.vcd wtf.fst
gtkwave wtf.fst wtf.gtkw
```

156
rtl/a2node/sim_1000_4w.txt

@ -0,0 +1,156 @@ @@ -0,0 +1,156 @@
Memory Size=01000000B
Loading memory from rom.bin.hex...
Seed=08675309
Cores: 4
Resetting...
Go!
>>> UART_0:
>>> UART_1:
>>> UART_2:
>>> UART_3:
>>> UART_0:A2Node Test!
>>> UART_1:A2Node Test!
>>> UART_2:A2Node Test!
>>> UART_3:A2Node Test!
>>> UART_0:
>>> UART_1:
>>> UART_2:
>>> UART_3:
>>> UART_0:Coremark test
>>> UART_1:Coremark test
>>> UART_2:Coremark test
>>> UART_3:Coremark test
>>> UART_0:Iterations: 1000
>>> UART_1:Iterations: 1000
>>> UART_2:Iterations: 1000
>>> UART_3:Iterations: 1000
>>> UART_0:Initing...
>>> UART_1:Initing...
>>> UART_2:Initing...
>>> UART_3:Initing...
>>> UART_0:List: 00FFF7CC 0000029A 00FFF7CC 00000000
>>> UART_1:List: 00BFF7CC 0000029A 00BFF7CC 00000000
>>> UART_2:List: 007FF7CC 0000029A 007FF7CC 00000000
>>> UART_3:List: 003FF7CC 0000029A 003FF7CC 00000000
>>> UART_0:Matrix: 00FFF7B0 0000029A 00FFFA66 00000000
>>> UART_1:Matrix: 00BFF7B0 0000029A 00BFFA66 00000000
>>> UART_2:Matrix: 007FF7B0 0000029A 007FFA66 00000000
>>> UART_3:Matrix: 003FF7B0 0000029A 003FFA66 00000000
>>> UART_0:State: 0000029A 00FFFD00 00000000
>>> UART_1:State: 0000029A 00BFFD00 00000000
>>> UART_2:State: 0000029A 007FFD00 00000000
>>> UART_3:State: 0000029A 003FFD00 00000000
>>> UART_0:Starting...
>>> UART_1:Starting...
>>> UART_2:Starting...
>>> UART_3:Starting...
cyc=40000000
cyc=80000000
cyc=120000000
cyc=160000000
cyc=200000000
cyc=240000000
cyc=280000000
cyc=320000000
cyc=360000000
cyc=400000000
cyc=440000000
cyc=480000000
cyc=520000000
cyc=560000000
cyc=600000000
>>> UART_0:2K performance run parameters for coremark.
>>> UART_1:2K performance run parameters for coremark.
>>> UART_0:CoreMark Size : 666
>>> UART_1:CoreMark Size : 666
>>> UART_0:Total ticks : 639078926
>>> UART_1:Total ticks : 639079903
>>> UART_0:Total time (secs): 6
>>> UART_1:Total time (secs): 6
>>> UART_0:Iterations/Sec : 166
>>> UART_1:Iterations/Sec : 166
>>> UART_0:ERROR! Must execute for at least 10 secs for a valid result!
>>> UART_1:ERROR! Must execute for at least 10 secs for a valid result!
>>> UART_2:2K performance run parameters for coremark.
>>> UART_0:Iterations : 1000
>>> UART_3:2K performance run parameters for coremark.
>>> UART_1:Iterations : 1000
>>> UART_0:Compiler version : GCC9.3.0
>>> UART_2:CoreMark Size : 666
>>> UART_1:Compiler version : GCC9.3.0
>>> UART_0:Compiler flags :
>>> UART_3:CoreMark Size : 666
>>> UART_1:Compiler flags :
>>> UART_2:Total ticks : 639099865
>>> UART_0:Memory location : STACK
>>> UART_1:Memory location : STACK
>>> UART_3:Total ticks : 639102185
>>> UART_2:Total time (secs): 6
>>> UART_0:seedcrc : 0xe9f5
>>> UART_1:seedcrc : 0xe9f5
>>> UART_3:Total time (secs): 6
>>> UART_2:Iterations/Sec : 166
>>> UART_0:[0]crclist : 0xe714
>>> UART_3:Iterations/Sec : 166
>>> UART_1:[0]crclist : 0xe714
>>> UART_2:ERROR! Must execute for at least 10 secs for a valid result!
>>> UART_0:[0]crcmatrix : 0x1fd7
>>> UART_3:ERROR! Must execute for at least 10 secs for a valid result!
>>> UART_1:[0]crcmatrix : 0x1fd7
>>> UART_2:Iterations : 1000
>>> UART_3:Iterations : 1000
>>> UART_0:[0]crcstate : 0x8e3a
>>> UART_2:Compiler version : GCC9.3.0
>>> UART_1:[0]crcstate : 0x8e3a
>>> UART_2:Compiler flags :
>>> UART_3:Compiler version : GCC9.3.0
>>> UART_0:[0]crcfinal : 0xd340
>>> UART_3:Compiler flags :
>>> UART_2:Memory location : STACK
>>> UART_1:[0]crcfinal : 0xd340
>>> UART_3:Memory location : STACK
>>> UART_2:seedcrc : 0xe9f5
>>> UART_0:Correct operation validated. See README.md for run and reporting rules.
>>> UART_0:
>>> UART_3:seedcrc : 0xe9f5
>>> UART_0:Pass.
>>> UART_0:
cyc=639212365 WBI Data @=0000f000 data=00000048
** pass address ifetch'd (1)...
>>> UART_1:Correct operation validated. See README.md for run and reporting rules.
>>> UART_1:
>>> UART_1:Pass.
>>> UART_1:
cyc=639213694 WBI Data @=0000f000 data=00000048
** pass address ifetch'd (2)...
>>> UART_2:[0]crclist : 0xe714
>>> UART_3:[0]crclist : 0xe714
>>> UART_2:[0]crcmatrix : 0x1fd7
>>> UART_3:[0]crcmatrix : 0x1fd7
>>> UART_2:[0]crcstate : 0x8e3a
>>> UART_3:[0]crcstate : 0x8e3a
>>> UART_2:[0]crcfinal : 0xd340
>>> UART_3:[0]crcfinal : 0xd340
>>> UART_2:Correct operation validated. See README.md for run and reporting rules.
>>> UART_2:
>>> UART_2:Pass.
>>> UART_2:
cyc=639232242 WBI Data @=0000f000 data=00000048
** pass address ifetch'd (3)...
>>> UART_3:Correct operation validated. See README.md for run and reporting rules.
>>> UART_3:
>>> UART_3:Pass.
>>> UART_3:
cyc=639234350 WBI Data @=0000f000 data=00000048
** pass address ifetch'd (4)...

Statistics
IFetch: 11416960
DRead: 00007676
DWrite: 111415376

Done.

You has opulence.

Seed=08675309

26
rtl/a2node/smp.v

@ -0,0 +1,26 @@ @@ -0,0 +1,26 @@

module smp # (
) (

);

// larx/stcx
// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate
// reservation granule is 32B (or use lcd of all cores)
// one reservation per thread
// reservation is set before core receives reload data

wire stcx_store [0:3];
wire resv_ra_hit [0:3];
wire resv_set [0:3];
wire resv_rst [0:3];
wire [27:0] resv_q [0:3]; // v, @31:5
wire [27:0] resv_d [0:3];

// sync ops

// cache ops

// tlb ops

endmodule

313
rtl/a2node/top.v

@ -0,0 +1,313 @@ @@ -0,0 +1,313 @@
`include "defs.v"

module top #(
parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_WB2, `CORE_TYPE_WB2, `CORE_TYPE_WB2},
parameter [0:3] BUS_TYPE = `BUS_TYPE_WB2
) (
input clk,
input rst,
output wb_i_stb,
output wb_i_cyc,
output [31:0] wb_i_adr,
input wb_i_ack,
input [31:0] wb_i_datr,
output wb_d_stb,
output wb_d_cyc,
output [31:0] wb_d_adr,
output wb_d_we,
output [3:0] wb_d_sel,
output [31:0] wb_d_datw,
input wb_d_ack,
input [31:0] wb_d_datr
);

wire [`WB2_WB_IN_START:0] c0_in;
wire [`WB2_WB_OUT_START:0] c0_out;
wire [`WB2_WB_IN_START:0] c1_in;
wire [`WB2_WB_OUT_START:0] c1_out;
wire [`WB2_WB_IN_START:0] c2_in;
wire [`WB2_WB_OUT_START:0] c2_out;
wire [`WB2_WB_IN_START:0] c3_in;
wire [`WB2_WB_OUT_START:0] c3_out;

wire [`BUS_WB2_IN_START:0] wb_in;
wire [`BUS_WB2_OUT_START:0] wb_out;

wire rst_0 /*verilator public*/;
wire wb_i_stb_0;
wire wb_i_cyc_0;
//wire wb_i_we_0;
//wire [3:0] wb_i_sel_0;
wire [31:2] wb_i_adr_0;
//wire [31:0] wb_i_datw_0;
wire wb_i_ack_0;
wire [31:0] wb_i_datr_0;
wire wb_d_stb_0;
wire wb_d_cyc_0;
wire wb_d_we_0;
wire [3:0] wb_d_sel_0;
wire [31:2] wb_d_adr_0;
wire [31:0] wb_d_datw_0;
wire wb_d_ack_0;
wire [31:0] wb_d_datr_0;
wire ext_int_0;
wire ext_int_s_0;
wire [31:0] ext_rst_vector_0;
wire soft_int_0;
wire timer_int_0;

wire rst_1 /*verilator public*/;
wire wb_i_stb_1;
wire wb_i_cyc_1;
//wire wb_i_we_1;
//wire [3:0] wb_i_sel_1;
wire [31:2] wb_i_adr_1;
//wire [31:0] wb_i_datw_1;
wire wb_i_ack_1;
wire [31:0] wb_i_datr_1;
wire wb_d_stb_1;
wire wb_d_cyc_1;
wire wb_d_we_1;
wire [3:0] wb_d_sel_1;
wire [31:2] wb_d_adr_1;
wire [31:0] wb_d_datw_1;
wire wb_d_ack_1;
wire [31:0] wb_d_datr_1;
wire ext_int_1;
wire ext_int_s_1;
wire [31:0] ext_rst_vector_1;
wire soft_int_1;
wire timer_int_1;

wire rst_2 /*verilator public*/;
wire wb_i_stb_2;
wire wb_i_cyc_2;
//wire wb_i_we_2;
//wire [3:0] wb_i_sel_2;
wire [31:2] wb_i_adr_2;
//wire [31:0] wb_i_datw_2;
wire wb_i_ack_2;
wire [31:0] wb_i_datr_2;
wire wb_d_stb_2;
wire wb_d_cyc_2;
wire wb_d_we_2;
wire [3:0] wb_d_sel_2;
wire [31:2] wb_d_adr_2;
wire [31:0] wb_d_datw_2;
wire wb_d_ack_2;
wire [31:0] wb_d_datr_2;
wire ext_int_2;
wire ext_int_s_2;
wire [31:0] ext_rst_vector_2;
wire soft_int_2;
wire timer_int_2;

wire rst_3 /*verilator public*/;
wire wb_i_stb_3;
wire wb_i_cyc_3;
//wire wb_i_we_3;
//wire [3:0] wb_i_sel_3;
wire [31:2] wb_i_adr_3;
//wire [31:0] wb_i_datw_3;
wire wb_i_ack_3;
wire [31:0] wb_i_datr_3;
wire wb_d_stb_3;
wire wb_d_cyc_3;
wire wb_d_we_3;
wire [3:0] wb_d_sel_3;
wire [31:2] wb_d_adr_3;
wire [31:0] wb_d_datw_3;
wire wb_d_ack_3;
wire [31:0] wb_d_datr_3;
wire ext_int_3;
wire ext_int_s_3;
wire [31:0] ext_rst_vector_3;
wire soft_int_3;
wire timer_int_3;


// core in/out viewed by core
assign c0_out = {wb_i_stb_0, wb_i_cyc_0, 1'b0 , 4'b0, {wb_i_adr_0, 2'b0}, 32'b0, 1'b0, 32'b0, 24'b0,
wb_d_stb_0, wb_d_cyc_0, wb_d_we_0, wb_d_sel_0, {wb_d_adr_0, 2'b0}, wb_d_datw_0, 1'b0, 32'b0, 24'b0,
ext_int_0, ext_int_s_0, ext_rst_vector_0, soft_int_0, timer_int_0, 92'b0};

assign {wb_i_ack_0, wb_i_datr_0} = c0_in[`WB2_I_WB_IN_START:`WB2_I_WB_IN_START-32];
assign {wb_d_ack_0, wb_d_datr_0} = c0_in[`WB2_D_WB_IN_START:`WB2_D_WB_IN_START-32];

assign c1_out = {wb_i_stb_1, wb_i_cyc_1, 1'b0 , 4'b0, {wb_i_adr_1, 2'b0}, 32'b0, 1'b0, 32'b0, 24'b0,
wb_d_stb_1, wb_d_cyc_1, wb_d_we_1, wb_d_sel_1, {wb_d_adr_1, 2'b0}, wb_d_datw_1, 1'b0, 32'b0, 24'b0,
ext_int_1, ext_int_s_1, ext_rst_vector_1, soft_int_1, timer_int_1, 92'b0};

assign {wb_i_ack_1, wb_i_datr_1} = c1_in[`WB2_I_WB_IN_START:`WB2_I_WB_IN_START-32];
assign {wb_d_ack_1, wb_d_datr_1} = c1_in[`WB2_D_WB_IN_START:`WB2_D_WB_IN_START-32];

assign c2_out = {wb_i_stb_2, wb_i_cyc_2, 1'b0 , 4'b0, {wb_i_adr_2, 2'b0}, 32'b0, 1'b0, 32'b0, 24'b0,
wb_d_stb_2, wb_d_cyc_2, wb_d_we_2, wb_d_sel_2, {wb_d_adr_2, 2'b0}, wb_d_datw_2, 1'b0, 32'b0, 24'b0,
ext_int_2, ext_int_s_2, ext_rst_vector_2, soft_int_2, timer_int_2, 92'b0};

assign {wb_i_ack_2, wb_i_datr_2} = c2_in[`WB2_I_WB_IN_START:`WB2_I_WB_IN_START-32];
assign {wb_d_ack_2, wb_d_datr_2} = c2_in[`WB2_D_WB_IN_START:`WB2_D_WB_IN_START-32];

assign c3_out = {wb_i_stb_3, wb_i_cyc_3, 1'b0 , 4'b0, {wb_i_adr_3, 2'b0}, 32'b0, 1'b0, 32'b0, 24'b0,
wb_d_stb_3, wb_d_cyc_3, wb_d_we_3, wb_d_sel_3, {wb_d_adr_3, 2'b0}, wb_d_datw_3, 1'b0, 32'b0, 24'b0,
ext_int_3, ext_int_s_3, ext_rst_vector_3, soft_int_3, timer_int_3, 92'b0};

assign {wb_i_ack_3, wb_i_datr_3} = c3_in[`WB2_I_WB_IN_START:`WB2_I_WB_IN_START-32];
assign {wb_d_ack_3, wb_d_datr_3} = c3_in[`WB2_D_WB_IN_START:`WB2_D_WB_IN_START-32];

// bus in/out viewed by bridge
assign {wb_i_stb, wb_i_cyc} = wb_out[`BUS_WB2_I_OUT_START:`BUS_WB2_I_OUT_START-1];
assign wb_i_adr = wb_out[`BUS_WB2_I_OUT_START-7:`BUS_WB2_I_OUT_START-38];
assign {wb_d_stb, wb_d_cyc, wb_d_we, wb_d_sel} = wb_out[`BUS_WB2_D_OUT_START:`BUS_WB2_D_OUT_START-6];
assign wb_d_adr = wb_out[`BUS_WB2_D_OUT_START-7:`BUS_WB2_D_OUT_START-38];
assign wb_d_datw = wb_out[`BUS_WB2_D_OUT_START-39:`BUS_WB2_D_OUT_START-70];

assign wb_in = {wb_i_ack, wb_i_datr, 31'b0, wb_d_ack, wb_d_datr, 31'b0};

// may want to control resets with config, etc.
//assign rst_0 = rst;
//assign rst_1 = rst;
//assign rst_2 = rst;
//assign rst_3 = rst;

A2WB #(
.CORE_TYPES(CORE_TYPES),
.BUS_TYPE(BUS_TYPE)
) bridge (
.clk(clk),
.rst(rst),
.cores_in({c0_out, c1_out, c2_out, c3_out}),
.cores_out({c0_in, c1_in, c2_in, c3_in}),
.bus_in(wb_in),
.bus_out(wb_out)
);

A2P_4K1W c0 (
.clk(clk),
.reset(rst_0),
.iBusWB_STB(wb_i_stb_0),
.iBusWB_CYC(wb_i_cyc_0),
.iBusWB_ADR(wb_i_adr_0),
.iBusWB_WE(),
.iBusWB_SEL(),
.iBusWB_DAT_MOSI(),
.iBusWB_ACK(wb_i_ack_0),
.iBusWB_DAT_MISO(wb_i_datr_0),
.iBusWB_ERR(1'd0),
.iBusWB_BTE(),
.iBusWB_CTI(),
.dBusWB_STB(wb_d_stb_0),
.dBusWB_CYC(wb_d_cyc_0),
.dBusWB_WE(wb_d_we_0),
.dBusWB_SEL(wb_d_sel_0),
.dBusWB_ADR(wb_d_adr_0),
.dBusWB_DAT_MOSI(wb_d_datw_0),
.dBusWB_ACK(wb_d_ack_0),
.dBusWB_DAT_MISO(wb_d_datr_0),
.dBusWB_ERR(1'd0),
.dBusWB_BTE(),
.dBusWB_CTI(),
.externalInterrupt(ext_int_0),
.externalInterruptS(ext_int_s_0),
.externalResetVector(ext_rst_vector_0),
.softwareInterrupt(soft_int_0),
.timerInterrupt(timer_int_0)
);

A2P_4K1W c1 (
.clk(clk),
.reset(rst_1),
.iBusWB_STB(wb_i_stb_1),
.iBusWB_CYC(wb_i_cyc_1),
.iBusWB_ADR(wb_i_adr_1),
.iBusWB_WE(),
.iBusWB_SEL(),
.iBusWB_DAT_MOSI(),
.iBusWB_ACK(wb_i_ack_1),
.iBusWB_DAT_MISO(wb_i_datr_1),
.iBusWB_ERR(1'd0),
.iBusWB_BTE(),
.iBusWB_CTI(),
.dBusWB_STB(wb_d_stb_1),
.dBusWB_CYC(wb_d_cyc_1),
.dBusWB_WE(wb_d_we_1),
.dBusWB_SEL(wb_d_sel_1),
.dBusWB_ADR(wb_d_adr_1),
.dBusWB_DAT_MOSI(wb_d_datw_1),
.dBusWB_ACK(wb_d_ack_1),
.dBusWB_DAT_MISO(wb_d_datr_1),
.dBusWB_ERR(1'd0),
.dBusWB_BTE(),
.dBusWB_CTI(),
.externalInterrupt(ext_int_1),
.externalInterruptS(ext_int_s_1),
.externalResetVector(ext_rst_vector_1),
.softwareInterrupt(soft_int_1),
.timerInterrupt(timer_int_1)
);

A2P_4K1W c2 (
.clk(clk),
.reset(rst_2),
.iBusWB_STB(wb_i_stb_2),
.iBusWB_CYC(wb_i_cyc_2),
.iBusWB_ADR(wb_i_adr_2),
.iBusWB_WE(),
.iBusWB_SEL(),
.iBusWB_DAT_MOSI(),
.iBusWB_ACK(wb_i_ack_2),
.iBusWB_DAT_MISO(wb_i_datr_2),
.iBusWB_ERR(1'd0),
.iBusWB_BTE(),
.iBusWB_CTI(),
.dBusWB_STB(wb_d_stb_2),
.dBusWB_CYC(wb_d_cyc_2),
.dBusWB_WE(wb_d_we_2),
.dBusWB_SEL(wb_d_sel_2),
.dBusWB_ADR(wb_d_adr_2),
.dBusWB_DAT_MOSI(wb_d_datw_2),
.dBusWB_ACK(wb_d_ack_2),
.dBusWB_DAT_MISO(wb_d_datr_2),
.dBusWB_ERR(1'd0),
.dBusWB_BTE(),
.dBusWB_CTI(),
.externalInterrupt(ext_int_2),
.externalInterruptS(ext_int_s_2),
.externalResetVector(ext_rst_vector_2),
.softwareInterrupt(soft_int_2),
.timerInterrupt(timer_int_2)
);

A2P_4K1W c3 (
.clk(clk),
.reset(rst_3),
.iBusWB_STB(wb_i_stb_3),
.iBusWB_CYC(wb_i_cyc_3),
.iBusWB_ADR(wb_i_adr_3),
.iBusWB_WE(),
.iBusWB_SEL(),
.iBusWB_DAT_MOSI(),
.iBusWB_ACK(wb_i_ack_3),
.iBusWB_DAT_MISO(wb_i_datr_3),
.iBusWB_ERR(1'd0),
.iBusWB_BTE(),
.iBusWB_CTI(),
.dBusWB_STB(wb_d_stb_3),
.dBusWB_CYC(wb_d_cyc_3),
.dBusWB_WE(wb_d_we_3),
.dBusWB_SEL(wb_d_sel_3),
.dBusWB_ADR(wb_d_adr_3),
.dBusWB_DAT_MOSI(wb_d_datw_3),
.dBusWB_ACK(wb_d_ack_3),
.dBusWB_DAT_MISO(wb_d_datr_3),
.dBusWB_ERR(1'd0),
.dBusWB_BTE(),
.dBusWB_CTI(),
.externalInterrupt(ext_int_3),
.externalInterruptS(ext_int_s_3),
.externalResetVector(ext_rst_vector_3),
.softwareInterrupt(soft_int_3),
.timerInterrupt(timer_int_3)
);

endmodule
Loading…
Cancel
Save