diff --git a/rtl/a2node/a2wb.v b/rtl/a2node/a2wb.v new file mode 100644 index 0000000..9310f05 --- /dev/null +++ b/rtl/a2node/a2wb.v @@ -0,0 +1,411 @@ +// A2 Core Bridge + +// adapt cores and buses with generic module +// one thread/core for now; multithread needs thread tag, deeper queues +// could include l2, or interface to wider bus to speed up cache line fetches + +`include "defs.v" + +module A2WB #( + parameter [0:15] CORE_TYPES = {`CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE}, + parameter [0:3] BUS_TYPE = `BUS_TYPE_WB2, + parameter MAX_CORES = 4 +) ( + input clk, + input rst, + input [(`WB2_WB_OUT_START+1)*4-1:0] cores_in, + output [(`WB2_WB_IN_START+1)*4-1:0] cores_out, + input [127:0] bus_in, + output [255:0] bus_out +); + +integer NUM_CORES; + +genvar i; + +// ------------------------------------------------------------------------------------------------ +// I/O Connections + +// cores must be contiguous, starting at 0 + +// core inputs to bridge: + +/* wb1 core out: +assign c0_out = {wb_stb_0, wb_cyc_0, wb_we_0, wb_sel_0, wb_adr_0, wb_dat_o_0, 1'b0, 32'b0, 27'b0, // 128 + ext_in}; // 128 +*/ +/* wb2 core out: +assign c0_out = {i_wb_stb_0, i_wb_cyc_0, 1'b0 , 4'b0, {i_wb_adr_0, 2'b0}, 32'b0, 1'b0, 32'b0, 27'b0, // 128 + d_wb_stb_0, d_wb_cyc_0, d_wb_we_0, d_wb_sel_0, d_wb_adr_0, d_wb_dat_o_0, 1'b0, 32'b0, 27'b0, // 128 + ext_in}; // 128 +*/ +/* a2l2 core out: +assign c0_in = {i_wb_ack_0, i_wb_dat_i_0, 95'b0, // 128 + ext_out}; // 128 +*/ + +/* +bus out/in: + assign bus_out[102] = bus_i_wb_cyc; + assign bus_out[101] = bus_i_wb_stb; + assign bus_out[100:71] = bus_i_wb_adr; + assign bus_out[70] = bus_d_wb_cyc; + assign bus_out[69] = bus_d_wb_stb; + assign bus_out[68] = bus_d_wb_we; + assign bus_out[67:64] = bus_d_wb_sel; + assign bus_out[63:32] = bus_d_wb_adr; + assign bus_out[31:0] = bus_d_wb_datw; + + assign bus_i_wb_ack = bus_in[65]; + assign bus_i_wb_datr = bus_in[64:33]; + assign bus_d_wb_ack = bus_in[32]; + assign bus_d_wb_datr = bus_in[31:0]; +*/ + + +wire [`WB2_WB_OUT_START:0] core_in [0:3]; +assign {core_in[0], core_in[1], core_in[2], core_in[3]} = cores_in; +wire [`WB2_WB_OUT_START:0] core_out [0:3]; +assign cores_out = {core_out[0], core_out[1], core_out[2], core_out[3]}; + +wire i_wb_cyc [0:3]; +wire i_wb_stb [0:3]; +wire [31:0] i_wb_adr [0:3]; +wire i_wb_ack [0:3]; +wire [31:0] i_wb_datr[0:3]; +wire d_wb_cyc [0:3]; +wire d_wb_stb [0:3]; +wire d_wb_we [0:3]; +wire [3:0] d_wb_sel [0:3]; +wire [31:0] d_wb_adr [0:3]; +wire [31:0] d_wb_datw [0:3]; +wire d_wb_ack [0:3]; +wire [31:0] d_wb_datr [0:3]; +wire [127:0] ext_cmd [0:3]; +wire [127:0] ext_rsp [0:3]; + +wire [`CMD_SIZE-1:0] cmd_out_0[0:3]; +wire [`CMD_SIZE-1:0] cmd_out_1[0:3]; +wire [1:0] cmd_taken[0:3]; +wire [1:0] cmd_complete[0:3]; +wire [`CMD_SIZE-1:0] req_0; +wire [`CMD_SIZE-1:0] req_1; +wire [`RSP_SIZE-1:0] core_rsp_0[0:3]; +wire [`RSP_SIZE-1:0] core_rsp_1[0:3]; +wire [`RSP_SIZE-1:0] rsp_0; +wire [`RSP_SIZE-1:0] rsp_1; + +// to do this, need to label scope and ref it +//if (BUS_TYPE == `BUS_TYPE_WB1) begin + wire bus_wb_cyc; + wire bus_wb_stb; + wire bus_wb_we; + wire [3:0] bus_wb_sel; + wire [31:0] bus_wb_adr; + wire [31:0] bus_wb_datw; + wire bus_wb_ack; + wire [31:0] bus_wb_datr; +//end + +//if (BUS_TYPE == `BUS_TYPE_WB2) begin + wire bus_i_wb_cyc; + wire bus_i_wb_stb; + wire [31:0] bus_i_wb_adr; + wire bus_i_wb_ack; + wire [31:0] bus_i_wb_datr; + wire bus_i_rdy; + wire bus_d_wb_cyc; + wire bus_d_wb_stb; + wire bus_d_wb_we; + wire [3:0] bus_d_wb_sel; + wire [31:0] bus_d_wb_adr; + wire [31:0] bus_d_wb_datw; + wire bus_d_wb_ack; + wire [31:0] bus_d_wb_datr; + wire bus_d_rdy; + wire bus_i_wb_ack; + wire [31:0] bus_i_wb_datr; + wire bus_d_wb_ack; + wire [31:0] bus_d_wb_datr; +//end + +generate + assign NUM_CORES = 0; + for (i = 0; i < MAX_CORES; i++) begin + case (CORE_TYPES[i*4:i*4+3]) + `CORE_TYPE_NONE: begin + end + `CORE_TYPE_A2L2: begin + assign NUM_CORES = NUM_CORES + 1; + // a2l2 + end + `CORE_TYPE_WB1: begin + assign NUM_CORES = NUM_CORES + 1; + assign d_wb_cyc[i] = core_in[i][`WB1_WB_OUT_START]; + assign d_wb_stb[i] = core_in[i][`WB1_WB_OUT_START-1]; + assign d_wb_we[i] = core_in[i][`WB1_WB_OUT_START-2]; + assign d_wb_sel[i] = core_in[i][`WB1_WB_OUT_START-3:`WB1_WB_OUT_START-6]; + assign d_wb_adr[i] = core_in[i][`WB1_WB_OUT_START-7:`WB1_WB_OUT_START-38]; + assign d_wb_datw[i] = core_in[i][`WB1_WB_OUT_START-39:`WB1_WB_OUT_START-70]; + assign ext_cmd[i] = core_in[i][`WB1_EXT_OUT_START:0]; + assign core_out[i][`WB1_WB_IN_START] = core_rsp_0[i][`RSP_VALID]; + assign core_out[i][`WB1_WB_IN_START-1:`WB1_WB_IN_START-32] = core_rsp_0[i][`RSP_DATA]; + assign core_out[i][`WB1_EXT_IN_START:0] = ext_rsp[i]; + end + `CORE_TYPE_WB2: begin + assign NUM_CORES = NUM_CORES + 1; + assign i_wb_cyc[i] = core_in[i][`WB2_I_WB_OUT_START]; + assign i_wb_stb[i] = core_in[i][`WB2_I_WB_OUT_START-1]; + assign i_wb_adr[i] = core_in[i][`WB2_I_WB_OUT_START-7:`WB2_I_WB_OUT_START-38]; + assign d_wb_cyc[i] = core_in[i][`WB2_D_WB_OUT_START]; + assign d_wb_stb[i] = core_in[i][`WB2_D_WB_OUT_START-1]; + assign d_wb_we[i] = core_in[i][`WB2_D_WB_OUT_START-2]; + assign d_wb_sel[i] = core_in[i][`WB2_D_WB_OUT_START-3:`WB2_D_WB_OUT_START-6]; + assign d_wb_adr[i] = core_in[i][`WB2_D_WB_OUT_START-7:`WB2_D_WB_OUT_START-38]; + assign d_wb_datw[i] = core_in[i][`WB2_D_WB_OUT_START-39:`WB2_D_WB_OUT_START-70]; + assign ext_cmd[i] = core_in[i][`WB2_EXT_OUT_START:0]; + assign core_out[i][`WB2_I_WB_IN_START] = core_rsp_0[i][`RSP_VALID]; + assign core_out[i][`WB2_I_WB_IN_START-1:`WB2_I_WB_IN_START-32] = core_rsp_0[i][`RSP_DATA]; + assign core_out[i][`WB2_D_WB_IN_START] = core_rsp_1[i][`RSP_VALID]; + assign core_out[i][`WB2_D_WB_IN_START-1:`WB2_D_WB_IN_START-32] = core_rsp_1[i][`RSP_DATA]; + assign core_out[i][`WB2_EXT_IN_START:0] = ext_rsp[i]; + end + endcase + end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Command Interfaces +// + +generate + for (i = 0; i < MAX_CORES; i++) begin: cmd + case (CORE_TYPES[i*4:i*4+3]) + `CORE_TYPE_NONE: begin + end + `CORE_TYPE_A2L2: begin + // convert a2l2 to internal format + end + `CORE_TYPE_WB1: begin + cmd_wb #(.CORE_ID(i), .CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) cmd_wb1 ( + .clk(clk), + .rst(rst), + .i_wb_cyc('b0), + .i_wb_stb('b0), + .i_wb_adr('h0), + .d_wb_cyc(d_wb_cyc[i]), + .d_wb_stb(d_wb_stb[i]), + .d_wb_we(d_wb_we[i]), + .d_wb_sel(d_wb_sel[i]), + .d_wb_adr(d_wb_adr[i]), + .d_wb_datw(d_wb_datw[i]), + .ext_cmd(ext_cmd[i]), + .cmd_taken(cmd_taken[i]), + .cmd_complete(cmd_complete[i]), + .cmd_out_0(cmd_out_0[i]), + .cmd_out_1(cmd_out_1[i]) + ); + end + `CORE_TYPE_WB2: begin + cmd_wb #(.CORE_ID(i), .CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) cmd_wb2 ( + .clk(clk), + .rst(rst), + .i_wb_cyc(i_wb_cyc[i]), + .i_wb_stb(i_wb_stb[i]), + .i_wb_adr(i_wb_adr[i]), + .d_wb_cyc(d_wb_cyc[i]), + .d_wb_stb(d_wb_stb[i]), + .d_wb_we(d_wb_we[i]), + .d_wb_sel(d_wb_sel[i]), + .d_wb_adr(d_wb_adr[i]), + .d_wb_datw(d_wb_datw[i]), + .ext_cmd(ext_cmd[i]), + .cmd_taken(cmd_taken[i]), + .cmd_complete(cmd_complete[i]), + .cmd_out_0(cmd_out_0[i]), + .cmd_out_1(cmd_out_1[i]) + ); + end + endcase + end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Arbitration +// +// LRU, etc. select from pending cmds; also needs smp to stall some/all cmds +// do addr cmp here, if necessary? or could do in smp - important if multiple outstanding req's allowed +// by any bus, which means there will be cmd and rsp queues with ordering requirements +// also needs to block cmds when bus is busy + +// cmds include valid indicator +// cmd_taken is bit vector for cmds 1,0 +// cmd_out's go to the bus; rsp_in's have to be associated with requesting core + +arb #() arb ( + .clk(clk), + .rst(rst), + .cmd_in_0_0(cmd_out_0[0]), + .cmd_in_1_0(cmd_out_1[0]), + .cmd_tkn_0(cmd_taken[0]), + .cmd_in_0_1(cmd_out_0[1]), + .cmd_in_1_1(cmd_out_1[1]), + .cmd_tkn_1(cmd_taken[1]), + .cmd_in_0_2(cmd_out_0[2]), + .cmd_in_1_2(cmd_out_1[2]), + .cmd_tkn_2(cmd_taken[2]), + .cmd_in_0_3(cmd_out_0[3]), + .cmd_in_1_3(cmd_out_1[3]), + .cmd_tkn_3(cmd_taken[3]), + .bus_rdy_0(bus_i_rdy), + .bus_rdy_1(bus_d_rdy), + .cmd_out_0(req_0), + .cmd_out_1(req_1) +); + +// ------------------------------------------------------------------------------------------------ +// SMP + +// special ops: track resv, stall pending cmds, gen rsp +smp #() smp ( + +); + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +// Bus interface and logic can be replaced for different buses + +// ------------------------------------------------------------------------------------------------ +// Bus Out +// requests to main bus + +generate begin: bus_wire_out + case(BUS_TYPE) + `BUS_TYPE_WB1: begin + assign bus_out[`BUS_WB1_OUT_START] = bus_i_wb_cyc; + assign bus_out[`BUS_WB1_OUT_START-1] = bus_wb_stb; + assign bus_out[`BUS_WB1_OUT_START-2] = bus_wb_we; + assign bus_out[`BUS_WB1_OUT_START-3:`BUS_WB1_OUT_START-6] = bus_wb_sel; + assign bus_out[`BUS_WB1_OUT_START-7:`BUS_WB1_OUT_START-38] = bus_wb_adr; + assign bus_out[`BUS_WB1_OUT_START-39:`BUS_WB1_OUT_START-70] = bus_wb_datw; + end + `BUS_TYPE_WB2: begin + assign bus_out[`BUS_WB2_I_OUT_START] = bus_i_wb_cyc; + assign bus_out[`BUS_WB2_I_OUT_START-1] = bus_i_wb_stb; + assign bus_out[`BUS_WB2_I_OUT_START-7:`BUS_WB2_I_OUT_START-38] = bus_i_wb_adr; + assign bus_out[`BUS_WB2_D_OUT_START] = bus_d_wb_cyc; + assign bus_out[`BUS_WB2_D_OUT_START-1] = bus_d_wb_stb; + assign bus_out[`BUS_WB2_D_OUT_START-2] = bus_d_wb_we; + assign bus_out[`BUS_WB2_D_OUT_START-3:`BUS_WB2_D_OUT_START-6] = bus_d_wb_sel; + assign bus_out[`BUS_WB2_D_OUT_START-7:`BUS_WB2_D_OUT_START-38] = bus_d_wb_adr; + assign bus_out[`BUS_WB2_D_OUT_START-39:`BUS_WB2_D_OUT_START-70] = bus_d_wb_datw; + end + endcase +end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Bus In +// responses from main bus + +generate begin: bus_wire_in + case(BUS_TYPE) + `BUS_TYPE_WB1: begin + assign bus_wb_ack = bus_in[`BUS_WB1_IN_START]; + assign bus_wb_datr = bus_in[`BUS_WB1_IN_START-1:`BUS_WB1_IN_START-32]; + end + `BUS_TYPE_WB2: begin + assign bus_i_wb_ack = bus_in[`BUS_WB2_I_IN_START]; + assign bus_i_wb_datr = bus_in[`BUS_WB2_I_IN_START-1:`BUS_WB2_I_IN_START-32]; + assign bus_d_wb_ack = bus_in[`BUS_WB2_D_IN_START]; + assign bus_d_wb_datr = bus_in[`BUS_WB2_D_IN_START-1:`BUS_WB2_D_IN_START-32]; + end + endcase +end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Bus Interface +// translate reqs and handle bus transactions + +generate begin: bus + case(BUS_TYPE) + `BUS_TYPE_WB1: begin + bus_wb1 #() bus( + .clk(clk), + .rst(rst), + .rdy(bus_rdy), + .cmd(req_0), + .rsp(rsp_0), + .wb_stb(bus_wb_stb), + .wb_cyc(bus_wb_cyc), + .wb_we(bus_wb_we), + .wb_sel(bus_wb_sel), + .wb_adr(bus_wb_adr), + .wb_datw(bus_wb_datw), + .wb_ack(bus_wb_ack), + .wb_datr(bus_wb_datr) + ); + end + `BUS_TYPE_WB2: begin + bus_wb2 #() bus( + .clk(clk), + .rst(rst), + .rdy_i(bus_i_rdy), + .rdy_d(bus_d_rdy), + .cmd_i(req_0), + .cmd_d(req_1), + .rsp_i(rsp_0), + .rsp_d(rsp_1), + .i_wb_cyc(bus_i_wb_cyc), + .i_wb_stb(bus_i_wb_stb), + .i_wb_adr(bus_i_wb_adr), + .i_wb_ack(bus_i_wb_ack), + .i_wb_datr(bus_i_wb_datr), + .d_wb_cyc(bus_d_wb_cyc), + .d_wb_stb(bus_d_wb_stb), + .d_wb_we(bus_d_wb_we), + .d_wb_sel(bus_d_wb_sel), + .d_wb_adr(bus_d_wb_adr), + .d_wb_datw(bus_d_wb_datw), + .d_wb_ack(bus_d_wb_ack), + .d_wb_datr(bus_d_wb_datr) + ); + end + endcase +end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Response Queues +// responses for cores - just routing if no queues needed +// but should be component; rsp's are formatted differently based on core interface type, plus +// a2l2 could support queues even if wb doesn't +// also, rsp may be gen'd from other units like smp, config, mailbox, etc. +generate begin: rsp + for (i = 0; i < MAX_CORES; i++) begin + assign core_rsp_0[i][`RSP_VALID] = rsp_0[`RSP_VALID] & (rsp_0[`RSP_CORE_ID] == i); + assign core_rsp_0[i][`RSP_DATA] = rsp_0[`RSP_DATA]; + assign cmd_complete[i][0] = core_rsp_0[i][`RSP_VALID]; + assign core_rsp_1[i][`RSP_VALID] = rsp_1[`RSP_VALID] & (rsp_1[`RSP_CORE_ID] == i); + assign core_rsp_1[i][`RSP_DATA] = rsp_1[`RSP_DATA]; + assign cmd_complete[i][1] = core_rsp_1[i][`RSP_VALID]; + end +end +endgenerate + + + +// ------------------------------------------------------------------------------------------------ +// Misc/Errors/Debug +// stuff + +/* +generate begin: misc + for (i = 0; i < MAX_CORES; i++) begin + end +end +endgenerate +*/ + +endmodule \ No newline at end of file diff --git a/rtl/a2node/arb.v b/rtl/a2node/arb.v new file mode 100644 index 0000000..d80a0f7 --- /dev/null +++ b/rtl/a2node/arb.v @@ -0,0 +1,186 @@ +`include "defs.v" + +module arb # ( +) ( + input clk, + input rst, + input bus_rdy_0, + input bus_rdy_1, + input [`CMD_SIZE-1:0] cmd_in_0_0, + input [`CMD_SIZE-1:0] cmd_in_1_0, + output [1:0] cmd_tkn_0, + input [`CMD_SIZE-1:0] cmd_in_0_1, + input [`CMD_SIZE-1:0] cmd_in_1_1, + output [1:0] cmd_tkn_1, + input [`CMD_SIZE-1:0] cmd_in_0_2, + input [`CMD_SIZE-1:0] cmd_in_1_2, + output [1:0] cmd_tkn_2, + input [`CMD_SIZE-1:0] cmd_in_0_3, + input [`CMD_SIZE-1:0] cmd_in_1_3, + output [1:0] cmd_tkn_3, + output [`CMD_SIZE-1:0] cmd_out_0, + output [`CMD_SIZE-1:0] cmd_out_1 +); + +// fairly choose 1 or 2 (depending on output buses) cmds +// mark taken from queue +// obey restrictions from smp, addr cmp, etc. (cmd_stall) + +//reg [7:0] pri_0_q; +//wire [7:0] pri_0_d; +reg [1:0] pri_0_q[0:3]; +wire [1:0] pri_0_d[0:3]; +wire [7:0] pri_update_0; +reg [1:0] pri_1_q[0:3]; +wire [1:0] pri_1_d[0:3]; +wire [7:0] pri_update_1; +wire [3:0] cmd_valids_0; +wire [1:0] cmd_sel_0; +wire cmd_out_val_0; +wire [3:0] cmd_valids_1; +wire [1:0] cmd_sel_1; +wire cmd_out_val_1; + +integer i; + + // FF + always @(posedge clk) begin + + if (rst) begin + + for (i = 0; i < 4; i++) begin + pri_0_q[i] = i; + pri_1_q[i] = i; + end + + end else begin + + for (i = 0; i < 4; i++) begin + pri_0_q[i] = pri_0_d[i]; + pri_1_q[i] = pri_1_d[i]; + end + end + + end + + // select next commands; 00 is highest priority + assign cmd_valids_0 = {cmd_in_0_0[`CMD_VALID],cmd_in_0_1[`CMD_VALID],cmd_in_0_2[`CMD_VALID],cmd_in_0_3[`CMD_VALID]}; + assign cmd_sel_0 = pri_sel(cmd_valids_0, pri_0_q); + assign cmd_out_val_0 = bus_rdy_0 & (|cmd_valids_0); //wtf depends if you want to allow it to change while bus is busy + assign cmd_out_0 = cmd_sel_0 == 2'b00 ? cmd_in_0_0 : + cmd_sel_0 == 2'b01 ? cmd_in_0_1 : + cmd_sel_0 == 2'b10 ? cmd_in_0_2 : + cmd_in_0_3; + + assign cmd_valids_1 = {cmd_in_1_0[`CMD_VALID],cmd_in_1_1[`CMD_VALID],cmd_in_1_2[`CMD_VALID],cmd_in_1_3[`CMD_VALID]}; + assign cmd_sel_1 = pri_sel(cmd_valids_1, pri_1_q); + assign cmd_out_val_1 = bus_rdy_1 & (|cmd_valids_1); //wtf depends if you want to allow it to change while bus is busy + assign cmd_out_1 = cmd_sel_1 == 2'b00 ? cmd_in_1_0 : + cmd_sel_1 == 2'b01 ? cmd_in_1_1 : + cmd_sel_1 == 2'b10 ? cmd_in_1_2 : + cmd_in_1_3; + + // update priorities if cmd selected + assign pri_update_0 = pri_upd(cmd_sel_0, pri_0_q); + + assign pri_0_d[0] = cmd_out_val_0 ? pri_update_0[1:0] : pri_0_q[0]; + assign pri_0_d[1] = cmd_out_val_0 ? pri_update_0[3:2] : pri_0_q[1]; + assign pri_0_d[2] = cmd_out_val_0 ? pri_update_0[5:4] : pri_0_q[2]; + assign pri_0_d[3] = cmd_out_val_0 ? pri_update_0[7:6] : pri_0_q[3]; + + assign pri_update_1 = pri_upd(cmd_sel_1, pri_1_q); + + assign pri_1_d[0] = cmd_out_val_1 ? pri_update_1[1:0] : pri_1_q[0]; + assign pri_1_d[1] = cmd_out_val_1 ? pri_update_1[3:2] : pri_1_q[1]; + assign pri_1_d[2] = cmd_out_val_1 ? pri_update_1[5:4] : pri_1_q[2]; + assign pri_1_d[3] = cmd_out_val_1 ? pri_update_1[7:6] : pri_1_q[3]; + + // mark taken + assign cmd_tkn_0[0] = cmd_out_val_0 & (cmd_sel_0 == 2'b00); + assign cmd_tkn_1[0] = cmd_out_val_0 & (cmd_sel_0 == 2'b01); + assign cmd_tkn_2[0] = cmd_out_val_0 & (cmd_sel_0 == 2'b10); + assign cmd_tkn_3[0] = cmd_out_val_0 & (cmd_sel_0 == 2'b11); + + assign cmd_tkn_0[1] = cmd_out_val_1 & (cmd_sel_1 == 2'b00); + assign cmd_tkn_1[1] = cmd_out_val_1 & (cmd_sel_1 == 2'b01); + assign cmd_tkn_2[1] = cmd_out_val_1 & (cmd_sel_1 == 2'b10); + assign cmd_tkn_3[1] = cmd_out_val_1 & (cmd_sel_1 == 2'b11); + +endmodule + +// could also account for configured thread/core priority +function [1:0] pri_sel (input [0:3] val, [1:0] pri[0:3]); + begin + pri_sel = 2'b00; + if (val[0]) begin + if (~(val[1] & pri[1] < pri[0]) | (val[2] & pri[2] < pri[0] | val[3] & pri[3] < pri[0])) begin + assign pri_sel = 2'b00; + end + end + if (val[1]) begin + if (~(val[0] & pri[0] < pri[1]) | (val[2] & pri[2] < pri[1] | val[3] & pri[3] < pri[1])) begin + assign pri_sel = 2'b01; + end + end + if (val[2]) begin + if (~(val[0] & pri[0] < pri[2]) | (val[1] & pri[1] < pri[2] | val[3] & pri[3] < pri[2])) begin + assign pri_sel = 2'b10; + end + end + if (val[3]) begin + if (~(val[0] & pri[0] < pri[3]) | (val[1] & pri[1] < pri[3] | val[2] & pri[2] < pri[3])) begin + assign pri_sel = 2'b11; + end + end + end +endfunction + + +//function [7:0] pri_upd (input [1:0] sel, [1:0] p3, [1:0] p2, [1:0] p1, [1:0] p0); +function [7:0] pri_upd (input [1:0] sel, [1:0] pri[0:3]); + begin + if (sel == 2'b00) begin + assign pri_upd[1:0] = 2'b11; + assign pri_upd[3:2] = pri_up(pri[0], pri[1]); + assign pri_upd[5:4] = pri_up(pri[0], pri[2]); + assign pri_upd[7:6] = pri_up(pri[0], pri[3]); + end + if (sel == 2'b01) begin + assign pri_upd[1:0] = pri_up(pri[1], pri[0]); + assign pri_upd[3:2] = 2'b11; + assign pri_upd[5:4] = pri_up(pri[1], pri[2]); + assign pri_upd[7:6] = pri_up(pri[1], pri[3]); + end + if (sel == 2'b10) begin + assign pri_upd[1:0] = pri_up(pri[2], pri[0]); + assign pri_upd[3:2] = pri_up(pri[2], pri[1]); + assign pri_upd[5:4] = 2'b11; + assign pri_upd[7:6] = pri_up(pri[2], pri[3]); + end + if (sel == 2'b11) begin + assign pri_upd[1:0] = pri_up(pri[3], pri[0]); + assign pri_upd[3:2] = pri_up(pri[3], pri[1]); + assign pri_upd[5:4] = pri_up(pri[3], pri[2]); + assign pri_upd[7:6] = 2'b11; + end + end +endfunction + +// raise priority by 1, if it's higher than lvl +function [1:0] pri_up(input [1:0] lvl, [1:0] pri); + begin + /* dont work! + if (pri > lvl) begin + if (pri == 2'b01) + assign pri_up = 2'b00; + if (pri == 2'b10) + assign pri_up = 2'b01; + if (pri == 2'b11) + assign pri_up = 2'b10; + else + assign pri_up = pri; + end + */ + assign pri_up = pri > lvl ? pri - 1: pri; // 0 always sticks + end +endfunction \ No newline at end of file diff --git a/rtl/a2node/bus_wb2.v b/rtl/a2node/bus_wb2.v new file mode 100644 index 0000000..09a479b --- /dev/null +++ b/rtl/a2node/bus_wb2.v @@ -0,0 +1,183 @@ + +module bus_wb2 # ( + +) ( + input clk, + input rst, + output rdy_i, + output rdy_d, + input [`CMD_SIZE-1:0] cmd_i, + input [`CMD_SIZE-1:0] cmd_d, + output [`RSP_SIZE-1:0] rsp_i, + output [`RSP_SIZE-1:0] rsp_d, + output i_wb_stb, + output i_wb_cyc, + output [31:0] i_wb_adr, + input i_wb_ack, + input [31:0] i_wb_datr, + output d_wb_stb, + output d_wb_cyc, + output d_wb_we, + output [3:0] d_wb_sel, + output [31:0] d_wb_adr, + output [31:0] d_wb_datw, + input d_wb_ack, + input [31:0] d_wb_datr +); + +reg [`CMD_SIZE-1:0] cmd_i_q; +wire [`CMD_SIZE-1:0] cmd_i_d; +reg [`CMD_SIZE-1:0] cmd_d_q; +wire [`CMD_SIZE-1:0] cmd_d_d; +reg [1:0] cmdseq_i_q; +wire [1:0] cmdseq_i_d; +reg [1:0] cmdseq_d_q; +wire [1:0] cmdseq_d_d; +wire idle_i; +wire cmd_val_i; +wire ld_cmd_i; +wire rsp_val_i; +wire idle_d; +wire cmd_val_d; +wire ld_cmd_d; +wire rsp_val_d; + + // FF + always @(posedge clk) begin + + if (rst) begin + + cmdseq_i_q = 2'b11; + cmdseq_d_q = 2'b11; + cmd_i_q = 'h0; + cmd_d_q = 'h0; + + end else begin + + cmdseq_i_q = cmdseq_i_d; + cmdseq_d_q = cmdseq_d_d; + cmd_i_q = cmd_i_d; + cmd_d_q = cmd_d_d; + + end + end + + // super-simple; latch cmd -> send req -> rtn rsp + + assign cmd_val_i = cmd_i[`CMD_SIZE-1]; + + //tbl cmdseq_i + //n cmdseq_i_q cmdseq_i_d + //n | cmd_val_i | ld_cmd_i + //n | | i_wb_ack | | + //n | | | | | + //n | | | | | + //n | | | | | idle_i + //n | | | | | | + //n | | | | | | + //b 10 | | 10 | | + //t ii i i oo o o + //*------------------------------------------------ + //* Idle ****************************************** + //s 11 - - -- - 1 + //s 11 0 - 11 0 - * ...zzz... + //s 11 1 - 01 1 - + //* Request Pending ******************************* + //s 01 - 0 01 0 0 + //s 01 - 1 11 0 0 + //*------------------------------------------------ + //tbl cmdseq_i + + assign cmd_i_d = ld_cmd_i ? cmd_i : {cmd_i_q[`CMD_VALID] & ~i_wb_ack, cmd_i_q[`CMD_VALID-1:0]}; + + assign i_wb_stb = cmd_i_q[`CMD_VALID]; + assign i_wb_cyc = cmd_i_q[`CMD_VALID]; + assign i_wb_adr = cmd_i_q[`CMD_ADR]; + + assign rdy_i = idle_i; + assign rsp_i[`RSP_VALID] = i_wb_ack; + assign rsp_i[`RSP_CORE_ID] = cmd_i_q[`CMD_CORE_ID]; + assign rsp_i[`RSP_DATA] = i_wb_datr; + + + //wtf eventually move this to config unit; it will respond and block cmd val to bus unit + // use adr compare to return coreid for d-read + wire adr_coreid; + assign adr_coreid = ~cmd_d_q[`CMD_WE] & (cmd_d_q[`CMD_ADR] == 32'b0); //wtf why is the adr cmp part segving verilator????? + assign cmd_val_d = cmd_d[`CMD_VALID]; + + //tbl cmdseq_d + //n cmdseq_d_q cmdseq_d_d + //n | cmd_val_d | ld_cmd_d + //n | | rsp_d_complete | | + //n | | | | | + //n | | | | | + //n | | | | | idle_d + //n | | | | | | + //n | | | | | | + //b 10 | | 10 | | + //t ii i i oo o o + //*------------------------------------------------ + //* Idle ****************************************** + //s 11 - - -- - 1 + //s 11 0 - 11 0 - * ...zzz... + //s 11 1 - 01 1 - + //* Request Pending ******************************* + //s 01 - 0 01 0 0 + //s 01 - 1 11 0 0 + //*------------------------------------------------ + //tbl cmdseq_d + + //assign cmd_d_d = ld_cmd_d ? cmd_d : {cmd_d_q[`CMD_VALID] & ~d_wb_ack, cmd_d_q[`CMD_VALID-1:0]}; + assign cmd_d_d = ld_cmd_d ? cmd_d : {cmd_d_q[`CMD_VALID] & ~rsp_d_complete, cmd_d_q[`CMD_VALID-1:0]}; + + //assign d_wb_cyc = cmd_d_q[`CMD_VALID]; + //assign d_wb_stb = cmd_d_q[`CMD_VALID]; + assign d_wb_cyc = cmd_d_q[`CMD_VALID] & ~adr_coreid; + assign d_wb_stb = cmd_d_q[`CMD_VALID] & ~adr_coreid; + assign d_wb_we = cmd_d_q[`CMD_WE]; + assign d_wb_sel = cmd_d_q[`CMD_SEL]; + assign d_wb_adr = cmd_d_q[`CMD_ADR]; + assign d_wb_datw = cmd_d_q[`CMD_DATW]; + + assign rdy_d = idle_d; + + //assign rsp_d[`RSP_VALID] = d_wb_ack; + wire rsp_d_complete; + assign rsp_d_complete = d_wb_ack | (cmd_d_q[`CMD_VALID] & adr_coreid); + assign rsp_d[`RSP_VALID] = rsp_d_complete; + assign rsp_d[`RSP_CORE_ID] = cmd_d_q[`CMD_CORE_ID]; + //assign rsp_d[`RSP_DATA] = d_wb_datr; + assign rsp_d[`RSP_DATA] = adr_coreid ? {6'b0, cmd_d_q[`CMD_CORE_ID], 24'b0} : d_wb_datr; // byte 3 = core_id + +// Generated... +//vtable cmdseq_i +assign cmdseq_i_d[1] = + (cmdseq_i_q[1] & cmdseq_i_q[0] & ~cmd_val_i) + + (~cmdseq_i_q[1] & cmdseq_i_q[0] & i_wb_ack); +assign cmdseq_i_d[0] = + (cmdseq_i_q[1] & cmdseq_i_q[0] & ~cmd_val_i) + + (cmdseq_i_q[1] & cmdseq_i_q[0] & cmd_val_i) + + (~cmdseq_i_q[1] & cmdseq_i_q[0] & ~i_wb_ack) + + (~cmdseq_i_q[1] & cmdseq_i_q[0] & i_wb_ack); +assign ld_cmd_i = + (cmdseq_i_q[1] & cmdseq_i_q[0] & cmd_val_i); +assign idle_i = + (cmdseq_i_q[1] & cmdseq_i_q[0]); +//vtable cmdseq_i +//vtable cmdseq_d +assign cmdseq_d_d[1] = + (cmdseq_d_q[1] & cmdseq_d_q[0] & ~cmd_val_d) + + (~cmdseq_d_q[1] & cmdseq_d_q[0] & rsp_d_complete); +assign cmdseq_d_d[0] = + (cmdseq_d_q[1] & cmdseq_d_q[0] & ~cmd_val_d) + + (cmdseq_d_q[1] & cmdseq_d_q[0] & cmd_val_d) + + (~cmdseq_d_q[1] & cmdseq_d_q[0] & ~rsp_d_complete) + + (~cmdseq_d_q[1] & cmdseq_d_q[0] & rsp_d_complete); +assign ld_cmd_d = + (cmdseq_d_q[1] & cmdseq_d_q[0] & cmd_val_d); +assign idle_d = + (cmdseq_d_q[1] & cmdseq_d_q[0]); +//vtable cmdseq_d + +endmodule diff --git a/rtl/a2node/cmd_a2l2.v b/rtl/a2node/cmd_a2l2.v new file mode 100644 index 0000000..b25743b --- /dev/null +++ b/rtl/a2node/cmd_a2l2.v @@ -0,0 +1,15 @@ +// a2l2 default: allow 1 ld, 1 st credit and use 2 dedicated queues + +`include "defs.v" + +reg [77:0] cmd_queue_q[0:3][0:1]; +wire [77:0] cmd_queue_d[0:3][0:1]; +wire [77:0] cmd_queue_in[0:3][0:1]; +wire [71:0] cmd_queue_out[0:3]; + +module cmd_wb #( + parameter CORE_TYPE = CORE_TYPE_A2L2 +) ( +); + +endmodule \ No newline at end of file diff --git a/rtl/a2node/cmd_wb.v b/rtl/a2node/cmd_wb.v new file mode 100644 index 0000000..f5124a7 --- /dev/null +++ b/rtl/a2node/cmd_wb.v @@ -0,0 +1,180 @@ +// Wishbone-Wishbone Command Interface + +// allow single- or dual-wb in/out: +// 1/1 : passthru +// 1/2 : route to proper - but this requires indicator in extcmd to distinguish i vs. d +// 2/1 : arbitrate +// 2/2 : passthru +// +// also handle special ops when possible (dcbz, ...) + +// select one command per output bus + +// ext_cmd is not tied to i/d, but does it need to have multiple outstanding (nop=0, and valid that require i/d info also must have that info provided) +// needs ext_tkn if not tied to i/d; needs to set q valid if tied to i/d (based on ext i or d type) +// seems like these are all tied to an i or d and require a response, so shouldn't need i+d+ext outstanding? + +// possible extended command modifiers +// prefetch +// larx +// stcx +// lwsync +// hwsync +// tlbsync +// ici, icbi +// dci, dcbi, etc +// dcbtst +// dcbz +// tlbie, etc + +// possible extended responses +// errors +// crit first, xfer# for larger bus width on core side +// credits +// resv valid +// stcx comp/pass +// sync ack +// back inv val/addr + +// possible extra functions +// integrated L2 +// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar) + +`include "defs.v" + +module cmd_wb #( + parameter CORE_ID = 0, + parameter CORE_TYPE = `CORE_TYPE_WB2, + parameter BUS_TYPE = `BUS_TYPE_WB2 +) ( + input clk, + input rst, + input i_wb_cyc, + input i_wb_stb, + input [31:0] i_wb_adr, + input d_wb_cyc, + input d_wb_stb, + input d_wb_we, + input [3:0] d_wb_sel, + input [31:0] d_wb_adr, + input [31:0] d_wb_datw, + input [127:0] ext_cmd, + input [1:0] cmd_taken, // bit vector, one per queued cmd (could simultaneously occur in some designs) + input [1:0] cmd_complete, // bit vector, one per queued cmd (could simultaneously occur in some designs) + output [`CMD_SIZE-1:0] cmd_out_0, + output [`CMD_SIZE-1:0] cmd_out_1 +); + +reg [`CMD_SIZE-1:0] cmd_queue_q[0:1]; +wire [`CMD_SIZE-1:0] cmd_queue_d[0:1]; +wire [`CMD_SIZE-1:0] cmd_queue_in[0:1]; +wire [`CMD_SIZE-1:0] cmd_queue_out; +reg [127:0] ext_queue_q; +wire [127:0] ext_queue_d; +wire [127:0] ext_queue_in; + +genvar i; + +// FF +always @(posedge clk) begin + + if (rst) begin + + cmd_queue_q[0] = 'h0; + cmd_queue_q[1] = 'h0; + ext_queue_q = 'h0; + + end else begin + + cmd_queue_q[0] = cmd_queue_d[0]; + cmd_queue_q[1] = cmd_queue_d[1]; + ext_queue_q = ext_queue_d; + + end +end + +case (CORE_TYPE) + `CORE_TYPE_WB1: begin + // q[0] = i or d + assign cmd_queue_in[0][`CMD_VALID] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too + assign cmd_queue_in[0][`CMD_CORE_ID] = CORE_ID; // core id + assign cmd_queue_in[0][`CMD_RSVD_0] = 1'b0; // rsvd (possible cores > 4) + assign cmd_queue_in[0][`CMD_RSVD_1] = 1'b0; // rsvd (possible cores > 4) + assign cmd_queue_in[0][`CMD_TAKEN] = 1'b0; // taken + assign cmd_queue_in[0][`CMD_RSVD_2] = 1'b0; // rsp rcvd? + assign cmd_queue_in[0][`CMD_RSVD_3] = 1'b0; + assign cmd_queue_in[0][`CMD_WE] = d_wb_we; + assign cmd_queue_in[0][`CMD_SEL] = d_wb_sel; + assign cmd_queue_in[0][`CMD_ADR] = d_wb_adr; + assign cmd_queue_in[0][`CMD_SIZE-46:`CMD_SIZE-77] = d_wb_datw; + assign ext_queue_in[0] = ext_cmd; + end + `CORE_TYPE_WB2: begin + // q[0]=i, q[1]=d + assign cmd_queue_in[0][`CMD_VALID] = i_wb_cyc & i_wb_stb; // valid - may need ext decode too + assign cmd_queue_in[0][`CMD_CORE_ID] = CORE_ID; // core id + assign cmd_queue_in[0][`CMD_RSVD_0] = 1'b0; // rsvd (possible cores > 4) + assign cmd_queue_in[0][`CMD_RSVD_1] = 1'b0; // rsvd (possible cores > 4) + assign cmd_queue_in[0][`CMD_TAKEN] = 1'b0; // taken + assign cmd_queue_in[0][`CMD_RSVD_2] = 1'b0; // rsp rcvd? + assign cmd_queue_in[0][`CMD_RSVD_3] = 1'b0; + assign cmd_queue_in[0][`CMD_WE] = 1'b0; + assign cmd_queue_in[0][`CMD_SEL] = 4'b0; + assign cmd_queue_in[0][`CMD_ADR] = i_wb_adr; + assign cmd_queue_in[0][`CMD_DATW] = 32'b0; + + assign cmd_queue_in[1][`CMD_VALID] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too + assign cmd_queue_in[1][`CMD_CORE_ID] = CORE_ID; // core id + assign cmd_queue_in[0][`CMD_RSVD_0] = 1'b0; // rsvd (possible cores > 4) + assign cmd_queue_in[0][`CMD_RSVD_1] = 1'b0; // rsvd (possible cores > 4) + assign cmd_queue_in[0][`CMD_TAKEN] = 1'b0; // taken + assign cmd_queue_in[0][`CMD_RSVD_2] = 1'b0; // rsp rcvd? + assign cmd_queue_in[0][`CMD_RSVD_3] = 1'b0; + assign cmd_queue_in[1][`CMD_WE] = d_wb_we; + assign cmd_queue_in[1][`CMD_SEL] = d_wb_sel; + assign cmd_queue_in[1][`CMD_ADR] = d_wb_adr; + assign cmd_queue_in[1][`CMD_DATW] = d_wb_datw; + + assign ext_queue_in = ext_cmd; + end +endcase + +// queue routing/arbitration to cmd processing + +case (CORE_TYPE) + `CORE_TYPE_WB1: begin + case (BUS_TYPE) + `BUS_TYPE_WB1: begin + assign cmd_out_0 = cmd_queue_q[0]; + end + `BUS_TYPE_WB2: begin + assign cmd_out_0 = ext_queue_q[0] ? cmd_queue_q[1] : cmd_queue_q[7]; // select i vs d + end + endcase + end + `CORE_TYPE_WB2: begin + case (BUS_TYPE) + `BUS_TYPE_WB1:begin + // both valid: send d + // want selected bit; set first cycle; dont change once selected until not valid + assign cmd_out_0 = cmd_queue_q[1][`CMD_VALID] ? cmd_queue_q[1] : cmd_queue_q[0]; + end + `BUS_TYPE_WB2: begin + assign cmd_out_0 = cmd_queue_q[0]; + assign cmd_out_1 = cmd_queue_q[1]; + end + endcase + end +endcase + +for (i = 0; i < 2; i++) begin + // valid + assign cmd_queue_d[i][`CMD_VALID] = cmd_queue_q[i][`CMD_VALID] ? ~cmd_complete[i] : cmd_queue_in[i][`CMD_VALID]; + // taken + assign cmd_queue_d[i][`CMD_TAKEN] = cmd_queue_q[i][`CMD_VALID] ? (cmd_queue_q[i][`CMD_TAKEN] | cmd_taken[i]) & ~cmd_complete[i] : 1'b0; + // rest + assign cmd_queue_d[i][`CMD_VALID-1:`CMD_TAKEN+1] = cmd_queue_q[i][`CMD_VALID] ? cmd_queue_q[i][`CMD_VALID-1:`CMD_TAKEN+1] : cmd_queue_in[i][`CMD_VALID-1:`CMD_TAKEN+1]; + assign cmd_queue_d[i][`CMD_TAKEN-1:0] = cmd_queue_q[i][`CMD_VALID] ? cmd_queue_q[i][`CMD_TAKEN-1:0] : cmd_queue_in[i][`CMD_TAKEN-1:0]; +end + +endmodule \ No newline at end of file diff --git a/rtl/a2node/defs.v b/rtl/a2node/defs.v new file mode 100644 index 0000000..356e5bc --- /dev/null +++ b/rtl/a2node/defs.v @@ -0,0 +1,71 @@ +// a2wb defines + +`define CORE_TYPE_NONE 4'h0 +`define CORE_TYPE_A2L2 4'h1 +`define CORE_TYPE_WB1 4'h2 +`define CORE_TYPE_WB2 4'h3 + +`define BUS_TYPE_NONE 4'h0 +`define BUS_TYPE_WB1 4'h1 +`define BUS_TYPE_WB2 4'h2 + +// starting bits for core in/out subvectors + +// out's are core out/bridge in + +`define WB1_WB_OUT_START 383 +`define WB1_EXT_OUT_START `WB1_WB_OUT_START-128 + +`define WB2_WB_OUT_START 383 +`define WB2_I_WB_OUT_START 383 +`define WB2_D_WB_OUT_START `WB2_I_WB_OUT_START-128 +`define WB2_EXT_OUT_START `WB2_D_WB_OUT_START-128 + +// in's are bridge out/core in + +`define WB1_WB_IN_START 383 +`define WB1_EXT_IN_START `WB1_WB_IN_START-128 + +`define WB2_WB_IN_START 383 +`define WB2_I_WB_IN_START 383 +`define WB2_D_WB_IN_START `WB2_I_WB_IN_START-128 +`define WB2_EXT_IN_START `WB2_D_WB_IN_START-128 + +// starting bits for bus in/out subvectors + +`define BUS_WB1_OUT_START 127 +`define BUS_WB1_IN_START 127 + +`define BUS_WB2_OUT_START 255 +`define BUS_WB2_I_OUT_START 255 +`define BUS_WB2_D_OUT_START 127 +`define BUS_WB2_IN_START 127 +`define BUS_WB2_I_IN_START 127 +`define BUS_WB2_D_IN_START 63 + + +// internal + +`define CMD_SIZE 77 +`define CMD_VALID `CMD_SIZE-1 +`define CMD_CORE_ID `CMD_SIZE-2:`CMD_SIZE-3 +`define CMD_RSVD_0 `CMD_SIZE-4 +`define CMD_RSVD_1 `CMD_SIZE-5 +`define CMD_TAKEN `CMD_SIZE-6 +`define CMD_RSVD_2 `CMD_SIZE-7 +`define CMD_RSVD_3 `CMD_SIZE-8 +`define CMD_WE `CMD_SIZE-9 +`define CMD_SEL `CMD_SIZE-10:`CMD_SIZE-13 +`define CMD_ADR `CMD_SIZE-14:`CMD_SIZE-45 +`define CMD_DATW `CMD_SIZE-46:`CMD_SIZE-77 + +`define RSP_SIZE 64 +`define RSP_VALID `RSP_SIZE-1 +`define RSP_CORE_ID `RSP_SIZE-2:`RSP_SIZE-3 +`define RSP_RSVD_0 `RSP_SIZE-4:`RSP_SIZE-8 +`define RSP_DATA `RSP_SIZE-9:`RSP_SIZE-40 + + +// main bus +`define WB1_BUS_OUT_START 127 +`define WB1_BUS_IN_START 127 diff --git a/rtl/a2node/readme.md b/rtl/a2node/readme.md new file mode 100644 index 0000000..9e3462f --- /dev/null +++ b/rtl/a2node/readme.md @@ -0,0 +1,68 @@ +# A2 Interfaces to WB + +* core interfaces + + * A2I/A2O A2L2 bus + + * Single (combined I/D) w/SMP extensions + + * Dual (separate I/D) WB buses w/SMP extensions + +* bus interfaces + + * single WB + + * dual WB + +* functions + + * queues one or more core commands + + * point of coherncy for larx/stcx, sync, tlbie, etc. for multicores below it (single/mulithread) + + * address compares necessary for ordering/coherency + + * optional mailbox interface for core-core peer and broadcast + + * arbitrates for WB bus(es) + + * gen responses for cores + +## Possible configurations + +* one core, WB: pass-through with SMP functions + +* one core, A2L2: bridge with SMP functions + +* multi-core: identical or mixed WB1/WB2/A2L2, queueing, arbitration, and SMP functions + + +### syntax check + +```verilator --lint-only a2wb.v -Wno-LITENDIAN``` + +### sim build + +``` +verilator --cc --exe --trace -Wno-Litendian -Wno-fatal -I./src top.v tb.cpp +cd obj_dir;make -f Vtop.mk;cd .. +obj_dir/Vtop +``` + +### synth build (Litex) + +* had to make some source changes for Vivado + +``` +rm obj_dir/* +# use sim top so tb.cpp is ok +verilator --cc --exe --trace -Wno-Litendian -Wno-fatal -I./litex/a2node/verilog -I./src top.v tb.cpp uart.cpp +cd obj_dir;make -f Vtop.mk;cd .. +obj_dir/Vtop + +gtkwave wtf.vcd wtf.gtkw + +vcd2fst wtf.vcd wtf.fst +gtkwave wtf.fst wtf.gtkw +``` + diff --git a/rtl/a2node/sim_1000_4w.txt b/rtl/a2node/sim_1000_4w.txt new file mode 100644 index 0000000..0dc753a --- /dev/null +++ b/rtl/a2node/sim_1000_4w.txt @@ -0,0 +1,156 @@ +Memory Size=01000000B +Loading memory from rom.bin.hex... +Seed=08675309 +Cores: 4 +Resetting... +Go! + >>> UART_0: + >>> UART_1: + >>> UART_2: + >>> UART_3: + >>> UART_0:A2Node Test! + >>> UART_1:A2Node Test! + >>> UART_2:A2Node Test! + >>> UART_3:A2Node Test! + >>> UART_0: + >>> UART_1: + >>> UART_2: + >>> UART_3: + >>> UART_0:Coremark test + >>> UART_1:Coremark test + >>> UART_2:Coremark test + >>> UART_3:Coremark test + >>> UART_0:Iterations: 1000 + >>> UART_1:Iterations: 1000 + >>> UART_2:Iterations: 1000 + >>> UART_3:Iterations: 1000 + >>> UART_0:Initing... + >>> UART_1:Initing... + >>> UART_2:Initing... + >>> UART_3:Initing... + >>> UART_0:List: 00FFF7CC 0000029A 00FFF7CC 00000000 + >>> UART_1:List: 00BFF7CC 0000029A 00BFF7CC 00000000 + >>> UART_2:List: 007FF7CC 0000029A 007FF7CC 00000000 + >>> UART_3:List: 003FF7CC 0000029A 003FF7CC 00000000 + >>> UART_0:Matrix: 00FFF7B0 0000029A 00FFFA66 00000000 + >>> UART_1:Matrix: 00BFF7B0 0000029A 00BFFA66 00000000 + >>> UART_2:Matrix: 007FF7B0 0000029A 007FFA66 00000000 + >>> UART_3:Matrix: 003FF7B0 0000029A 003FFA66 00000000 + >>> UART_0:State: 0000029A 00FFFD00 00000000 + >>> UART_1:State: 0000029A 00BFFD00 00000000 + >>> UART_2:State: 0000029A 007FFD00 00000000 + >>> UART_3:State: 0000029A 003FFD00 00000000 + >>> UART_0:Starting... + >>> UART_1:Starting... + >>> UART_2:Starting... + >>> UART_3:Starting... +cyc=40000000 +cyc=80000000 +cyc=120000000 +cyc=160000000 +cyc=200000000 +cyc=240000000 +cyc=280000000 +cyc=320000000 +cyc=360000000 +cyc=400000000 +cyc=440000000 +cyc=480000000 +cyc=520000000 +cyc=560000000 +cyc=600000000 + >>> UART_0:2K performance run parameters for coremark. + >>> UART_1:2K performance run parameters for coremark. + >>> UART_0:CoreMark Size : 666 + >>> UART_1:CoreMark Size : 666 + >>> UART_0:Total ticks : 639078926 + >>> UART_1:Total ticks : 639079903 + >>> UART_0:Total time (secs): 6 + >>> UART_1:Total time (secs): 6 + >>> UART_0:Iterations/Sec : 166 + >>> UART_1:Iterations/Sec : 166 + >>> UART_0:ERROR! Must execute for at least 10 secs for a valid result! + >>> UART_1:ERROR! Must execute for at least 10 secs for a valid result! + >>> UART_2:2K performance run parameters for coremark. + >>> UART_0:Iterations : 1000 + >>> UART_3:2K performance run parameters for coremark. + >>> UART_1:Iterations : 1000 + >>> UART_0:Compiler version : GCC9.3.0 + >>> UART_2:CoreMark Size : 666 + >>> UART_1:Compiler version : GCC9.3.0 + >>> UART_0:Compiler flags : + >>> UART_3:CoreMark Size : 666 + >>> UART_1:Compiler flags : + >>> UART_2:Total ticks : 639099865 + >>> UART_0:Memory location : STACK + >>> UART_1:Memory location : STACK + >>> UART_3:Total ticks : 639102185 + >>> UART_2:Total time (secs): 6 + >>> UART_0:seedcrc : 0xe9f5 + >>> UART_1:seedcrc : 0xe9f5 + >>> UART_3:Total time (secs): 6 + >>> UART_2:Iterations/Sec : 166 + >>> UART_0:[0]crclist : 0xe714 + >>> UART_3:Iterations/Sec : 166 + >>> UART_1:[0]crclist : 0xe714 + >>> UART_2:ERROR! Must execute for at least 10 secs for a valid result! + >>> UART_0:[0]crcmatrix : 0x1fd7 + >>> UART_3:ERROR! Must execute for at least 10 secs for a valid result! + >>> UART_1:[0]crcmatrix : 0x1fd7 + >>> UART_2:Iterations : 1000 + >>> UART_3:Iterations : 1000 + >>> UART_0:[0]crcstate : 0x8e3a + >>> UART_2:Compiler version : GCC9.3.0 + >>> UART_1:[0]crcstate : 0x8e3a + >>> UART_2:Compiler flags : + >>> UART_3:Compiler version : GCC9.3.0 + >>> UART_0:[0]crcfinal : 0xd340 + >>> UART_3:Compiler flags : + >>> UART_2:Memory location : STACK + >>> UART_1:[0]crcfinal : 0xd340 + >>> UART_3:Memory location : STACK + >>> UART_2:seedcrc : 0xe9f5 + >>> UART_0:Correct operation validated. See README.md for run and reporting rules. + >>> UART_0: + >>> UART_3:seedcrc : 0xe9f5 + >>> UART_0:Pass. + >>> UART_0: +cyc=639212365 WBI Data @=0000f000 data=00000048 + ** pass address ifetch'd (1)... + >>> UART_1:Correct operation validated. See README.md for run and reporting rules. + >>> UART_1: + >>> UART_1:Pass. + >>> UART_1: +cyc=639213694 WBI Data @=0000f000 data=00000048 + ** pass address ifetch'd (2)... + >>> UART_2:[0]crclist : 0xe714 + >>> UART_3:[0]crclist : 0xe714 + >>> UART_2:[0]crcmatrix : 0x1fd7 + >>> UART_3:[0]crcmatrix : 0x1fd7 + >>> UART_2:[0]crcstate : 0x8e3a + >>> UART_3:[0]crcstate : 0x8e3a + >>> UART_2:[0]crcfinal : 0xd340 + >>> UART_3:[0]crcfinal : 0xd340 + >>> UART_2:Correct operation validated. See README.md for run and reporting rules. + >>> UART_2: + >>> UART_2:Pass. + >>> UART_2: +cyc=639232242 WBI Data @=0000f000 data=00000048 + ** pass address ifetch'd (3)... + >>> UART_3:Correct operation validated. See README.md for run and reporting rules. + >>> UART_3: + >>> UART_3:Pass. + >>> UART_3: +cyc=639234350 WBI Data @=0000f000 data=00000048 + ** pass address ifetch'd (4)... + +Statistics + IFetch: 11416960 + DRead: 00007676 + DWrite: 111415376 + +Done. + +You has opulence. + +Seed=08675309 diff --git a/rtl/a2node/smp.v b/rtl/a2node/smp.v new file mode 100644 index 0000000..41c36a6 --- /dev/null +++ b/rtl/a2node/smp.v @@ -0,0 +1,26 @@ + +module smp # ( +) ( + +); + +// larx/stcx +// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate +// reservation granule is 32B (or use lcd of all cores) +// one reservation per thread +// reservation is set before core receives reload data + +wire stcx_store [0:3]; +wire resv_ra_hit [0:3]; +wire resv_set [0:3]; +wire resv_rst [0:3]; +wire [27:0] resv_q [0:3]; // v, @31:5 +wire [27:0] resv_d [0:3]; + +// sync ops + +// cache ops + +// tlb ops + +endmodule \ No newline at end of file diff --git a/rtl/a2node/top.v b/rtl/a2node/top.v new file mode 100644 index 0000000..82766d1 --- /dev/null +++ b/rtl/a2node/top.v @@ -0,0 +1,313 @@ +`include "defs.v" + +module top #( + parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_WB2, `CORE_TYPE_WB2, `CORE_TYPE_WB2}, + parameter [0:3] BUS_TYPE = `BUS_TYPE_WB2 +) ( + input clk, + input rst, + output wb_i_stb, + output wb_i_cyc, + output [31:0] wb_i_adr, + input wb_i_ack, + input [31:0] wb_i_datr, + output wb_d_stb, + output wb_d_cyc, + output [31:0] wb_d_adr, + output wb_d_we, + output [3:0] wb_d_sel, + output [31:0] wb_d_datw, + input wb_d_ack, + input [31:0] wb_d_datr +); + +wire [`WB2_WB_IN_START:0] c0_in; +wire [`WB2_WB_OUT_START:0] c0_out; +wire [`WB2_WB_IN_START:0] c1_in; +wire [`WB2_WB_OUT_START:0] c1_out; +wire [`WB2_WB_IN_START:0] c2_in; +wire [`WB2_WB_OUT_START:0] c2_out; +wire [`WB2_WB_IN_START:0] c3_in; +wire [`WB2_WB_OUT_START:0] c3_out; + +wire [`BUS_WB2_IN_START:0] wb_in; +wire [`BUS_WB2_OUT_START:0] wb_out; + +wire rst_0 /*verilator public*/; +wire wb_i_stb_0; +wire wb_i_cyc_0; +//wire wb_i_we_0; +//wire [3:0] wb_i_sel_0; +wire [31:2] wb_i_adr_0; +//wire [31:0] wb_i_datw_0; +wire wb_i_ack_0; +wire [31:0] wb_i_datr_0; +wire wb_d_stb_0; +wire wb_d_cyc_0; +wire wb_d_we_0; +wire [3:0] wb_d_sel_0; +wire [31:2] wb_d_adr_0; +wire [31:0] wb_d_datw_0; +wire wb_d_ack_0; +wire [31:0] wb_d_datr_0; +wire ext_int_0; +wire ext_int_s_0; +wire [31:0] ext_rst_vector_0; +wire soft_int_0; +wire timer_int_0; + +wire rst_1 /*verilator public*/; +wire wb_i_stb_1; +wire wb_i_cyc_1; +//wire wb_i_we_1; +//wire [3:0] wb_i_sel_1; +wire [31:2] wb_i_adr_1; +//wire [31:0] wb_i_datw_1; +wire wb_i_ack_1; +wire [31:0] wb_i_datr_1; +wire wb_d_stb_1; +wire wb_d_cyc_1; +wire wb_d_we_1; +wire [3:0] wb_d_sel_1; +wire [31:2] wb_d_adr_1; +wire [31:0] wb_d_datw_1; +wire wb_d_ack_1; +wire [31:0] wb_d_datr_1; +wire ext_int_1; +wire ext_int_s_1; +wire [31:0] ext_rst_vector_1; +wire soft_int_1; +wire timer_int_1; + +wire rst_2 /*verilator public*/; +wire wb_i_stb_2; +wire wb_i_cyc_2; +//wire wb_i_we_2; +//wire [3:0] wb_i_sel_2; +wire [31:2] wb_i_adr_2; +//wire [31:0] wb_i_datw_2; +wire wb_i_ack_2; +wire [31:0] wb_i_datr_2; +wire wb_d_stb_2; +wire wb_d_cyc_2; +wire wb_d_we_2; +wire [3:0] wb_d_sel_2; +wire [31:2] wb_d_adr_2; +wire [31:0] wb_d_datw_2; +wire wb_d_ack_2; +wire [31:0] wb_d_datr_2; +wire ext_int_2; +wire ext_int_s_2; +wire [31:0] ext_rst_vector_2; +wire soft_int_2; +wire timer_int_2; + +wire rst_3 /*verilator public*/; +wire wb_i_stb_3; +wire wb_i_cyc_3; +//wire wb_i_we_3; +//wire [3:0] wb_i_sel_3; +wire [31:2] wb_i_adr_3; +//wire [31:0] wb_i_datw_3; +wire wb_i_ack_3; +wire [31:0] wb_i_datr_3; +wire wb_d_stb_3; +wire wb_d_cyc_3; +wire wb_d_we_3; +wire [3:0] wb_d_sel_3; +wire [31:2] wb_d_adr_3; +wire [31:0] wb_d_datw_3; +wire wb_d_ack_3; +wire [31:0] wb_d_datr_3; +wire ext_int_3; +wire ext_int_s_3; +wire [31:0] ext_rst_vector_3; +wire soft_int_3; +wire timer_int_3; + + +// core in/out viewed by core +assign c0_out = {wb_i_stb_0, wb_i_cyc_0, 1'b0 , 4'b0, {wb_i_adr_0, 2'b0}, 32'b0, 1'b0, 32'b0, 24'b0, + wb_d_stb_0, wb_d_cyc_0, wb_d_we_0, wb_d_sel_0, {wb_d_adr_0, 2'b0}, wb_d_datw_0, 1'b0, 32'b0, 24'b0, + ext_int_0, ext_int_s_0, ext_rst_vector_0, soft_int_0, timer_int_0, 92'b0}; + +assign {wb_i_ack_0, wb_i_datr_0} = c0_in[`WB2_I_WB_IN_START:`WB2_I_WB_IN_START-32]; +assign {wb_d_ack_0, wb_d_datr_0} = c0_in[`WB2_D_WB_IN_START:`WB2_D_WB_IN_START-32]; + +assign c1_out = {wb_i_stb_1, wb_i_cyc_1, 1'b0 , 4'b0, {wb_i_adr_1, 2'b0}, 32'b0, 1'b0, 32'b0, 24'b0, + wb_d_stb_1, wb_d_cyc_1, wb_d_we_1, wb_d_sel_1, {wb_d_adr_1, 2'b0}, wb_d_datw_1, 1'b0, 32'b0, 24'b0, + ext_int_1, ext_int_s_1, ext_rst_vector_1, soft_int_1, timer_int_1, 92'b0}; + +assign {wb_i_ack_1, wb_i_datr_1} = c1_in[`WB2_I_WB_IN_START:`WB2_I_WB_IN_START-32]; +assign {wb_d_ack_1, wb_d_datr_1} = c1_in[`WB2_D_WB_IN_START:`WB2_D_WB_IN_START-32]; + +assign c2_out = {wb_i_stb_2, wb_i_cyc_2, 1'b0 , 4'b0, {wb_i_adr_2, 2'b0}, 32'b0, 1'b0, 32'b0, 24'b0, + wb_d_stb_2, wb_d_cyc_2, wb_d_we_2, wb_d_sel_2, {wb_d_adr_2, 2'b0}, wb_d_datw_2, 1'b0, 32'b0, 24'b0, + ext_int_2, ext_int_s_2, ext_rst_vector_2, soft_int_2, timer_int_2, 92'b0}; + +assign {wb_i_ack_2, wb_i_datr_2} = c2_in[`WB2_I_WB_IN_START:`WB2_I_WB_IN_START-32]; +assign {wb_d_ack_2, wb_d_datr_2} = c2_in[`WB2_D_WB_IN_START:`WB2_D_WB_IN_START-32]; + +assign c3_out = {wb_i_stb_3, wb_i_cyc_3, 1'b0 , 4'b0, {wb_i_adr_3, 2'b0}, 32'b0, 1'b0, 32'b0, 24'b0, + wb_d_stb_3, wb_d_cyc_3, wb_d_we_3, wb_d_sel_3, {wb_d_adr_3, 2'b0}, wb_d_datw_3, 1'b0, 32'b0, 24'b0, + ext_int_3, ext_int_s_3, ext_rst_vector_3, soft_int_3, timer_int_3, 92'b0}; + +assign {wb_i_ack_3, wb_i_datr_3} = c3_in[`WB2_I_WB_IN_START:`WB2_I_WB_IN_START-32]; +assign {wb_d_ack_3, wb_d_datr_3} = c3_in[`WB2_D_WB_IN_START:`WB2_D_WB_IN_START-32]; + +// bus in/out viewed by bridge +assign {wb_i_stb, wb_i_cyc} = wb_out[`BUS_WB2_I_OUT_START:`BUS_WB2_I_OUT_START-1]; +assign wb_i_adr = wb_out[`BUS_WB2_I_OUT_START-7:`BUS_WB2_I_OUT_START-38]; +assign {wb_d_stb, wb_d_cyc, wb_d_we, wb_d_sel} = wb_out[`BUS_WB2_D_OUT_START:`BUS_WB2_D_OUT_START-6]; +assign wb_d_adr = wb_out[`BUS_WB2_D_OUT_START-7:`BUS_WB2_D_OUT_START-38]; +assign wb_d_datw = wb_out[`BUS_WB2_D_OUT_START-39:`BUS_WB2_D_OUT_START-70]; + +assign wb_in = {wb_i_ack, wb_i_datr, 31'b0, wb_d_ack, wb_d_datr, 31'b0}; + +// may want to control resets with config, etc. +//assign rst_0 = rst; +//assign rst_1 = rst; +//assign rst_2 = rst; +//assign rst_3 = rst; + +A2WB #( + .CORE_TYPES(CORE_TYPES), + .BUS_TYPE(BUS_TYPE) +) bridge ( + .clk(clk), + .rst(rst), + .cores_in({c0_out, c1_out, c2_out, c3_out}), + .cores_out({c0_in, c1_in, c2_in, c3_in}), + .bus_in(wb_in), + .bus_out(wb_out) +); + +A2P_4K1W c0 ( + .clk(clk), + .reset(rst_0), + .iBusWB_STB(wb_i_stb_0), + .iBusWB_CYC(wb_i_cyc_0), + .iBusWB_ADR(wb_i_adr_0), + .iBusWB_WE(), + .iBusWB_SEL(), + .iBusWB_DAT_MOSI(), + .iBusWB_ACK(wb_i_ack_0), + .iBusWB_DAT_MISO(wb_i_datr_0), + .iBusWB_ERR(1'd0), + .iBusWB_BTE(), + .iBusWB_CTI(), + .dBusWB_STB(wb_d_stb_0), + .dBusWB_CYC(wb_d_cyc_0), + .dBusWB_WE(wb_d_we_0), + .dBusWB_SEL(wb_d_sel_0), + .dBusWB_ADR(wb_d_adr_0), + .dBusWB_DAT_MOSI(wb_d_datw_0), + .dBusWB_ACK(wb_d_ack_0), + .dBusWB_DAT_MISO(wb_d_datr_0), + .dBusWB_ERR(1'd0), + .dBusWB_BTE(), + .dBusWB_CTI(), + .externalInterrupt(ext_int_0), + .externalInterruptS(ext_int_s_0), + .externalResetVector(ext_rst_vector_0), + .softwareInterrupt(soft_int_0), + .timerInterrupt(timer_int_0) +); + +A2P_4K1W c1 ( + .clk(clk), + .reset(rst_1), + .iBusWB_STB(wb_i_stb_1), + .iBusWB_CYC(wb_i_cyc_1), + .iBusWB_ADR(wb_i_adr_1), + .iBusWB_WE(), + .iBusWB_SEL(), + .iBusWB_DAT_MOSI(), + .iBusWB_ACK(wb_i_ack_1), + .iBusWB_DAT_MISO(wb_i_datr_1), + .iBusWB_ERR(1'd0), + .iBusWB_BTE(), + .iBusWB_CTI(), + .dBusWB_STB(wb_d_stb_1), + .dBusWB_CYC(wb_d_cyc_1), + .dBusWB_WE(wb_d_we_1), + .dBusWB_SEL(wb_d_sel_1), + .dBusWB_ADR(wb_d_adr_1), + .dBusWB_DAT_MOSI(wb_d_datw_1), + .dBusWB_ACK(wb_d_ack_1), + .dBusWB_DAT_MISO(wb_d_datr_1), + .dBusWB_ERR(1'd0), + .dBusWB_BTE(), + .dBusWB_CTI(), + .externalInterrupt(ext_int_1), + .externalInterruptS(ext_int_s_1), + .externalResetVector(ext_rst_vector_1), + .softwareInterrupt(soft_int_1), + .timerInterrupt(timer_int_1) +); + +A2P_4K1W c2 ( + .clk(clk), + .reset(rst_2), + .iBusWB_STB(wb_i_stb_2), + .iBusWB_CYC(wb_i_cyc_2), + .iBusWB_ADR(wb_i_adr_2), + .iBusWB_WE(), + .iBusWB_SEL(), + .iBusWB_DAT_MOSI(), + .iBusWB_ACK(wb_i_ack_2), + .iBusWB_DAT_MISO(wb_i_datr_2), + .iBusWB_ERR(1'd0), + .iBusWB_BTE(), + .iBusWB_CTI(), + .dBusWB_STB(wb_d_stb_2), + .dBusWB_CYC(wb_d_cyc_2), + .dBusWB_WE(wb_d_we_2), + .dBusWB_SEL(wb_d_sel_2), + .dBusWB_ADR(wb_d_adr_2), + .dBusWB_DAT_MOSI(wb_d_datw_2), + .dBusWB_ACK(wb_d_ack_2), + .dBusWB_DAT_MISO(wb_d_datr_2), + .dBusWB_ERR(1'd0), + .dBusWB_BTE(), + .dBusWB_CTI(), + .externalInterrupt(ext_int_2), + .externalInterruptS(ext_int_s_2), + .externalResetVector(ext_rst_vector_2), + .softwareInterrupt(soft_int_2), + .timerInterrupt(timer_int_2) +); + +A2P_4K1W c3 ( + .clk(clk), + .reset(rst_3), + .iBusWB_STB(wb_i_stb_3), + .iBusWB_CYC(wb_i_cyc_3), + .iBusWB_ADR(wb_i_adr_3), + .iBusWB_WE(), + .iBusWB_SEL(), + .iBusWB_DAT_MOSI(), + .iBusWB_ACK(wb_i_ack_3), + .iBusWB_DAT_MISO(wb_i_datr_3), + .iBusWB_ERR(1'd0), + .iBusWB_BTE(), + .iBusWB_CTI(), + .dBusWB_STB(wb_d_stb_3), + .dBusWB_CYC(wb_d_cyc_3), + .dBusWB_WE(wb_d_we_3), + .dBusWB_SEL(wb_d_sel_3), + .dBusWB_ADR(wb_d_adr_3), + .dBusWB_DAT_MOSI(wb_d_datw_3), + .dBusWB_ACK(wb_d_ack_3), + .dBusWB_DAT_MISO(wb_d_datr_3), + .dBusWB_ERR(1'd0), + .dBusWB_BTE(), + .dBusWB_CTI(), + .externalInterrupt(ext_int_3), + .externalInterruptS(ext_int_s_3), + .externalResetVector(ext_rst_vector_3), + .softwareInterrupt(soft_int_3), + .timerInterrupt(timer_int_3) +); + +endmodule