diff --git a/rtl/a2wb/a2wb.v b/rtl/a2wb/a2wb.v index 27754e7..c5866e7 100644 --- a/rtl/a2wb/a2wb.v +++ b/rtl/a2wb/a2wb.v @@ -1,46 +1,9 @@ // A2 Core Bridge -// should modularize as much as possible and just do messy rewiring here! +// adapt cores and buses with generic module +// one thread/core for now; multithread needs thread tag, deeper queues -// one thread/core for now - -// possible extended command modifiers -// prefetch -// larx -// stcx -// lwsync -// hwsync -// tlbsync -// ici, icbi -// dci, dcbi, etc -// dcbtst -// dcbz -// tlbie, etc - -// possible extended responses -// errors -// crit first, xfer# for larger bus width on core side -// credits -// resv valid -// stcx comp/pass -// sync ack -// back inv val/addr - -// possible extra functions -// integrated L2 -// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar) - - - -// cores must be contiguous, starting at 0 -`define CORE_TYPE_NONE 4'h0 -`define CORE_TYPE_A2L2 4'h1 -`define CORE_TYPE_WB1 4'h2 -`define CORE_TYPE_WB2 4'h3 - -`define BUS_TYPE_NONE 4'h0 -`define BUS_TYPE_WB1 4'h1 -`define BUS_TYPE_WB2 4'h2 +`include "defs.v" module A2WB #( parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE}, @@ -61,6 +24,8 @@ genvar i; // ------------------------------------------------------------------------------------------------ // I/O Connections +// cores must be contiguous, starting at 0 + wire i_wb_cyc [0:3]; wire i_wb_stb [0:3]; wire [31:2] i_wb_adr[0:3] ; @@ -80,13 +45,13 @@ wire [7:0] ext_rsp [0:3]; generate for (i = 0; i < 4; i++) begin case (CORE_TYPES[i*4:i*4+3]) - 4'h0: begin + `CORE_TYPE_NONE: begin end - 4'h1: begin + `CORE_TYPE_A2L2: begin assign NUMCORES = NUMCORES + 1; // a2l2 end - 4'h2: begin + `CORE_TYPE_WB1: begin assign NUMCORES = NUMCORES + 1; wire [78:0] core_0_in; @@ -101,7 +66,7 @@ generate assign core_out[i][32] = d_wb_ack[i]; assign core_out[i][31:0] = d_wb_datr[i]; end - 4'h3: begin + `CORE_TYPE_WB2: begin assign NUMCORES = NUMCORES + 1; wire [110:0] core_in[i]; @@ -126,137 +91,128 @@ generate endgenerate // ------------------------------------------------------------------------------------------------ -// Command Queues/Addr Compare/Bypass +// Command Interfaces // -// cores can have either 1 or 2 buses; assume single-cmd outstanding per, for now -// a2l2 could also allow 1 ld, 1 st credit and use 2 dedicated queues -reg [77:0] cmd_queue_q[0:3][0:1]; -wire [77:0] cmd_queue_d[0:3][0:1]; -wire [77:0] cmd_queue_in[0:3][0:1]; -wire [71:0] cmd_queue_out[0:3]; + generate for (i = 0; i < 4; i++) begin case (CORE_TYPES[i*4:i*4+3]) - 4'h0: begin + `CORE_TYPE_NONE: begin end - 4'h1: begin + `CORE_TYPE_A2L2: begin // convert a2l2 to internal format end - 4'h2: begin - // q[0] = i or d - assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid - assign cmd_queue_in[i][0][76] = d_wb_we[i]; - assign cmd_queue_in[i][0][75:72] = d_wb_sel[i]; - assign cmd_queue_in[i][0][71:40] = d_wb_adr[i]; - assign cmd_queue_in[i][0][39:8] = d_wb_datw[i]; - assign cmd_queue_in[i][0][7:0] = ext_cmd[i]; + `CORE_TYPE_WB1: begin + cmd_wb #(.CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) core_in ( + .clk(clk), + .rst(rst), + .i_wb_cyc('b0), + .i_wb_stb('b0), + .i_wb_adr('h0), + .d_wb_cyc(d_wb_cyc[i]), + .d_wb_stb(d_wb_stb[i]), + .d_wb_we(d_wb_we[i]), + .d_wb_sel(d_wb_sel[i]), + .d_wb_adr(d_wb_adr[i]), + .d_wb_datw(d_wb_datw[i]), + .ext_cmd(ext_cmd[i]), + .cmd_taken('b0), + .cmd_out_0(), + .cmd_out_1() + ); end - 4'h3: begin - // q[0]=i, q[1]=d - assign cmd_queue_in[i][0][77] = i_wb_cyc[i] & i_wb_stb[i]; // valid - assign cmd_queue_in[i][0][76] = 'b0; - assign cmd_queue_in[i][0][75:72] = 'b0000; - assign cmd_queue_in[i][0][71:40] = d_wb_adr[i]; - assign cmd_queue_in[i][0][39:8] = 'h000000; - assign cmd_queue_in[i][0][7:0] = ext_cmd[i]; - assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid - assign cmd_queue_in[i][0][76] = d_wb_we[i]; - assign cmd_queue_in[i][0][75:72] = d_wb_sel[i]; - assign cmd_queue_in[i][0][71:40] = d_wb_adr[i]; - assign cmd_queue_in[i][0][39:8] = d_wb_datw[i]; - assign cmd_queue_in[i][0][7:0] = ext_cmd[i]; + `CORE_TYPE_WB2: begin + cmd_wb #(.CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) core_in ( + .clk(clk), + .rst(rst), + .i_wb_cyc(i_wb_cyc[i]), + .i_wb_stb(i_wb_stb[i]), + .i_wb_adr(i_wb_adr[i]), + .d_wb_cyc(d_wb_cyc[i]), + .d_wb_stb(d_wb_stb[i]), + .d_wb_we(d_wb_we[i]), + .d_wb_sel(d_wb_sel[i]), + .d_wb_adr(d_wb_adr[i]), + .d_wb_datw(d_wb_datw[i]), + .ext_cmd(ext_cmd[i]), + .cmd_taken('b0), + .cmd_out_0(), + .cmd_out_1() + ); end endcase end endgenerate // ------------------------------------------------------------------------------------------------ -// SMP - -// larx/stcx -// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate -// reservation granule is 32B (or use lcd of all cores) -// one reservation per thread -// reservation is set before core receives reload data - -wire stcx_store [0:3]; -wire resv_ra_hit [0:3]; -wire resv_set [0:3]; -wire resv_rst [0:3]; -wire [27:0] resv_q [0:3]; // v, @31:5 -wire [27:0] resv_d [0:3]; - -generate - for (i = 0; i < 4; i++) begin +// Arbitration +// +// LRU, etc. select from pending cmds; also needs smp to stall some/all cmds +// do addr cmp here, if necessary? or could do in smp - end -endgenerate +arb #() arb ( -// sync ack +); -// cache ops +// ------------------------------------------------------------------------------------------------ +// SMP -// tlb ops +// special ops: track resv, stall pending cmds, gen rsp +smp #() smp ( -// ------------------------------------------------------------------------------------------------ -// Arbitration -// -// LRU, etc. select from pending cmds -generate - for (i = 0; i < 4; i++) begin - end -endgenerate +); // ------------------------------------------------------------------------------------------------ // Bus Out +// commands to main bus generate - if (BUS_TYPE == `BUS_TYPE_WB1) begin - - end else if (BUS_TYPE == `BUS_TYPE_WB2) begin - - wire [101:0] bus_out; - wire bus_i_wb_stb; - assign bus_out[101] = bus_i_wb_stb; - wire [31:2] bus_i_wb_adr; - assign bus_out[100:71] = bus_i_wb_adr; - wire bus_d_wb_cyc; - assign bus_out[70] = bus_d_wb_cyc; - wire bus_d_wb_stb; - assign bus_out[69] = bus_d_wb_stb; - wire bus_d_wb_we; - assign bus_out[68] = bus_d_wb_we; - wire [3:0] bus_d_wb_sel; - assign bus_out[67:64] = bus_d_wb_sel; - wire [31:0] bus_d_wb_adr; - assign bus_out[63:32] = bus_d_wb_adr; - wire [31:0] bus_d_wb_datw; - assign bus_out[31:0] = bus_d_wb_datw; - - end else begin - end + case(BUS_TYPE) + `BUS_TYPE_WB1: begin + end + `BUS_TYPE_WB2: begin + wire [101:0] bus_out; + wire bus_i_wb_stb; + assign bus_out[101] = bus_i_wb_stb; + wire [31:2] bus_i_wb_adr; + assign bus_out[100:71] = bus_i_wb_adr; + wire bus_d_wb_cyc; + assign bus_out[70] = bus_d_wb_cyc; + wire bus_d_wb_stb; + assign bus_out[69] = bus_d_wb_stb; + wire bus_d_wb_we; + assign bus_out[68] = bus_d_wb_we; + wire [3:0] bus_d_wb_sel; + assign bus_out[67:64] = bus_d_wb_sel; + wire [31:0] bus_d_wb_adr; + assign bus_out[63:32] = bus_d_wb_adr; + wire [31:0] bus_d_wb_datw; + assign bus_out[31:0] = bus_d_wb_datw; + end + endcase endgenerate // ------------------------------------------------------------------------------------------------ // Bus In +// responses from main bus generate - if (BUS_TYPE == `BUS_TYPE_WB1) begin - - end else if (BUS_TYPE == `BUS_TYPE_WB2) begin - - wire [65:0] bus_in; - wire bus_i_wb_ack = bus_in[65]; - wire [31:0] bus_i_wb_datr = bus_in[64:33]; - wire bus_d_wb_ack = bus_in[32]; - wire [31:0] bus_d_wb_datr = bus_in[31:0]; - - end else begin - end + case(BUS_TYPE) + `BUS_TYPE_WB1: begin + end + `BUS_TYPE_WB2: begin + wire [65:0] bus_in; + wire bus_i_wb_ack = bus_in[65]; + wire [31:0] bus_i_wb_datr = bus_in[64:33]; + wire bus_d_wb_ack = bus_in[32]; + wire [31:0] bus_d_wb_datr = bus_in[31:0]; + end + endcase endgenerate // ------------------------------------------------------------------------------------------------ // Response Queues +// responses for cores generate for (i = 0; i < 4; i++) begin @@ -265,6 +221,7 @@ endgenerate // ------------------------------------------------------------------------------------------------ // Misc/Errors/Debug +// stuff generate for (i = 0; i < 4; i++) begin diff --git a/rtl/a2wb/arb.v b/rtl/a2wb/arb.v new file mode 100644 index 0000000..11e23c4 --- /dev/null +++ b/rtl/a2wb/arb.v @@ -0,0 +1,13 @@ + +module arb # ( +) ( + +); + +// fairly choose 1 or 2 (depending on output buses) cmds +// mark taken from queue +// obey restrictions from smp, etc. +// detect addr collisions - not needed if no caching? + + +endmodule \ No newline at end of file diff --git a/rtl/a2wb/cmd_a2l2.v b/rtl/a2wb/cmd_a2l2.v new file mode 100644 index 0000000..b25743b --- /dev/null +++ b/rtl/a2wb/cmd_a2l2.v @@ -0,0 +1,15 @@ +// a2l2 default: allow 1 ld, 1 st credit and use 2 dedicated queues + +`include "defs.v" + +reg [77:0] cmd_queue_q[0:3][0:1]; +wire [77:0] cmd_queue_d[0:3][0:1]; +wire [77:0] cmd_queue_in[0:3][0:1]; +wire [71:0] cmd_queue_out[0:3]; + +module cmd_wb #( + parameter CORE_TYPE = CORE_TYPE_A2L2 +) ( +); + +endmodule \ No newline at end of file diff --git a/rtl/a2wb/cmd_wb.v b/rtl/a2wb/cmd_wb.v new file mode 100644 index 0000000..f05f43b --- /dev/null +++ b/rtl/a2wb/cmd_wb.v @@ -0,0 +1,132 @@ +// Wishbone-Wishbone Command Interface + +// allow single- or dual-wb in/out: +// 1/1 : passthru +// 1/2 : route to proper - but this requires indicator in extcmd to distinguish i vs. d +// 2/1 : arbitrate +// 2/2 : passthru +// +// also handle special ops when possible (dcbz, ...) + +// select one command per output bus + +// ext_cmd is not tied to i/d, but does it need to have multiple outstanding (nop=0, and valid that require i/d info also must have that info provided) +// needs ext_tkn if not tied to i/d; needs to set q valid if tied to i/d (based on ext i or d type) +// seems like these are all tied to an i or d and require a response, so shouldn't need i+d+ext outstanding? + +// possible extended command modifiers +// prefetch +// larx +// stcx +// lwsync +// hwsync +// tlbsync +// ici, icbi +// dci, dcbi, etc +// dcbtst +// dcbz +// tlbie, etc + +// possible extended responses +// errors +// crit first, xfer# for larger bus width on core side +// credits +// resv valid +// stcx comp/pass +// sync ack +// back inv val/addr + +// possible extra functions +// integrated L2 +// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar) + +`include "defs.v" + +reg [77:0] cmd_queue_q[0:1]; +wire [77:0] cmd_queue_d[0:1]; +wire [77:0] cmd_queue_in[0:1]; +wire [77:0] cmd_queue_out; +reg [7:0] ext_queue_q; +wire [7:0] ext_queue_d; +wire [7:0] ext_queue_in; + +module cmd_wb #( + parameter CORE_TYPE = `CORE_TYPE_WB2, + parameter BUS_TYPE = `BUS_TYPE_WB2 +) ( + input clk, + input rst, + input i_wb_cyc, + input i_wb_stb, + input [31:2] i_wb_adr, + input d_wb_cyc, + input d_wb_stb, + input d_wb_we, + input [3:0] d_wb_sel, + input [31:0] d_wb_adr, + input [31:0] d_wb_datw, + input [7:0] ext_cmd, + input [2:0] cmd_taken, // bit vector, one per queued cmd (could simultaneously occur in some designs) + output [77:0] cmd_out_0, + output [77:0] cmd_out_1 +); + +case (CORE_TYPE) + `CORE_TYPE_WB1: begin + // q[0] = i or d + assign cmd_queue_in[0][77] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too + assign cmd_queue_in[0][76] = d_wb_we; + assign cmd_queue_in[0][75:72] = d_wb_sel; + assign cmd_queue_in[0][71:40] = d_wb_adr; + assign cmd_queue_in[0][39:8] = d_wb_datw; + assign cmd_queue_in[0][7:0] = ext_cmd; + end + `CORE_TYPE_WB2: begin + // q[0]=i, q[1]=d + assign cmd_queue_in[0][77] = i_wb_cyc & i_wb_stb; // valid - may need ext decode too + assign cmd_queue_in[0][76:72] = 'h0; + assign cmd_queue_in[0][71:40] = {i_wb_adr, 2'b0}; + assign cmd_queue_in[0][39:8] = 'h0; + assign cmd_queue_in[0][7:0] = ext_cmd; + assign cmd_queue_in[1][77] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too + assign cmd_queue_in[1][76] = d_wb_we; + assign cmd_queue_in[1][75:72] = d_wb_sel; + assign cmd_queue_in[1][71:40] = d_wb_adr; + assign cmd_queue_in[1][39:8] = d_wb_datw; + assign cmd_queue_in[1][7:0] = ext_cmd; + assign ext_queue_in = ext_cmd; + end +endcase + +// queue routing/arbitration to cmd processing + +// create generic command format out, handling i,d,ext!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// dual wb cmds may need taken AND next; depends though on what should be ordering; start with INORDER=1 parameter? + +case (CORE_TYPE) + `CORE_TYPE_WB1: begin + case (BUS_TYPE) + `BUS_TYPE_WB1: begin + assign cmd_out_0 = cmd_queue_q[0]; + end + `BUS_TYPE_WB2: begin + assign cmd_out_0 = ext_queue_q[0] ? cmd_queue_q[1] : cmd_queue_q[7]; // select i vs d + end + endcase + end + `CORE_TYPE_WB2: begin + case (BUS_TYPE) + `BUS_TYPE_WB1:begin + // both valid: send d + // want selected bit; set first cycle; dont change once selected until not valid + assign cmd_out_0 = cmd_queue_q[1][77] ? cmd_queue_q[1] : cmd_queue_q[0]; + end + `BUS_TYPE_WB2: begin + assign cmd_out_0 = cmd_queue_q[0]; + assign cmd_out_1 = cmd_queue_q[1]; + end + endcase + end +endcase + +endmodule \ No newline at end of file diff --git a/rtl/a2wb/defs.v b/rtl/a2wb/defs.v new file mode 100644 index 0000000..fc768ab --- /dev/null +++ b/rtl/a2wb/defs.v @@ -0,0 +1,10 @@ +// a2wb defines + +`define CORE_TYPE_NONE 4'h0 +`define CORE_TYPE_A2L2 4'h1 +`define CORE_TYPE_WB1 4'h2 +`define CORE_TYPE_WB2 4'h3 + +`define BUS_TYPE_NONE 4'h0 +`define BUS_TYPE_WB1 4'h1 +`define BUS_TYPE_WB2 4'h2 diff --git a/rtl/a2wb/readme.md b/rtl/a2wb/readme.md index cd8dcb1..8b82dc9 100644 --- a/rtl/a2wb/readme.md +++ b/rtl/a2wb/readme.md @@ -8,13 +8,13 @@ * Dual (separate I/D) WB buses w/SMP extensions -* bus interface +* bus interfaces - * WB (non-SMP) + * single WB -* functions + * dual WB - * arbitrates for WB bus +* functions * queues one or more core commands @@ -24,6 +24,9 @@ * optional mailbox interface for core-core peer and broadcast + * arbitrates for WB bus(es) + + * gen responses for cores ## Possible configurations @@ -31,6 +34,9 @@ * one core, A2L2: bridge with SMP functions -* multi-core: bridge for A2L2, queueing, arbitration, and SMP functions +* multi-core: identical or mixed WB1/WB2/A2L2, queueing, arbitration, and SMP functions + +### syntax check +```verilator --lint-only a2wb.v -Wno-LITENDIAN``` \ No newline at end of file diff --git a/rtl/a2wb/smp.v b/rtl/a2wb/smp.v new file mode 100644 index 0000000..41c36a6 --- /dev/null +++ b/rtl/a2wb/smp.v @@ -0,0 +1,26 @@ + +module smp # ( +) ( + +); + +// larx/stcx +// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate +// reservation granule is 32B (or use lcd of all cores) +// one reservation per thread +// reservation is set before core receives reload data + +wire stcx_store [0:3]; +wire resv_ra_hit [0:3]; +wire resv_set [0:3]; +wire resv_rst [0:3]; +wire [27:0] resv_q [0:3]; // v, @31:5 +wire [27:0] resv_d [0:3]; + +// sync ops + +// cache ops + +// tlb ops + +endmodule \ No newline at end of file