master
wtf 2 years ago
parent 760391234c
commit fbd8854d4c

@ -1,46 +1,9 @@
// A2 Core Bridge // A2 Core Bridge


// should modularize as much as possible and just do messy rewiring here! // adapt cores and buses with generic module
// one thread/core for now; multithread needs thread tag, deeper queues


// one thread/core for now `include "defs.v"

// possible extended command modifiers
// prefetch
// larx
// stcx
// lwsync
// hwsync
// tlbsync
// ici, icbi
// dci, dcbi, etc
// dcbtst
// dcbz
// tlbie, etc

// possible extended responses
// errors
// crit first, xfer# for larger bus width on core side
// credits
// resv valid
// stcx comp/pass
// sync ack
// back inv val/addr

// possible extra functions
// integrated L2
// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar)



// cores must be contiguous, starting at 0
`define CORE_TYPE_NONE 4'h0
`define CORE_TYPE_A2L2 4'h1
`define CORE_TYPE_WB1 4'h2
`define CORE_TYPE_WB2 4'h3

`define BUS_TYPE_NONE 4'h0
`define BUS_TYPE_WB1 4'h1
`define BUS_TYPE_WB2 4'h2


module A2WB #( module A2WB #(
parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE}, parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE},
@ -61,6 +24,8 @@ genvar i;
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// I/O Connections // I/O Connections


// cores must be contiguous, starting at 0

wire i_wb_cyc [0:3]; wire i_wb_cyc [0:3];
wire i_wb_stb [0:3]; wire i_wb_stb [0:3];
wire [31:2] i_wb_adr[0:3] ; wire [31:2] i_wb_adr[0:3] ;
@ -80,13 +45,13 @@ wire [7:0] ext_rsp [0:3];
generate generate
for (i = 0; i < 4; i++) begin for (i = 0; i < 4; i++) begin
case (CORE_TYPES[i*4:i*4+3]) case (CORE_TYPES[i*4:i*4+3])
4'h0: begin `CORE_TYPE_NONE: begin
end end
4'h1: begin `CORE_TYPE_A2L2: begin
assign NUMCORES = NUMCORES + 1; assign NUMCORES = NUMCORES + 1;
// a2l2 // a2l2
end end
4'h2: begin `CORE_TYPE_WB1: begin
assign NUMCORES = NUMCORES + 1; assign NUMCORES = NUMCORES + 1;


wire [78:0] core_0_in; wire [78:0] core_0_in;
@ -101,7 +66,7 @@ generate
assign core_out[i][32] = d_wb_ack[i]; assign core_out[i][32] = d_wb_ack[i];
assign core_out[i][31:0] = d_wb_datr[i]; assign core_out[i][31:0] = d_wb_datr[i];
end end
4'h3: begin `CORE_TYPE_WB2: begin
assign NUMCORES = NUMCORES + 1; assign NUMCORES = NUMCORES + 1;


wire [110:0] core_in[i]; wire [110:0] core_in[i];
@ -126,137 +91,128 @@ generate
endgenerate endgenerate


// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// Command Queues/Addr Compare/Bypass // Command Interfaces
// //
// cores can have either 1 or 2 buses; assume single-cmd outstanding per, for now
// a2l2 could also allow 1 ld, 1 st credit and use 2 dedicated queues
reg [77:0] cmd_queue_q[0:3][0:1];
wire [77:0] cmd_queue_d[0:3][0:1];
wire [77:0] cmd_queue_in[0:3][0:1];
wire [71:0] cmd_queue_out[0:3];
generate generate
for (i = 0; i < 4; i++) begin for (i = 0; i < 4; i++) begin
case (CORE_TYPES[i*4:i*4+3]) case (CORE_TYPES[i*4:i*4+3])
4'h0: begin `CORE_TYPE_NONE: begin
end end
4'h1: begin `CORE_TYPE_A2L2: begin
// convert a2l2 to internal format // convert a2l2 to internal format
end end
4'h2: begin `CORE_TYPE_WB1: begin
// q[0] = i or d cmd_wb #(.CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) core_in (
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid .clk(clk),
assign cmd_queue_in[i][0][76] = d_wb_we[i]; .rst(rst),
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i]; .i_wb_cyc('b0),
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i]; .i_wb_stb('b0),
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i]; .i_wb_adr('h0),
assign cmd_queue_in[i][0][7:0] = ext_cmd[i]; .d_wb_cyc(d_wb_cyc[i]),
.d_wb_stb(d_wb_stb[i]),
.d_wb_we(d_wb_we[i]),
.d_wb_sel(d_wb_sel[i]),
.d_wb_adr(d_wb_adr[i]),
.d_wb_datw(d_wb_datw[i]),
.ext_cmd(ext_cmd[i]),
.cmd_taken('b0),
.cmd_out_0(),
.cmd_out_1()
);
end end
4'h3: begin `CORE_TYPE_WB2: begin
// q[0]=i, q[1]=d cmd_wb #(.CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) core_in (
assign cmd_queue_in[i][0][77] = i_wb_cyc[i] & i_wb_stb[i]; // valid .clk(clk),
assign cmd_queue_in[i][0][76] = 'b0; .rst(rst),
assign cmd_queue_in[i][0][75:72] = 'b0000; .i_wb_cyc(i_wb_cyc[i]),
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i]; .i_wb_stb(i_wb_stb[i]),
assign cmd_queue_in[i][0][39:8] = 'h000000; .i_wb_adr(i_wb_adr[i]),
assign cmd_queue_in[i][0][7:0] = ext_cmd[i]; .d_wb_cyc(d_wb_cyc[i]),
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid .d_wb_stb(d_wb_stb[i]),
assign cmd_queue_in[i][0][76] = d_wb_we[i]; .d_wb_we(d_wb_we[i]),
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i]; .d_wb_sel(d_wb_sel[i]),
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i]; .d_wb_adr(d_wb_adr[i]),
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i]; .d_wb_datw(d_wb_datw[i]),
assign cmd_queue_in[i][0][7:0] = ext_cmd[i]; .ext_cmd(ext_cmd[i]),
.cmd_taken('b0),
.cmd_out_0(),
.cmd_out_1()
);
end end
endcase endcase
end end
endgenerate endgenerate


// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// SMP // Arbitration

//
// larx/stcx // LRU, etc. select from pending cmds; also needs smp to stall some/all cmds
// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate // do addr cmp here, if necessary? or could do in smp
// reservation granule is 32B (or use lcd of all cores)
// one reservation per thread
// reservation is set before core receives reload data

wire stcx_store [0:3];
wire resv_ra_hit [0:3];
wire resv_set [0:3];
wire resv_rst [0:3];
wire [27:0] resv_q [0:3]; // v, @31:5
wire [27:0] resv_d [0:3];

generate
for (i = 0; i < 4; i++) begin


end arb #() arb (
endgenerate


// sync ack );


// cache ops // ------------------------------------------------------------------------------------------------
// SMP


// tlb ops // special ops: track resv, stall pending cmds, gen rsp
smp #() smp (


// ------------------------------------------------------------------------------------------------ );
// Arbitration
//
// LRU, etc. select from pending cmds
generate
for (i = 0; i < 4; i++) begin
end
endgenerate


// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// Bus Out // Bus Out
// commands to main bus


generate generate
if (BUS_TYPE == `BUS_TYPE_WB1) begin case(BUS_TYPE)

`BUS_TYPE_WB1: begin
end else if (BUS_TYPE == `BUS_TYPE_WB2) begin end

`BUS_TYPE_WB2: begin
wire [101:0] bus_out; wire [101:0] bus_out;
wire bus_i_wb_stb; wire bus_i_wb_stb;
assign bus_out[101] = bus_i_wb_stb; assign bus_out[101] = bus_i_wb_stb;
wire [31:2] bus_i_wb_adr; wire [31:2] bus_i_wb_adr;
assign bus_out[100:71] = bus_i_wb_adr; assign bus_out[100:71] = bus_i_wb_adr;
wire bus_d_wb_cyc; wire bus_d_wb_cyc;
assign bus_out[70] = bus_d_wb_cyc; assign bus_out[70] = bus_d_wb_cyc;
wire bus_d_wb_stb; wire bus_d_wb_stb;
assign bus_out[69] = bus_d_wb_stb; assign bus_out[69] = bus_d_wb_stb;
wire bus_d_wb_we; wire bus_d_wb_we;
assign bus_out[68] = bus_d_wb_we; assign bus_out[68] = bus_d_wb_we;
wire [3:0] bus_d_wb_sel; wire [3:0] bus_d_wb_sel;
assign bus_out[67:64] = bus_d_wb_sel; assign bus_out[67:64] = bus_d_wb_sel;
wire [31:0] bus_d_wb_adr; wire [31:0] bus_d_wb_adr;
assign bus_out[63:32] = bus_d_wb_adr; assign bus_out[63:32] = bus_d_wb_adr;
wire [31:0] bus_d_wb_datw; wire [31:0] bus_d_wb_datw;
assign bus_out[31:0] = bus_d_wb_datw; assign bus_out[31:0] = bus_d_wb_datw;

end
end else begin endcase
end
endgenerate endgenerate


// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// Bus In // Bus In
// responses from main bus


generate generate
if (BUS_TYPE == `BUS_TYPE_WB1) begin case(BUS_TYPE)

`BUS_TYPE_WB1: begin
end else if (BUS_TYPE == `BUS_TYPE_WB2) begin end

`BUS_TYPE_WB2: begin
wire [65:0] bus_in; wire [65:0] bus_in;
wire bus_i_wb_ack = bus_in[65]; wire bus_i_wb_ack = bus_in[65];
wire [31:0] bus_i_wb_datr = bus_in[64:33]; wire [31:0] bus_i_wb_datr = bus_in[64:33];
wire bus_d_wb_ack = bus_in[32]; wire bus_d_wb_ack = bus_in[32];
wire [31:0] bus_d_wb_datr = bus_in[31:0]; wire [31:0] bus_d_wb_datr = bus_in[31:0];

end
end else begin endcase
end
endgenerate endgenerate


// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// Response Queues // Response Queues
// responses for cores


generate generate
for (i = 0; i < 4; i++) begin for (i = 0; i < 4; i++) begin
@ -265,6 +221,7 @@ endgenerate


// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
// Misc/Errors/Debug // Misc/Errors/Debug
// stuff


generate generate
for (i = 0; i < 4; i++) begin for (i = 0; i < 4; i++) begin

@ -0,0 +1,13 @@

module arb # (
) (

);

// fairly choose 1 or 2 (depending on output buses) cmds
// mark taken from queue
// obey restrictions from smp, etc.
// detect addr collisions - not needed if no caching?


endmodule

@ -0,0 +1,15 @@
// a2l2 default: allow 1 ld, 1 st credit and use 2 dedicated queues

`include "defs.v"

reg [77:0] cmd_queue_q[0:3][0:1];
wire [77:0] cmd_queue_d[0:3][0:1];
wire [77:0] cmd_queue_in[0:3][0:1];
wire [71:0] cmd_queue_out[0:3];

module cmd_wb #(
parameter CORE_TYPE = CORE_TYPE_A2L2
) (
);

endmodule

@ -0,0 +1,132 @@
// Wishbone-Wishbone Command Interface

// allow single- or dual-wb in/out:
// 1/1 : passthru
// 1/2 : route to proper - but this requires indicator in extcmd to distinguish i vs. d
// 2/1 : arbitrate
// 2/2 : passthru
//
// also handle special ops when possible (dcbz, ...)

// select one command per output bus

// ext_cmd is not tied to i/d, but does it need to have multiple outstanding (nop=0, and valid that require i/d info also must have that info provided)
// needs ext_tkn if not tied to i/d; needs to set q valid if tied to i/d (based on ext i or d type)
// seems like these are all tied to an i or d and require a response, so shouldn't need i+d+ext outstanding?

// possible extended command modifiers
// prefetch
// larx
// stcx
// lwsync
// hwsync
// tlbsync
// ici, icbi
// dci, dcbi, etc
// dcbtst
// dcbz
// tlbie, etc

// possible extended responses
// errors
// crit first, xfer# for larger bus width on core side
// credits
// resv valid
// stcx comp/pass
// sync ack
// back inv val/addr

// possible extra functions
// integrated L2
// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar)

`include "defs.v"

reg [77:0] cmd_queue_q[0:1];
wire [77:0] cmd_queue_d[0:1];
wire [77:0] cmd_queue_in[0:1];
wire [77:0] cmd_queue_out;
reg [7:0] ext_queue_q;
wire [7:0] ext_queue_d;
wire [7:0] ext_queue_in;

module cmd_wb #(
parameter CORE_TYPE = `CORE_TYPE_WB2,
parameter BUS_TYPE = `BUS_TYPE_WB2
) (
input clk,
input rst,
input i_wb_cyc,
input i_wb_stb,
input [31:2] i_wb_adr,
input d_wb_cyc,
input d_wb_stb,
input d_wb_we,
input [3:0] d_wb_sel,
input [31:0] d_wb_adr,
input [31:0] d_wb_datw,
input [7:0] ext_cmd,
input [2:0] cmd_taken, // bit vector, one per queued cmd (could simultaneously occur in some designs)
output [77:0] cmd_out_0,
output [77:0] cmd_out_1
);

case (CORE_TYPE)
`CORE_TYPE_WB1: begin
// q[0] = i or d
assign cmd_queue_in[0][77] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too
assign cmd_queue_in[0][76] = d_wb_we;
assign cmd_queue_in[0][75:72] = d_wb_sel;
assign cmd_queue_in[0][71:40] = d_wb_adr;
assign cmd_queue_in[0][39:8] = d_wb_datw;
assign cmd_queue_in[0][7:0] = ext_cmd;
end
`CORE_TYPE_WB2: begin
// q[0]=i, q[1]=d
assign cmd_queue_in[0][77] = i_wb_cyc & i_wb_stb; // valid - may need ext decode too
assign cmd_queue_in[0][76:72] = 'h0;
assign cmd_queue_in[0][71:40] = {i_wb_adr, 2'b0};
assign cmd_queue_in[0][39:8] = 'h0;
assign cmd_queue_in[0][7:0] = ext_cmd;
assign cmd_queue_in[1][77] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too
assign cmd_queue_in[1][76] = d_wb_we;
assign cmd_queue_in[1][75:72] = d_wb_sel;
assign cmd_queue_in[1][71:40] = d_wb_adr;
assign cmd_queue_in[1][39:8] = d_wb_datw;
assign cmd_queue_in[1][7:0] = ext_cmd;
assign ext_queue_in = ext_cmd;
end
endcase

// queue routing/arbitration to cmd processing

// create generic command format out, handling i,d,ext!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// dual wb cmds may need taken AND next; depends though on what should be ordering; start with INORDER=1 parameter?

case (CORE_TYPE)
`CORE_TYPE_WB1: begin
case (BUS_TYPE)
`BUS_TYPE_WB1: begin
assign cmd_out_0 = cmd_queue_q[0];
end
`BUS_TYPE_WB2: begin
assign cmd_out_0 = ext_queue_q[0] ? cmd_queue_q[1] : cmd_queue_q[7]; // select i vs d
end
endcase
end
`CORE_TYPE_WB2: begin
case (BUS_TYPE)
`BUS_TYPE_WB1:begin
// both valid: send d
// want selected bit; set first cycle; dont change once selected until not valid
assign cmd_out_0 = cmd_queue_q[1][77] ? cmd_queue_q[1] : cmd_queue_q[0];
end
`BUS_TYPE_WB2: begin
assign cmd_out_0 = cmd_queue_q[0];
assign cmd_out_1 = cmd_queue_q[1];
end
endcase
end
endcase

endmodule

@ -0,0 +1,10 @@
// a2wb defines

`define CORE_TYPE_NONE 4'h0
`define CORE_TYPE_A2L2 4'h1
`define CORE_TYPE_WB1 4'h2
`define CORE_TYPE_WB2 4'h3

`define BUS_TYPE_NONE 4'h0
`define BUS_TYPE_WB1 4'h1
`define BUS_TYPE_WB2 4'h2

@ -8,13 +8,13 @@


* Dual (separate I/D) WB buses w/SMP extensions * Dual (separate I/D) WB buses w/SMP extensions


* bus interface * bus interfaces


* WB (non-SMP) * single WB


* functions * dual WB


* arbitrates for WB bus * functions


* queues one or more core commands * queues one or more core commands


@ -24,6 +24,9 @@


* optional mailbox interface for core-core peer and broadcast * optional mailbox interface for core-core peer and broadcast


* arbitrates for WB bus(es)

* gen responses for cores


## Possible configurations ## Possible configurations


@ -31,6 +34,9 @@


* one core, A2L2: bridge with SMP functions * one core, A2L2: bridge with SMP functions


* multi-core: bridge for A2L2, queueing, arbitration, and SMP functions * multi-core: identical or mixed WB1/WB2/A2L2, queueing, arbitration, and SMP functions



### syntax check


```verilator --lint-only a2wb.v -Wno-LITENDIAN```

@ -0,0 +1,26 @@

module smp # (
) (

);

// larx/stcx
// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate
// reservation granule is 32B (or use lcd of all cores)
// one reservation per thread
// reservation is set before core receives reload data

wire stcx_store [0:3];
wire resv_ra_hit [0:3];
wire resv_set [0:3];
wire resv_rst [0:3];
wire [27:0] resv_q [0:3]; // v, @31:5
wire [27:0] resv_d [0:3];

// sync ops

// cache ops

// tlb ops

endmodule
Loading…
Cancel
Save