init bridge/smp stuff
parent
8b01139c2d
commit
4a8aeab3be
@ -0,0 +1,274 @@
|
||||
// A2 Core Bridge
|
||||
|
||||
// should modularize as much as possible and just do messy rewiring here!
|
||||
|
||||
// one thread/core for now
|
||||
|
||||
// possible extended command modifiers
|
||||
// prefetch
|
||||
// larx
|
||||
// stcx
|
||||
// lwsync
|
||||
// hwsync
|
||||
// tlbsync
|
||||
// ici, icbi
|
||||
// dci, dcbi, etc
|
||||
// dcbtst
|
||||
// dcbz
|
||||
// tlbie, etc
|
||||
|
||||
// possible extended responses
|
||||
// errors
|
||||
// crit first, xfer# for larger bus width on core side
|
||||
// credits
|
||||
// resv valid
|
||||
// stcx comp/pass
|
||||
// sync ack
|
||||
// back inv val/addr
|
||||
|
||||
// possible extra functions
|
||||
// integrated L2
|
||||
// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar)
|
||||
|
||||
|
||||
|
||||
// cores must be contiguous, starting at 0
|
||||
`define CORE_TYPE_NONE 4'h0
|
||||
`define CORE_TYPE_A2L2 4'h1
|
||||
`define CORE_TYPE_WB1 4'h2
|
||||
`define CORE_TYPE_WB2 4'h3
|
||||
|
||||
`define BUS_TYPE_NONE 4'h0
|
||||
`define BUS_TYPE_WB1 4'h1
|
||||
`define BUS_TYPE_WB2 4'h2
|
||||
|
||||
module A2WB #(
|
||||
parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE},
|
||||
parameter [0:3] BUS_TYPE = `BUS_TYPE_WB2
|
||||
) (
|
||||
input clk,
|
||||
input rst,
|
||||
input [0:3] core_in,
|
||||
output [0:3] core_out,
|
||||
input bus_in,
|
||||
output bus_out
|
||||
);
|
||||
|
||||
integer NUMCORES = 0;
|
||||
|
||||
genvar i;
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// I/O Connections
|
||||
|
||||
wire i_wb_cyc [0:3];
|
||||
wire i_wb_stb [0:3];
|
||||
wire [31:2] i_wb_adr[0:3] ;
|
||||
wire i_wb_ack [0:3];
|
||||
wire [31:0] i_wb_datr[0:3];
|
||||
wire d_wb_cyc [0:3];
|
||||
wire d_wb_stb [0:3];
|
||||
wire d_wb_we [0:3];
|
||||
wire [3:0] d_wb_sel [0:3];
|
||||
wire [31:0] d_wb_adr [0:3];
|
||||
wire [31:0] d_wb_datw [0:3];
|
||||
wire d_wb_ack [0:3];
|
||||
wire [31:0] d_wb_datr[0:3];
|
||||
wire [7:0] ext_cmd [0:3];
|
||||
wire [7:0] ext_rsp [0:3];
|
||||
|
||||
generate
|
||||
for (i = 0; i < 4; i++) begin
|
||||
case (CORE_TYPES[i*4:i*4+3])
|
||||
4'h0: begin
|
||||
end
|
||||
4'h1: begin
|
||||
assign NUMCORES = NUMCORES + 1;
|
||||
// a2l2
|
||||
end
|
||||
4'h2: begin
|
||||
assign NUMCORES = NUMCORES + 1;
|
||||
|
||||
wire [78:0] core_0_in;
|
||||
wire [32:0] core_out[i];
|
||||
assign d_wb_cyc[i] = core_in[i][78];
|
||||
assign d_wb_stb[i] = core_in[i][77];
|
||||
assign d_wb_we[i] = core_in[i][76];
|
||||
assign d_wb_sel[i] = core_in[i][75:72];
|
||||
assign d_wb_adr[i] = core_in[i][71:40];
|
||||
assign d_wb_datw[i] = core_in[i][39:8];
|
||||
assign ext_cmd[i] = core_in[i][7:0];
|
||||
assign core_out[i][32] = d_wb_ack[i];
|
||||
assign core_out[i][31:0] = d_wb_datr[i];
|
||||
end
|
||||
4'h3: begin
|
||||
assign NUMCORES = NUMCORES + 1;
|
||||
|
||||
wire [110:0] core_in[i];
|
||||
wire [65:0] core_out[i];
|
||||
assign i_wb_cyc[i] = core_in[i][110];
|
||||
assign i_wb_stb[i] = core_in[i][109];
|
||||
assign i_wb_adr[i] = core_in[i][108:79];
|
||||
assign d_wb_cyc[i] = core_in[i][78];
|
||||
assign d_wb_stb[i] = core_in[i][77];
|
||||
assign d_wb_we[i] = core_in[i][76];
|
||||
assign d_wb_sel[i] = core_in[i][75:72];
|
||||
assign d_wb_adr[i] = core_in[i][71:40];
|
||||
assign d_wb_datw[i] = core_in[i][39:8];
|
||||
assign ext_cmd[i] = core_in[i][7:0];
|
||||
assign core_out[i][65] = i_wb_ack[i];
|
||||
assign core_out[i][64:33] = i_wb_datr[i];
|
||||
assign core_out[i][32] = d_wb_ack[i];
|
||||
assign core_out[i][31:0] = d_wb_datr[i];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Command Queues/Addr Compare/Bypass
|
||||
//
|
||||
// cores can have either 1 or 2 buses; assume single-cmd outstanding per, for now
|
||||
// a2l2 could also allow 1 ld, 1 st credit and use 2 dedicated queues
|
||||
reg [77:0] cmd_queue_q[0:3][0:1];
|
||||
wire [77:0] cmd_queue_d[0:3][0:1];
|
||||
wire [77:0] cmd_queue_in[0:3][0:1];
|
||||
wire [71:0] cmd_queue_out[0:3];
|
||||
generate
|
||||
for (i = 0; i < 4; i++) begin
|
||||
case (CORE_TYPES[i*4:i*4+3])
|
||||
4'h0: begin
|
||||
end
|
||||
4'h1: begin
|
||||
// convert a2l2 to internal format
|
||||
end
|
||||
4'h2: begin
|
||||
// q[0] = i or d
|
||||
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid
|
||||
assign cmd_queue_in[i][0][76] = d_wb_we[i];
|
||||
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i];
|
||||
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
|
||||
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i];
|
||||
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
|
||||
end
|
||||
4'h3: begin
|
||||
// q[0]=i, q[1]=d
|
||||
assign cmd_queue_in[i][0][77] = i_wb_cyc[i] & i_wb_stb[i]; // valid
|
||||
assign cmd_queue_in[i][0][76] = 'b0;
|
||||
assign cmd_queue_in[i][0][75:72] = 'b0000;
|
||||
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
|
||||
assign cmd_queue_in[i][0][39:8] = 'h000000;
|
||||
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
|
||||
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid
|
||||
assign cmd_queue_in[i][0][76] = d_wb_we[i];
|
||||
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i];
|
||||
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
|
||||
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i];
|
||||
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// SMP
|
||||
|
||||
// larx/stcx
|
||||
// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate
|
||||
// reservation granule is 32B (or use lcd of all cores)
|
||||
// one reservation per thread
|
||||
// reservation is set before core receives reload data
|
||||
|
||||
wire stcx_store [0:3];
|
||||
wire resv_ra_hit [0:3];
|
||||
wire resv_set [0:3];
|
||||
wire resv_rst [0:3];
|
||||
wire [27:0] resv_q [0:3]; // v, @31:5
|
||||
wire [27:0] resv_d [0:3];
|
||||
|
||||
generate
|
||||
for (i = 0; i < 4; i++) begin
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// sync ack
|
||||
|
||||
// cache ops
|
||||
|
||||
// tlb ops
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Arbitration
|
||||
//
|
||||
// LRU, etc. select from pending cmds
|
||||
generate
|
||||
for (i = 0; i < 4; i++) begin
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Bus Out
|
||||
|
||||
generate
|
||||
if (BUS_TYPE == `BUS_TYPE_WB1) begin
|
||||
|
||||
end else if (BUS_TYPE == `BUS_TYPE_WB2) begin
|
||||
|
||||
wire [101:0] bus_out;
|
||||
wire bus_i_wb_stb;
|
||||
assign bus_out[101] = bus_i_wb_stb;
|
||||
wire [31:2] bus_i_wb_adr;
|
||||
assign bus_out[100:71] = bus_i_wb_adr;
|
||||
wire bus_d_wb_cyc;
|
||||
assign bus_out[70] = bus_d_wb_cyc;
|
||||
wire bus_d_wb_stb;
|
||||
assign bus_out[69] = bus_d_wb_stb;
|
||||
wire bus_d_wb_we;
|
||||
assign bus_out[68] = bus_d_wb_we;
|
||||
wire [3:0] bus_d_wb_sel;
|
||||
assign bus_out[67:64] = bus_d_wb_sel;
|
||||
wire [31:0] bus_d_wb_adr;
|
||||
assign bus_out[63:32] = bus_d_wb_adr;
|
||||
wire [31:0] bus_d_wb_datw;
|
||||
assign bus_out[31:0] = bus_d_wb_datw;
|
||||
|
||||
end else begin
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Bus In
|
||||
|
||||
generate
|
||||
if (BUS_TYPE == `BUS_TYPE_WB1) begin
|
||||
|
||||
end else if (BUS_TYPE == `BUS_TYPE_WB2) begin
|
||||
|
||||
wire [65:0] bus_in;
|
||||
wire bus_i_wb_ack = bus_in[65];
|
||||
wire [31:0] bus_i_wb_datr = bus_in[64:33];
|
||||
wire bus_d_wb_ack = bus_in[32];
|
||||
wire [31:0] bus_d_wb_datr = bus_in[31:0];
|
||||
|
||||
end else begin
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Response Queues
|
||||
|
||||
generate
|
||||
for (i = 0; i < 4; i++) begin
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Misc/Errors/Debug
|
||||
|
||||
generate
|
||||
for (i = 0; i < 4; i++) begin
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
@ -1 +1,36 @@
|
||||
# Bridge A2I/A2O memory interface to WB
|
||||
# A2 Interfaces to WB
|
||||
|
||||
* core interfaces
|
||||
|
||||
* A2I/A2O A2L2 bus
|
||||
|
||||
* Single (combined I/D) w/SMP extensions
|
||||
|
||||
* Dual (separate I/D) WB buses w/SMP extensions
|
||||
|
||||
* bus interface
|
||||
|
||||
* WB (non-SMP)
|
||||
|
||||
* functions
|
||||
|
||||
* arbitrates for WB bus
|
||||
|
||||
* queues one or more core commands
|
||||
|
||||
* point of coherncy for larx/stcx, sync, tlbie, etc. for multicores below it (single/mulithread)
|
||||
|
||||
* address compares necessary for ordering/coherency
|
||||
|
||||
* optional mailbox interface for core-core peer and broadcast
|
||||
|
||||
|
||||
## Possible configurations
|
||||
|
||||
* one core, WB: pass-through with SMP functions
|
||||
|
||||
* one core, A2L2: bridge with SMP functions
|
||||
|
||||
* multi-core: bridge for A2L2, queueing, arbitration, and SMP functions
|
||||
|
||||
|
||||
|
@ -1,18 +0,0 @@
|
||||
# L2 Multicore
|
||||
|
||||
* Multicore bridge to WB
|
||||
|
||||
* generate 1-4 core interfaces (possibly mixed)
|
||||
|
||||
* slave WB-I, slave WB-D, OPMC extension (opcode/WIMG/...)
|
||||
|
||||
* a2i/a2o out-of-order/credits/... style
|
||||
|
||||
* configurable load and store queues per interface (if pipelined/credited interfaces)
|
||||
|
||||
* point-of-coherency/snoop/sync/... logic
|
||||
|
||||
* configurable-size shared L2 (extra tags for pinning, etc.?)
|
||||
|
||||
* WB-I, WB-D master
|
||||
|
@ -0,0 +1,94 @@
|
||||
# Magic Incantations for Litex Integration
|
||||
|
||||
## Litex General
|
||||
|
||||
### add local platform
|
||||
from platforms import cmod7
|
||||
|
||||
### add local core
|
||||
|
||||
```
|
||||
binPath = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.append(os.path.join(binPath, 'a2p'))
|
||||
from a2p import A2P
|
||||
from litex.soc.cores import cpu
|
||||
cpu.CPUS['a2p'] = A2P
|
||||
```
|
||||
|
||||
### add local modules
|
||||
|
||||
```
|
||||
sys.path.append(os.path.join(binPath, 'modules'))
|
||||
```
|
||||
|
||||
### add extra UARTs (GPIO)
|
||||
|
||||
* must add PHY and UART to CSR
|
||||
|
||||
```
|
||||
self.submodules.uart_1_phy = RS232PHY(pins, sys_clk_freq, 115200, with_dynamic_baudrate=True)
|
||||
self.add_csr('uart_1_phy')
|
||||
self.submodules.uart_1 = UART(phy=self.uart_1_phy)
|
||||
self.add_csr('uart_1')
|
||||
```
|
||||
|
||||
* dynamic baudrate allows setting it in csr
|
||||
|
||||
* tuning_word = int((baudrate/clk_freq)*2**32)
|
||||
* in bios
|
||||
* ```#define CONFIG_CLOCK_FREQUENCY 100000000```
|
||||
* or read freq with config_clock_frequency_read()
|
||||
* access csr with generated UART_1 funcs
|
||||
|
||||
* 115200=004B7F5A (4947802)
|
||||
* 9600=00064A9C (412316)
|
||||
|
||||
### add 'CSR Definition' CSR for indirectly addressing CSRs, so that code doesn't need to change when gateware rebuilt
|
||||
|
||||
* add a 'CSR Definition' region at constant location (like CSR_BASE)
|
||||
|
||||
* contains list of CSRs which point to and define CSRs available
|
||||
|
||||
* Base Addr: 24 bits, Identifier: 8 bits
|
||||
|
||||
* also could self.add_constant(ID) for each, so that software can use constants to check Identifier
|
||||
|
||||
* needs to be able to reserve its region, then find out what's been built and add its regs before finalize()
|
||||
|
||||
### allow lxterm and lxserver to share main uart (crossover)
|
||||
|
||||
|
||||
## Litex RTL
|
||||
|
||||
### add verilog module to top, between top-level bus and I/Os (slave)
|
||||
|
||||
```
|
||||
from issiram import ISSIRam
|
||||
platform.add_source("./modules/issiram.v")
|
||||
|
||||
sram_bus = wishbone.Interface()
|
||||
pins = platform.request('issiram')
|
||||
# to rename pins
|
||||
mem = {
|
||||
'ce': pins.cen,
|
||||
'oe': pins.oen,
|
||||
'we': pins.wen,
|
||||
'adr': pins.addr,
|
||||
'dat': pins.data
|
||||
}
|
||||
sram = ISSIRam(self, ClockSignal(), ResetSignal(), sram_bus, mem)
|
||||
self.submodules.sram = sram
|
||||
self.bus.add_slave('sram', sram_bus, SoCRegion(origin=self.mem_map['sram'], size=sram.size))
|
||||
self.logger.info("SRAM {} {} {}.".format(
|
||||
colorer('sram'),
|
||||
colorer("added", color="green"),
|
||||
self.bus.regions['sram'])
|
||||
)
|
||||
```
|
||||
|
||||
### add verilog module to top, with connections to other top-level signals
|
||||
|
||||
|
||||
### add Litex L2 between WB and verilog module
|
||||
|
||||
|
Loading…
Reference in New Issue