init bridge/smp stuff
parent
8b01139c2d
commit
4a8aeab3be
@ -0,0 +1,274 @@
|
|||||||
|
// A2 Core Bridge
|
||||||
|
|
||||||
|
// should modularize as much as possible and just do messy rewiring here!
|
||||||
|
|
||||||
|
// one thread/core for now
|
||||||
|
|
||||||
|
// possible extended command modifiers
|
||||||
|
// prefetch
|
||||||
|
// larx
|
||||||
|
// stcx
|
||||||
|
// lwsync
|
||||||
|
// hwsync
|
||||||
|
// tlbsync
|
||||||
|
// ici, icbi
|
||||||
|
// dci, dcbi, etc
|
||||||
|
// dcbtst
|
||||||
|
// dcbz
|
||||||
|
// tlbie, etc
|
||||||
|
|
||||||
|
// possible extended responses
|
||||||
|
// errors
|
||||||
|
// crit first, xfer# for larger bus width on core side
|
||||||
|
// credits
|
||||||
|
// resv valid
|
||||||
|
// stcx comp/pass
|
||||||
|
// sync ack
|
||||||
|
// back inv val/addr
|
||||||
|
|
||||||
|
// possible extra functions
|
||||||
|
// integrated L2
|
||||||
|
// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// cores must be contiguous, starting at 0
|
||||||
|
`define CORE_TYPE_NONE 4'h0
|
||||||
|
`define CORE_TYPE_A2L2 4'h1
|
||||||
|
`define CORE_TYPE_WB1 4'h2
|
||||||
|
`define CORE_TYPE_WB2 4'h3
|
||||||
|
|
||||||
|
`define BUS_TYPE_NONE 4'h0
|
||||||
|
`define BUS_TYPE_WB1 4'h1
|
||||||
|
`define BUS_TYPE_WB2 4'h2
|
||||||
|
|
||||||
|
module A2WB #(
|
||||||
|
parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE},
|
||||||
|
parameter [0:3] BUS_TYPE = `BUS_TYPE_WB2
|
||||||
|
) (
|
||||||
|
input clk,
|
||||||
|
input rst,
|
||||||
|
input [0:3] core_in,
|
||||||
|
output [0:3] core_out,
|
||||||
|
input bus_in,
|
||||||
|
output bus_out
|
||||||
|
);
|
||||||
|
|
||||||
|
integer NUMCORES = 0;
|
||||||
|
|
||||||
|
genvar i;
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// I/O Connections
|
||||||
|
|
||||||
|
wire i_wb_cyc [0:3];
|
||||||
|
wire i_wb_stb [0:3];
|
||||||
|
wire [31:2] i_wb_adr[0:3] ;
|
||||||
|
wire i_wb_ack [0:3];
|
||||||
|
wire [31:0] i_wb_datr[0:3];
|
||||||
|
wire d_wb_cyc [0:3];
|
||||||
|
wire d_wb_stb [0:3];
|
||||||
|
wire d_wb_we [0:3];
|
||||||
|
wire [3:0] d_wb_sel [0:3];
|
||||||
|
wire [31:0] d_wb_adr [0:3];
|
||||||
|
wire [31:0] d_wb_datw [0:3];
|
||||||
|
wire d_wb_ack [0:3];
|
||||||
|
wire [31:0] d_wb_datr[0:3];
|
||||||
|
wire [7:0] ext_cmd [0:3];
|
||||||
|
wire [7:0] ext_rsp [0:3];
|
||||||
|
|
||||||
|
generate
|
||||||
|
for (i = 0; i < 4; i++) begin
|
||||||
|
case (CORE_TYPES[i*4:i*4+3])
|
||||||
|
4'h0: begin
|
||||||
|
end
|
||||||
|
4'h1: begin
|
||||||
|
assign NUMCORES = NUMCORES + 1;
|
||||||
|
// a2l2
|
||||||
|
end
|
||||||
|
4'h2: begin
|
||||||
|
assign NUMCORES = NUMCORES + 1;
|
||||||
|
|
||||||
|
wire [78:0] core_0_in;
|
||||||
|
wire [32:0] core_out[i];
|
||||||
|
assign d_wb_cyc[i] = core_in[i][78];
|
||||||
|
assign d_wb_stb[i] = core_in[i][77];
|
||||||
|
assign d_wb_we[i] = core_in[i][76];
|
||||||
|
assign d_wb_sel[i] = core_in[i][75:72];
|
||||||
|
assign d_wb_adr[i] = core_in[i][71:40];
|
||||||
|
assign d_wb_datw[i] = core_in[i][39:8];
|
||||||
|
assign ext_cmd[i] = core_in[i][7:0];
|
||||||
|
assign core_out[i][32] = d_wb_ack[i];
|
||||||
|
assign core_out[i][31:0] = d_wb_datr[i];
|
||||||
|
end
|
||||||
|
4'h3: begin
|
||||||
|
assign NUMCORES = NUMCORES + 1;
|
||||||
|
|
||||||
|
wire [110:0] core_in[i];
|
||||||
|
wire [65:0] core_out[i];
|
||||||
|
assign i_wb_cyc[i] = core_in[i][110];
|
||||||
|
assign i_wb_stb[i] = core_in[i][109];
|
||||||
|
assign i_wb_adr[i] = core_in[i][108:79];
|
||||||
|
assign d_wb_cyc[i] = core_in[i][78];
|
||||||
|
assign d_wb_stb[i] = core_in[i][77];
|
||||||
|
assign d_wb_we[i] = core_in[i][76];
|
||||||
|
assign d_wb_sel[i] = core_in[i][75:72];
|
||||||
|
assign d_wb_adr[i] = core_in[i][71:40];
|
||||||
|
assign d_wb_datw[i] = core_in[i][39:8];
|
||||||
|
assign ext_cmd[i] = core_in[i][7:0];
|
||||||
|
assign core_out[i][65] = i_wb_ack[i];
|
||||||
|
assign core_out[i][64:33] = i_wb_datr[i];
|
||||||
|
assign core_out[i][32] = d_wb_ack[i];
|
||||||
|
assign core_out[i][31:0] = d_wb_datr[i];
|
||||||
|
end
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// Command Queues/Addr Compare/Bypass
|
||||||
|
//
|
||||||
|
// cores can have either 1 or 2 buses; assume single-cmd outstanding per, for now
|
||||||
|
// a2l2 could also allow 1 ld, 1 st credit and use 2 dedicated queues
|
||||||
|
reg [77:0] cmd_queue_q[0:3][0:1];
|
||||||
|
wire [77:0] cmd_queue_d[0:3][0:1];
|
||||||
|
wire [77:0] cmd_queue_in[0:3][0:1];
|
||||||
|
wire [71:0] cmd_queue_out[0:3];
|
||||||
|
generate
|
||||||
|
for (i = 0; i < 4; i++) begin
|
||||||
|
case (CORE_TYPES[i*4:i*4+3])
|
||||||
|
4'h0: begin
|
||||||
|
end
|
||||||
|
4'h1: begin
|
||||||
|
// convert a2l2 to internal format
|
||||||
|
end
|
||||||
|
4'h2: begin
|
||||||
|
// q[0] = i or d
|
||||||
|
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid
|
||||||
|
assign cmd_queue_in[i][0][76] = d_wb_we[i];
|
||||||
|
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i];
|
||||||
|
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
|
||||||
|
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i];
|
||||||
|
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
|
||||||
|
end
|
||||||
|
4'h3: begin
|
||||||
|
// q[0]=i, q[1]=d
|
||||||
|
assign cmd_queue_in[i][0][77] = i_wb_cyc[i] & i_wb_stb[i]; // valid
|
||||||
|
assign cmd_queue_in[i][0][76] = 'b0;
|
||||||
|
assign cmd_queue_in[i][0][75:72] = 'b0000;
|
||||||
|
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
|
||||||
|
assign cmd_queue_in[i][0][39:8] = 'h000000;
|
||||||
|
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
|
||||||
|
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid
|
||||||
|
assign cmd_queue_in[i][0][76] = d_wb_we[i];
|
||||||
|
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i];
|
||||||
|
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
|
||||||
|
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i];
|
||||||
|
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
|
||||||
|
end
|
||||||
|
endcase
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// SMP
|
||||||
|
|
||||||
|
// larx/stcx
|
||||||
|
// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate
|
||||||
|
// reservation granule is 32B (or use lcd of all cores)
|
||||||
|
// one reservation per thread
|
||||||
|
// reservation is set before core receives reload data
|
||||||
|
|
||||||
|
wire stcx_store [0:3];
|
||||||
|
wire resv_ra_hit [0:3];
|
||||||
|
wire resv_set [0:3];
|
||||||
|
wire resv_rst [0:3];
|
||||||
|
wire [27:0] resv_q [0:3]; // v, @31:5
|
||||||
|
wire [27:0] resv_d [0:3];
|
||||||
|
|
||||||
|
generate
|
||||||
|
for (i = 0; i < 4; i++) begin
|
||||||
|
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
// sync ack
|
||||||
|
|
||||||
|
// cache ops
|
||||||
|
|
||||||
|
// tlb ops
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// Arbitration
|
||||||
|
//
|
||||||
|
// LRU, etc. select from pending cmds
|
||||||
|
generate
|
||||||
|
for (i = 0; i < 4; i++) begin
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// Bus Out
|
||||||
|
|
||||||
|
generate
|
||||||
|
if (BUS_TYPE == `BUS_TYPE_WB1) begin
|
||||||
|
|
||||||
|
end else if (BUS_TYPE == `BUS_TYPE_WB2) begin
|
||||||
|
|
||||||
|
wire [101:0] bus_out;
|
||||||
|
wire bus_i_wb_stb;
|
||||||
|
assign bus_out[101] = bus_i_wb_stb;
|
||||||
|
wire [31:2] bus_i_wb_adr;
|
||||||
|
assign bus_out[100:71] = bus_i_wb_adr;
|
||||||
|
wire bus_d_wb_cyc;
|
||||||
|
assign bus_out[70] = bus_d_wb_cyc;
|
||||||
|
wire bus_d_wb_stb;
|
||||||
|
assign bus_out[69] = bus_d_wb_stb;
|
||||||
|
wire bus_d_wb_we;
|
||||||
|
assign bus_out[68] = bus_d_wb_we;
|
||||||
|
wire [3:0] bus_d_wb_sel;
|
||||||
|
assign bus_out[67:64] = bus_d_wb_sel;
|
||||||
|
wire [31:0] bus_d_wb_adr;
|
||||||
|
assign bus_out[63:32] = bus_d_wb_adr;
|
||||||
|
wire [31:0] bus_d_wb_datw;
|
||||||
|
assign bus_out[31:0] = bus_d_wb_datw;
|
||||||
|
|
||||||
|
end else begin
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// Bus In
|
||||||
|
|
||||||
|
generate
|
||||||
|
if (BUS_TYPE == `BUS_TYPE_WB1) begin
|
||||||
|
|
||||||
|
end else if (BUS_TYPE == `BUS_TYPE_WB2) begin
|
||||||
|
|
||||||
|
wire [65:0] bus_in;
|
||||||
|
wire bus_i_wb_ack = bus_in[65];
|
||||||
|
wire [31:0] bus_i_wb_datr = bus_in[64:33];
|
||||||
|
wire bus_d_wb_ack = bus_in[32];
|
||||||
|
wire [31:0] bus_d_wb_datr = bus_in[31:0];
|
||||||
|
|
||||||
|
end else begin
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// Response Queues
|
||||||
|
|
||||||
|
generate
|
||||||
|
for (i = 0; i < 4; i++) begin
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// Misc/Errors/Debug
|
||||||
|
|
||||||
|
generate
|
||||||
|
for (i = 0; i < 4; i++) begin
|
||||||
|
end
|
||||||
|
endgenerate
|
||||||
|
|
||||||
|
endmodule
|
@ -1 +1,36 @@
|
|||||||
# Bridge A2I/A2O memory interface to WB
|
# A2 Interfaces to WB
|
||||||
|
|
||||||
|
* core interfaces
|
||||||
|
|
||||||
|
* A2I/A2O A2L2 bus
|
||||||
|
|
||||||
|
* Single (combined I/D) w/SMP extensions
|
||||||
|
|
||||||
|
* Dual (separate I/D) WB buses w/SMP extensions
|
||||||
|
|
||||||
|
* bus interface
|
||||||
|
|
||||||
|
* WB (non-SMP)
|
||||||
|
|
||||||
|
* functions
|
||||||
|
|
||||||
|
* arbitrates for WB bus
|
||||||
|
|
||||||
|
* queues one or more core commands
|
||||||
|
|
||||||
|
* point of coherncy for larx/stcx, sync, tlbie, etc. for multicores below it (single/mulithread)
|
||||||
|
|
||||||
|
* address compares necessary for ordering/coherency
|
||||||
|
|
||||||
|
* optional mailbox interface for core-core peer and broadcast
|
||||||
|
|
||||||
|
|
||||||
|
## Possible configurations
|
||||||
|
|
||||||
|
* one core, WB: pass-through with SMP functions
|
||||||
|
|
||||||
|
* one core, A2L2: bridge with SMP functions
|
||||||
|
|
||||||
|
* multi-core: bridge for A2L2, queueing, arbitration, and SMP functions
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,18 +0,0 @@
|
|||||||
# L2 Multicore
|
|
||||||
|
|
||||||
* Multicore bridge to WB
|
|
||||||
|
|
||||||
* generate 1-4 core interfaces (possibly mixed)
|
|
||||||
|
|
||||||
* slave WB-I, slave WB-D, OPMC extension (opcode/WIMG/...)
|
|
||||||
|
|
||||||
* a2i/a2o out-of-order/credits/... style
|
|
||||||
|
|
||||||
* configurable load and store queues per interface (if pipelined/credited interfaces)
|
|
||||||
|
|
||||||
* point-of-coherency/snoop/sync/... logic
|
|
||||||
|
|
||||||
* configurable-size shared L2 (extra tags for pinning, etc.?)
|
|
||||||
|
|
||||||
* WB-I, WB-D master
|
|
||||||
|
|
@ -0,0 +1,94 @@
|
|||||||
|
# Magic Incantations for Litex Integration
|
||||||
|
|
||||||
|
## Litex General
|
||||||
|
|
||||||
|
### add local platform
|
||||||
|
from platforms import cmod7
|
||||||
|
|
||||||
|
### add local core
|
||||||
|
|
||||||
|
```
|
||||||
|
binPath = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
sys.path.append(os.path.join(binPath, 'a2p'))
|
||||||
|
from a2p import A2P
|
||||||
|
from litex.soc.cores import cpu
|
||||||
|
cpu.CPUS['a2p'] = A2P
|
||||||
|
```
|
||||||
|
|
||||||
|
### add local modules
|
||||||
|
|
||||||
|
```
|
||||||
|
sys.path.append(os.path.join(binPath, 'modules'))
|
||||||
|
```
|
||||||
|
|
||||||
|
### add extra UARTs (GPIO)
|
||||||
|
|
||||||
|
* must add PHY and UART to CSR
|
||||||
|
|
||||||
|
```
|
||||||
|
self.submodules.uart_1_phy = RS232PHY(pins, sys_clk_freq, 115200, with_dynamic_baudrate=True)
|
||||||
|
self.add_csr('uart_1_phy')
|
||||||
|
self.submodules.uart_1 = UART(phy=self.uart_1_phy)
|
||||||
|
self.add_csr('uart_1')
|
||||||
|
```
|
||||||
|
|
||||||
|
* dynamic baudrate allows setting it in csr
|
||||||
|
|
||||||
|
* tuning_word = int((baudrate/clk_freq)*2**32)
|
||||||
|
* in bios
|
||||||
|
* ```#define CONFIG_CLOCK_FREQUENCY 100000000```
|
||||||
|
* or read freq with config_clock_frequency_read()
|
||||||
|
* access csr with generated UART_1 funcs
|
||||||
|
|
||||||
|
* 115200=004B7F5A (4947802)
|
||||||
|
* 9600=00064A9C (412316)
|
||||||
|
|
||||||
|
### add 'CSR Definition' CSR for indirectly addressing CSRs, so that code doesn't need to change when gateware rebuilt
|
||||||
|
|
||||||
|
* add a 'CSR Definition' region at constant location (like CSR_BASE)
|
||||||
|
|
||||||
|
* contains list of CSRs which point to and define CSRs available
|
||||||
|
|
||||||
|
* Base Addr: 24 bits, Identifier: 8 bits
|
||||||
|
|
||||||
|
* also could self.add_constant(ID) for each, so that software can use constants to check Identifier
|
||||||
|
|
||||||
|
* needs to be able to reserve its region, then find out what's been built and add its regs before finalize()
|
||||||
|
|
||||||
|
### allow lxterm and lxserver to share main uart (crossover)
|
||||||
|
|
||||||
|
|
||||||
|
## Litex RTL
|
||||||
|
|
||||||
|
### add verilog module to top, between top-level bus and I/Os (slave)
|
||||||
|
|
||||||
|
```
|
||||||
|
from issiram import ISSIRam
|
||||||
|
platform.add_source("./modules/issiram.v")
|
||||||
|
|
||||||
|
sram_bus = wishbone.Interface()
|
||||||
|
pins = platform.request('issiram')
|
||||||
|
# to rename pins
|
||||||
|
mem = {
|
||||||
|
'ce': pins.cen,
|
||||||
|
'oe': pins.oen,
|
||||||
|
'we': pins.wen,
|
||||||
|
'adr': pins.addr,
|
||||||
|
'dat': pins.data
|
||||||
|
}
|
||||||
|
sram = ISSIRam(self, ClockSignal(), ResetSignal(), sram_bus, mem)
|
||||||
|
self.submodules.sram = sram
|
||||||
|
self.bus.add_slave('sram', sram_bus, SoCRegion(origin=self.mem_map['sram'], size=sram.size))
|
||||||
|
self.logger.info("SRAM {} {} {}.".format(
|
||||||
|
colorer('sram'),
|
||||||
|
colorer("added", color="green"),
|
||||||
|
self.bus.regions['sram'])
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### add verilog module to top, with connections to other top-level signals
|
||||||
|
|
||||||
|
|
||||||
|
### add Litex L2 between WB and verilog module
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue