Browse Source

init bridge/smp stuff

master
wtf 1 year ago
parent
commit
4a8aeab3be
  1. 274
      rtl/a2wb/a2wb.v
  2. 37
      rtl/a2wb/readme.md
  3. 18
      rtl/l2mc/readme.md
  4. 94
      rtl/readme.md

274
rtl/a2wb/a2wb.v

@ -0,0 +1,274 @@ @@ -0,0 +1,274 @@
// A2 Core Bridge

// should modularize as much as possible and just do messy rewiring here!

// one thread/core for now

// possible extended command modifiers
// prefetch
// larx
// stcx
// lwsync
// hwsync
// tlbsync
// ici, icbi
// dci, dcbi, etc
// dcbtst
// dcbz
// tlbie, etc

// possible extended responses
// errors
// crit first, xfer# for larger bus width on core side
// credits
// resv valid
// stcx comp/pass
// sync ack
// back inv val/addr

// possible extra functions
// integrated L2
// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar)



// cores must be contiguous, starting at 0
`define CORE_TYPE_NONE 4'h0
`define CORE_TYPE_A2L2 4'h1
`define CORE_TYPE_WB1 4'h2
`define CORE_TYPE_WB2 4'h3

`define BUS_TYPE_NONE 4'h0
`define BUS_TYPE_WB1 4'h1
`define BUS_TYPE_WB2 4'h2

module A2WB #(
parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE},
parameter [0:3] BUS_TYPE = `BUS_TYPE_WB2
) (
input clk,
input rst,
input [0:3] core_in,
output [0:3] core_out,
input bus_in,
output bus_out
);

integer NUMCORES = 0;

genvar i;

// ------------------------------------------------------------------------------------------------
// I/O Connections

wire i_wb_cyc [0:3];
wire i_wb_stb [0:3];
wire [31:2] i_wb_adr[0:3] ;
wire i_wb_ack [0:3];
wire [31:0] i_wb_datr[0:3];
wire d_wb_cyc [0:3];
wire d_wb_stb [0:3];
wire d_wb_we [0:3];
wire [3:0] d_wb_sel [0:3];
wire [31:0] d_wb_adr [0:3];
wire [31:0] d_wb_datw [0:3];
wire d_wb_ack [0:3];
wire [31:0] d_wb_datr[0:3];
wire [7:0] ext_cmd [0:3];
wire [7:0] ext_rsp [0:3];

generate
for (i = 0; i < 4; i++) begin
case (CORE_TYPES[i*4:i*4+3])
4'h0: begin
end
4'h1: begin
assign NUMCORES = NUMCORES + 1;
// a2l2
end
4'h2: begin
assign NUMCORES = NUMCORES + 1;

wire [78:0] core_0_in;
wire [32:0] core_out[i];
assign d_wb_cyc[i] = core_in[i][78];
assign d_wb_stb[i] = core_in[i][77];
assign d_wb_we[i] = core_in[i][76];
assign d_wb_sel[i] = core_in[i][75:72];
assign d_wb_adr[i] = core_in[i][71:40];
assign d_wb_datw[i] = core_in[i][39:8];
assign ext_cmd[i] = core_in[i][7:0];
assign core_out[i][32] = d_wb_ack[i];
assign core_out[i][31:0] = d_wb_datr[i];
end
4'h3: begin
assign NUMCORES = NUMCORES + 1;

wire [110:0] core_in[i];
wire [65:0] core_out[i];
assign i_wb_cyc[i] = core_in[i][110];
assign i_wb_stb[i] = core_in[i][109];
assign i_wb_adr[i] = core_in[i][108:79];
assign d_wb_cyc[i] = core_in[i][78];
assign d_wb_stb[i] = core_in[i][77];
assign d_wb_we[i] = core_in[i][76];
assign d_wb_sel[i] = core_in[i][75:72];
assign d_wb_adr[i] = core_in[i][71:40];
assign d_wb_datw[i] = core_in[i][39:8];
assign ext_cmd[i] = core_in[i][7:0];
assign core_out[i][65] = i_wb_ack[i];
assign core_out[i][64:33] = i_wb_datr[i];
assign core_out[i][32] = d_wb_ack[i];
assign core_out[i][31:0] = d_wb_datr[i];
end
endcase
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Command Queues/Addr Compare/Bypass
//
// cores can have either 1 or 2 buses; assume single-cmd outstanding per, for now
// a2l2 could also allow 1 ld, 1 st credit and use 2 dedicated queues
reg [77:0] cmd_queue_q[0:3][0:1];
wire [77:0] cmd_queue_d[0:3][0:1];
wire [77:0] cmd_queue_in[0:3][0:1];
wire [71:0] cmd_queue_out[0:3];
generate
for (i = 0; i < 4; i++) begin
case (CORE_TYPES[i*4:i*4+3])
4'h0: begin
end
4'h1: begin
// convert a2l2 to internal format
end
4'h2: begin
// q[0] = i or d
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid
assign cmd_queue_in[i][0][76] = d_wb_we[i];
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i];
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i];
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
end
4'h3: begin
// q[0]=i, q[1]=d
assign cmd_queue_in[i][0][77] = i_wb_cyc[i] & i_wb_stb[i]; // valid
assign cmd_queue_in[i][0][76] = 'b0;
assign cmd_queue_in[i][0][75:72] = 'b0000;
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
assign cmd_queue_in[i][0][39:8] = 'h000000;
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid
assign cmd_queue_in[i][0][76] = d_wb_we[i];
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i];
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i];
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
end
endcase
end
endgenerate

// ------------------------------------------------------------------------------------------------
// SMP

// larx/stcx
// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate
// reservation granule is 32B (or use lcd of all cores)
// one reservation per thread
// reservation is set before core receives reload data

wire stcx_store [0:3];
wire resv_ra_hit [0:3];
wire resv_set [0:3];
wire resv_rst [0:3];
wire [27:0] resv_q [0:3]; // v, @31:5
wire [27:0] resv_d [0:3];

generate
for (i = 0; i < 4; i++) begin

end
endgenerate

// sync ack

// cache ops

// tlb ops

// ------------------------------------------------------------------------------------------------
// Arbitration
//
// LRU, etc. select from pending cmds
generate
for (i = 0; i < 4; i++) begin
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Bus Out

generate
if (BUS_TYPE == `BUS_TYPE_WB1) begin

end else if (BUS_TYPE == `BUS_TYPE_WB2) begin

wire [101:0] bus_out;
wire bus_i_wb_stb;
assign bus_out[101] = bus_i_wb_stb;
wire [31:2] bus_i_wb_adr;
assign bus_out[100:71] = bus_i_wb_adr;
wire bus_d_wb_cyc;
assign bus_out[70] = bus_d_wb_cyc;
wire bus_d_wb_stb;
assign bus_out[69] = bus_d_wb_stb;
wire bus_d_wb_we;
assign bus_out[68] = bus_d_wb_we;
wire [3:0] bus_d_wb_sel;
assign bus_out[67:64] = bus_d_wb_sel;
wire [31:0] bus_d_wb_adr;
assign bus_out[63:32] = bus_d_wb_adr;
wire [31:0] bus_d_wb_datw;
assign bus_out[31:0] = bus_d_wb_datw;

end else begin
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Bus In

generate
if (BUS_TYPE == `BUS_TYPE_WB1) begin

end else if (BUS_TYPE == `BUS_TYPE_WB2) begin

wire [65:0] bus_in;
wire bus_i_wb_ack = bus_in[65];
wire [31:0] bus_i_wb_datr = bus_in[64:33];
wire bus_d_wb_ack = bus_in[32];
wire [31:0] bus_d_wb_datr = bus_in[31:0];

end else begin
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Response Queues

generate
for (i = 0; i < 4; i++) begin
end
endgenerate

// ------------------------------------------------------------------------------------------------
// Misc/Errors/Debug

generate
for (i = 0; i < 4; i++) begin
end
endgenerate

endmodule

37
rtl/a2wb/readme.md

@ -1 +1,36 @@ @@ -1 +1,36 @@
# Bridge A2I/A2O memory interface to WB
# A2 Interfaces to WB

* core interfaces

* A2I/A2O A2L2 bus

* Single (combined I/D) w/SMP extensions

* Dual (separate I/D) WB buses w/SMP extensions

* bus interface

* WB (non-SMP)

* functions

* arbitrates for WB bus

* queues one or more core commands

* point of coherncy for larx/stcx, sync, tlbie, etc. for multicores below it (single/mulithread)

* address compares necessary for ordering/coherency

* optional mailbox interface for core-core peer and broadcast


## Possible configurations

* one core, WB: pass-through with SMP functions

* one core, A2L2: bridge with SMP functions

* multi-core: bridge for A2L2, queueing, arbitration, and SMP functions



18
rtl/l2mc/readme.md

@ -1,18 +0,0 @@ @@ -1,18 +0,0 @@
# L2 Multicore

* Multicore bridge to WB

* generate 1-4 core interfaces (possibly mixed)

* slave WB-I, slave WB-D, OPMC extension (opcode/WIMG/...)

* a2i/a2o out-of-order/credits/... style

* configurable load and store queues per interface (if pipelined/credited interfaces)

* point-of-coherency/snoop/sync/... logic

* configurable-size shared L2 (extra tags for pinning, etc.?)

* WB-I, WB-D master

94
rtl/readme.md

@ -0,0 +1,94 @@ @@ -0,0 +1,94 @@
# Magic Incantations for Litex Integration

## Litex General

### add local platform
from platforms import cmod7

### add local core

```
binPath = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(binPath, 'a2p'))
from a2p import A2P
from litex.soc.cores import cpu
cpu.CPUS['a2p'] = A2P
```

### add local modules

```
sys.path.append(os.path.join(binPath, 'modules'))
```

### add extra UARTs (GPIO)

* must add PHY and UART to CSR

```
self.submodules.uart_1_phy = RS232PHY(pins, sys_clk_freq, 115200, with_dynamic_baudrate=True)
self.add_csr('uart_1_phy')
self.submodules.uart_1 = UART(phy=self.uart_1_phy)
self.add_csr('uart_1')
```

* dynamic baudrate allows setting it in csr

* tuning_word = int((baudrate/clk_freq)*2**32)
* in bios
* ```#define CONFIG_CLOCK_FREQUENCY 100000000```
* or read freq with config_clock_frequency_read()
* access csr with generated UART_1 funcs

* 115200=004B7F5A (4947802)
* 9600=00064A9C (412316)

### add 'CSR Definition' CSR for indirectly addressing CSRs, so that code doesn't need to change when gateware rebuilt

* add a 'CSR Definition' region at constant location (like CSR_BASE)

* contains list of CSRs which point to and define CSRs available

* Base Addr: 24 bits, Identifier: 8 bits

* also could self.add_constant(ID) for each, so that software can use constants to check Identifier

* needs to be able to reserve its region, then find out what's been built and add its regs before finalize()

### allow lxterm and lxserver to share main uart (crossover)


## Litex RTL

### add verilog module to top, between top-level bus and I/Os (slave)

```
from issiram import ISSIRam
platform.add_source("./modules/issiram.v")

sram_bus = wishbone.Interface()
pins = platform.request('issiram')
# to rename pins
mem = {
'ce': pins.cen,
'oe': pins.oen,
'we': pins.wen,
'adr': pins.addr,
'dat': pins.data
}
sram = ISSIRam(self, ClockSignal(), ResetSignal(), sram_bus, mem)
self.submodules.sram = sram
self.bus.add_slave('sram', sram_bus, SoCRegion(origin=self.mem_map['sram'], size=sram.size))
self.logger.info("SRAM {} {} {}.".format(
colorer('sram'),
colorer("added", color="green"),
self.bus.regions['sram'])
)
```

### add verilog module to top, with connections to other top-level signals


### add Litex L2 between WB and verilog module


Loading…
Cancel
Save