From 4a8aeab3be4b687094076beeaf58b06ebaec62a9 Mon Sep 17 00:00:00 2001 From: wtf <52765606+openpowerwtf@users.noreply.ggithub.com> Date: Thu, 18 Nov 2021 09:00:19 -0600 Subject: [PATCH] init bridge/smp stuff --- rtl/a2wb/a2wb.v | 274 +++++++++++++++++++++++++++++++++++++++++++++ rtl/a2wb/readme.md | 37 +++++- rtl/l2mc/readme.md | 18 --- rtl/readme.md | 94 ++++++++++++++++ 4 files changed, 404 insertions(+), 19 deletions(-) create mode 100644 rtl/a2wb/a2wb.v delete mode 100644 rtl/l2mc/readme.md create mode 100644 rtl/readme.md diff --git a/rtl/a2wb/a2wb.v b/rtl/a2wb/a2wb.v new file mode 100644 index 0000000..27754e7 --- /dev/null +++ b/rtl/a2wb/a2wb.v @@ -0,0 +1,274 @@ +// A2 Core Bridge + +// should modularize as much as possible and just do messy rewiring here! + +// one thread/core for now + +// possible extended command modifiers +// prefetch +// larx +// stcx +// lwsync +// hwsync +// tlbsync +// ici, icbi +// dci, dcbi, etc +// dcbtst +// dcbz +// tlbie, etc + +// possible extended responses +// errors +// crit first, xfer# for larger bus width on core side +// credits +// resv valid +// stcx comp/pass +// sync ack +// back inv val/addr + +// possible extra functions +// integrated L2 +// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar) + + + +// cores must be contiguous, starting at 0 +`define CORE_TYPE_NONE 4'h0 +`define CORE_TYPE_A2L2 4'h1 +`define CORE_TYPE_WB1 4'h2 +`define CORE_TYPE_WB2 4'h3 + +`define BUS_TYPE_NONE 4'h0 +`define BUS_TYPE_WB1 4'h1 +`define BUS_TYPE_WB2 4'h2 + +module A2WB #( + parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE}, + parameter [0:3] BUS_TYPE = `BUS_TYPE_WB2 +) ( + input clk, + input rst, + input [0:3] core_in, + output [0:3] core_out, + input bus_in, + output bus_out +); + +integer NUMCORES = 0; + +genvar i; + +// ------------------------------------------------------------------------------------------------ +// I/O Connections + +wire i_wb_cyc [0:3]; +wire i_wb_stb [0:3]; +wire [31:2] i_wb_adr[0:3] ; +wire i_wb_ack [0:3]; +wire [31:0] i_wb_datr[0:3]; +wire d_wb_cyc [0:3]; +wire d_wb_stb [0:3]; +wire d_wb_we [0:3]; +wire [3:0] d_wb_sel [0:3]; +wire [31:0] d_wb_adr [0:3]; +wire [31:0] d_wb_datw [0:3]; +wire d_wb_ack [0:3]; +wire [31:0] d_wb_datr[0:3]; +wire [7:0] ext_cmd [0:3]; +wire [7:0] ext_rsp [0:3]; + +generate + for (i = 0; i < 4; i++) begin + case (CORE_TYPES[i*4:i*4+3]) + 4'h0: begin + end + 4'h1: begin + assign NUMCORES = NUMCORES + 1; + // a2l2 + end + 4'h2: begin + assign NUMCORES = NUMCORES + 1; + + wire [78:0] core_0_in; + wire [32:0] core_out[i]; + assign d_wb_cyc[i] = core_in[i][78]; + assign d_wb_stb[i] = core_in[i][77]; + assign d_wb_we[i] = core_in[i][76]; + assign d_wb_sel[i] = core_in[i][75:72]; + assign d_wb_adr[i] = core_in[i][71:40]; + assign d_wb_datw[i] = core_in[i][39:8]; + assign ext_cmd[i] = core_in[i][7:0]; + assign core_out[i][32] = d_wb_ack[i]; + assign core_out[i][31:0] = d_wb_datr[i]; + end + 4'h3: begin + assign NUMCORES = NUMCORES + 1; + + wire [110:0] core_in[i]; + wire [65:0] core_out[i]; + assign i_wb_cyc[i] = core_in[i][110]; + assign i_wb_stb[i] = core_in[i][109]; + assign i_wb_adr[i] = core_in[i][108:79]; + assign d_wb_cyc[i] = core_in[i][78]; + assign d_wb_stb[i] = core_in[i][77]; + assign d_wb_we[i] = core_in[i][76]; + assign d_wb_sel[i] = core_in[i][75:72]; + assign d_wb_adr[i] = core_in[i][71:40]; + assign d_wb_datw[i] = core_in[i][39:8]; + assign ext_cmd[i] = core_in[i][7:0]; + assign core_out[i][65] = i_wb_ack[i]; + assign core_out[i][64:33] = i_wb_datr[i]; + assign core_out[i][32] = d_wb_ack[i]; + assign core_out[i][31:0] = d_wb_datr[i]; + end + endcase + end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Command Queues/Addr Compare/Bypass +// +// cores can have either 1 or 2 buses; assume single-cmd outstanding per, for now +// a2l2 could also allow 1 ld, 1 st credit and use 2 dedicated queues +reg [77:0] cmd_queue_q[0:3][0:1]; +wire [77:0] cmd_queue_d[0:3][0:1]; +wire [77:0] cmd_queue_in[0:3][0:1]; +wire [71:0] cmd_queue_out[0:3]; +generate + for (i = 0; i < 4; i++) begin + case (CORE_TYPES[i*4:i*4+3]) + 4'h0: begin + end + 4'h1: begin + // convert a2l2 to internal format + end + 4'h2: begin + // q[0] = i or d + assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid + assign cmd_queue_in[i][0][76] = d_wb_we[i]; + assign cmd_queue_in[i][0][75:72] = d_wb_sel[i]; + assign cmd_queue_in[i][0][71:40] = d_wb_adr[i]; + assign cmd_queue_in[i][0][39:8] = d_wb_datw[i]; + assign cmd_queue_in[i][0][7:0] = ext_cmd[i]; + end + 4'h3: begin + // q[0]=i, q[1]=d + assign cmd_queue_in[i][0][77] = i_wb_cyc[i] & i_wb_stb[i]; // valid + assign cmd_queue_in[i][0][76] = 'b0; + assign cmd_queue_in[i][0][75:72] = 'b0000; + assign cmd_queue_in[i][0][71:40] = d_wb_adr[i]; + assign cmd_queue_in[i][0][39:8] = 'h000000; + assign cmd_queue_in[i][0][7:0] = ext_cmd[i]; + assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid + assign cmd_queue_in[i][0][76] = d_wb_we[i]; + assign cmd_queue_in[i][0][75:72] = d_wb_sel[i]; + assign cmd_queue_in[i][0][71:40] = d_wb_adr[i]; + assign cmd_queue_in[i][0][39:8] = d_wb_datw[i]; + assign cmd_queue_in[i][0][7:0] = ext_cmd[i]; + end + endcase + end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// SMP + +// larx/stcx +// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate +// reservation granule is 32B (or use lcd of all cores) +// one reservation per thread +// reservation is set before core receives reload data + +wire stcx_store [0:3]; +wire resv_ra_hit [0:3]; +wire resv_set [0:3]; +wire resv_rst [0:3]; +wire [27:0] resv_q [0:3]; // v, @31:5 +wire [27:0] resv_d [0:3]; + +generate + for (i = 0; i < 4; i++) begin + + end +endgenerate + +// sync ack + +// cache ops + +// tlb ops + +// ------------------------------------------------------------------------------------------------ +// Arbitration +// +// LRU, etc. select from pending cmds +generate + for (i = 0; i < 4; i++) begin + end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Bus Out + +generate + if (BUS_TYPE == `BUS_TYPE_WB1) begin + + end else if (BUS_TYPE == `BUS_TYPE_WB2) begin + + wire [101:0] bus_out; + wire bus_i_wb_stb; + assign bus_out[101] = bus_i_wb_stb; + wire [31:2] bus_i_wb_adr; + assign bus_out[100:71] = bus_i_wb_adr; + wire bus_d_wb_cyc; + assign bus_out[70] = bus_d_wb_cyc; + wire bus_d_wb_stb; + assign bus_out[69] = bus_d_wb_stb; + wire bus_d_wb_we; + assign bus_out[68] = bus_d_wb_we; + wire [3:0] bus_d_wb_sel; + assign bus_out[67:64] = bus_d_wb_sel; + wire [31:0] bus_d_wb_adr; + assign bus_out[63:32] = bus_d_wb_adr; + wire [31:0] bus_d_wb_datw; + assign bus_out[31:0] = bus_d_wb_datw; + + end else begin + end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Bus In + +generate + if (BUS_TYPE == `BUS_TYPE_WB1) begin + + end else if (BUS_TYPE == `BUS_TYPE_WB2) begin + + wire [65:0] bus_in; + wire bus_i_wb_ack = bus_in[65]; + wire [31:0] bus_i_wb_datr = bus_in[64:33]; + wire bus_d_wb_ack = bus_in[32]; + wire [31:0] bus_d_wb_datr = bus_in[31:0]; + + end else begin + end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Response Queues + +generate + for (i = 0; i < 4; i++) begin + end +endgenerate + +// ------------------------------------------------------------------------------------------------ +// Misc/Errors/Debug + +generate + for (i = 0; i < 4; i++) begin + end +endgenerate + +endmodule \ No newline at end of file diff --git a/rtl/a2wb/readme.md b/rtl/a2wb/readme.md index 43080b1..cd8dcb1 100644 --- a/rtl/a2wb/readme.md +++ b/rtl/a2wb/readme.md @@ -1 +1,36 @@ -# Bridge A2I/A2O memory interface to WB +# A2 Interfaces to WB + +* core interfaces + + * A2I/A2O A2L2 bus + + * Single (combined I/D) w/SMP extensions + + * Dual (separate I/D) WB buses w/SMP extensions + +* bus interface + + * WB (non-SMP) + +* functions + + * arbitrates for WB bus + + * queues one or more core commands + + * point of coherncy for larx/stcx, sync, tlbie, etc. for multicores below it (single/mulithread) + + * address compares necessary for ordering/coherency + + * optional mailbox interface for core-core peer and broadcast + + +## Possible configurations + +* one core, WB: pass-through with SMP functions + +* one core, A2L2: bridge with SMP functions + +* multi-core: bridge for A2L2, queueing, arbitration, and SMP functions + + diff --git a/rtl/l2mc/readme.md b/rtl/l2mc/readme.md deleted file mode 100644 index 5c3d920..0000000 --- a/rtl/l2mc/readme.md +++ /dev/null @@ -1,18 +0,0 @@ -# L2 Multicore - -* Multicore bridge to WB - - * generate 1-4 core interfaces (possibly mixed) - - * slave WB-I, slave WB-D, OPMC extension (opcode/WIMG/...) - - * a2i/a2o out-of-order/credits/... style - - * configurable load and store queues per interface (if pipelined/credited interfaces) - - * point-of-coherency/snoop/sync/... logic - - * configurable-size shared L2 (extra tags for pinning, etc.?) - - * WB-I, WB-D master - diff --git a/rtl/readme.md b/rtl/readme.md new file mode 100644 index 0000000..3825381 --- /dev/null +++ b/rtl/readme.md @@ -0,0 +1,94 @@ +# Magic Incantations for Litex Integration + +## Litex General + +### add local platform +from platforms import cmod7 + +### add local core + +``` +binPath = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(binPath, 'a2p')) +from a2p import A2P +from litex.soc.cores import cpu +cpu.CPUS['a2p'] = A2P +``` + +### add local modules + +``` +sys.path.append(os.path.join(binPath, 'modules')) +``` + +### add extra UARTs (GPIO) + +* must add PHY and UART to CSR + +``` +self.submodules.uart_1_phy = RS232PHY(pins, sys_clk_freq, 115200, with_dynamic_baudrate=True) +self.add_csr('uart_1_phy') +self.submodules.uart_1 = UART(phy=self.uart_1_phy) +self.add_csr('uart_1') +``` + +* dynamic baudrate allows setting it in csr + + * tuning_word = int((baudrate/clk_freq)*2**32) + * in bios + * ```#define CONFIG_CLOCK_FREQUENCY 100000000``` + * or read freq with config_clock_frequency_read() + * access csr with generated UART_1 funcs + + * 115200=004B7F5A (4947802) + * 9600=00064A9C (412316) + +### add 'CSR Definition' CSR for indirectly addressing CSRs, so that code doesn't need to change when gateware rebuilt + +* add a 'CSR Definition' region at constant location (like CSR_BASE) + + * contains list of CSRs which point to and define CSRs available + + * Base Addr: 24 bits, Identifier: 8 bits + + * also could self.add_constant(ID) for each, so that software can use constants to check Identifier + + * needs to be able to reserve its region, then find out what's been built and add its regs before finalize() + +### allow lxterm and lxserver to share main uart (crossover) + + +## Litex RTL + +### add verilog module to top, between top-level bus and I/Os (slave) + +``` +from issiram import ISSIRam +platform.add_source("./modules/issiram.v") + +sram_bus = wishbone.Interface() +pins = platform.request('issiram') +# to rename pins +mem = { + 'ce': pins.cen, + 'oe': pins.oen, + 'we': pins.wen, + 'adr': pins.addr, + 'dat': pins.data +} +sram = ISSIRam(self, ClockSignal(), ResetSignal(), sram_bus, mem) +self.submodules.sram = sram +self.bus.add_slave('sram', sram_bus, SoCRegion(origin=self.mem_map['sram'], size=sram.size)) +self.logger.info("SRAM {} {} {}.".format( + colorer('sram'), + colorer("added", color="green"), + self.bus.regions['sram']) +) +``` + +### add verilog module to top, with connections to other top-level signals + + +### add Litex L2 between WB and verilog module + +