forked from cores/microwatt
Browse Source
Add VHDL wrappers and verilog behaviourals for the cache_ram, register_file and main_bram arrays. Signed-off-by: Anton Blanchard <anton@linux.ibm.com>caravel-mpw5-20220322
8 changed files with 523 additions and 0 deletions
@ -0,0 +1,24 @@
@@ -0,0 +1,24 @@
|
||||
module Microwatt_FP_DFFRFile ( |
||||
`ifdef USE_POWER_PINS |
||||
inout VPWR, |
||||
inout VGND, |
||||
`endif |
||||
input [6:0] R1, R2, R3, RW, |
||||
input [63:0] DW, |
||||
output [63:0] D1, D2, D3, |
||||
input CLK, |
||||
input WE |
||||
); |
||||
|
||||
reg [63:0] registers[0:95]; |
||||
|
||||
assign D1 = registers[R1]; |
||||
assign D2 = registers[R2]; |
||||
assign D3 = registers[R3]; |
||||
|
||||
always @(posedge CLK) begin |
||||
if (WE) |
||||
registers[RW] <= DW; |
||||
end |
||||
|
||||
endmodule |
@ -0,0 +1,40 @@
@@ -0,0 +1,40 @@
|
||||
module RAM32_1RW1R #( |
||||
parameter BITS=5 |
||||
) ( |
||||
`ifdef USE_POWER_PINS |
||||
inout VPWR, |
||||
inout VGND, |
||||
`endif |
||||
input CLK, |
||||
|
||||
input EN0, |
||||
input [BITS-1:0] A0, |
||||
input [7:0] WE0, |
||||
input [63:0] Di0, |
||||
output reg [63:0] Do0, |
||||
|
||||
input EN1, |
||||
input [BITS-1:0] A1, |
||||
output reg [63:0] Do1 |
||||
); |
||||
|
||||
reg [63:0] RAM[2**BITS-1:0]; |
||||
|
||||
always @(posedge CLK) begin |
||||
if (EN1) |
||||
Do1 <= RAM[A1]; |
||||
end |
||||
|
||||
generate |
||||
genvar i; |
||||
for (i=0; i<8; i=i+1) begin: BYTE |
||||
always @(posedge CLK) begin |
||||
if (EN0) begin |
||||
if (WE0[i]) |
||||
RAM[A0][i*8+7:i*8] <= Di0[i*8+7:i*8]; |
||||
end |
||||
end |
||||
end |
||||
endgenerate |
||||
|
||||
endmodule |
@ -0,0 +1,42 @@
@@ -0,0 +1,42 @@
|
||||
module RAM512 #( |
||||
parameter BITS=9, |
||||
parameter FILENAME="firmware.hex" |
||||
) ( |
||||
`ifdef USE_POWER_PINS |
||||
inout VPWR, |
||||
inout VGND, |
||||
`endif |
||||
input CLK, |
||||
input [7:0] WE0, |
||||
input EN0, |
||||
input [63:0] Di0, |
||||
output reg [63:0] Do0, |
||||
input [BITS-1:0] A0 |
||||
); |
||||
|
||||
reg [63:0] RAM[2**BITS-1:0]; |
||||
|
||||
always @(posedge CLK) begin |
||||
if (EN0) |
||||
Do0 <= RAM[A0]; |
||||
else |
||||
Do0 <= 64'b0; |
||||
end |
||||
|
||||
generate |
||||
genvar i; |
||||
for (i=0; i<8; i=i+1) begin: BYTE |
||||
always @(posedge CLK) begin |
||||
if (EN0) begin |
||||
if (WE0[i]) |
||||
RAM[A0][i*8+7:i*8] <= Di0[i*8+7:i*8]; |
||||
end |
||||
end |
||||
end |
||||
endgenerate |
||||
|
||||
initial begin |
||||
$readmemh(FILENAME, RAM); |
||||
end |
||||
|
||||
endmodule |
@ -0,0 +1,24 @@
@@ -0,0 +1,24 @@
|
||||
module multiply_add_64x64 |
||||
#( |
||||
parameter BITS=64 |
||||
) ( |
||||
`ifdef USE_POWER_PINS |
||||
inout VPWR, |
||||
inout VGND, |
||||
`endif |
||||
input clk, |
||||
input [BITS-1:0] a, |
||||
input [BITS-1:0] b, |
||||
input [BITS*2-1:0] c, |
||||
output [BITS*2-1:0] o |
||||
); |
||||
reg [BITS*2-1:0] o_tmp[2:0]; |
||||
|
||||
always @(posedge clk) begin |
||||
o_tmp[2] = o_tmp[1]; |
||||
o_tmp[1] = o_tmp[0]; |
||||
o_tmp[0] = (a * b) + c; |
||||
end |
||||
|
||||
assign o = o_tmp[2]; |
||||
endmodule |
@ -0,0 +1,99 @@
@@ -0,0 +1,99 @@
|
||||
library ieee; |
||||
use ieee.std_logic_1164.all; |
||||
use ieee.numeric_std.all; |
||||
use ieee.math_real.all; |
||||
|
||||
entity cache_ram is |
||||
generic( |
||||
ROW_BITS : integer := 5; |
||||
WIDTH : integer := 64; |
||||
TRACE : boolean := false; |
||||
ADD_BUF : boolean := false |
||||
); |
||||
|
||||
port( |
||||
clk : in std_logic; |
||||
|
||||
rd_en : in std_logic; |
||||
rd_addr : in std_logic_vector(ROW_BITS - 1 downto 0); |
||||
rd_data : out std_logic_vector(WIDTH - 1 downto 0); |
||||
|
||||
wr_sel : in std_logic_vector(WIDTH/8 - 1 downto 0); |
||||
wr_addr : in std_logic_vector(ROW_BITS - 1 downto 0); |
||||
wr_data : in std_logic_vector(WIDTH - 1 downto 0) |
||||
); |
||||
|
||||
end cache_ram; |
||||
|
||||
architecture rtl of cache_ram is |
||||
component RAM32_1RW1R port( |
||||
CLK : in std_logic; |
||||
|
||||
EN0 : in std_logic; |
||||
A0 : in std_logic_vector(4 downto 0); |
||||
WE0 : in std_logic_vector(7 downto 0); |
||||
Di0 : in std_logic_vector(63 downto 0); |
||||
Do0 : out std_logic_vector(63 downto 0); |
||||
|
||||
EN1 : in std_logic; |
||||
A1 : in std_logic_vector(4 downto 0); |
||||
Do1 : out std_logic_vector(63 downto 0) |
||||
); |
||||
end component; |
||||
|
||||
signal wr_enable: std_logic; |
||||
signal rd_data0_tmp : std_logic_vector(WIDTH - 1 downto 0); |
||||
signal rd_data0_saved : std_logic_vector(WIDTH - 1 downto 0); |
||||
signal rd_data0 : std_logic_vector(WIDTH - 1 downto 0); |
||||
signal rd_en_prev: std_ulogic; |
||||
begin |
||||
assert (ROW_BITS = 5) report "ROW_BITS must be 5" severity FAILURE; |
||||
assert (WIDTH = 64) report "Must be 64 bit" severity FAILURE; |
||||
assert (TRACE = false) report "Trace not supported" severity FAILURE; |
||||
|
||||
wr_enable <= or(wr_sel); |
||||
|
||||
cache_ram_0 : RAM32_1RW1R |
||||
port map ( |
||||
CLK => clk, |
||||
|
||||
EN0 => wr_enable, |
||||
A0 => wr_addr, |
||||
WE0 => wr_sel, |
||||
Di0 => wr_data, |
||||
Do0 => open, |
||||
|
||||
EN1 => rd_en, |
||||
A1 => rd_addr, |
||||
Do1 => rd_data0_tmp |
||||
); |
||||
|
||||
-- The caches rely on cache_ram latching the last read. Handle it here |
||||
-- for now. |
||||
process(clk) |
||||
begin |
||||
if rising_edge(clk) then |
||||
rd_en_prev <= rd_en; |
||||
if rd_en_prev = '1' then |
||||
rd_data0_saved <= rd_data0_tmp; |
||||
end if; |
||||
end if; |
||||
end process; |
||||
rd_data0 <= rd_data0_tmp when rd_en_prev = '1' else rd_data0_saved; |
||||
|
||||
buf: if ADD_BUF generate |
||||
begin |
||||
process(clk) |
||||
begin |
||||
if rising_edge(clk) then |
||||
rd_data <= rd_data0; |
||||
end if; |
||||
end process; |
||||
end generate; |
||||
|
||||
nobuf: if not ADD_BUF generate |
||||
begin |
||||
rd_data <= rd_data0; |
||||
end generate; |
||||
|
||||
end architecture rtl; |
@ -0,0 +1,63 @@
@@ -0,0 +1,63 @@
|
||||
library ieee; |
||||
use ieee.std_logic_1164.all; |
||||
|
||||
library work; |
||||
|
||||
entity main_bram is |
||||
generic( |
||||
WIDTH : natural := 64; |
||||
HEIGHT_BITS : natural; |
||||
MEMORY_SIZE : natural; |
||||
RAM_INIT_FILE : string |
||||
); |
||||
port( |
||||
clk : in std_logic; |
||||
addr : in std_logic_vector(HEIGHT_BITS - 1 downto 0) ; |
||||
din : in std_logic_vector(WIDTH-1 downto 0); |
||||
dout : out std_logic_vector(WIDTH-1 downto 0); |
||||
sel : in std_logic_vector((WIDTH/8)-1 downto 0); |
||||
re : in std_ulogic; |
||||
we : in std_ulogic |
||||
); |
||||
end entity main_bram; |
||||
|
||||
architecture behaviour of main_bram is |
||||
component RAM512 port ( |
||||
CLK : in std_ulogic; |
||||
WE0 : in std_ulogic_vector(7 downto 0); |
||||
EN0 : in std_ulogic; |
||||
Di0 : in std_ulogic_vector(63 downto 0); |
||||
Do0 : out std_ulogic_vector(63 downto 0); |
||||
A0 : in std_ulogic_vector(8 downto 0) |
||||
); |
||||
end component; |
||||
|
||||
signal sel_qual: std_ulogic_vector((WIDTH/8)-1 downto 0); |
||||
|
||||
signal obuf : std_logic_vector(WIDTH-1 downto 0); |
||||
begin |
||||
assert (WIDTH = 64) report "Must be 64 bit" severity FAILURE; |
||||
-- Do we have a log2 round up issue here? |
||||
assert (HEIGHT_BITS = 9) report "HEIGHT_BITS must be 10" severity FAILURE; |
||||
assert (MEMORY_SIZE = 4096) report "MEMORY_SIZE must be 4096" severity FAILURE; |
||||
|
||||
sel_qual <= sel when we = '1' else (others => '0'); |
||||
|
||||
memory_0 : RAM512 |
||||
port map ( |
||||
CLK => clk, |
||||
WE0 => sel_qual(7 downto 0), |
||||
EN0 => re or we, |
||||
Di0 => din(63 downto 0), |
||||
Do0 => obuf(63 downto 0), |
||||
A0 => addr(8 downto 0) |
||||
); |
||||
|
||||
-- The wishbone BRAM wrapper assumes a 1 cycle delay |
||||
memory_read_buffer: process(clk) |
||||
begin |
||||
if rising_edge(clk) then |
||||
dout <= obuf; |
||||
end if; |
||||
end process; |
||||
end architecture behaviour; |
@ -0,0 +1,128 @@
@@ -0,0 +1,128 @@
|
||||
library ieee; |
||||
use ieee.std_logic_1164.all; |
||||
use ieee.numeric_std.all; |
||||
|
||||
library work; |
||||
use work.common.all; |
||||
|
||||
-- XXX We should be able to make timing with a 2 cycle multiplier |
||||
entity multiply is |
||||
generic ( |
||||
PIPELINE_DEPTH : natural := 4 |
||||
); |
||||
port ( |
||||
clk : in std_logic; |
||||
|
||||
m_in : in MultiplyInputType; |
||||
m_out : out MultiplyOutputType |
||||
); |
||||
end entity multiply; |
||||
|
||||
architecture behaviour of multiply is |
||||
signal m: MultiplyInputType := MultiplyInputInit; |
||||
|
||||
type multiply_pipeline_stage is record |
||||
valid : std_ulogic; |
||||
is_32bit : std_ulogic; |
||||
not_res : std_ulogic; |
||||
end record; |
||||
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', |
||||
is_32bit => '0', |
||||
not_res => '0'); |
||||
|
||||
type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; |
||||
constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit); |
||||
|
||||
type reg_type is record |
||||
multiply_pipeline : multiply_pipeline_type; |
||||
end record; |
||||
|
||||
signal r, rin : reg_type := (multiply_pipeline => MultiplyPipelineInit); |
||||
signal overflow : std_ulogic; |
||||
signal ovf_in : std_ulogic; |
||||
|
||||
signal mult_out : std_logic_vector(127 downto 0); |
||||
|
||||
component multiply_add_64x64 port( |
||||
clk : in std_logic; |
||||
a : in std_logic_vector(63 downto 0); |
||||
b : in std_logic_vector(63 downto 0); |
||||
c : in std_logic_vector(127 downto 0); |
||||
o : out std_logic_vector(127 downto 0) |
||||
); |
||||
end component; |
||||
begin |
||||
multiply_0: process(clk) |
||||
begin |
||||
if rising_edge(clk) then |
||||
m <= m_in; |
||||
r <= rin; |
||||
overflow <= ovf_in; |
||||
end if; |
||||
end process; |
||||
|
||||
multiplier : multiply_add_64x64 |
||||
port map ( |
||||
clk => clk, |
||||
a => m.data1, |
||||
b => m.data2, |
||||
c => m.addend, |
||||
o => mult_out |
||||
); |
||||
|
||||
multiply_1: process(all) |
||||
variable v : reg_type; |
||||
variable d : std_ulogic_vector(127 downto 0); |
||||
variable d2 : std_ulogic_vector(63 downto 0); |
||||
variable ov : std_ulogic; |
||||
begin |
||||
v := r; |
||||
v.multiply_pipeline(0).valid := m.valid; |
||||
v.multiply_pipeline(0).is_32bit := m.is_32bit; |
||||
v.multiply_pipeline(0).not_res := m.not_result; |
||||
|
||||
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop |
||||
v.multiply_pipeline(i) := r.multiply_pipeline(i-1); |
||||
end loop; |
||||
|
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).not_res = '1' then |
||||
d := not mult_out; |
||||
else |
||||
d := mult_out; |
||||
end if; |
||||
|
||||
ov := '0'; |
||||
if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then |
||||
ov := (or d(63 downto 31)) and not (and d(63 downto 31)); |
||||
else |
||||
ov := (or d(127 downto 63)) and not (and d(127 downto 63)); |
||||
end if; |
||||
ovf_in <= ov; |
||||
|
||||
m_out.result <= d; |
||||
m_out.overflow <= overflow; |
||||
m_out.valid <= v.multiply_pipeline(PIPELINE_DEPTH-1).valid; |
||||
|
||||
rin <= v; |
||||
end process; |
||||
end architecture behaviour; |
||||
|
||||
|
||||
library ieee; |
||||
use ieee.std_logic_1164.all; |
||||
use ieee.numeric_std.all; |
||||
|
||||
entity short_multiply is |
||||
port ( |
||||
clk : in std_ulogic; |
||||
|
||||
a_in : in std_ulogic_vector(15 downto 0); |
||||
b_in : in std_ulogic_vector(15 downto 0); |
||||
m_out : out std_ulogic_vector(31 downto 0) |
||||
); |
||||
end entity short_multiply; |
||||
|
||||
architecture behaviour of short_multiply is |
||||
begin |
||||
m_out <= std_ulogic_vector(signed(a_in) * signed(b_in)); |
||||
end architecture behaviour; |
@ -0,0 +1,103 @@
@@ -0,0 +1,103 @@
|
||||
library ieee; |
||||
use ieee.std_logic_1164.all; |
||||
use ieee.numeric_std.all; |
||||
|
||||
library work; |
||||
use work.common.all; |
||||
|
||||
entity register_file is |
||||
generic ( |
||||
SIM : boolean := false; |
||||
HAS_FPU : boolean := true; |
||||
LOG_LENGTH : natural := 0 |
||||
); |
||||
port( |
||||
clk : in std_logic; |
||||
|
||||
d_in : in Decode2ToRegisterFileType; |
||||
d_out : out RegisterFileToDecode2Type; |
||||
|
||||
w_in : in WritebackToRegisterFileType; |
||||
|
||||
dbg_gpr_req : in std_ulogic; |
||||
dbg_gpr_ack : out std_ulogic; |
||||
dbg_gpr_addr : in gspr_index_t; |
||||
dbg_gpr_data : out std_ulogic_vector(63 downto 0); |
||||
|
||||
sim_dump : in std_ulogic; |
||||
sim_dump_done : out std_ulogic; |
||||
|
||||
log_out : out std_ulogic_vector(71 downto 0) |
||||
); |
||||
end entity register_file; |
||||
|
||||
architecture behaviour of register_file is |
||||
component Microwatt_FP_DFFRFile port ( |
||||
CLK : in std_ulogic; |
||||
|
||||
R1 : in std_ulogic_vector(6 downto 0); |
||||
R2 : in std_ulogic_vector(6 downto 0); |
||||
R3 : in std_ulogic_vector(6 downto 0); |
||||
|
||||
D1 : out std_ulogic_vector(63 downto 0); |
||||
D2 : out std_ulogic_vector(63 downto 0); |
||||
D3 : out std_ulogic_vector(63 downto 0); |
||||
|
||||
WE : in std_ulogic; |
||||
RW : in std_ulogic_vector(6 downto 0); |
||||
DW : in std_ulogic_vector(63 downto 0) |
||||
); |
||||
end component; |
||||
|
||||
signal d1: std_ulogic_vector(63 downto 0); |
||||
signal d2: std_ulogic_vector(63 downto 0); |
||||
signal d3: std_ulogic_vector(63 downto 0); |
||||
begin |
||||
|
||||
register_file_0 : Microwatt_FP_DFFRFile |
||||
port map ( |
||||
CLK => clk, |
||||
|
||||
R1 => d_in.read1_reg, |
||||
R2 => d_in.read2_reg, |
||||
R3 => d_in.read3_reg, |
||||
|
||||
D1 => d1, |
||||
D2 => d2, |
||||
D3 => d3, |
||||
|
||||
WE => w_in.write_enable, |
||||
RW => w_in.write_reg, |
||||
DW => w_in.write_data |
||||
); |
||||
|
||||
x_state_check: process(clk) |
||||
begin |
||||
if rising_edge(clk) then |
||||
if w_in.write_enable = '1' then |
||||
assert not(is_x(w_in.write_data)) and not(is_x(w_in.write_reg)) severity failure; |
||||
end if; |
||||
end if; |
||||
end process x_state_check; |
||||
|
||||
-- Forward any written data |
||||
register_read_0: process(all) |
||||
begin |
||||
d_out.read1_data <= d1; |
||||
d_out.read2_data <= d2; |
||||
d_out.read3_data <= d3; |
||||
|
||||
if w_in.write_enable = '1' then |
||||
if d_in.read1_reg = w_in.write_reg then |
||||
d_out.read1_data <= w_in.write_data; |
||||
end if; |
||||
if d_in.read2_reg = w_in.write_reg then |
||||
d_out.read2_data <= w_in.write_data; |
||||
end if; |
||||
if d_in.read3_reg = w_in.write_reg then |
||||
d_out.read3_data <= w_in.write_data; |
||||
end if; |
||||
end if; |
||||
end process register_read_0; |
||||
|
||||
end architecture behaviour; |
Loading…
Reference in new issue