Merge pull request #118 from antonblanchard/bus-pipeline

Bus pipeline
pull/123/head
Anton Blanchard 5 years ago committed by GitHub
commit 9b1394e236
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2,8 +2,8 @@ GHDL=ghdl
GHDLFLAGS=--std=08 -Psim-unisim
CFLAGS=-O2 -Wall

all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \
rotator_tb countzero_tb
all = core_tb soc_reset_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \
rotator_tb countzero_tb wishbone_bram_tb

# XXX
# loadstore_tb fetch_tb
@ -35,10 +35,14 @@ helpers.o:
cache_ram.o:
plru.o:
plru_tb.o: plru.o
icache.o: common.o wishbone_types.o plru.o cache_ram.o
icache_tb.o: common.o wishbone_types.o icache.o simple_ram_behavioural.o
dcache.o: common.o wishbone_types.o plru.o cache_ram.o
dcache_tb.o: common.o wishbone_types.o dcache.o simple_ram_behavioural.o
utils.o:
sim_bram.o: sim_bram_helpers.o utils.o
wishbone_bram_wrapper.o: wishbone_types.o sim_bram.o utils.o
wishbone_bram_tb.o: wishbone_bram_wrapper.o
icache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o
icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o
dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o
dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o
insn_helpers.o:
loadstore1.o: common.o helpers.o
logical.o: decode_types.o
@ -51,11 +55,8 @@ register_file.o: common.o
rotator.o: common.o
rotator_tb.o: common.o glibc_random.o ppc_fx_insns.o insn_helpers.o rotator.o
sim_console.o:
simple_ram_behavioural_helpers.o:
simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o
simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o
sim_uart.o: wishbone_types.o sim_console.o
soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_debug_master.o
soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o wishbone_bram_wrapper.o dmi_dtm_xilinx.o wishbone_debug_master.o
wishbone_arbiter.o: wishbone_types.o
wishbone_types.o:
writeback.o: common.o crhelpers.o
@ -73,17 +74,17 @@ fpga/soc_reset_tb.o: fpga/soc_reset.o
soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o
$(GHDL) -e $(GHDLFLAGS) soc_reset_tb

core_tb: core_tb.o simple_ram_behavioural_helpers_c.o sim_console_c.o sim_jtag_socket_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@
core_tb: core_tb.o sim_bram_helpers_c.o sim_console_c.o sim_jtag_socket_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@

fetch_tb: fetch_tb.o
$(GHDL) -e $(GHDLFLAGS) $@

icache_tb: icache_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@

dcache_tb: dcache_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@

plru_tb: plru_tb.o
$(GHDL) -e $(GHDLFLAGS) $@
@ -106,11 +107,11 @@ countzero_tb: countzero_tb.o
simple_ram_tb: simple_ram_tb.o
$(GHDL) -e $(GHDLFLAGS) $@

simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@
wishbone_bram_tb: sim_bram_helpers_c.o wishbone_bram_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@

dmi_dtm_tb: dmi_dtm_tb.o simple_ram_behavioural_helpers_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@
dmi_dtm_tb: dmi_dtm_tb.o sim_bram_helpers_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@

tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out)))


@ -39,7 +39,7 @@ make
- Link in the micropython image:

```
ln -s ../micropython/ports/powerpc/build/firmware.bin simple_ram_behavioural.bin
ln -s ../micropython/ports/powerpc/build/firmware.bin main_ram.bin
```

- Now run microwatt, sending debug output to /dev/null:

@ -8,7 +8,8 @@ use work.wishbone_types.all;

entity core is
generic (
SIM : boolean := false
SIM : boolean := false;
DISABLE_FLATTEN : boolean := false
);
port (
clk : in std_logic;
@ -93,6 +94,29 @@ architecture behave of core is
-- Debug status
signal dbg_core_is_stopped: std_ulogic;

function keep_h(disable : boolean) return string is
begin
if disable then
return "yes";
else
return "no";
end if;
end function;
attribute keep_hierarchy : string;
attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of fetch2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of multiply_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of debug_0 : label is keep_h(DISABLE_FLATTEN);
begin

core_rst <= dbg_core_rst or rst;

@ -20,7 +20,7 @@ begin
generic map(
SIM => true,
MEMORY_SIZE => 524288,
RAM_INIT_FILE => "simple_ram_behavioural.bin",
RAM_INIT_FILE => "main_ram.bin",
RESET_LOW => false
)
port map(

@ -16,6 +16,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.common.all;
use work.helpers.all;
use work.wishbone_types.all;
@ -44,26 +45,6 @@ entity dcache is
end entity dcache;

architecture rtl of dcache is
function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

-- BRAM organisation: We never access more than wishbone_data_bits at
-- a time so to save resources we make the array only that wide, and
-- use consecutive indices for to make a cache "line"
@ -187,6 +168,7 @@ architecture rtl of dcache is
state : state_t;
wb : wishbone_master_out;
store_way : way_t;
store_row : row_t;
store_index : index_t;
end record;

@ -213,6 +195,7 @@ architecture rtl of dcache is
signal req_hit_way : way_t;
signal req_tag : cache_tag_t;
signal req_op : op_t;
signal req_laddr : std_ulogic_vector(63 downto 0);

-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
@ -244,12 +227,21 @@ architecture rtl of dcache is
end;

-- Returns whether this is the last row of a line
function is_last_row(addr: wishbone_addr_type) return boolean is
function is_last_row_addr(addr: wishbone_addr_type) return boolean is
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
end;

-- Returns whether this is the last row of a line
function is_last_row(row: row_t) return boolean is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
return row_v(ROW_LINEBITS-1 downto 0) = ones;
end;

-- Return the address of the next row in the current cache line
function next_row_addr(addr: wishbone_addr_type) return std_ulogic_vector is
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
@ -263,6 +255,21 @@ architecture rtl of dcache is
return result;
end;

-- Return the next row in the current cache line. We use a dedicated
-- function in order to limit the size of the generated adder to be
-- only the bits within a cache line (3 bits with default settings)
--
function next_row(row: row_t) return row_t is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
row_idx := row_v(ROW_LINEBITS-1 downto 0);
row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
return to_integer(unsigned(row_v));
end;

-- Get the tag value from the address
function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is
begin
@ -381,6 +388,12 @@ begin
req_row <= get_row(d_in.addr);
req_tag <= get_tag(d_in.addr);

-- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed
--
req_laddr <= d_in.addr(63 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');

-- Test if pending request is a hit on any way
hit_way := 0;
is_hit := '0';
@ -573,7 +586,8 @@ begin
wr_data => wr_data
);
process(all)
variable tmp_adr : std_ulogic_vector(63 downto 0);
variable tmp_adr : std_ulogic_vector(63 downto 0);
variable reloading : boolean;
begin
-- Cache hit reads
do_read <= '1';
@ -596,17 +610,17 @@ begin
-- Otherwise, we might be doing a reload
wr_data <= wishbone_in.dat;
wr_sel <= (others => '1');
tmp_adr := (r1.wb.adr'left downto 0 => r1.wb.adr, others => '0');
wr_addr <= std_ulogic_vector(to_unsigned(get_row(tmp_adr), ROW_BITS));
wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
end if;

-- The two actual write cases here
do_write <= '0';
if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and r1.store_way = i then
reloading := r1.state = RELOAD_WAIT_ACK;
if reloading and wishbone_in.ack = '1' and r1.store_way = i then
do_write <= '1';
end if;
if req_op = OP_STORE_HIT and req_hit_way = i then
assert r1.state /= RELOAD_WAIT_ACK report "Store hit while in state:" &
assert not reloading report "Store hit while in state:" &
state_t'image(r1.state)
severity FAILURE;
do_write <= '1';
@ -637,7 +651,7 @@ begin
-- single issue on load/stores so we are fine, later, we can generate
-- a stall output if necessary).

if d_in.valid = '1' then
if req_op /= OP_NONE then
r1.req <= d_in;

report "op:" & op_t'image(req_op) &
@ -672,7 +686,8 @@ begin
-- operates at stage 1.
--
dcache_slow : process(clk)
variable tagset : cache_tags_set_t;
variable tagset : cache_tags_set_t;
variable stbs_done : boolean;
begin
if rising_edge(clk) then
-- On reset, clear all valid bits to force misses
@ -731,16 +746,18 @@ begin
-- Keep track of our index and way for subsequent stores.
r1.store_index <= req_index;
r1.store_way <= replace_way;
r1.store_row <= get_row(req_laddr);

-- Prep for first wishbone read. We calculate the address of
-- the start of the cache line
-- the start of the cache line and start the WB cycle
--
r1.wb.adr <= d_in.addr(r1.wb.adr'left downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');
r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0);
r1.wb.sel <= (others => '1');
r1.wb.we <= '0';
r1.wb.cyc <= '1';
r1.wb.stb <= '1';

-- Track that we had one request sent
r1.state <= RELOAD_WAIT_ACK;

when OP_LOAD_NC =>
@ -770,6 +787,25 @@ begin
end case;

when RELOAD_WAIT_ACK =>
-- Requests are all sent if stb is 0
stbs_done := r1.wb.stb = '0';

-- If we are still sending requests, was one accepted ?
if wishbone_in.stall = '0' and not stbs_done then
-- That was the last word ? We are done sending. Clear
-- stb and set stbs_done so we can handle an eventual last
-- ack on the same cycle.
--
if is_last_row_addr(r1.wb.adr) then
r1.wb.stb <= '0';
stbs_done := true;
end if;

-- Calculate the next row address
r1.wb.adr <= next_row_addr(r1.wb.adr);
end if;

-- Incoming acks processing
if wishbone_in.ack = '1' then
-- Is this the data we were looking for ? Latch it so
-- we can respond later. We don't currently complete the
@ -779,16 +815,17 @@ begin
-- not idle, which we don't currently know how to deal
-- with.
--
if r1.wb.adr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) =
r1.req.addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) then
if r1.store_row = get_row(r1.req.addr) then
r1.slow_data <= wishbone_in.dat;
end if;

-- That was the last word ? We are done
if is_last_row(r1.wb.adr) then
cache_valids(r1.store_index)(r1.store_way) <= '1';
-- Check for completion
if stbs_done and is_last_row(r1.store_row) then
-- Complete wishbone cycle
r1.wb.cyc <= '0';
r1.wb.stb <= '0';

-- Cache line is now valid
cache_valids(r1.store_index)(r1.store_way) <= '1';

-- Complete the load that missed. For load with update
-- we also need to do the deferred update cycle.
@ -801,10 +838,10 @@ begin
r1.state <= IDLE;
report "completing miss !";
end if;
else
-- Otherwise, calculate the next row address
r1.wb.adr <= next_row_addr(r1.wb.adr);
end if;

-- Increment store row counter
r1.store_row <= next_row(r1.store_row);
end if;

when LOAD_UPDATE =>
@ -816,7 +853,13 @@ begin
r1.state <= IDLE;

when STORE_WAIT_ACK | NC_LOAD_WAIT_ACK =>
if wishbone_in.ack = '1' then
-- Clear stb when slave accepted request
if wishbone_in.stall = '0' then
r1.wb.stb <= '0';
end if;

-- Got ack ? complete.
if wishbone_in.ack = '1' then
if r1.state = NC_LOAD_WAIT_ACK then
r1.slow_data <= wishbone_in.dat;
end if;

@ -35,9 +35,9 @@ begin
);

-- BRAM Memory slave
bram0: entity work.mw_soc_memory
bram0: entity work.wishbone_bram_wrapper
generic map(
MEMORY_SIZE => 128,
MEMORY_SIZE => 1024,
RAM_INIT_FILE => "icache_test.bin"
)
port map(
@ -121,7 +121,6 @@ begin
d_in.valid <= '1';
wait until rising_edge(clk);
d_in.valid <= '0';

wait until rising_edge(clk) and d_out.write_enable = '1';
assert d_out.valid = '1';
assert d_out.write_data = x"0000004100000040"
@ -130,7 +129,10 @@ begin
" expected 0000004100000040"
severity failure;

wait for clk_period*4;
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);

assert false report "end of test" severity failure;
wait;

@ -2,92 +2,93 @@ library ieee;
use ieee.std_logic_1164.all;

package decode_types is
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
OP_CNTZ, OP_CRAND,
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG);

type input_reg_a_t is (NONE, RA, RA_OR_ZERO);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32);
type input_reg_c_t is (NONE, RS);
type output_reg_a_t is (NONE, RT, RA);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, ONE);

constant SH_OFFSET : integer := 0;
constant MB_OFFSET : integer := 1;
constant ME_OFFSET : integer := 1;
constant SH32_OFFSET : integer := 0;
constant MB32_OFFSET : integer := 1;
constant ME32_OFFSET : integer := 2;

constant FXM_OFFSET : integer := 0;

constant BO_OFFSET : integer := 0;
constant BI_OFFSET : integer := 1;
constant BH_OFFSET : integer := 2;

constant BF_OFFSET : integer := 0;
constant L_OFFSET : integer := 1;

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, MUL, DIV);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record
unit : unit_t;
insn_type : insn_type_t;
input_reg_a : input_reg_a_t;
input_reg_b : input_reg_b_t;
input_reg_c : input_reg_c_t;
output_reg_a : output_reg_a_t;

input_cr : std_ulogic;
output_cr : std_ulogic;

invert_a : std_ulogic;
invert_out : std_ulogic;
input_carry : carry_in_t;
output_carry : std_ulogic;

-- load/store signals
length : length_t;
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
update : std_ulogic;
reserve : std_ulogic;

-- multiplier and ALU signals
is_32bit : std_ulogic;
is_signed : std_ulogic;

rc : rc_t;
lr : std_ulogic;

sgl_pipe : std_ulogic;
end record;
constant decode_rom_init : decode_rom_t := (unit => NONE,
insn_type => OP_ILLEGAL, input_reg_a => NONE,
input_reg_b => NONE, input_reg_c => NONE,
output_reg_a => NONE, input_cr => '0', output_cr => '0',
invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0',
length => NONE, byte_reverse => '0', sign_extend => '0',
update => '0', reserve => '0', is_32bit => '0',
is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0');
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
OP_CNTZ, OP_CRAND,
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_SHL, OP_SHR,
OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG
);

type input_reg_a_t is (NONE, RA, RA_OR_ZERO);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32);
type input_reg_c_t is (NONE, RS);
type output_reg_a_t is (NONE, RT, RA);
type rc_t is (NONE, ONE, RC);
type carry_in_t is (ZERO, CA, ONE);

constant SH_OFFSET : integer := 0;
constant MB_OFFSET : integer := 1;
constant ME_OFFSET : integer := 1;
constant SH32_OFFSET : integer := 0;
constant MB32_OFFSET : integer := 1;
constant ME32_OFFSET : integer := 2;

constant FXM_OFFSET : integer := 0;

constant BO_OFFSET : integer := 0;
constant BI_OFFSET : integer := 1;
constant BH_OFFSET : integer := 2;

constant BF_OFFSET : integer := 0;
constant L_OFFSET : integer := 1;

constant TOO_OFFSET : integer := 0;

type unit_t is (NONE, ALU, LDST, MUL, DIV);
type length_t is (NONE, is1B, is2B, is4B, is8B);

type decode_rom_t is record
unit : unit_t;
insn_type : insn_type_t;
input_reg_a : input_reg_a_t;
input_reg_b : input_reg_b_t;
input_reg_c : input_reg_c_t;
output_reg_a : output_reg_a_t;

input_cr : std_ulogic;
output_cr : std_ulogic;

invert_a : std_ulogic;
invert_out : std_ulogic;
input_carry : carry_in_t;
output_carry : std_ulogic;

-- load/store signals
length : length_t;
byte_reverse : std_ulogic;
sign_extend : std_ulogic;
update : std_ulogic;
reserve : std_ulogic;

-- multiplier and ALU signals
is_32bit : std_ulogic;
is_signed : std_ulogic;

rc : rc_t;
lr : std_ulogic;

sgl_pipe : std_ulogic;
end record;
constant decode_rom_init : decode_rom_t := (unit => NONE,
insn_type => OP_ILLEGAL, input_reg_a => NONE,
input_reg_b => NONE, input_reg_c => NONE,
output_reg_a => NONE, input_cr => '0', output_cr => '0',
invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0',
length => NONE, byte_reverse => '0', sign_extend => '0',
update => '0', reserve => '0', is_32bit => '0',
is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0');

end decode_types;


@ -50,8 +50,8 @@ begin
dmi_ack => dmi_ack
);

simple_ram_0: entity work.mw_soc_memory
generic map(RAM_INIT_FILE => "simple_ram_behavioural.bin",
simple_ram_0: entity work.wishbone_bram_wrapper
generic map(RAM_INIT_FILE => "main_ram.bin",
MEMORY_SIZE => 524288)
port map(clk => clk, rst => rst,
wishbone_in => wishbone_ram_out,

@ -0,0 +1,83 @@
-- Single port Block RAM with one cycle output buffer

library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;

entity main_bram is
generic(
WIDTH : natural := 64;
HEIGHT_BITS : natural := 1024;
MEMORY_SIZE : natural := 65536;
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
addr : in std_logic_vector(HEIGHT_BITS - 1 downto 0) ;
di : in std_logic_vector(WIDTH-1 downto 0);
do : out std_logic_vector(WIDTH-1 downto 0);
sel : in std_logic_vector((WIDTH/8)-1 downto 0);
re : in std_ulogic;
we : in std_ulogic
);
end entity main_bram;

architecture behaviour of main_bram is

constant WIDTH_BYTES : natural := WIDTH / 8;

-- RAM type definition
type ram_t is array(0 to (MEMORY_SIZE / WIDTH_BYTES) - 1) of std_logic_vector(WIDTH-1 downto 0);

-- RAM loading
impure function init_ram(name : STRING) return ram_t is
file ram_file : text open read_mode is name;
variable ram_line : line;
variable temp_word : std_logic_vector(WIDTH-1 downto 0);
variable temp_ram : ram_t := (others => (others => '0'));
begin
for i in 0 to (MEMORY_SIZE / WIDTH_BYTES) - 1 loop
exit when endfile(ram_file);
readline(ram_file, ram_line);
hread(ram_line, temp_word);
temp_ram(i) := temp_word;
end loop;

return temp_ram;
end function;

-- RAM instance
signal memory : ram_t := init_ram(RAM_INIT_FILE);
attribute ram_style : string;
attribute ram_style of memory : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of memory : signal is "power";

-- Others
signal obuf : std_logic_vector(WIDTH-1 downto 0);
begin

-- Actual RAM template
memory_0: process(clk)
begin
if rising_edge(clk) then
if we = '1' then
for i in 0 to 7 loop
if sel(i) = '1' then
memory(conv_integer(addr))((i + 1) * 8 - 1 downto i * 8) <=
di((i + 1) * 8 - 1 downto i * 8);
end if;
end loop;
end if;
if re = '1' then
obuf <= memory(conv_integer(addr));
end if;
do <= obuf;
end if;
end process;

end architecture behaviour;

@ -1,106 +0,0 @@
-- Based on:
-- The Potato Processor - A simple processor for FPGAs
-- (c) Kristian Klomsten Skordal 2014 - 2015 <kristian.skordal@wafflemail.net>

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.wishbone_types.all;

use work.pp_utilities.all;

--! @brief Simple memory module for use in Wishbone-based systems.
entity mw_soc_memory is
generic(
MEMORY_SIZE : natural := 4096; --! Memory size in bytes.
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
rst : in std_logic;

-- Wishbone interface:
wishbone_in : in wishbone_master_out;
wishbone_out : out wishbone_slave_out
);
end entity mw_soc_memory;

architecture behaviour of mw_soc_memory is
signal wb_adr_in : std_logic_vector(log2(MEMORY_SIZE) - 1 downto 0);
type ram_t is array(0 to (MEMORY_SIZE / 8) - 1) of std_logic_vector(63 downto 0);

impure function init_ram(name : STRING) return ram_t is
file ram_file : text open read_mode is name;
variable ram_line : line;
variable temp_word : std_logic_vector(63 downto 0);
variable temp_ram : ram_t := (others => (others => '0'));
begin
for i in 0 to (MEMORY_SIZE/8)-1 loop
exit when endfile(ram_file);
readline(ram_file, ram_line);
hread(ram_line, temp_word);
temp_ram(i) := temp_word;
end loop;

return temp_ram;
end function;

signal memory : ram_t := init_ram(RAM_INIT_FILE);

attribute ram_style : string;
attribute ram_style of memory : signal is "block";

attribute ram_decomp : string;
attribute ram_decomp of memory : signal is "power";

type state_type is (IDLE, ACK);
signal state : state_type;

signal read_ack : std_logic;

begin

wb_adr_in <= wishbone_in.adr(log2(MEMORY_SIZE) - 1 downto 0);

wishbone_out.ack <= read_ack and wishbone_in.stb;

memory_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
read_ack <= '0';
state <= IDLE;
else
if wishbone_in.cyc = '1' then
case state is
when IDLE =>
if wishbone_in.stb = '1' and wishbone_in.we = '1' then
for i in 0 to 7 loop
if wishbone_in.sel(i) = '1' then
memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3))))(((i + 1) * 8) - 1 downto i * 8)
<= wishbone_in.dat(((i + 1) * 8) - 1 downto i * 8);
end if;
end loop;
read_ack <= '1';
state <= ACK;
elsif wishbone_in.stb = '1' then
wishbone_out.dat <= memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3))));
read_ack <= '1';
state <= ACK;
end if;
when ACK =>
read_ack <= '0';
state <= IDLE;
end case;
else
state <= IDLE;
read_ack <= '0';
end if;
end if;
end if;
end process;

end architecture behaviour;

@ -34,351 +34,353 @@ use ieee.numeric_std.all;
--! - Bit 0: data received (receive buffer not empty)
--! - Bit 1: ready to send data (transmit buffer empty)
entity pp_soc_uart is
generic(
FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs.
generic(
FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs.
);
port(
clk : in std_logic;
reset : in std_logic;

-- UART ports:
txd : out std_logic;
rxd : in std_logic;

-- Interrupt signal:
irq : out std_logic;

-- Wishbone ports:
wb_adr_in : in std_logic_vector(11 downto 0);
wb_dat_in : in std_logic_vector( 7 downto 0);
wb_dat_out : out std_logic_vector( 7 downto 0);
wb_we_in : in std_logic;
wb_cyc_in : in std_logic;
wb_stb_in : in std_logic;
wb_ack_out : out std_logic
port(
clk : in std_logic;
reset : in std_logic;

-- UART ports:
txd : out std_logic;
rxd : in std_logic;

-- Interrupt signal:
irq : out std_logic;

-- Wishbone ports:
wb_adr_in : in std_logic_vector(11 downto 0);
wb_dat_in : in std_logic_vector( 7 downto 0);
wb_dat_out : out std_logic_vector( 7 downto 0);
wb_we_in : in std_logic;
wb_cyc_in : in std_logic;
wb_stb_in : in std_logic;
wb_ack_out : out std_logic
);
end entity pp_soc_uart;

architecture behaviour of pp_soc_uart is

subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit.
subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit.

-- UART sample clock signals:
signal sample_clk : std_logic;
signal sample_clk_divisor : std_logic_vector(7 downto 0);
signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range);
-- UART sample clock signals:
signal sample_clk : std_logic;
signal sample_clk_divisor : std_logic_vector(7 downto 0);
signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range);

-- UART receive process signals:
type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT);
signal rx_state : rx_state_type;
signal rx_byte : std_logic_vector(7 downto 0);
signal rx_current_bit : bitnumber;
-- UART receive process signals:
type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT);
signal rx_state : rx_state_type;
signal rx_byte : std_logic_vector(7 downto 0);
signal rx_current_bit : bitnumber;

subtype rx_sample_counter_type is natural range 0 to 15;
signal rx_sample_counter : rx_sample_counter_type;
signal rx_sample_value : rx_sample_counter_type;
subtype rx_sample_counter_type is natural range 0 to 15;
signal rx_sample_counter : rx_sample_counter_type;
signal rx_sample_value : rx_sample_counter_type;

subtype rx_sample_delay_type is natural range 0 to 7;
signal rx_sample_delay : rx_sample_delay_type;
subtype rx_sample_delay_type is natural range 0 to 7;
signal rx_sample_delay : rx_sample_delay_type;

-- UART transmit process signals:
type tx_state_type is (IDLE, TRANSMIT, STOPBIT);
signal tx_state : tx_state_type;
signal tx_byte : std_logic_vector(7 downto 0);
signal tx_current_bit : bitnumber;
-- UART transmit process signals:
type tx_state_type is (IDLE, TRANSMIT, STOPBIT);
signal tx_state : tx_state_type;
signal tx_byte : std_logic_vector(7 downto 0);
signal tx_current_bit : bitnumber;

-- UART transmit clock:
subtype uart_tx_counter_type is natural range 0 to 15;
signal uart_tx_counter : uart_tx_counter_type := 0;
signal uart_tx_clk : std_logic;
-- UART transmit clock:
subtype uart_tx_counter_type is natural range 0 to 15;
signal uart_tx_counter : uart_tx_counter_type := 0;
signal uart_tx_clk : std_logic;

-- Buffer signals:
signal send_buffer_full, send_buffer_empty : std_logic;
signal recv_buffer_full, recv_buffer_empty : std_logic;
signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0);
signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0);
signal send_buffer_push, send_buffer_pop : std_logic := '0';
signal recv_buffer_push, recv_buffer_pop : std_logic := '0';
-- Buffer signals:
signal send_buffer_full, send_buffer_empty : std_logic;
signal recv_buffer_full, recv_buffer_empty : std_logic;
signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0);
signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0);
signal send_buffer_push, send_buffer_pop : std_logic := '0';
signal recv_buffer_push, recv_buffer_pop : std_logic := '0';

-- IRQ enable signals:
signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0';
-- IRQ enable signals:
signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0';

-- Wishbone signals:
type wb_state_type is (IDLE, WRITE_ACK, READ_ACK);
signal wb_state : wb_state_type;
-- Wishbone signals:
type wb_state_type is (IDLE, WRITE_ACK, READ_ACK);
signal wb_state : wb_state_type;

signal wb_ack : std_logic; --! Wishbone acknowledge signal
signal wb_ack : std_logic; --! Wishbone acknowledge signal

begin

irq <= (irq_recv_enable and (not recv_buffer_empty))
or (irq_tx_ready_enable and send_buffer_empty);

---------- UART receive ----------

recv_buffer_input <= rx_byte;

uart_receive: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_state <= IDLE;
recv_buffer_push <= '0';
irq <= (irq_recv_enable and (not recv_buffer_empty))
or (irq_tx_ready_enable and send_buffer_empty);

---------- UART receive ----------

recv_buffer_input <= rx_byte;

uart_receive: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_state <= IDLE;
recv_buffer_push <= '0';
else
case rx_state is
when IDLE =>
if recv_buffer_push = '1' then
recv_buffer_push <= '0';
end if;

if sample_clk = '1' and rxd = '0' then
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
rx_current_bit <= 0;
rx_state <= STARTBIT;
end if;
when STARTBIT =>
if sample_clk = '1' then
if rx_sample_delay = 7 then
rx_state <= RECEIVE;
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
else
case rx_state is
when IDLE =>
if recv_buffer_push = '1' then
recv_buffer_push <= '0';
end if;

if sample_clk = '1' and rxd = '0' then
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
rx_current_bit <= 0;
rx_state <= STARTBIT;
end if;
when STARTBIT =>
if sample_clk = '1' then
if rx_sample_delay = 7 then
rx_state <= RECEIVE;
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
else
rx_sample_delay <= rx_sample_delay + 1;
end if;
end if;
when RECEIVE =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
if rx_current_bit /= 7 then
rx_byte(rx_current_bit) <= rxd;
rx_current_bit <= rx_current_bit + 1;
else
rx_byte(rx_current_bit) <= rxd;
rx_state <= STOPBIT;
end if;
end if;
when STOPBIT =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
rx_state <= IDLE;

if recv_buffer_full = '0' then
recv_buffer_push <= '1';
end if;
end if;
end case;
rx_sample_delay <= rx_sample_delay + 1;
end if;
end if;
end process uart_receive;

sample_counter: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_sample_counter <= 0;
elsif sample_clk = '1' then
if rx_sample_counter = 15 then
rx_sample_counter <= 0;
else
rx_sample_counter <= rx_sample_counter + 1;
end if;
end if;
when RECEIVE =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
if rx_current_bit /= 7 then
rx_byte(rx_current_bit) <= rxd;
rx_current_bit <= rx_current_bit + 1;
else
rx_byte(rx_current_bit) <= rxd;
rx_state <= STOPBIT;
end if;
end if;
end process sample_counter;

---------- UART transmit ----------
end if;
when STOPBIT =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
rx_state <= IDLE;

tx_byte <= send_buffer_output;

uart_transmit: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
txd <= '1';
tx_state <= IDLE;
send_buffer_pop <= '0';
tx_current_bit <= 0;
else
case tx_state is
when IDLE =>
if send_buffer_empty = '0' and uart_tx_clk = '1' then
txd <= '0';
send_buffer_pop <= '1';
tx_current_bit <= 0;
tx_state <= TRANSMIT;
elsif uart_tx_clk = '1' then
txd <= '1';
end if;
when TRANSMIT =>
if send_buffer_pop = '1' then
send_buffer_pop <= '0';
elsif uart_tx_clk = '1' and tx_current_bit = 7 then
txd <= tx_byte(tx_current_bit);
tx_state <= STOPBIT;
elsif uart_tx_clk = '1' then
txd <= tx_byte(tx_current_bit);
tx_current_bit <= tx_current_bit + 1;
end if;
when STOPBIT =>
if uart_tx_clk = '1' then
txd <= '1';
tx_state <= IDLE;
end if;
end case;
if recv_buffer_full = '0' then
recv_buffer_push <= '1';
end if;
end if;
end case;
end if;
end if;
end process uart_receive;

sample_counter: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_sample_counter <= 0;
elsif sample_clk = '1' then
if rx_sample_counter = 15 then
rx_sample_counter <= 0;
else
rx_sample_counter <= rx_sample_counter + 1;
end if;
end process uart_transmit;

uart_tx_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
uart_tx_counter <= 0;
uart_tx_clk <= '0';
else
if sample_clk = '1' then
if uart_tx_counter = 15 then
uart_tx_counter <= 0;
uart_tx_clk <= '1';
else
uart_tx_counter <= uart_tx_counter + 1;
uart_tx_clk <= '0';
end if;
else
uart_tx_clk <= '0';
end if;
end if;
end if;
end if;
end process sample_counter;

---------- UART transmit ----------

tx_byte <= send_buffer_output;

uart_transmit: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
txd <= '1';
tx_state <= IDLE;
send_buffer_pop <= '0';
tx_current_bit <= 0;
else
case tx_state is
when IDLE =>
if send_buffer_empty = '0' and uart_tx_clk = '1' then
txd <= '0';
send_buffer_pop <= '1';
tx_current_bit <= 0;
tx_state <= TRANSMIT;
elsif uart_tx_clk = '1' then
txd <= '1';
end if;
when TRANSMIT =>
if send_buffer_pop = '1' then
send_buffer_pop <= '0';
elsif uart_tx_clk = '1' and tx_current_bit = 7 then
txd <= tx_byte(tx_current_bit);
tx_state <= STOPBIT;
elsif uart_tx_clk = '1' then
txd <= tx_byte(tx_current_bit);
tx_current_bit <= tx_current_bit + 1;
end if;
when STOPBIT =>
if uart_tx_clk = '1' then
txd <= '1';
tx_state <= IDLE;
end if;
end case;
end if;
end if;
end process uart_transmit;

uart_tx_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
uart_tx_counter <= 0;
uart_tx_clk <= '0';
else
if sample_clk = '1' then
if uart_tx_counter = 15 then
uart_tx_counter <= 0;
uart_tx_clk <= '1';
else
uart_tx_counter <= uart_tx_counter + 1;
uart_tx_clk <= '0';
end if;
else
uart_tx_clk <= '0';
end if;
end process uart_tx_clock_generator;

---------- Sample clock generator ----------

sample_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
sample_clk_counter <= (others => '0');
sample_clk <= '0';
else
if sample_clk_divisor /= x"00" then
if sample_clk_counter = sample_clk_divisor then
sample_clk_counter <= (others => '0');
sample_clk <= '1';
else
sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1);
sample_clk <= '0';
end if;
end if;
end if;
end if;
end if;
end process uart_tx_clock_generator;

---------- Sample clock generator ----------

sample_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
sample_clk_counter <= (others => '0');
sample_clk <= '0';
else
if sample_clk_divisor /= x"00" then
if sample_clk_counter = sample_clk_divisor then
sample_clk_counter <= (others => '0');
sample_clk <= '1';
else
sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1);
sample_clk <= '0';
end if;
end if;
end process sample_clock_generator;

---------- Data Buffers ----------

send_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => send_buffer_full,
empty => send_buffer_empty,
data_in => send_buffer_input,
data_out => send_buffer_output,
push => send_buffer_push,
pop => send_buffer_pop
end if;
end if;
end process sample_clock_generator;

---------- Data Buffers ----------

send_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => send_buffer_full,
empty => send_buffer_empty,
data_in => send_buffer_input,
data_out => send_buffer_output,
push => send_buffer_push,
pop => send_buffer_pop
);

recv_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => recv_buffer_full,
empty => recv_buffer_empty,
data_in => recv_buffer_input,
data_out => recv_buffer_output,
push => recv_buffer_push,
pop => recv_buffer_pop
recv_buffer: entity work.pp_fifo
generic map(
DEPTH => FIFO_DEPTH,
WIDTH => 8
) port map(
clk => clk,
reset => reset,
full => recv_buffer_full,
empty => recv_buffer_empty,
data_in => recv_buffer_input,
data_out => recv_buffer_output,
push => recv_buffer_push,
pop => recv_buffer_pop
);

---------- Wishbone Interface ----------

wb_ack_out <= wb_ack and wb_cyc_in and wb_stb_in;

wishbone: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
wb_ack <= '0';
wb_state <= IDLE;
send_buffer_push <= '0';
recv_buffer_pop <= '0';
sample_clk_divisor <= (others => '0');
irq_recv_enable <= '0';
irq_tx_ready_enable <= '0';
else
case wb_state is
when IDLE =>
if wb_cyc_in = '1' and wb_stb_in = '1' then
if wb_we_in = '1' then -- Write to register
if wb_adr_in = x"000" then
send_buffer_input <= wb_dat_in;
send_buffer_push <= '1';
elsif wb_adr_in = x"018" then
sample_clk_divisor <= wb_dat_in;
elsif wb_adr_in = x"020" then
irq_recv_enable <= wb_dat_in(0);
irq_tx_ready_enable <= wb_dat_in(1);
end if;

-- Invalid writes are acked and ignored.

wb_ack <= '1';
wb_state <= WRITE_ACK;
else -- Read from register
if wb_adr_in = x"008" then
recv_buffer_pop <= '1';
elsif wb_adr_in = x"010" then
wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full & send_buffer_empty & recv_buffer_empty;
wb_ack <= '1';
elsif wb_adr_in = x"018" then
wb_dat_out <= sample_clk_divisor;
wb_ack <= '1';
elsif wb_adr_in = x"020" then
wb_dat_out <= (0 => irq_recv_enable, 1 => irq_tx_ready_enable, others => '0');
wb_ack <= '1';
else
wb_dat_out <= (others => '0');
wb_ack <= '1';
end if;
wb_state <= READ_ACK;
end if;
end if;
when WRITE_ACK =>
send_buffer_push <= '0';

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
when READ_ACK =>
if recv_buffer_pop = '1' then
recv_buffer_pop <= '0';
else
wb_dat_out <= recv_buffer_output;
wb_ack <= '1';
end if;

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
end case;
---------- Wishbone Interface ----------

wb_ack_out <= wb_ack and wb_cyc_in and wb_stb_in;

wishbone: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
wb_ack <= '0';
wb_state <= IDLE;
send_buffer_push <= '0';
recv_buffer_pop <= '0';
sample_clk_divisor <= (others => '0');
irq_recv_enable <= '0';
irq_tx_ready_enable <= '0';
else
case wb_state is
when IDLE =>
if wb_cyc_in = '1' and wb_stb_in = '1' then
if wb_we_in = '1' then -- Write to register
if wb_adr_in = x"000" then
send_buffer_input <= wb_dat_in;
send_buffer_push <= '1';
elsif wb_adr_in = x"018" then
sample_clk_divisor <= wb_dat_in;
elsif wb_adr_in = x"020" then
irq_recv_enable <= wb_dat_in(0);
irq_tx_ready_enable <= wb_dat_in(1);
end if;

-- Invalid writes are acked and ignored.
wb_ack <= '1';
wb_state <= WRITE_ACK;
else -- Read from register
if wb_adr_in = x"008" then
recv_buffer_pop <= '1';
elsif wb_adr_in = x"010" then
wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full &
send_buffer_empty & recv_buffer_empty;
wb_ack <= '1';
elsif wb_adr_in = x"018" then
wb_dat_out <= sample_clk_divisor;
wb_ack <= '1';
elsif wb_adr_in = x"020" then
wb_dat_out <= (0 => irq_recv_enable,
1 => irq_tx_ready_enable,
others => '0');
wb_ack <= '1';
else
wb_dat_out <= (others => '0');
wb_ack <= '1';
end if;
wb_state <= READ_ACK;
end if;
end if;
end process wishbone;
end if;
when WRITE_ACK =>
send_buffer_push <= '0';

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
when READ_ACK =>
if recv_buffer_pop = '1' then
recv_buffer_pop <= '0';
else
wb_dat_out <= recv_buffer_output;
wb_ack <= '1';
end if;

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
end case;
end if;
end if;
end process wishbone;

end architecture behaviour;

@ -7,7 +7,8 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true;
CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000
CLK_FREQUENCY : positive := 100000000;
DISABLE_FLATTEN_CORE : boolean := false
);
port(
ext_clk : in std_ulogic;
@ -62,7 +63,8 @@ begin
MEMORY_SIZE => MEMORY_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE,
RESET_LOW => RESET_LOW,
SIM => false
SIM => false,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE
)
port map (
system_clk => system_clk,

@ -21,6 +21,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.utils.all;
use work.common.all;
use work.wishbone_types.all;

@ -51,26 +52,6 @@ entity icache is
end entity icache;

architecture rtl of icache is
function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

-- BRAM organisation: We never access more than wishbone_data_bits at
-- a time so to save resources we make the array only that wide, and
-- use consecutive indices for to make a cache "line"
@ -159,6 +140,7 @@ architecture rtl of icache is
wb : wishbone_master_out;
store_way : way_t;
store_index : index_t;
store_row : row_t;
end record;

signal r : reg_internal_t;
@ -170,6 +152,7 @@ architecture rtl of icache is
signal req_tag : cache_tag_t;
signal req_is_hit : std_ulogic;
signal req_is_miss : std_ulogic;
signal req_laddr : std_ulogic_vector(63 downto 0);

-- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t;
@ -193,12 +176,21 @@ architecture rtl of icache is
end;

-- Returns whether this is the last row of a line
function is_last_row(addr: wishbone_addr_type) return boolean is
function is_last_row_addr(addr: wishbone_addr_type) return boolean is
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
end;

-- Returns whether this is the last row of a line
function is_last_row(row: row_t) return boolean is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
return row_v(ROW_LINEBITS-1 downto 0) = ones;
end;

-- Return the address of the next row in the current cache line
function next_row_addr(addr: wishbone_addr_type)
return std_ulogic_vector is
@ -213,6 +205,21 @@ architecture rtl of icache is
return result;
end;

-- Return the next row in the current cache line. We use a dedicated
-- function in order to limit the size of the generated adder to be
-- only the bits within a cache line (3 bits with default settings)
--
function next_row(row: row_t) return row_t is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
row_idx := row_v(ROW_LINEBITS-1 downto 0);
row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
return to_integer(unsigned(row_v));
end;

-- Read the instruction word for the given address in the current cache row
function read_insn_word(addr: std_ulogic_vector(63 downto 0);
data: cache_row_t) return std_ulogic_vector is
@ -298,7 +305,6 @@ begin
wr_data => wishbone_in.dat
);
process(all)
variable tmp_adr : std_ulogic_vector(63 downto 0);
begin
do_read <= '1';
do_write <= '0';
@ -307,8 +313,7 @@ begin
end if;
cache_out(i) <= dout;
rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
tmp_adr := (r.wb.adr'left downto 0 => r.wb.adr, others => '0');
wr_addr <= std_ulogic_vector(to_unsigned(get_row(tmp_adr), ROW_BITS));
wr_addr <= std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
end process;
end generate;
@ -358,6 +363,12 @@ begin
req_row <= get_row(i_in.nia);
req_tag <= get_tag(i_in.nia);

-- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed
--
req_laddr <= i_in.nia(63 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');

-- Test if pending request is a hit on any way
hit_way := 0;
is_hit := '0';
@ -427,7 +438,8 @@ begin

-- Cache miss/reload synchronous machine
icache_miss : process(clk)
variable tagset : cache_tags_set_t;
variable tagset : cache_tags_set_t;
variable stbs_done : boolean;
begin
if rising_edge(clk) then
-- On reset, clear all valid bits to force misses
@ -473,29 +485,54 @@ begin
-- Keep track of our index and way for subsequent stores
r.store_index <= req_index;
r.store_way <= replace_way;
r.store_row <= get_row(req_laddr);

-- Prep for first wishbone read. We calculate the address of
-- the start of the cache line
-- the start of the cache line and start the WB cycle.
--
r.wb.adr <= i_in.nia(r.wb.adr'left downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');
r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
r.wb.cyc <= '1';
r.wb.stb <= '1';

-- Track that we had one request sent
r.state <= WAIT_ACK;
end if;

when WAIT_ACK =>
-- Requests are all sent if stb is 0
stbs_done := r.wb.stb = '0';

-- If we are still sending requests, was one accepted ?
if wishbone_in.stall = '0' and not stbs_done then
-- That was the last word ? We are done sending. Clear
-- stb and set stbs_done so we can handle an eventual last
-- ack on the same cycle.
--
if is_last_row_addr(r.wb.adr) then
r.wb.stb <= '0';
stbs_done := true;
end if;

-- Calculate the next row address
r.wb.adr <= next_row_addr(r.wb.adr);
end if;

-- Incoming acks processing
if wishbone_in.ack = '1' then
-- That was the last word ? We are done
if is_last_row(r.wb.adr) then
cache_valids(r.store_index)(r.store_way) <= '1';
-- Check for completion
if stbs_done and is_last_row(r.store_row) then
-- Complete wishbone cycle
r.wb.cyc <= '0';
r.wb.stb <= '0';

-- Cache line is now valid
cache_valids(r.store_index)(r.store_way) <= '1';

-- We are done
r.state <= IDLE;
else
-- Otherwise, calculate the next row address
r.wb.adr <= next_row_addr(r.wb.adr);
end if;

-- Increment store row counter
r.store_row <= next_row(r.store_row);
end if;
end case;
end if;

@ -36,9 +36,9 @@ begin
);

-- BRAM Memory slave
bram0: entity work.mw_soc_memory
bram0: entity work.wishbone_bram_wrapper
generic map(
MEMORY_SIZE => 128,
MEMORY_SIZE => 1024,
RAM_INIT_FILE => "icache_test.bin"
)
port map(
@ -68,15 +68,20 @@ begin
begin
i_out.req <= '0';
i_out.nia <= (others => '0');
i_out.stop_mark <= '0';

wait for 4*clk_period;
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);

i_out.req <= '1';
i_out.nia <= x"0000000000000004";

wait for 30*clk_period;
wait until rising_edge(clk);

assert i_in.valid = '1';
assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000001"
report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) &
@ -85,27 +90,29 @@ begin

i_out.req <= '0';

wait for clk_period;
wait until rising_edge(clk);

-- hit
i_out.req <= '1';
i_out.nia <= x"0000000000000008";
wait for clk_period;
assert i_in.valid = '1';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000002"
report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) &
" expected 00000002"
severity failure;
wait for clk_period;
wait until rising_edge(clk);

-- another miss
i_out.req <= '1';
i_out.nia <= x"0000000000000040";

wait for 30*clk_period;
wait until rising_edge(clk);

assert i_in.valid = '1';
assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000010"
report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) &
@ -115,13 +122,15 @@ begin
-- test something that aliases
i_out.req <= '1';
i_out.nia <= x"0000000000000100";
wait for clk_period;
assert i_in.valid = '0';
wait for clk_period;
wait until rising_edge(clk);
wait until rising_edge(clk);
assert i_in.valid = '0' severity failure;
wait until rising_edge(clk);

wait for 30*clk_period;
wait until rising_edge(clk);

assert i_in.valid = '1';
assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000040"
report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) &

@ -36,20 +36,22 @@ filesets:
- plru.vhdl
- cache_ram.vhdl
- core_debug.vhdl
- utils.vhdl
file_type : vhdlSource-2008

soc:
files:
- wishbone_arbiter.vhdl
- wishbone_debug_master.vhdl
- wishbone_bram_wrapper.vhdl
- soc.vhdl
file_type : vhdlSource-2008

fpga:
files:
- fpga/pp_fifo.vhd
- fpga/mw_soc_memory.vhdl
- fpga/main_bram.vhdl
- fpga/soc_reset.vhdl
- fpga/pp_fifo.vhd
- fpga/pp_soc_uart.vhd
- fpga/pp_utilities.vhd
- fpga/toplevel.vhdl
@ -93,6 +95,7 @@ targets:
- ram_init_file
- clk_input
- clk_frequency
- disable_flatten_core
tools:
vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel
@ -105,6 +108,7 @@ targets:
- ram_init_file
- clk_input
- clk_frequency
- disable_flatten_core
tools:
vivado: {part : xc7a200tsbg484-1}
toplevel : toplevel
@ -117,6 +121,7 @@ targets:
- ram_init_file
- clk_input
- clk_frequency
- disable_flatten_core
tools:
vivado: {part : xc7a35ticsg324-1L}
toplevel : toplevel
@ -129,6 +134,7 @@ targets:
- ram_init_file
- clk_input
- clk_frequency
- disable_flatten_core
tools:
vivado: {part : xc7a100ticsg324-1L}
toplevel : toplevel
@ -142,6 +148,7 @@ targets:
- reset_low=false
- clk_input=12000000
- clk_frequency
- disable_flatten_core
tools:
vivado: {part : xc7a35tcpg236-1}
toplevel : toplevel
@ -179,3 +186,9 @@ parameters:
description : Generated system clock frequency in HZ (for top-generic based boards)
paramtype : generic
default : 50000000

disable_flatten_core:
datatype : bool
description : Prevent Vivado from flattening the main core components
paramtype : generic
default : false

@ -21,7 +21,7 @@ Y=$(${MICROWATT_DIR}/scripts/hash.py tests/${TEST}.out)

cd $TMPDIR

cp ${MICROWATT_DIR}/tests/${TEST}.bin simple_ram_behavioural.bin
cp ${MICROWATT_DIR}/tests/${TEST}.bin main_ram.bin

X=$( ${MICROWATT_DIR}/core_tb | ${MICROWATT_DIR}/scripts/hash.py )


@ -13,7 +13,7 @@ cwd = os.getcwd()
os.chdir(tempdir.name)

copyfile(os.path.join(cwd, 'tests/micropython.bin'),
os.path.join(tempdir.name, 'simple_ram_behavioural.bin'))
os.path.join(tempdir.name, 'main_ram.bin'))

cmd = [ os.path.join(cwd, './core_tb') ]


@ -13,7 +13,7 @@ cwd = os.getcwd()
os.chdir(tempdir.name)

copyfile(os.path.join(cwd, 'tests/micropython.bin'),
os.path.join(tempdir.name, 'simple_ram_behavioural.bin'))
os.path.join(tempdir.name, 'main_ram.bin'))

cmd = [ os.path.join(cwd, './core_tb') ]


@ -0,0 +1,67 @@
-- Single port Block RAM with one cycle output buffer
--
-- Simulated via C helpers

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.utils.all;
use work.sim_bram_helpers.all;

entity main_bram is
generic(
WIDTH : natural := 64;
HEIGHT_BITS : natural := 1024;
MEMORY_SIZE : natural := 65536;
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
addr : in std_logic_vector(HEIGHT_BITS - 1 downto 0) ;
di : in std_logic_vector(WIDTH-1 downto 0);
do : out std_logic_vector(WIDTH-1 downto 0);
sel : in std_logic_vector((WIDTH/8)-1 downto 0);
re : in std_ulogic;
we : in std_ulogic
);
end entity main_bram;

architecture sim of main_bram is

constant WIDTH_BYTES : natural := WIDTH / 8;
constant pad_zeros : std_ulogic_vector(log2(WIDTH_BYTES)-1 downto 0)
:= (others => '0');

signal identifier : integer := behavioural_initialize(filename => RAM_INIT_FILE,
size => MEMORY_SIZE);
-- Others
signal obuf : std_logic_vector(WIDTH-1 downto 0);
begin

-- Actual RAM template
memory_0: process(clk)
variable ret_dat_v : std_ulogic_vector(63 downto 0);
variable addr64 : std_ulogic_vector(63 downto 0);
begin
if rising_edge(clk) then
addr64 := (others => '0');
addr64(HEIGHT_BITS + 2 downto 3) := addr;
if we = '1' then
report "RAM writing " & to_hstring(di) & " to " &
to_hstring(addr & pad_zeros) & " sel:" & to_hstring(sel);
behavioural_write(di, addr64, to_integer(unsigned(sel)), identifier);
end if;
if re = '1' then
behavioural_read(ret_dat_v, addr64, to_integer(unsigned(sel)), identifier);
report "RAM reading from " & to_hstring(addr & pad_zeros) &
" returns " & to_hstring(ret_dat_v);
obuf <= ret_dat_v(obuf'left downto 0);
end if;
do <= obuf;
end if;
end process;

end architecture sim;

@ -1,24 +1,24 @@
library ieee;
use ieee.std_logic_1164.all;

package simple_ram_behavioural_helpers is
package sim_bram_helpers is
function behavioural_initialize (filename: String; size: integer) return integer;
attribute foreign of behavioural_initialize : function is "VHPIDIRECT behavioural_initialize";

procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer; reload: integer);
procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier:integer);
attribute foreign of behavioural_read : procedure is "VHPIDIRECT behavioural_read";

procedure behavioural_write (val: std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer);
attribute foreign of behavioural_write : procedure is "VHPIDIRECT behavioural_write";
end simple_ram_behavioural_helpers;
end sim_bram_helpers;

package body simple_ram_behavioural_helpers is
package body sim_bram_helpers is
function behavioural_initialize (filename: String; size: integer) return integer is
begin
assert false report "VHPI" severity failure;
end behavioural_initialize;

procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer; reload: integer) is
procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer) is
begin
assert false report "VHPI" severity failure;
end behavioural_read;
@ -27,4 +27,4 @@ package body simple_ram_behavioural_helpers is
begin
assert false report "VHPI" severity failure;
end behavioural_write;
end simple_ram_behavioural_helpers;
end sim_bram_helpers;

@ -1,79 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.wishbone_types.all;
use work.simple_ram_behavioural_helpers.all;

entity mw_soc_memory is
generic (
RAM_INIT_FILE : string;
MEMORY_SIZE : integer
);

port (
clk : in std_ulogic;
rst : in std_ulogic;

wishbone_in : in wishbone_master_out;
wishbone_out : out wishbone_slave_out
);
end mw_soc_memory;

architecture behave of mw_soc_memory is
type wishbone_state_t is (IDLE, ACK);

signal state : wishbone_state_t := IDLE;
signal ret_ack : std_ulogic := '0';
signal identifier : integer := behavioural_initialize(filename => RAM_INIT_FILE, size => MEMORY_SIZE);
signal reload : integer := 0;
begin
wishbone_process: process(clk)
variable ret_dat: std_ulogic_vector(63 downto 0) := (others => '0');
variable adr: std_ulogic_vector(63 downto 0);
begin
wishbone_out.ack <= ret_ack and wishbone_in.cyc and wishbone_in.stb;
wishbone_out.dat <= ret_dat;

if rising_edge(clk) then
if rst = '1' then
state <= IDLE;
ret_ack <= '0';
else
ret_dat := x"FFFFFFFFFFFFFFFF";

-- Active
if wishbone_in.cyc = '1' then
case state is
when IDLE =>
if wishbone_in.stb = '1' then
-- write
adr := (wishbone_in.adr'left downto 0 => wishbone_in.adr, others => '0');
if wishbone_in.we = '1' then
assert not(is_x(wishbone_in.dat)) and not(is_x(wishbone_in.adr)) severity failure;
report "RAM writing " & to_hstring(wishbone_in.dat) & " to " & to_hstring(wishbone_in.adr);
behavioural_write(wishbone_in.dat, adr, to_integer(unsigned(wishbone_in.sel)), identifier);
reload <= reload + 1;
ret_ack <= '1';
state <= ACK;
else
behavioural_read(ret_dat, adr, to_integer(unsigned(wishbone_in.sel)), identifier, reload);
report "RAM reading from " & to_hstring(wishbone_in.adr) & " returns " & to_hstring(ret_dat);
ret_ack <= '1';
state <= ACK;
end if;
end if;
when ACK =>
ret_ack <= '0';
state <= IDLE;
end case;
else
ret_ack <= '0';
state <= IDLE;
end if;
end if;
end if;
end process;
end behave;

@ -1,246 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.wishbone_types.all;

entity simple_ram_behavioural_tb is
end simple_ram_behavioural_tb;

architecture behave of simple_ram_behavioural_tb is
signal clk : std_ulogic;
signal rst : std_ulogic := '1';

constant clk_period : time := 10 ns;

signal w_in : wishbone_slave_out;
signal w_out : wishbone_master_out;

impure function to_adr(a: integer) return std_ulogic_vector is
begin
return std_ulogic_vector(to_unsigned(a, w_out.adr'length));
end;
begin
simple_ram_0: entity work.mw_soc_memory
generic map (
RAM_INIT_FILE => "simple_ram_behavioural_tb.bin",
MEMORY_SIZE => 16
)
port map (
clk => clk,
rst => rst,
wishbone_out => w_in,
wishbone_in => w_out
);

clock: process
begin
clk <= '1';
wait for clk_period / 2;
clk <= '0';
wait for clk_period / 2;
end process clock;

stim: process
begin
w_out.adr <= (others => '0');
w_out.dat <= (others => '0');
w_out.cyc <= '0';
w_out.stb <= '0';
w_out.sel <= (others => '0');
w_out.we <= '0';

wait for clk_period;
rst <= '0';

wait for clk_period;

w_out.cyc <= '1';

-- test various read lengths and alignments
w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(7 downto 0) = x"00" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(1);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(7 downto 0) = x"01" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(7);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(7 downto 0) = x"07" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000011";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(15 downto 0) = x"0100" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000011";
w_out.adr <= to_adr(1);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(15 downto 0) = x"0201" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000011";
w_out.adr <= to_adr(7);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(15 downto 0) = x"0807" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00001111";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(31 downto 0) = x"03020100" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00001111";
w_out.adr <= to_adr(1);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(31 downto 0) = x"04030201" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00001111";
w_out.adr <= to_adr(7);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(31 downto 0) = x"0A090807" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0706050403020100" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(1);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0807060504030201" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(7);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0E0D0C0B0A090807" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

-- test various write lengths and alignments
w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(0);
w_out.we <= '1';
w_out.dat(7 downto 0) <= x"0F";
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(0);
w_out.we <= '0';
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(7 downto 0) = x"0F" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(7);
w_out.we <= '1';
w_out.dat <= x"BADC0FFEBADC0FFE";
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(7);
w_out.we <= '0';
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat = x"BADC0FFEBADC0FFE" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

assert false report "end of test" severity failure;
wait;
end process;
end behave;

@ -17,7 +17,8 @@ entity soc is
MEMORY_SIZE : positive;
RAM_INIT_FILE : string;
RESET_LOW : boolean;
SIM : boolean
SIM : boolean;
DISABLE_FLATTEN_CORE : boolean := false
);
port(
rst : in std_ulogic;
@ -42,6 +43,12 @@ architecture behaviour of soc is
signal wishbone_debug_in : wishbone_slave_out;
signal wishbone_debug_out : wishbone_master_out;

-- Arbiter array (ghdl doesnt' support assigning the array
-- elements in the entity instantiation)
constant NUM_WB_MASTERS : positive := 3;
signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1);
signal wb_masters_in : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1);

-- Wishbone master (output of arbiter):
signal wb_master_in : wishbone_slave_out;
signal wb_master_out : wishbone_master_out;
@ -76,7 +83,8 @@ begin
-- Processor core
processor: entity work.core
generic map(
SIM => SIM
SIM => SIM,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE
)
port map(
clk => system_clk,
@ -94,13 +102,22 @@ begin
);

-- Wishbone bus master arbiter & mux
wb_masters_out <= (0 => wishbone_dcore_out,
1 => wishbone_icore_out,
2 => wishbone_debug_out);
wishbone_dcore_in <= wb_masters_in(0);
wishbone_icore_in <= wb_masters_in(1);
wishbone_debug_in <= wb_masters_in(2);
wishbone_arbiter_0: entity work.wishbone_arbiter
generic map(
NUM_MASTERS => NUM_WB_MASTERS
)
port map(
clk => system_clk, rst => rst,
wb1_in => wishbone_dcore_out, wb1_out => wishbone_dcore_in,
wb2_in => wishbone_icore_out, wb2_out => wishbone_icore_in,
wb3_in => wishbone_debug_out, wb3_out => wishbone_debug_in,
wb_out => wb_master_out, wb_in => wb_master_in
wb_masters_in => wb_masters_out,
wb_masters_out => wb_masters_in,
wb_slave_out => wb_master_out,
wb_slave_in => wb_master_in
);

-- Wishbone slaves address decoder & mux
@ -136,6 +153,7 @@ begin
when others =>
wb_master_in.dat <= (others => '1');
wb_master_in.ack <= wb_master_out.stb and wb_master_out.cyc;
wb_master_in.stall <= '0';
end case;
end process slave_intercon;

@ -164,9 +182,10 @@ begin
wb_ack_out => wb_uart0_out.ack
);
wb_uart0_out.dat <= x"00000000000000" & uart_dat8;
wb_uart0_out.stall <= '0' when wb_uart0_in.cyc = '0' else not wb_uart0_out.ack;

-- BRAM Memory slave
bram0: entity work.mw_soc_memory
bram0: entity work.wishbone_bram_wrapper
generic map(
MEMORY_SIZE => MEMORY_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE

@ -0,0 +1,35 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

package utils is

function log2(i : natural) return integer;
function ispow2(i : integer) return boolean;

end utils;

package body utils is

function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

end utils;

@ -6,73 +6,64 @@ use work.wishbone_types.all;

-- TODO: Use an array of master/slaves with parametric size
entity wishbone_arbiter is
generic(
NUM_MASTERS : positive := 3
);
port (clk : in std_ulogic;
rst : in std_ulogic;

wb1_in : in wishbone_master_out;
wb1_out : out wishbone_slave_out;
wb_masters_in : in wishbone_master_out_vector(0 to NUM_MASTERS-1);
wb_masters_out : out wishbone_slave_out_vector(0 to NUM_MASTERS-1);

wb2_in : in wishbone_master_out;
wb2_out : out wishbone_slave_out;

wb3_in : in wishbone_master_out;
wb3_out : out wishbone_slave_out;

wb_out : out wishbone_master_out;
wb_in : in wishbone_slave_out
wb_slave_out : out wishbone_master_out;
wb_slave_in : in wishbone_slave_out
);
end wishbone_arbiter;

architecture behave of wishbone_arbiter is
type wishbone_arbiter_state_t is (IDLE, WB1_BUSY, WB2_BUSY, WB3_BUSY);
signal state : wishbone_arbiter_state_t := IDLE;
subtype wb_arb_master_t is integer range 0 to NUM_MASTERS-1;
signal candidate, selected : wb_arb_master_t;
signal busy : std_ulogic;
begin

wishbone_muxes: process(state, wb_in, wb1_in, wb2_in, wb3_in)
busy <= wb_masters_in(selected).cyc;

wishbone_muxes: process(selected, candidate, busy, wb_slave_in, wb_masters_in)
variable early_sel : wb_arb_master_t;
begin
-- Requests from masters are fully muxed
wb_out <= wb1_in when state = WB1_BUSY else
wb2_in when state = WB2_BUSY else
wb3_in when state = WB3_BUSY else
wishbone_master_out_init;
early_sel := selected;
if busy = '0' then
early_sel := candidate;
end if;
wb_slave_out <= wb_masters_in(early_sel);
for i in 0 to NUM_MASTERS-1 loop
wb_masters_out(i).dat <= wb_slave_in.dat;
wb_masters_out(i).ack <= wb_slave_in.ack when early_sel = i else '0';
wb_masters_out(i).stall <= wb_slave_in.stall when early_sel = i else '1';
end loop;
end process;

-- Responses from slave don't need to mux the data bus
wb1_out.dat <= wb_in.dat;
wb2_out.dat <= wb_in.dat;
wb3_out.dat <= wb_in.dat;
wb1_out.ack <= wb_in.ack when state = WB1_BUSY else '0';
wb2_out.ack <= wb_in.ack when state = WB2_BUSY else '0';
wb3_out.ack <= wb_in.ack when state = WB3_BUSY else '0';
-- Candidate selection is dumb, priority order... we could
-- instead consider some form of fairness but it's not really
-- an issue at the moment.
--
wishbone_candidate: process(all)
begin
candidate <= selected;
for i in NUM_MASTERS-1 downto 0 loop
if wb_masters_in(i).cyc = '1' then
candidate <= i;
end if;
end loop;
end process;

wishbone_arbiter_process: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
state <= IDLE;
else
case state is
when IDLE =>
if wb1_in.cyc = '1' then
state <= WB1_BUSY;
elsif wb2_in.cyc = '1' then
state <= WB2_BUSY;
elsif wb3_in.cyc = '1' then
state <= WB3_BUSY;
end if;
when WB1_BUSY =>
if wb1_in.cyc = '0' then
state <= IDLE;
end if;
when WB2_BUSY =>
if wb2_in.cyc = '0' then
state <= IDLE;
end if;
when WB3_BUSY =>
if wb3_in.cyc = '0' then
state <= IDLE;
end if;
end case;
selected <= 0;
elsif busy = '0' then
selected <= candidate;
end if;
end if;
end process;

@ -0,0 +1,175 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.wishbone_types.all;

entity wishbone_bram_tb is
end wishbone_bram_tb;

architecture behave of wishbone_bram_tb is
signal clk : std_ulogic;
signal rst : std_ulogic := '1';

constant clk_period : time := 10 ns;

signal w_in : wishbone_slave_out;
signal w_out : wishbone_master_out;

impure function to_adr(a: integer) return std_ulogic_vector is
begin
return std_ulogic_vector(to_unsigned(a, w_out.adr'length));
end;
begin
simple_ram_0: entity work.wishbone_bram_wrapper
generic map (
RAM_INIT_FILE => "wishbone_bram_tb.bin",
MEMORY_SIZE => 16
)
port map (
clk => clk,
rst => rst,
wishbone_out => w_in,
wishbone_in => w_out
);

clock: process
begin
clk <= '1';
wait for clk_period / 2;
clk <= '0';
wait for clk_period / 2;
end process clock;

stim: process
begin
w_out.adr <= (others => '0');
w_out.dat <= (others => '0');
w_out.cyc <= '0';
w_out.stb <= '0';
w_out.sel <= (others => '0');
w_out.we <= '0';

wait until rising_edge(clk);
rst <= '0';
wait until rising_edge(clk);

w_out.cyc <= '1';

-- Test read 0
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0706050403020100" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test read 8
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(8);
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0F0E0D0C0B0A0908" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test write byte at 0
w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(0);
w_out.we <= '1';
w_out.dat(7 downto 0) <= x"0F";
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk) and w_in.ack = '1';
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test read back
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(0);
w_out.we <= '0';
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"070605040302010F" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test write dword at 4
w_out.stb <= '1';
w_out.sel <= "11110000";
w_out.adr <= to_adr(0);
w_out.we <= '1';
w_out.dat(63 downto 32) <= x"BAADFEED";
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk) and w_in.ack = '1';
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test read back
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(0);
w_out.we <= '0';
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"BAADFEED0302010F" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test write qword at 8
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(8);
w_out.we <= '1';
w_out.dat(63 downto 0) <= x"0001020304050607";
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk) and w_in.ack = '1';
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test read back
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(8);
w_out.we <= '0';
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0001020304050607" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

assert false report "end of test" severity failure;
wait;
end process;
end behave;

@ -0,0 +1,84 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.utils.all;
use work.wishbone_types.all;

--! @brief Simple memory module for use in Wishbone-based systems.
entity wishbone_bram_wrapper is
generic(
MEMORY_SIZE : natural := 4096; --! Memory size in bytes.
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
rst : in std_logic;

-- Wishbone interface:
wishbone_in : in wishbone_master_out;
wishbone_out : out wishbone_slave_out
);
end entity wishbone_bram_wrapper;

architecture behaviour of wishbone_bram_wrapper is
constant ram_addr_bits : integer := log2(MEMORY_SIZE) - 3;

-- RAM interface
signal ram_addr : std_logic_vector(ram_addr_bits - 1 downto 0);
signal ram_we : std_ulogic;
signal ram_re : std_ulogic;

-- Others
signal ack, ack_buf : std_ulogic;
begin

-- Actual RAM template
ram_0: entity work.main_bram
generic map(
WIDTH => 64,
HEIGHT_BITS => ram_addr_bits,
MEMORY_SIZE => MEMORY_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE
)
port map(
clk => clk,
addr => ram_addr,
di => wishbone_in.dat,
do => wishbone_out.dat,
sel => wishbone_in.sel,
re => ram_re,
we => ram_we
);

-- Wishbone interface
ram_addr <= wishbone_in.adr(ram_addr_bits + 2 downto 3);
ram_we <= wishbone_in.stb and wishbone_in.cyc and wishbone_in.we;
ram_re <= wishbone_in.stb and wishbone_in.cyc and not wishbone_in.we;
wishbone_out.stall <= '0';
wishbone_out.ack <= ack_buf;

wb_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' or wishbone_in.cyc = '0' then
ack_buf <= '0';
ack <= '0';
else
-- On loads, we have a delay cycle due to BRAM bufferring
-- but not on stores. So try to send an early ack on a
-- store if we aren't behind an existing load ack.
--
if ram_we = '1' and ack = '0' then
ack_buf <= '1';
else
ack <= wishbone_in.stb;
ack_buf <= ack;
end if;
end if;
end if;
end process;

end architecture behaviour;

@ -124,7 +124,6 @@ begin

-- We always move WB cyc and stb simultaneously (no pipelining yet...)
wb_out.cyc <= '1' when state = WB_CYCLE else '0';
wb_out.stb <= '1' when state = WB_CYCLE else '0';

-- Data latch. WB will take the read data away as soon as the cycle
-- terminates but we must maintain it on DMI until req goes down, so
@ -145,14 +144,23 @@ begin
if rising_edge(clk) then
if (rst) then
state <= IDLE;
wb_out.stb <= '0';
else
case state is
when IDLE =>
if dmi_req = '1' and dmi_addr = DBG_WB_DATA then
state <= WB_CYCLE;
wb_out.stb <= '1';
end if;
when WB_CYCLE =>
if wb_in.stall = '0' then
wb_out.stb <= '0';
end if;
if wb_in.ack then
-- We shouldn't get the ack if we hadn't already cleared
-- stb above but if this happen, don't leave it dangling.
--
wb_out.stb <= '0';
state <= DMI_WAIT;
end if;
when DMI_WAIT =>

@ -21,9 +21,13 @@ package wishbone_types is
constant wishbone_master_out_init : wishbone_master_out := (cyc => '0', stb => '0', we => '0', others => (others => '0'));

type wishbone_slave_out is record
dat : wishbone_data_type;
ack : std_ulogic;
dat : wishbone_data_type;
ack : std_ulogic;
stall : std_ulogic;
end record;
constant wishbone_slave_out_init : wishbone_slave_out := (ack => '0', others => (others => '0'));
constant wishbone_slave_out_init : wishbone_slave_out := (ack => '0', stall => '0', others => (others => '0'));

type wishbone_master_out_vector is array (natural range <>) of wishbone_master_out;
type wishbone_slave_out_vector is array (natural range <>) of wishbone_slave_out;

end package wishbone_types;

@ -44,6 +44,7 @@ architecture behaviour of writeback is
signal sign_extend : std_ulogic;
signal negative : std_ulogic;
signal second_word : std_ulogic;
signal zero : std_ulogic;
begin
writeback_0: process(clk)
begin
@ -155,7 +156,9 @@ begin

-- If the data can arrive split over two cycles, this will be correct
-- provided we don't have both sign extension and byte reversal.
negative <= (data_len(2) and data_permuted(31)) or (data_len(1) and data_permuted(15)) or
negative <= (data_len(3) and data_permuted(63)) or
(data_len(2) and data_permuted(31)) or
(data_len(1) and data_permuted(15)) or
(data_len(0) and data_permuted(7));

-- trim and sign-extend
@ -170,12 +173,16 @@ begin
trim_ctl(i) <= '0' & (negative and sign_extend);
end if;
end loop;
zero <= not negative;
for i in 0 to 7 loop
case trim_ctl(i) is
when "11" =>
data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8);
when "10" =>
data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8);
if or data_permuted(i * 8 + 7 downto i * 8) /= '0' then
zero <= '0';
end if;
when "01" =>
data_trimmed(i * 8 + 7 downto i * 8) <= x"FF";
when others =>
@ -190,9 +197,9 @@ begin
if rc = '1' then
c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= num_to_fxm(0);
if data_trimmed(63) = '1' then
if negative = '1' then
c_out.write_cr_data <= x"80000000";
elsif or (data_trimmed(62 downto 0)) = '1' then
elsif zero = '0' then
c_out.write_cr_data <= x"40000000";
else
c_out.write_cr_data <= x"20000000";

Loading…
Cancel
Save