Merge pull request #118 from antonblanchard/bus-pipeline

Bus pipeline
jtag-port
Anton Blanchard 5 years ago committed by GitHub
commit 9b1394e236
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2,8 +2,8 @@ GHDL=ghdl
GHDLFLAGS=--std=08 -Psim-unisim GHDLFLAGS=--std=08 -Psim-unisim
CFLAGS=-O2 -Wall CFLAGS=-O2 -Wall


all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \ all = core_tb soc_reset_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \
rotator_tb countzero_tb rotator_tb countzero_tb wishbone_bram_tb


# XXX # XXX
# loadstore_tb fetch_tb # loadstore_tb fetch_tb
@ -35,10 +35,14 @@ helpers.o:
cache_ram.o: cache_ram.o:
plru.o: plru.o:
plru_tb.o: plru.o plru_tb.o: plru.o
icache.o: common.o wishbone_types.o plru.o cache_ram.o utils.o:
icache_tb.o: common.o wishbone_types.o icache.o simple_ram_behavioural.o sim_bram.o: sim_bram_helpers.o utils.o
dcache.o: common.o wishbone_types.o plru.o cache_ram.o wishbone_bram_wrapper.o: wishbone_types.o sim_bram.o utils.o
dcache_tb.o: common.o wishbone_types.o dcache.o simple_ram_behavioural.o wishbone_bram_tb.o: wishbone_bram_wrapper.o
icache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o
icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o
dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o
dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o
insn_helpers.o: insn_helpers.o:
loadstore1.o: common.o helpers.o loadstore1.o: common.o helpers.o
logical.o: decode_types.o logical.o: decode_types.o
@ -51,11 +55,8 @@ register_file.o: common.o
rotator.o: common.o rotator.o: common.o
rotator_tb.o: common.o glibc_random.o ppc_fx_insns.o insn_helpers.o rotator.o rotator_tb.o: common.o glibc_random.o ppc_fx_insns.o insn_helpers.o rotator.o
sim_console.o: sim_console.o:
simple_ram_behavioural_helpers.o:
simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o
simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o
sim_uart.o: wishbone_types.o sim_console.o sim_uart.o: wishbone_types.o sim_console.o
soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_debug_master.o soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o wishbone_bram_wrapper.o dmi_dtm_xilinx.o wishbone_debug_master.o
wishbone_arbiter.o: wishbone_types.o wishbone_arbiter.o: wishbone_types.o
wishbone_types.o: wishbone_types.o:
writeback.o: common.o crhelpers.o writeback.o: common.o crhelpers.o
@ -73,17 +74,17 @@ fpga/soc_reset_tb.o: fpga/soc_reset.o
soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o
$(GHDL) -e $(GHDLFLAGS) soc_reset_tb $(GHDL) -e $(GHDLFLAGS) soc_reset_tb


core_tb: core_tb.o simple_ram_behavioural_helpers_c.o sim_console_c.o sim_jtag_socket_c.o core_tb: core_tb.o sim_bram_helpers_c.o sim_console_c.o sim_jtag_socket_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@ $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@


fetch_tb: fetch_tb.o fetch_tb: fetch_tb.o
$(GHDL) -e $(GHDLFLAGS) $@ $(GHDL) -e $(GHDLFLAGS) $@


icache_tb: icache_tb.o icache_tb: icache_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@


dcache_tb: dcache_tb.o dcache_tb: dcache_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@


plru_tb: plru_tb.o plru_tb: plru_tb.o
$(GHDL) -e $(GHDLFLAGS) $@ $(GHDL) -e $(GHDLFLAGS) $@
@ -106,11 +107,11 @@ countzero_tb: countzero_tb.o
simple_ram_tb: simple_ram_tb.o simple_ram_tb: simple_ram_tb.o
$(GHDL) -e $(GHDLFLAGS) $@ $(GHDL) -e $(GHDLFLAGS) $@


simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o wishbone_bram_tb: sim_bram_helpers_c.o wishbone_bram_tb.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@


dmi_dtm_tb: dmi_dtm_tb.o simple_ram_behavioural_helpers_c.o dmi_dtm_tb: dmi_dtm_tb.o sim_bram_helpers_c.o
$(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@


tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out))) tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out)))



@ -39,7 +39,7 @@ make
- Link in the micropython image: - Link in the micropython image:


``` ```
ln -s ../micropython/ports/powerpc/build/firmware.bin simple_ram_behavioural.bin ln -s ../micropython/ports/powerpc/build/firmware.bin main_ram.bin
``` ```


- Now run microwatt, sending debug output to /dev/null: - Now run microwatt, sending debug output to /dev/null:

@ -8,7 +8,8 @@ use work.wishbone_types.all;


entity core is entity core is
generic ( generic (
SIM : boolean := false SIM : boolean := false;
DISABLE_FLATTEN : boolean := false
); );
port ( port (
clk : in std_logic; clk : in std_logic;
@ -93,6 +94,29 @@ architecture behave of core is
-- Debug status -- Debug status
signal dbg_core_is_stopped: std_ulogic; signal dbg_core_is_stopped: std_ulogic;


function keep_h(disable : boolean) return string is
begin
if disable then
return "yes";
else
return "no";
end if;
end function;
attribute keep_hierarchy : string;
attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of fetch2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of multiply_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN);
attribute keep_hierarchy of debug_0 : label is keep_h(DISABLE_FLATTEN);
begin begin


core_rst <= dbg_core_rst or rst; core_rst <= dbg_core_rst or rst;

@ -20,7 +20,7 @@ begin
generic map( generic map(
SIM => true, SIM => true,
MEMORY_SIZE => 524288, MEMORY_SIZE => 524288,
RAM_INIT_FILE => "simple_ram_behavioural.bin", RAM_INIT_FILE => "main_ram.bin",
RESET_LOW => false RESET_LOW => false
) )
port map( port map(

@ -16,6 +16,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;


library work; library work;
use work.utils.all;
use work.common.all; use work.common.all;
use work.helpers.all; use work.helpers.all;
use work.wishbone_types.all; use work.wishbone_types.all;
@ -44,26 +45,6 @@ entity dcache is
end entity dcache; end entity dcache;


architecture rtl of dcache is architecture rtl of dcache is
function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

-- BRAM organisation: We never access more than wishbone_data_bits at -- BRAM organisation: We never access more than wishbone_data_bits at
-- a time so to save resources we make the array only that wide, and -- a time so to save resources we make the array only that wide, and
-- use consecutive indices for to make a cache "line" -- use consecutive indices for to make a cache "line"
@ -187,6 +168,7 @@ architecture rtl of dcache is
state : state_t; state : state_t;
wb : wishbone_master_out; wb : wishbone_master_out;
store_way : way_t; store_way : way_t;
store_row : row_t;
store_index : index_t; store_index : index_t;
end record; end record;


@ -213,6 +195,7 @@ architecture rtl of dcache is
signal req_hit_way : way_t; signal req_hit_way : way_t;
signal req_tag : cache_tag_t; signal req_tag : cache_tag_t;
signal req_op : op_t; signal req_op : op_t;
signal req_laddr : std_ulogic_vector(63 downto 0);


-- Cache RAM interface -- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t; type cache_ram_out_t is array(way_t) of cache_row_t;
@ -244,12 +227,21 @@ architecture rtl of dcache is
end; end;


-- Returns whether this is the last row of a line -- Returns whether this is the last row of a line
function is_last_row(addr: wishbone_addr_type) return boolean is function is_last_row_addr(addr: wishbone_addr_type) return boolean is
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1'); constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin begin
return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones; return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
end; end;


-- Returns whether this is the last row of a line
function is_last_row(row: row_t) return boolean is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
return row_v(ROW_LINEBITS-1 downto 0) = ones;
end;

-- Return the address of the next row in the current cache line -- Return the address of the next row in the current cache line
function next_row_addr(addr: wishbone_addr_type) return std_ulogic_vector is function next_row_addr(addr: wishbone_addr_type) return std_ulogic_vector is
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0); variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
@ -263,6 +255,21 @@ architecture rtl of dcache is
return result; return result;
end; end;


-- Return the next row in the current cache line. We use a dedicated
-- function in order to limit the size of the generated adder to be
-- only the bits within a cache line (3 bits with default settings)
--
function next_row(row: row_t) return row_t is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
row_idx := row_v(ROW_LINEBITS-1 downto 0);
row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
return to_integer(unsigned(row_v));
end;

-- Get the tag value from the address -- Get the tag value from the address
function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is
begin begin
@ -381,6 +388,12 @@ begin
req_row <= get_row(d_in.addr); req_row <= get_row(d_in.addr);
req_tag <= get_tag(d_in.addr); req_tag <= get_tag(d_in.addr);


-- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed
--
req_laddr <= d_in.addr(63 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');

-- Test if pending request is a hit on any way -- Test if pending request is a hit on any way
hit_way := 0; hit_way := 0;
is_hit := '0'; is_hit := '0';
@ -573,7 +586,8 @@ begin
wr_data => wr_data wr_data => wr_data
); );
process(all) process(all)
variable tmp_adr : std_ulogic_vector(63 downto 0); variable tmp_adr : std_ulogic_vector(63 downto 0);
variable reloading : boolean;
begin begin
-- Cache hit reads -- Cache hit reads
do_read <= '1'; do_read <= '1';
@ -596,17 +610,17 @@ begin
-- Otherwise, we might be doing a reload -- Otherwise, we might be doing a reload
wr_data <= wishbone_in.dat; wr_data <= wishbone_in.dat;
wr_sel <= (others => '1'); wr_sel <= (others => '1');
tmp_adr := (r1.wb.adr'left downto 0 => r1.wb.adr, others => '0'); wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS));
wr_addr <= std_ulogic_vector(to_unsigned(get_row(tmp_adr), ROW_BITS));
end if; end if;


-- The two actual write cases here -- The two actual write cases here
do_write <= '0'; do_write <= '0';
if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and r1.store_way = i then reloading := r1.state = RELOAD_WAIT_ACK;
if reloading and wishbone_in.ack = '1' and r1.store_way = i then
do_write <= '1'; do_write <= '1';
end if; end if;
if req_op = OP_STORE_HIT and req_hit_way = i then if req_op = OP_STORE_HIT and req_hit_way = i then
assert r1.state /= RELOAD_WAIT_ACK report "Store hit while in state:" & assert not reloading report "Store hit while in state:" &
state_t'image(r1.state) state_t'image(r1.state)
severity FAILURE; severity FAILURE;
do_write <= '1'; do_write <= '1';
@ -637,7 +651,7 @@ begin
-- single issue on load/stores so we are fine, later, we can generate -- single issue on load/stores so we are fine, later, we can generate
-- a stall output if necessary). -- a stall output if necessary).


if d_in.valid = '1' then if req_op /= OP_NONE then
r1.req <= d_in; r1.req <= d_in;


report "op:" & op_t'image(req_op) & report "op:" & op_t'image(req_op) &
@ -672,7 +686,8 @@ begin
-- operates at stage 1. -- operates at stage 1.
-- --
dcache_slow : process(clk) dcache_slow : process(clk)
variable tagset : cache_tags_set_t; variable tagset : cache_tags_set_t;
variable stbs_done : boolean;
begin begin
if rising_edge(clk) then if rising_edge(clk) then
-- On reset, clear all valid bits to force misses -- On reset, clear all valid bits to force misses
@ -731,16 +746,18 @@ begin
-- Keep track of our index and way for subsequent stores. -- Keep track of our index and way for subsequent stores.
r1.store_index <= req_index; r1.store_index <= req_index;
r1.store_way <= replace_way; r1.store_way <= replace_way;
r1.store_row <= get_row(req_laddr);


-- Prep for first wishbone read. We calculate the address of -- Prep for first wishbone read. We calculate the address of
-- the start of the cache line -- the start of the cache line and start the WB cycle
-- --
r1.wb.adr <= d_in.addr(r1.wb.adr'left downto LINE_OFF_BITS) & r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0);
(LINE_OFF_BITS-1 downto 0 => '0');
r1.wb.sel <= (others => '1'); r1.wb.sel <= (others => '1');
r1.wb.we <= '0'; r1.wb.we <= '0';
r1.wb.cyc <= '1'; r1.wb.cyc <= '1';
r1.wb.stb <= '1'; r1.wb.stb <= '1';

-- Track that we had one request sent
r1.state <= RELOAD_WAIT_ACK; r1.state <= RELOAD_WAIT_ACK;


when OP_LOAD_NC => when OP_LOAD_NC =>
@ -770,6 +787,25 @@ begin
end case; end case;


when RELOAD_WAIT_ACK => when RELOAD_WAIT_ACK =>
-- Requests are all sent if stb is 0
stbs_done := r1.wb.stb = '0';

-- If we are still sending requests, was one accepted ?
if wishbone_in.stall = '0' and not stbs_done then
-- That was the last word ? We are done sending. Clear
-- stb and set stbs_done so we can handle an eventual last
-- ack on the same cycle.
--
if is_last_row_addr(r1.wb.adr) then
r1.wb.stb <= '0';
stbs_done := true;
end if;

-- Calculate the next row address
r1.wb.adr <= next_row_addr(r1.wb.adr);
end if;

-- Incoming acks processing
if wishbone_in.ack = '1' then if wishbone_in.ack = '1' then
-- Is this the data we were looking for ? Latch it so -- Is this the data we were looking for ? Latch it so
-- we can respond later. We don't currently complete the -- we can respond later. We don't currently complete the
@ -779,16 +815,17 @@ begin
-- not idle, which we don't currently know how to deal -- not idle, which we don't currently know how to deal
-- with. -- with.
-- --
if r1.wb.adr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = if r1.store_row = get_row(r1.req.addr) then
r1.req.addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) then
r1.slow_data <= wishbone_in.dat; r1.slow_data <= wishbone_in.dat;
end if; end if;


-- That was the last word ? We are done -- Check for completion
if is_last_row(r1.wb.adr) then if stbs_done and is_last_row(r1.store_row) then
cache_valids(r1.store_index)(r1.store_way) <= '1'; -- Complete wishbone cycle
r1.wb.cyc <= '0'; r1.wb.cyc <= '0';
r1.wb.stb <= '0';
-- Cache line is now valid
cache_valids(r1.store_index)(r1.store_way) <= '1';


-- Complete the load that missed. For load with update -- Complete the load that missed. For load with update
-- we also need to do the deferred update cycle. -- we also need to do the deferred update cycle.
@ -801,10 +838,10 @@ begin
r1.state <= IDLE; r1.state <= IDLE;
report "completing miss !"; report "completing miss !";
end if; end if;
else
-- Otherwise, calculate the next row address
r1.wb.adr <= next_row_addr(r1.wb.adr);
end if; end if;

-- Increment store row counter
r1.store_row <= next_row(r1.store_row);
end if; end if;


when LOAD_UPDATE => when LOAD_UPDATE =>
@ -816,7 +853,13 @@ begin
r1.state <= IDLE; r1.state <= IDLE;


when STORE_WAIT_ACK | NC_LOAD_WAIT_ACK => when STORE_WAIT_ACK | NC_LOAD_WAIT_ACK =>
if wishbone_in.ack = '1' then -- Clear stb when slave accepted request
if wishbone_in.stall = '0' then
r1.wb.stb <= '0';
end if;

-- Got ack ? complete.
if wishbone_in.ack = '1' then
if r1.state = NC_LOAD_WAIT_ACK then if r1.state = NC_LOAD_WAIT_ACK then
r1.slow_data <= wishbone_in.dat; r1.slow_data <= wishbone_in.dat;
end if; end if;

@ -35,9 +35,9 @@ begin
); );


-- BRAM Memory slave -- BRAM Memory slave
bram0: entity work.mw_soc_memory bram0: entity work.wishbone_bram_wrapper
generic map( generic map(
MEMORY_SIZE => 128, MEMORY_SIZE => 1024,
RAM_INIT_FILE => "icache_test.bin" RAM_INIT_FILE => "icache_test.bin"
) )
port map( port map(
@ -121,7 +121,6 @@ begin
d_in.valid <= '1'; d_in.valid <= '1';
wait until rising_edge(clk); wait until rising_edge(clk);
d_in.valid <= '0'; d_in.valid <= '0';

wait until rising_edge(clk) and d_out.write_enable = '1'; wait until rising_edge(clk) and d_out.write_enable = '1';
assert d_out.valid = '1'; assert d_out.valid = '1';
assert d_out.write_data = x"0000004100000040" assert d_out.write_data = x"0000004100000040"
@ -130,7 +129,10 @@ begin
" expected 0000004100000040" " expected 0000004100000040"
severity failure; severity failure;


wait for clk_period*4; wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);


assert false report "end of test" severity failure; assert false report "end of test" severity failure;
wait; wait;

@ -2,92 +2,93 @@ library ieee;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;


package decode_types is package decode_types is
type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD, type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG, OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB, OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
OP_CNTZ, OP_CRAND, OP_CNTZ, OP_CRAND,
OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC, OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
OP_DCBZ, OP_DIV, OP_EXTS, OP_DCBZ, OP_DIV, OP_EXTS,
OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC,
OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF, OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF,
OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD, OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD,
OP_MTCRF, OP_MTSPR, OP_MUL_L64, OP_MTCRF, OP_MTSPR, OP_MUL_L64,
OP_MUL_H64, OP_MUL_H32, OP_OR, OP_MUL_H64, OP_MUL_H32, OP_OR,
OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD, OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD,
OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB, OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB,
OP_SHL, OP_SHR, OP_SHL, OP_SHR,
OP_SYNC, OP_TD, OP_TDI, OP_TW, OP_SYNC, OP_TD, OP_TDI, OP_TW,
OP_TWI, OP_XOR, OP_SIM_CONFIG); OP_TWI, OP_XOR, OP_SIM_CONFIG

);
type input_reg_a_t is (NONE, RA, RA_OR_ZERO);
type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32); type input_reg_a_t is (NONE, RA, RA_OR_ZERO);
type input_reg_c_t is (NONE, RS); type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32);
type output_reg_a_t is (NONE, RT, RA); type input_reg_c_t is (NONE, RS);
type rc_t is (NONE, ONE, RC); type output_reg_a_t is (NONE, RT, RA);
type carry_in_t is (ZERO, CA, ONE); type rc_t is (NONE, ONE, RC);

type carry_in_t is (ZERO, CA, ONE);
constant SH_OFFSET : integer := 0;
constant MB_OFFSET : integer := 1; constant SH_OFFSET : integer := 0;
constant ME_OFFSET : integer := 1; constant MB_OFFSET : integer := 1;
constant SH32_OFFSET : integer := 0; constant ME_OFFSET : integer := 1;
constant MB32_OFFSET : integer := 1; constant SH32_OFFSET : integer := 0;
constant ME32_OFFSET : integer := 2; constant MB32_OFFSET : integer := 1;

constant ME32_OFFSET : integer := 2;
constant FXM_OFFSET : integer := 0;

constant FXM_OFFSET : integer := 0;
constant BO_OFFSET : integer := 0;
constant BI_OFFSET : integer := 1; constant BO_OFFSET : integer := 0;
constant BH_OFFSET : integer := 2; constant BI_OFFSET : integer := 1;

constant BH_OFFSET : integer := 2;
constant BF_OFFSET : integer := 0;
constant L_OFFSET : integer := 1; constant BF_OFFSET : integer := 0;

constant L_OFFSET : integer := 1;
constant TOO_OFFSET : integer := 0;

constant TOO_OFFSET : integer := 0;
type unit_t is (NONE, ALU, LDST, MUL, DIV);
type length_t is (NONE, is1B, is2B, is4B, is8B); type unit_t is (NONE, ALU, LDST, MUL, DIV);

type length_t is (NONE, is1B, is2B, is4B, is8B);
type decode_rom_t is record
unit : unit_t; type decode_rom_t is record
insn_type : insn_type_t; unit : unit_t;
input_reg_a : input_reg_a_t; insn_type : insn_type_t;
input_reg_b : input_reg_b_t; input_reg_a : input_reg_a_t;
input_reg_c : input_reg_c_t; input_reg_b : input_reg_b_t;
output_reg_a : output_reg_a_t; input_reg_c : input_reg_c_t;

output_reg_a : output_reg_a_t;
input_cr : std_ulogic;
output_cr : std_ulogic; input_cr : std_ulogic;

output_cr : std_ulogic;
invert_a : std_ulogic;
invert_out : std_ulogic; invert_a : std_ulogic;
input_carry : carry_in_t; invert_out : std_ulogic;
output_carry : std_ulogic; input_carry : carry_in_t;

output_carry : std_ulogic;
-- load/store signals
length : length_t; -- load/store signals
byte_reverse : std_ulogic; length : length_t;
sign_extend : std_ulogic; byte_reverse : std_ulogic;
update : std_ulogic; sign_extend : std_ulogic;
reserve : std_ulogic; update : std_ulogic;

reserve : std_ulogic;
-- multiplier and ALU signals
is_32bit : std_ulogic; -- multiplier and ALU signals
is_signed : std_ulogic; is_32bit : std_ulogic;

is_signed : std_ulogic;
rc : rc_t;
lr : std_ulogic; rc : rc_t;

lr : std_ulogic;
sgl_pipe : std_ulogic;
end record; sgl_pipe : std_ulogic;
constant decode_rom_init : decode_rom_t := (unit => NONE, end record;
insn_type => OP_ILLEGAL, input_reg_a => NONE, constant decode_rom_init : decode_rom_t := (unit => NONE,
input_reg_b => NONE, input_reg_c => NONE, insn_type => OP_ILLEGAL, input_reg_a => NONE,
output_reg_a => NONE, input_cr => '0', output_cr => '0', input_reg_b => NONE, input_reg_c => NONE,
invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', output_reg_a => NONE, input_cr => '0', output_cr => '0',
length => NONE, byte_reverse => '0', sign_extend => '0', invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0',
update => '0', reserve => '0', is_32bit => '0', length => NONE, byte_reverse => '0', sign_extend => '0',
is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0'); update => '0', reserve => '0', is_32bit => '0',
is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0');


end decode_types; end decode_types;



@ -50,8 +50,8 @@ begin
dmi_ack => dmi_ack dmi_ack => dmi_ack
); );


simple_ram_0: entity work.mw_soc_memory simple_ram_0: entity work.wishbone_bram_wrapper
generic map(RAM_INIT_FILE => "simple_ram_behavioural.bin", generic map(RAM_INIT_FILE => "main_ram.bin",
MEMORY_SIZE => 524288) MEMORY_SIZE => 524288)
port map(clk => clk, rst => rst, port map(clk => clk, rst => rst,
wishbone_in => wishbone_ram_out, wishbone_in => wishbone_ram_out,

@ -0,0 +1,83 @@
-- Single port Block RAM with one cycle output buffer

library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;

entity main_bram is
generic(
WIDTH : natural := 64;
HEIGHT_BITS : natural := 1024;
MEMORY_SIZE : natural := 65536;
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
addr : in std_logic_vector(HEIGHT_BITS - 1 downto 0) ;
di : in std_logic_vector(WIDTH-1 downto 0);
do : out std_logic_vector(WIDTH-1 downto 0);
sel : in std_logic_vector((WIDTH/8)-1 downto 0);
re : in std_ulogic;
we : in std_ulogic
);
end entity main_bram;

architecture behaviour of main_bram is

constant WIDTH_BYTES : natural := WIDTH / 8;

-- RAM type definition
type ram_t is array(0 to (MEMORY_SIZE / WIDTH_BYTES) - 1) of std_logic_vector(WIDTH-1 downto 0);

-- RAM loading
impure function init_ram(name : STRING) return ram_t is
file ram_file : text open read_mode is name;
variable ram_line : line;
variable temp_word : std_logic_vector(WIDTH-1 downto 0);
variable temp_ram : ram_t := (others => (others => '0'));
begin
for i in 0 to (MEMORY_SIZE / WIDTH_BYTES) - 1 loop
exit when endfile(ram_file);
readline(ram_file, ram_line);
hread(ram_line, temp_word);
temp_ram(i) := temp_word;
end loop;

return temp_ram;
end function;

-- RAM instance
signal memory : ram_t := init_ram(RAM_INIT_FILE);
attribute ram_style : string;
attribute ram_style of memory : signal is "block";
attribute ram_decomp : string;
attribute ram_decomp of memory : signal is "power";

-- Others
signal obuf : std_logic_vector(WIDTH-1 downto 0);
begin

-- Actual RAM template
memory_0: process(clk)
begin
if rising_edge(clk) then
if we = '1' then
for i in 0 to 7 loop
if sel(i) = '1' then
memory(conv_integer(addr))((i + 1) * 8 - 1 downto i * 8) <=
di((i + 1) * 8 - 1 downto i * 8);
end if;
end loop;
end if;
if re = '1' then
obuf <= memory(conv_integer(addr));
end if;
do <= obuf;
end if;
end process;

end architecture behaviour;

@ -1,106 +0,0 @@
-- Based on:
-- The Potato Processor - A simple processor for FPGAs
-- (c) Kristian Klomsten Skordal 2014 - 2015 <kristian.skordal@wafflemail.net>

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.wishbone_types.all;

use work.pp_utilities.all;

--! @brief Simple memory module for use in Wishbone-based systems.
entity mw_soc_memory is
generic(
MEMORY_SIZE : natural := 4096; --! Memory size in bytes.
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
rst : in std_logic;

-- Wishbone interface:
wishbone_in : in wishbone_master_out;
wishbone_out : out wishbone_slave_out
);
end entity mw_soc_memory;

architecture behaviour of mw_soc_memory is
signal wb_adr_in : std_logic_vector(log2(MEMORY_SIZE) - 1 downto 0);
type ram_t is array(0 to (MEMORY_SIZE / 8) - 1) of std_logic_vector(63 downto 0);

impure function init_ram(name : STRING) return ram_t is
file ram_file : text open read_mode is name;
variable ram_line : line;
variable temp_word : std_logic_vector(63 downto 0);
variable temp_ram : ram_t := (others => (others => '0'));
begin
for i in 0 to (MEMORY_SIZE/8)-1 loop
exit when endfile(ram_file);
readline(ram_file, ram_line);
hread(ram_line, temp_word);
temp_ram(i) := temp_word;
end loop;

return temp_ram;
end function;

signal memory : ram_t := init_ram(RAM_INIT_FILE);

attribute ram_style : string;
attribute ram_style of memory : signal is "block";

attribute ram_decomp : string;
attribute ram_decomp of memory : signal is "power";

type state_type is (IDLE, ACK);
signal state : state_type;

signal read_ack : std_logic;

begin

wb_adr_in <= wishbone_in.adr(log2(MEMORY_SIZE) - 1 downto 0);

wishbone_out.ack <= read_ack and wishbone_in.stb;

memory_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
read_ack <= '0';
state <= IDLE;
else
if wishbone_in.cyc = '1' then
case state is
when IDLE =>
if wishbone_in.stb = '1' and wishbone_in.we = '1' then
for i in 0 to 7 loop
if wishbone_in.sel(i) = '1' then
memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3))))(((i + 1) * 8) - 1 downto i * 8)
<= wishbone_in.dat(((i + 1) * 8) - 1 downto i * 8);
end if;
end loop;
read_ack <= '1';
state <= ACK;
elsif wishbone_in.stb = '1' then
wishbone_out.dat <= memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3))));
read_ack <= '1';
state <= ACK;
end if;
when ACK =>
read_ack <= '0';
state <= IDLE;
end case;
else
state <= IDLE;
read_ack <= '0';
end if;
end if;
end if;
end process;

end architecture behaviour;

@ -34,351 +34,353 @@ use ieee.numeric_std.all;
--! - Bit 0: data received (receive buffer not empty) --! - Bit 0: data received (receive buffer not empty)
--! - Bit 1: ready to send data (transmit buffer empty) --! - Bit 1: ready to send data (transmit buffer empty)
entity pp_soc_uart is entity pp_soc_uart is
generic( generic(
FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs. FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs.
); );
port( port(
clk : in std_logic; clk : in std_logic;
reset : in std_logic; reset : in std_logic;


-- UART ports: -- UART ports:
txd : out std_logic; txd : out std_logic;
rxd : in std_logic; rxd : in std_logic;


-- Interrupt signal: -- Interrupt signal:
irq : out std_logic; irq : out std_logic;


-- Wishbone ports: -- Wishbone ports:
wb_adr_in : in std_logic_vector(11 downto 0); wb_adr_in : in std_logic_vector(11 downto 0);
wb_dat_in : in std_logic_vector( 7 downto 0); wb_dat_in : in std_logic_vector( 7 downto 0);
wb_dat_out : out std_logic_vector( 7 downto 0); wb_dat_out : out std_logic_vector( 7 downto 0);
wb_we_in : in std_logic; wb_we_in : in std_logic;
wb_cyc_in : in std_logic; wb_cyc_in : in std_logic;
wb_stb_in : in std_logic; wb_stb_in : in std_logic;
wb_ack_out : out std_logic wb_ack_out : out std_logic
); );
end entity pp_soc_uart; end entity pp_soc_uart;


architecture behaviour of pp_soc_uart is architecture behaviour of pp_soc_uart is


subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit. subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit.


-- UART sample clock signals: -- UART sample clock signals:
signal sample_clk : std_logic; signal sample_clk : std_logic;
signal sample_clk_divisor : std_logic_vector(7 downto 0); signal sample_clk_divisor : std_logic_vector(7 downto 0);
signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range); signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range);


-- UART receive process signals: -- UART receive process signals:
type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT); type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT);
signal rx_state : rx_state_type; signal rx_state : rx_state_type;
signal rx_byte : std_logic_vector(7 downto 0); signal rx_byte : std_logic_vector(7 downto 0);
signal rx_current_bit : bitnumber; signal rx_current_bit : bitnumber;


subtype rx_sample_counter_type is natural range 0 to 15; subtype rx_sample_counter_type is natural range 0 to 15;
signal rx_sample_counter : rx_sample_counter_type; signal rx_sample_counter : rx_sample_counter_type;
signal rx_sample_value : rx_sample_counter_type; signal rx_sample_value : rx_sample_counter_type;


subtype rx_sample_delay_type is natural range 0 to 7; subtype rx_sample_delay_type is natural range 0 to 7;
signal rx_sample_delay : rx_sample_delay_type; signal rx_sample_delay : rx_sample_delay_type;


-- UART transmit process signals: -- UART transmit process signals:
type tx_state_type is (IDLE, TRANSMIT, STOPBIT); type tx_state_type is (IDLE, TRANSMIT, STOPBIT);
signal tx_state : tx_state_type; signal tx_state : tx_state_type;
signal tx_byte : std_logic_vector(7 downto 0); signal tx_byte : std_logic_vector(7 downto 0);
signal tx_current_bit : bitnumber; signal tx_current_bit : bitnumber;


-- UART transmit clock: -- UART transmit clock:
subtype uart_tx_counter_type is natural range 0 to 15; subtype uart_tx_counter_type is natural range 0 to 15;
signal uart_tx_counter : uart_tx_counter_type := 0; signal uart_tx_counter : uart_tx_counter_type := 0;
signal uart_tx_clk : std_logic; signal uart_tx_clk : std_logic;


-- Buffer signals: -- Buffer signals:
signal send_buffer_full, send_buffer_empty : std_logic; signal send_buffer_full, send_buffer_empty : std_logic;
signal recv_buffer_full, recv_buffer_empty : std_logic; signal recv_buffer_full, recv_buffer_empty : std_logic;
signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0); signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0);
signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0); signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0);
signal send_buffer_push, send_buffer_pop : std_logic := '0'; signal send_buffer_push, send_buffer_pop : std_logic := '0';
signal recv_buffer_push, recv_buffer_pop : std_logic := '0'; signal recv_buffer_push, recv_buffer_pop : std_logic := '0';


-- IRQ enable signals: -- IRQ enable signals:
signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0'; signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0';


-- Wishbone signals: -- Wishbone signals:
type wb_state_type is (IDLE, WRITE_ACK, READ_ACK); type wb_state_type is (IDLE, WRITE_ACK, READ_ACK);
signal wb_state : wb_state_type; signal wb_state : wb_state_type;


signal wb_ack : std_logic; --! Wishbone acknowledge signal signal wb_ack : std_logic; --! Wishbone acknowledge signal


begin begin


irq <= (irq_recv_enable and (not recv_buffer_empty)) irq <= (irq_recv_enable and (not recv_buffer_empty))
or (irq_tx_ready_enable and send_buffer_empty); or (irq_tx_ready_enable and send_buffer_empty);


---------- UART receive ---------- ---------- UART receive ----------


recv_buffer_input <= rx_byte; recv_buffer_input <= rx_byte;


uart_receive: process(clk) uart_receive: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if reset = '1' then if reset = '1' then
rx_state <= IDLE; rx_state <= IDLE;
recv_buffer_push <= '0'; recv_buffer_push <= '0';
else
case rx_state is
when IDLE =>
if recv_buffer_push = '1' then
recv_buffer_push <= '0';
end if;

if sample_clk = '1' and rxd = '0' then
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
rx_current_bit <= 0;
rx_state <= STARTBIT;
end if;
when STARTBIT =>
if sample_clk = '1' then
if rx_sample_delay = 7 then
rx_state <= RECEIVE;
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
else else
case rx_state is rx_sample_delay <= rx_sample_delay + 1;
when IDLE =>
if recv_buffer_push = '1' then
recv_buffer_push <= '0';
end if;

if sample_clk = '1' and rxd = '0' then
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
rx_current_bit <= 0;
rx_state <= STARTBIT;
end if;
when STARTBIT =>
if sample_clk = '1' then
if rx_sample_delay = 7 then
rx_state <= RECEIVE;
rx_sample_value <= rx_sample_counter;
rx_sample_delay <= 0;
else
rx_sample_delay <= rx_sample_delay + 1;
end if;
end if;
when RECEIVE =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
if rx_current_bit /= 7 then
rx_byte(rx_current_bit) <= rxd;
rx_current_bit <= rx_current_bit + 1;
else
rx_byte(rx_current_bit) <= rxd;
rx_state <= STOPBIT;
end if;
end if;
when STOPBIT =>
if sample_clk = '1' and rx_sample_counter = rx_sample_value then
rx_state <= IDLE;

if recv_buffer_full = '0' then
recv_buffer_push <= '1';
end if;
end if;
end case;
end if; end if;
end if; end if;
end process uart_receive; when RECEIVE =>

if sample_clk = '1' and rx_sample_counter = rx_sample_value then
sample_counter: process(clk) if rx_current_bit /= 7 then
begin rx_byte(rx_current_bit) <= rxd;
if rising_edge(clk) then rx_current_bit <= rx_current_bit + 1;
if reset = '1' then else
rx_sample_counter <= 0; rx_byte(rx_current_bit) <= rxd;
elsif sample_clk = '1' then rx_state <= STOPBIT;
if rx_sample_counter = 15 then
rx_sample_counter <= 0;
else
rx_sample_counter <= rx_sample_counter + 1;
end if;
end if; end if;
end if; end if;
end process sample_counter; when STOPBIT =>

if sample_clk = '1' and rx_sample_counter = rx_sample_value then
---------- UART transmit ---------- rx_state <= IDLE;


tx_byte <= send_buffer_output; if recv_buffer_full = '0' then

recv_buffer_push <= '1';
uart_transmit: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
txd <= '1';
tx_state <= IDLE;
send_buffer_pop <= '0';
tx_current_bit <= 0;
else
case tx_state is
when IDLE =>
if send_buffer_empty = '0' and uart_tx_clk = '1' then
txd <= '0';
send_buffer_pop <= '1';
tx_current_bit <= 0;
tx_state <= TRANSMIT;
elsif uart_tx_clk = '1' then
txd <= '1';
end if;
when TRANSMIT =>
if send_buffer_pop = '1' then
send_buffer_pop <= '0';
elsif uart_tx_clk = '1' and tx_current_bit = 7 then
txd <= tx_byte(tx_current_bit);
tx_state <= STOPBIT;
elsif uart_tx_clk = '1' then
txd <= tx_byte(tx_current_bit);
tx_current_bit <= tx_current_bit + 1;
end if;
when STOPBIT =>
if uart_tx_clk = '1' then
txd <= '1';
tx_state <= IDLE;
end if;
end case;
end if; end if;
end if;
end case;
end if;
end if;
end process uart_receive;

sample_counter: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
rx_sample_counter <= 0;
elsif sample_clk = '1' then
if rx_sample_counter = 15 then
rx_sample_counter <= 0;
else
rx_sample_counter <= rx_sample_counter + 1;
end if; end if;
end process uart_transmit; end if;

end if;
uart_tx_clock_generator: process(clk) end process sample_counter;
begin
if rising_edge(clk) then ---------- UART transmit ----------
if reset = '1' then
uart_tx_counter <= 0; tx_byte <= send_buffer_output;
uart_tx_clk <= '0';
else uart_transmit: process(clk)
if sample_clk = '1' then begin
if uart_tx_counter = 15 then if rising_edge(clk) then
uart_tx_counter <= 0; if reset = '1' then
uart_tx_clk <= '1'; txd <= '1';
else tx_state <= IDLE;
uart_tx_counter <= uart_tx_counter + 1; send_buffer_pop <= '0';
uart_tx_clk <= '0'; tx_current_bit <= 0;
end if; else
else case tx_state is
uart_tx_clk <= '0'; when IDLE =>
end if; if send_buffer_empty = '0' and uart_tx_clk = '1' then
end if; txd <= '0';
send_buffer_pop <= '1';
tx_current_bit <= 0;
tx_state <= TRANSMIT;
elsif uart_tx_clk = '1' then
txd <= '1';
end if;
when TRANSMIT =>
if send_buffer_pop = '1' then
send_buffer_pop <= '0';
elsif uart_tx_clk = '1' and tx_current_bit = 7 then
txd <= tx_byte(tx_current_bit);
tx_state <= STOPBIT;
elsif uart_tx_clk = '1' then
txd <= tx_byte(tx_current_bit);
tx_current_bit <= tx_current_bit + 1;
end if;
when STOPBIT =>
if uart_tx_clk = '1' then
txd <= '1';
tx_state <= IDLE;
end if;
end case;
end if;
end if;
end process uart_transmit;

uart_tx_clock_generator: process(clk)
begin
if rising_edge(clk) then
if reset = '1' then
uart_tx_counter <= 0;
uart_tx_clk <= '0';
else
if sample_clk = '1' then
if uart_tx_counter = 15 then
uart_tx_counter <= 0;
uart_tx_clk <= '1';
else
uart_tx_counter <= uart_tx_counter + 1;
uart_tx_clk <= '0';
end if;
else
uart_tx_clk <= '0';
end if; end if;
end process uart_tx_clock_generator; end if;

end if;
---------- Sample clock generator ---------- end process uart_tx_clock_generator;


sample_clock_generator: process(clk) ---------- Sample clock generator ----------
begin
if rising_edge(clk) then sample_clock_generator: process(clk)
if reset = '1' then begin
sample_clk_counter <= (others => '0'); if rising_edge(clk) then
sample_clk <= '0'; if reset = '1' then
else sample_clk_counter <= (others => '0');
if sample_clk_divisor /= x"00" then sample_clk <= '0';
if sample_clk_counter = sample_clk_divisor then else
sample_clk_counter <= (others => '0'); if sample_clk_divisor /= x"00" then
sample_clk <= '1'; if sample_clk_counter = sample_clk_divisor then
else sample_clk_counter <= (others => '0');
sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1); sample_clk <= '1';
sample_clk <= '0'; else
end if; sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1);
end if; sample_clk <= '0';
end if; end if;
end if; end if;
end process sample_clock_generator; end if;

end if;
---------- Data Buffers ---------- end process sample_clock_generator;


send_buffer: entity work.pp_fifo ---------- Data Buffers ----------
generic map(
DEPTH => FIFO_DEPTH, send_buffer: entity work.pp_fifo
WIDTH => 8 generic map(
) port map( DEPTH => FIFO_DEPTH,
clk => clk, WIDTH => 8
reset => reset, ) port map(
full => send_buffer_full, clk => clk,
empty => send_buffer_empty, reset => reset,
data_in => send_buffer_input, full => send_buffer_full,
data_out => send_buffer_output, empty => send_buffer_empty,
push => send_buffer_push, data_in => send_buffer_input,
pop => send_buffer_pop data_out => send_buffer_output,
push => send_buffer_push,
pop => send_buffer_pop
); );


recv_buffer: entity work.pp_fifo recv_buffer: entity work.pp_fifo
generic map( generic map(
DEPTH => FIFO_DEPTH, DEPTH => FIFO_DEPTH,
WIDTH => 8 WIDTH => 8
) port map( ) port map(
clk => clk, clk => clk,
reset => reset, reset => reset,
full => recv_buffer_full, full => recv_buffer_full,
empty => recv_buffer_empty, empty => recv_buffer_empty,
data_in => recv_buffer_input, data_in => recv_buffer_input,
data_out => recv_buffer_output, data_out => recv_buffer_output,
push => recv_buffer_push, push => recv_buffer_push,
pop => recv_buffer_pop pop => recv_buffer_pop
); );


---------- Wishbone Interface ---------- ---------- Wishbone Interface ----------


wb_ack_out <= wb_ack and wb_cyc_in and wb_stb_in; wb_ack_out <= wb_ack and wb_cyc_in and wb_stb_in;


wishbone: process(clk) wishbone: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if reset = '1' then if reset = '1' then
wb_ack <= '0'; wb_ack <= '0';
wb_state <= IDLE; wb_state <= IDLE;
send_buffer_push <= '0'; send_buffer_push <= '0';
recv_buffer_pop <= '0'; recv_buffer_pop <= '0';
sample_clk_divisor <= (others => '0'); sample_clk_divisor <= (others => '0');
irq_recv_enable <= '0'; irq_recv_enable <= '0';
irq_tx_ready_enable <= '0'; irq_tx_ready_enable <= '0';
else else
case wb_state is case wb_state is
when IDLE => when IDLE =>
if wb_cyc_in = '1' and wb_stb_in = '1' then if wb_cyc_in = '1' and wb_stb_in = '1' then
if wb_we_in = '1' then -- Write to register if wb_we_in = '1' then -- Write to register
if wb_adr_in = x"000" then if wb_adr_in = x"000" then
send_buffer_input <= wb_dat_in; send_buffer_input <= wb_dat_in;
send_buffer_push <= '1'; send_buffer_push <= '1';
elsif wb_adr_in = x"018" then elsif wb_adr_in = x"018" then
sample_clk_divisor <= wb_dat_in; sample_clk_divisor <= wb_dat_in;
elsif wb_adr_in = x"020" then elsif wb_adr_in = x"020" then
irq_recv_enable <= wb_dat_in(0); irq_recv_enable <= wb_dat_in(0);
irq_tx_ready_enable <= wb_dat_in(1); irq_tx_ready_enable <= wb_dat_in(1);
end if; end if;


-- Invalid writes are acked and ignored. -- Invalid writes are acked and ignored.

wb_ack <= '1';
wb_ack <= '1'; wb_state <= WRITE_ACK;
wb_state <= WRITE_ACK; else -- Read from register
else -- Read from register if wb_adr_in = x"008" then
if wb_adr_in = x"008" then recv_buffer_pop <= '1';
recv_buffer_pop <= '1'; elsif wb_adr_in = x"010" then
elsif wb_adr_in = x"010" then wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full &
wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full & send_buffer_empty & recv_buffer_empty; send_buffer_empty & recv_buffer_empty;
wb_ack <= '1'; wb_ack <= '1';
elsif wb_adr_in = x"018" then elsif wb_adr_in = x"018" then
wb_dat_out <= sample_clk_divisor; wb_dat_out <= sample_clk_divisor;
wb_ack <= '1'; wb_ack <= '1';
elsif wb_adr_in = x"020" then elsif wb_adr_in = x"020" then
wb_dat_out <= (0 => irq_recv_enable, 1 => irq_tx_ready_enable, others => '0'); wb_dat_out <= (0 => irq_recv_enable,
wb_ack <= '1'; 1 => irq_tx_ready_enable,
else others => '0');
wb_dat_out <= (others => '0'); wb_ack <= '1';
wb_ack <= '1'; else
end if; wb_dat_out <= (others => '0');
wb_state <= READ_ACK; wb_ack <= '1';
end if; end if;
end if; wb_state <= READ_ACK;
when WRITE_ACK =>
send_buffer_push <= '0';

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
when READ_ACK =>
if recv_buffer_pop = '1' then
recv_buffer_pop <= '0';
else
wb_dat_out <= recv_buffer_output;
wb_ack <= '1';
end if;

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
end case;
end if; end if;
end if; end if;
end process wishbone; when WRITE_ACK =>
send_buffer_push <= '0';

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
when READ_ACK =>
if recv_buffer_pop = '1' then
recv_buffer_pop <= '0';
else
wb_dat_out <= recv_buffer_output;
wb_ack <= '1';
end if;

if wb_stb_in = '0' then
wb_ack <= '0';
wb_state <= IDLE;
end if;
end case;
end if;
end if;
end process wishbone;


end architecture behaviour; end architecture behaviour;

@ -7,7 +7,8 @@ entity toplevel is
RAM_INIT_FILE : string := "firmware.hex"; RAM_INIT_FILE : string := "firmware.hex";
RESET_LOW : boolean := true; RESET_LOW : boolean := true;
CLK_INPUT : positive := 100000000; CLK_INPUT : positive := 100000000;
CLK_FREQUENCY : positive := 100000000 CLK_FREQUENCY : positive := 100000000;
DISABLE_FLATTEN_CORE : boolean := false
); );
port( port(
ext_clk : in std_ulogic; ext_clk : in std_ulogic;
@ -62,7 +63,8 @@ begin
MEMORY_SIZE => MEMORY_SIZE, MEMORY_SIZE => MEMORY_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE, RAM_INIT_FILE => RAM_INIT_FILE,
RESET_LOW => RESET_LOW, RESET_LOW => RESET_LOW,
SIM => false SIM => false,
DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE
) )
port map ( port map (
system_clk => system_clk, system_clk => system_clk,

@ -21,6 +21,7 @@ use ieee.std_logic_1164.all;
use ieee.numeric_std.all; use ieee.numeric_std.all;


library work; library work;
use work.utils.all;
use work.common.all; use work.common.all;
use work.wishbone_types.all; use work.wishbone_types.all;


@ -51,26 +52,6 @@ entity icache is
end entity icache; end entity icache;


architecture rtl of icache is architecture rtl of icache is
function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

-- BRAM organisation: We never access more than wishbone_data_bits at -- BRAM organisation: We never access more than wishbone_data_bits at
-- a time so to save resources we make the array only that wide, and -- a time so to save resources we make the array only that wide, and
-- use consecutive indices for to make a cache "line" -- use consecutive indices for to make a cache "line"
@ -159,6 +140,7 @@ architecture rtl of icache is
wb : wishbone_master_out; wb : wishbone_master_out;
store_way : way_t; store_way : way_t;
store_index : index_t; store_index : index_t;
store_row : row_t;
end record; end record;


signal r : reg_internal_t; signal r : reg_internal_t;
@ -170,6 +152,7 @@ architecture rtl of icache is
signal req_tag : cache_tag_t; signal req_tag : cache_tag_t;
signal req_is_hit : std_ulogic; signal req_is_hit : std_ulogic;
signal req_is_miss : std_ulogic; signal req_is_miss : std_ulogic;
signal req_laddr : std_ulogic_vector(63 downto 0);


-- Cache RAM interface -- Cache RAM interface
type cache_ram_out_t is array(way_t) of cache_row_t; type cache_ram_out_t is array(way_t) of cache_row_t;
@ -193,12 +176,21 @@ architecture rtl of icache is
end; end;


-- Returns whether this is the last row of a line -- Returns whether this is the last row of a line
function is_last_row(addr: wishbone_addr_type) return boolean is function is_last_row_addr(addr: wishbone_addr_type) return boolean is
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1'); constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin begin
return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones; return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones;
end; end;


-- Returns whether this is the last row of a line
function is_last_row(row: row_t) return boolean is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1');
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
return row_v(ROW_LINEBITS-1 downto 0) = ones;
end;

-- Return the address of the next row in the current cache line -- Return the address of the next row in the current cache line
function next_row_addr(addr: wishbone_addr_type) function next_row_addr(addr: wishbone_addr_type)
return std_ulogic_vector is return std_ulogic_vector is
@ -213,6 +205,21 @@ architecture rtl of icache is
return result; return result;
end; end;


-- Return the next row in the current cache line. We use a dedicated
-- function in order to limit the size of the generated adder to be
-- only the bits within a cache line (3 bits with default settings)
--
function next_row(row: row_t) return row_t is
variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0);
variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0);
variable result : std_ulogic_vector(ROW_BITS-1 downto 0);
begin
row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS));
row_idx := row_v(ROW_LINEBITS-1 downto 0);
row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1);
return to_integer(unsigned(row_v));
end;

-- Read the instruction word for the given address in the current cache row -- Read the instruction word for the given address in the current cache row
function read_insn_word(addr: std_ulogic_vector(63 downto 0); function read_insn_word(addr: std_ulogic_vector(63 downto 0);
data: cache_row_t) return std_ulogic_vector is data: cache_row_t) return std_ulogic_vector is
@ -298,7 +305,6 @@ begin
wr_data => wishbone_in.dat wr_data => wishbone_in.dat
); );
process(all) process(all)
variable tmp_adr : std_ulogic_vector(63 downto 0);
begin begin
do_read <= '1'; do_read <= '1';
do_write <= '0'; do_write <= '0';
@ -307,8 +313,7 @@ begin
end if; end if;
cache_out(i) <= dout; cache_out(i) <= dout;
rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS)); rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
tmp_adr := (r.wb.adr'left downto 0 => r.wb.adr, others => '0'); wr_addr <= std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
wr_addr <= std_ulogic_vector(to_unsigned(get_row(tmp_adr), ROW_BITS));
end process; end process;
end generate; end generate;
@ -358,6 +363,12 @@ begin
req_row <= get_row(i_in.nia); req_row <= get_row(i_in.nia);
req_tag <= get_tag(i_in.nia); req_tag <= get_tag(i_in.nia);


-- Calculate address of beginning of cache line, will be
-- used for cache miss processing if needed
--
req_laddr <= i_in.nia(63 downto LINE_OFF_BITS) &
(LINE_OFF_BITS-1 downto 0 => '0');

-- Test if pending request is a hit on any way -- Test if pending request is a hit on any way
hit_way := 0; hit_way := 0;
is_hit := '0'; is_hit := '0';
@ -427,7 +438,8 @@ begin


-- Cache miss/reload synchronous machine -- Cache miss/reload synchronous machine
icache_miss : process(clk) icache_miss : process(clk)
variable tagset : cache_tags_set_t; variable tagset : cache_tags_set_t;
variable stbs_done : boolean;
begin begin
if rising_edge(clk) then if rising_edge(clk) then
-- On reset, clear all valid bits to force misses -- On reset, clear all valid bits to force misses
@ -473,29 +485,54 @@ begin
-- Keep track of our index and way for subsequent stores -- Keep track of our index and way for subsequent stores
r.store_index <= req_index; r.store_index <= req_index;
r.store_way <= replace_way; r.store_way <= replace_way;
r.store_row <= get_row(req_laddr);


-- Prep for first wishbone read. We calculate the address of -- Prep for first wishbone read. We calculate the address of
-- the start of the cache line -- the start of the cache line and start the WB cycle.
-- --
r.wb.adr <= i_in.nia(r.wb.adr'left downto LINE_OFF_BITS) & r.wb.adr <= req_laddr(r.wb.adr'left downto 0);
(LINE_OFF_BITS-1 downto 0 => '0');
r.wb.cyc <= '1'; r.wb.cyc <= '1';
r.wb.stb <= '1'; r.wb.stb <= '1';


-- Track that we had one request sent
r.state <= WAIT_ACK; r.state <= WAIT_ACK;
end if; end if;

when WAIT_ACK => when WAIT_ACK =>
-- Requests are all sent if stb is 0
stbs_done := r.wb.stb = '0';

-- If we are still sending requests, was one accepted ?
if wishbone_in.stall = '0' and not stbs_done then
-- That was the last word ? We are done sending. Clear
-- stb and set stbs_done so we can handle an eventual last
-- ack on the same cycle.
--
if is_last_row_addr(r.wb.adr) then
r.wb.stb <= '0';
stbs_done := true;
end if;

-- Calculate the next row address
r.wb.adr <= next_row_addr(r.wb.adr);
end if;

-- Incoming acks processing
if wishbone_in.ack = '1' then if wishbone_in.ack = '1' then
-- That was the last word ? We are done -- Check for completion
if is_last_row(r.wb.adr) then if stbs_done and is_last_row(r.store_row) then
cache_valids(r.store_index)(r.store_way) <= '1'; -- Complete wishbone cycle
r.wb.cyc <= '0'; r.wb.cyc <= '0';
r.wb.stb <= '0';
-- Cache line is now valid
cache_valids(r.store_index)(r.store_way) <= '1';

-- We are done
r.state <= IDLE; r.state <= IDLE;
else
-- Otherwise, calculate the next row address
r.wb.adr <= next_row_addr(r.wb.adr);
end if; end if;

-- Increment store row counter
r.store_row <= next_row(r.store_row);
end if; end if;
end case; end case;
end if; end if;

@ -36,9 +36,9 @@ begin
); );


-- BRAM Memory slave -- BRAM Memory slave
bram0: entity work.mw_soc_memory bram0: entity work.wishbone_bram_wrapper
generic map( generic map(
MEMORY_SIZE => 128, MEMORY_SIZE => 1024,
RAM_INIT_FILE => "icache_test.bin" RAM_INIT_FILE => "icache_test.bin"
) )
port map( port map(
@ -68,15 +68,20 @@ begin
begin begin
i_out.req <= '0'; i_out.req <= '0';
i_out.nia <= (others => '0'); i_out.nia <= (others => '0');
i_out.stop_mark <= '0';


wait for 4*clk_period; wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);
wait until rising_edge(clk);


i_out.req <= '1'; i_out.req <= '1';
i_out.nia <= x"0000000000000004"; i_out.nia <= x"0000000000000004";


wait for 30*clk_period; wait for 30*clk_period;
wait until rising_edge(clk);


assert i_in.valid = '1'; assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000001" assert i_in.insn = x"00000001"
report "insn @" & to_hstring(i_out.nia) & report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) & "=" & to_hstring(i_in.insn) &
@ -85,27 +90,29 @@ begin


i_out.req <= '0'; i_out.req <= '0';


wait for clk_period; wait until rising_edge(clk);


-- hit -- hit
i_out.req <= '1'; i_out.req <= '1';
i_out.nia <= x"0000000000000008"; i_out.nia <= x"0000000000000008";
wait for clk_period; wait until rising_edge(clk);
assert i_in.valid = '1'; wait until rising_edge(clk);
assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000002" assert i_in.insn = x"00000002"
report "insn @" & to_hstring(i_out.nia) & report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) & "=" & to_hstring(i_in.insn) &
" expected 00000002" " expected 00000002"
severity failure; severity failure;
wait for clk_period; wait until rising_edge(clk);


-- another miss -- another miss
i_out.req <= '1'; i_out.req <= '1';
i_out.nia <= x"0000000000000040"; i_out.nia <= x"0000000000000040";


wait for 30*clk_period; wait for 30*clk_period;
wait until rising_edge(clk);


assert i_in.valid = '1'; assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000010" assert i_in.insn = x"00000010"
report "insn @" & to_hstring(i_out.nia) & report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) & "=" & to_hstring(i_in.insn) &
@ -115,13 +122,15 @@ begin
-- test something that aliases -- test something that aliases
i_out.req <= '1'; i_out.req <= '1';
i_out.nia <= x"0000000000000100"; i_out.nia <= x"0000000000000100";
wait for clk_period; wait until rising_edge(clk);
assert i_in.valid = '0'; wait until rising_edge(clk);
wait for clk_period; assert i_in.valid = '0' severity failure;
wait until rising_edge(clk);


wait for 30*clk_period; wait for 30*clk_period;
wait until rising_edge(clk);


assert i_in.valid = '1'; assert i_in.valid = '1' severity failure;
assert i_in.insn = x"00000040" assert i_in.insn = x"00000040"
report "insn @" & to_hstring(i_out.nia) & report "insn @" & to_hstring(i_out.nia) &
"=" & to_hstring(i_in.insn) & "=" & to_hstring(i_in.insn) &

@ -36,20 +36,22 @@ filesets:
- plru.vhdl - plru.vhdl
- cache_ram.vhdl - cache_ram.vhdl
- core_debug.vhdl - core_debug.vhdl
- utils.vhdl
file_type : vhdlSource-2008 file_type : vhdlSource-2008


soc: soc:
files: files:
- wishbone_arbiter.vhdl - wishbone_arbiter.vhdl
- wishbone_debug_master.vhdl - wishbone_debug_master.vhdl
- wishbone_bram_wrapper.vhdl
- soc.vhdl - soc.vhdl
file_type : vhdlSource-2008 file_type : vhdlSource-2008


fpga: fpga:
files: files:
- fpga/pp_fifo.vhd - fpga/main_bram.vhdl
- fpga/mw_soc_memory.vhdl
- fpga/soc_reset.vhdl - fpga/soc_reset.vhdl
- fpga/pp_fifo.vhd
- fpga/pp_soc_uart.vhd - fpga/pp_soc_uart.vhd
- fpga/pp_utilities.vhd - fpga/pp_utilities.vhd
- fpga/toplevel.vhdl - fpga/toplevel.vhdl
@ -93,6 +95,7 @@ targets:
- ram_init_file - ram_init_file
- clk_input - clk_input
- clk_frequency - clk_frequency
- disable_flatten_core
tools: tools:
vivado: {part : xc7a100tcsg324-1} vivado: {part : xc7a100tcsg324-1}
toplevel : toplevel toplevel : toplevel
@ -105,6 +108,7 @@ targets:
- ram_init_file - ram_init_file
- clk_input - clk_input
- clk_frequency - clk_frequency
- disable_flatten_core
tools: tools:
vivado: {part : xc7a200tsbg484-1} vivado: {part : xc7a200tsbg484-1}
toplevel : toplevel toplevel : toplevel
@ -117,6 +121,7 @@ targets:
- ram_init_file - ram_init_file
- clk_input - clk_input
- clk_frequency - clk_frequency
- disable_flatten_core
tools: tools:
vivado: {part : xc7a35ticsg324-1L} vivado: {part : xc7a35ticsg324-1L}
toplevel : toplevel toplevel : toplevel
@ -129,6 +134,7 @@ targets:
- ram_init_file - ram_init_file
- clk_input - clk_input
- clk_frequency - clk_frequency
- disable_flatten_core
tools: tools:
vivado: {part : xc7a100ticsg324-1L} vivado: {part : xc7a100ticsg324-1L}
toplevel : toplevel toplevel : toplevel
@ -142,6 +148,7 @@ targets:
- reset_low=false - reset_low=false
- clk_input=12000000 - clk_input=12000000
- clk_frequency - clk_frequency
- disable_flatten_core
tools: tools:
vivado: {part : xc7a35tcpg236-1} vivado: {part : xc7a35tcpg236-1}
toplevel : toplevel toplevel : toplevel
@ -179,3 +186,9 @@ parameters:
description : Generated system clock frequency in HZ (for top-generic based boards) description : Generated system clock frequency in HZ (for top-generic based boards)
paramtype : generic paramtype : generic
default : 50000000 default : 50000000

disable_flatten_core:
datatype : bool
description : Prevent Vivado from flattening the main core components
paramtype : generic
default : false

@ -21,7 +21,7 @@ Y=$(${MICROWATT_DIR}/scripts/hash.py tests/${TEST}.out)


cd $TMPDIR cd $TMPDIR


cp ${MICROWATT_DIR}/tests/${TEST}.bin simple_ram_behavioural.bin cp ${MICROWATT_DIR}/tests/${TEST}.bin main_ram.bin


X=$( ${MICROWATT_DIR}/core_tb | ${MICROWATT_DIR}/scripts/hash.py ) X=$( ${MICROWATT_DIR}/core_tb | ${MICROWATT_DIR}/scripts/hash.py )



@ -13,7 +13,7 @@ cwd = os.getcwd()
os.chdir(tempdir.name) os.chdir(tempdir.name)


copyfile(os.path.join(cwd, 'tests/micropython.bin'), copyfile(os.path.join(cwd, 'tests/micropython.bin'),
os.path.join(tempdir.name, 'simple_ram_behavioural.bin')) os.path.join(tempdir.name, 'main_ram.bin'))


cmd = [ os.path.join(cwd, './core_tb') ] cmd = [ os.path.join(cwd, './core_tb') ]



@ -13,7 +13,7 @@ cwd = os.getcwd()
os.chdir(tempdir.name) os.chdir(tempdir.name)


copyfile(os.path.join(cwd, 'tests/micropython.bin'), copyfile(os.path.join(cwd, 'tests/micropython.bin'),
os.path.join(tempdir.name, 'simple_ram_behavioural.bin')) os.path.join(tempdir.name, 'main_ram.bin'))


cmd = [ os.path.join(cwd, './core_tb') ] cmd = [ os.path.join(cwd, './core_tb') ]



@ -0,0 +1,67 @@
-- Single port Block RAM with one cycle output buffer
--
-- Simulated via C helpers

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.utils.all;
use work.sim_bram_helpers.all;

entity main_bram is
generic(
WIDTH : natural := 64;
HEIGHT_BITS : natural := 1024;
MEMORY_SIZE : natural := 65536;
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
addr : in std_logic_vector(HEIGHT_BITS - 1 downto 0) ;
di : in std_logic_vector(WIDTH-1 downto 0);
do : out std_logic_vector(WIDTH-1 downto 0);
sel : in std_logic_vector((WIDTH/8)-1 downto 0);
re : in std_ulogic;
we : in std_ulogic
);
end entity main_bram;

architecture sim of main_bram is

constant WIDTH_BYTES : natural := WIDTH / 8;
constant pad_zeros : std_ulogic_vector(log2(WIDTH_BYTES)-1 downto 0)
:= (others => '0');

signal identifier : integer := behavioural_initialize(filename => RAM_INIT_FILE,
size => MEMORY_SIZE);
-- Others
signal obuf : std_logic_vector(WIDTH-1 downto 0);
begin

-- Actual RAM template
memory_0: process(clk)
variable ret_dat_v : std_ulogic_vector(63 downto 0);
variable addr64 : std_ulogic_vector(63 downto 0);
begin
if rising_edge(clk) then
addr64 := (others => '0');
addr64(HEIGHT_BITS + 2 downto 3) := addr;
if we = '1' then
report "RAM writing " & to_hstring(di) & " to " &
to_hstring(addr & pad_zeros) & " sel:" & to_hstring(sel);
behavioural_write(di, addr64, to_integer(unsigned(sel)), identifier);
end if;
if re = '1' then
behavioural_read(ret_dat_v, addr64, to_integer(unsigned(sel)), identifier);
report "RAM reading from " & to_hstring(addr & pad_zeros) &
" returns " & to_hstring(ret_dat_v);
obuf <= ret_dat_v(obuf'left downto 0);
end if;
do <= obuf;
end if;
end process;

end architecture sim;

@ -1,24 +1,24 @@
library ieee; library ieee;
use ieee.std_logic_1164.all; use ieee.std_logic_1164.all;


package simple_ram_behavioural_helpers is package sim_bram_helpers is
function behavioural_initialize (filename: String; size: integer) return integer; function behavioural_initialize (filename: String; size: integer) return integer;
attribute foreign of behavioural_initialize : function is "VHPIDIRECT behavioural_initialize"; attribute foreign of behavioural_initialize : function is "VHPIDIRECT behavioural_initialize";


procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer; reload: integer); procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier:integer);
attribute foreign of behavioural_read : procedure is "VHPIDIRECT behavioural_read"; attribute foreign of behavioural_read : procedure is "VHPIDIRECT behavioural_read";


procedure behavioural_write (val: std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer); procedure behavioural_write (val: std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer);
attribute foreign of behavioural_write : procedure is "VHPIDIRECT behavioural_write"; attribute foreign of behavioural_write : procedure is "VHPIDIRECT behavioural_write";
end simple_ram_behavioural_helpers; end sim_bram_helpers;


package body simple_ram_behavioural_helpers is package body sim_bram_helpers is
function behavioural_initialize (filename: String; size: integer) return integer is function behavioural_initialize (filename: String; size: integer) return integer is
begin begin
assert false report "VHPI" severity failure; assert false report "VHPI" severity failure;
end behavioural_initialize; end behavioural_initialize;


procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer; reload: integer) is procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer) is
begin begin
assert false report "VHPI" severity failure; assert false report "VHPI" severity failure;
end behavioural_read; end behavioural_read;
@ -27,4 +27,4 @@ package body simple_ram_behavioural_helpers is
begin begin
assert false report "VHPI" severity failure; assert false report "VHPI" severity failure;
end behavioural_write; end behavioural_write;
end simple_ram_behavioural_helpers; end sim_bram_helpers;

@ -1,79 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.wishbone_types.all;
use work.simple_ram_behavioural_helpers.all;

entity mw_soc_memory is
generic (
RAM_INIT_FILE : string;
MEMORY_SIZE : integer
);

port (
clk : in std_ulogic;
rst : in std_ulogic;

wishbone_in : in wishbone_master_out;
wishbone_out : out wishbone_slave_out
);
end mw_soc_memory;

architecture behave of mw_soc_memory is
type wishbone_state_t is (IDLE, ACK);

signal state : wishbone_state_t := IDLE;
signal ret_ack : std_ulogic := '0';
signal identifier : integer := behavioural_initialize(filename => RAM_INIT_FILE, size => MEMORY_SIZE);
signal reload : integer := 0;
begin
wishbone_process: process(clk)
variable ret_dat: std_ulogic_vector(63 downto 0) := (others => '0');
variable adr: std_ulogic_vector(63 downto 0);
begin
wishbone_out.ack <= ret_ack and wishbone_in.cyc and wishbone_in.stb;
wishbone_out.dat <= ret_dat;

if rising_edge(clk) then
if rst = '1' then
state <= IDLE;
ret_ack <= '0';
else
ret_dat := x"FFFFFFFFFFFFFFFF";

-- Active
if wishbone_in.cyc = '1' then
case state is
when IDLE =>
if wishbone_in.stb = '1' then
-- write
adr := (wishbone_in.adr'left downto 0 => wishbone_in.adr, others => '0');
if wishbone_in.we = '1' then
assert not(is_x(wishbone_in.dat)) and not(is_x(wishbone_in.adr)) severity failure;
report "RAM writing " & to_hstring(wishbone_in.dat) & " to " & to_hstring(wishbone_in.adr);
behavioural_write(wishbone_in.dat, adr, to_integer(unsigned(wishbone_in.sel)), identifier);
reload <= reload + 1;
ret_ack <= '1';
state <= ACK;
else
behavioural_read(ret_dat, adr, to_integer(unsigned(wishbone_in.sel)), identifier, reload);
report "RAM reading from " & to_hstring(wishbone_in.adr) & " returns " & to_hstring(ret_dat);
ret_ack <= '1';
state <= ACK;
end if;
end if;
when ACK =>
ret_ack <= '0';
state <= IDLE;
end case;
else
ret_ack <= '0';
state <= IDLE;
end if;
end if;
end if;
end process;
end behave;

@ -1,246 +0,0 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.wishbone_types.all;

entity simple_ram_behavioural_tb is
end simple_ram_behavioural_tb;

architecture behave of simple_ram_behavioural_tb is
signal clk : std_ulogic;
signal rst : std_ulogic := '1';

constant clk_period : time := 10 ns;

signal w_in : wishbone_slave_out;
signal w_out : wishbone_master_out;

impure function to_adr(a: integer) return std_ulogic_vector is
begin
return std_ulogic_vector(to_unsigned(a, w_out.adr'length));
end;
begin
simple_ram_0: entity work.mw_soc_memory
generic map (
RAM_INIT_FILE => "simple_ram_behavioural_tb.bin",
MEMORY_SIZE => 16
)
port map (
clk => clk,
rst => rst,
wishbone_out => w_in,
wishbone_in => w_out
);

clock: process
begin
clk <= '1';
wait for clk_period / 2;
clk <= '0';
wait for clk_period / 2;
end process clock;

stim: process
begin
w_out.adr <= (others => '0');
w_out.dat <= (others => '0');
w_out.cyc <= '0';
w_out.stb <= '0';
w_out.sel <= (others => '0');
w_out.we <= '0';

wait for clk_period;
rst <= '0';

wait for clk_period;

w_out.cyc <= '1';

-- test various read lengths and alignments
w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(7 downto 0) = x"00" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(1);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(7 downto 0) = x"01" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(7);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(7 downto 0) = x"07" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000011";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(15 downto 0) = x"0100" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000011";
w_out.adr <= to_adr(1);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(15 downto 0) = x"0201" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000011";
w_out.adr <= to_adr(7);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(15 downto 0) = x"0807" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00001111";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(31 downto 0) = x"03020100" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00001111";
w_out.adr <= to_adr(1);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(31 downto 0) = x"04030201" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00001111";
w_out.adr <= to_adr(7);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(31 downto 0) = x"0A090807" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0706050403020100" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(1);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0807060504030201" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(7);
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0E0D0C0B0A090807" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

-- test various write lengths and alignments
w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(0);
w_out.we <= '1';
w_out.dat(7 downto 0) <= x"0F";
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(0);
w_out.we <= '0';
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat(7 downto 0) = x"0F" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(7);
w_out.we <= '1';
w_out.dat <= x"BADC0FFEBADC0FFE";
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(7);
w_out.we <= '0';
assert w_in.ack = '0';
wait for clk_period;
assert w_in.ack = '1';
assert w_in.dat = x"BADC0FFEBADC0FFE" report to_hstring(w_in.dat);
w_out.stb <= '0';
wait for clk_period;
assert w_in.ack = '0';

assert false report "end of test" severity failure;
wait;
end process;
end behave;

@ -17,7 +17,8 @@ entity soc is
MEMORY_SIZE : positive; MEMORY_SIZE : positive;
RAM_INIT_FILE : string; RAM_INIT_FILE : string;
RESET_LOW : boolean; RESET_LOW : boolean;
SIM : boolean SIM : boolean;
DISABLE_FLATTEN_CORE : boolean := false
); );
port( port(
rst : in std_ulogic; rst : in std_ulogic;
@ -42,6 +43,12 @@ architecture behaviour of soc is
signal wishbone_debug_in : wishbone_slave_out; signal wishbone_debug_in : wishbone_slave_out;
signal wishbone_debug_out : wishbone_master_out; signal wishbone_debug_out : wishbone_master_out;


-- Arbiter array (ghdl doesnt' support assigning the array
-- elements in the entity instantiation)
constant NUM_WB_MASTERS : positive := 3;
signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1);
signal wb_masters_in : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1);

-- Wishbone master (output of arbiter): -- Wishbone master (output of arbiter):
signal wb_master_in : wishbone_slave_out; signal wb_master_in : wishbone_slave_out;
signal wb_master_out : wishbone_master_out; signal wb_master_out : wishbone_master_out;
@ -76,7 +83,8 @@ begin
-- Processor core -- Processor core
processor: entity work.core processor: entity work.core
generic map( generic map(
SIM => SIM SIM => SIM,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE
) )
port map( port map(
clk => system_clk, clk => system_clk,
@ -94,13 +102,22 @@ begin
); );


-- Wishbone bus master arbiter & mux -- Wishbone bus master arbiter & mux
wb_masters_out <= (0 => wishbone_dcore_out,
1 => wishbone_icore_out,
2 => wishbone_debug_out);
wishbone_dcore_in <= wb_masters_in(0);
wishbone_icore_in <= wb_masters_in(1);
wishbone_debug_in <= wb_masters_in(2);
wishbone_arbiter_0: entity work.wishbone_arbiter wishbone_arbiter_0: entity work.wishbone_arbiter
generic map(
NUM_MASTERS => NUM_WB_MASTERS
)
port map( port map(
clk => system_clk, rst => rst, clk => system_clk, rst => rst,
wb1_in => wishbone_dcore_out, wb1_out => wishbone_dcore_in, wb_masters_in => wb_masters_out,
wb2_in => wishbone_icore_out, wb2_out => wishbone_icore_in, wb_masters_out => wb_masters_in,
wb3_in => wishbone_debug_out, wb3_out => wishbone_debug_in, wb_slave_out => wb_master_out,
wb_out => wb_master_out, wb_in => wb_master_in wb_slave_in => wb_master_in
); );


-- Wishbone slaves address decoder & mux -- Wishbone slaves address decoder & mux
@ -136,6 +153,7 @@ begin
when others => when others =>
wb_master_in.dat <= (others => '1'); wb_master_in.dat <= (others => '1');
wb_master_in.ack <= wb_master_out.stb and wb_master_out.cyc; wb_master_in.ack <= wb_master_out.stb and wb_master_out.cyc;
wb_master_in.stall <= '0';
end case; end case;
end process slave_intercon; end process slave_intercon;


@ -164,9 +182,10 @@ begin
wb_ack_out => wb_uart0_out.ack wb_ack_out => wb_uart0_out.ack
); );
wb_uart0_out.dat <= x"00000000000000" & uart_dat8; wb_uart0_out.dat <= x"00000000000000" & uart_dat8;
wb_uart0_out.stall <= '0' when wb_uart0_in.cyc = '0' else not wb_uart0_out.ack;


-- BRAM Memory slave -- BRAM Memory slave
bram0: entity work.mw_soc_memory bram0: entity work.wishbone_bram_wrapper
generic map( generic map(
MEMORY_SIZE => MEMORY_SIZE, MEMORY_SIZE => MEMORY_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE RAM_INIT_FILE => RAM_INIT_FILE

@ -0,0 +1,35 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

package utils is

function log2(i : natural) return integer;
function ispow2(i : integer) return boolean;

end utils;

package body utils is

function log2(i : natural) return integer is
variable tmp : integer := i;
variable ret : integer := 0;
begin
while tmp > 1 loop
ret := ret + 1;
tmp := tmp / 2;
end loop;
return ret;
end function;

function ispow2(i : integer) return boolean is
begin
if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then
return true;
else
return false;
end if;
end function;

end utils;

@ -6,73 +6,64 @@ use work.wishbone_types.all;


-- TODO: Use an array of master/slaves with parametric size -- TODO: Use an array of master/slaves with parametric size
entity wishbone_arbiter is entity wishbone_arbiter is
generic(
NUM_MASTERS : positive := 3
);
port (clk : in std_ulogic; port (clk : in std_ulogic;
rst : in std_ulogic; rst : in std_ulogic;


wb1_in : in wishbone_master_out; wb_masters_in : in wishbone_master_out_vector(0 to NUM_MASTERS-1);
wb1_out : out wishbone_slave_out; wb_masters_out : out wishbone_slave_out_vector(0 to NUM_MASTERS-1);


wb2_in : in wishbone_master_out; wb_slave_out : out wishbone_master_out;
wb2_out : out wishbone_slave_out; wb_slave_in : in wishbone_slave_out

wb3_in : in wishbone_master_out;
wb3_out : out wishbone_slave_out;

wb_out : out wishbone_master_out;
wb_in : in wishbone_slave_out
); );
end wishbone_arbiter; end wishbone_arbiter;


architecture behave of wishbone_arbiter is architecture behave of wishbone_arbiter is
type wishbone_arbiter_state_t is (IDLE, WB1_BUSY, WB2_BUSY, WB3_BUSY); subtype wb_arb_master_t is integer range 0 to NUM_MASTERS-1;
signal state : wishbone_arbiter_state_t := IDLE; signal candidate, selected : wb_arb_master_t;
signal busy : std_ulogic;
begin begin


wishbone_muxes: process(state, wb_in, wb1_in, wb2_in, wb3_in) busy <= wb_masters_in(selected).cyc;

wishbone_muxes: process(selected, candidate, busy, wb_slave_in, wb_masters_in)
variable early_sel : wb_arb_master_t;
begin begin
-- Requests from masters are fully muxed early_sel := selected;
wb_out <= wb1_in when state = WB1_BUSY else if busy = '0' then
wb2_in when state = WB2_BUSY else early_sel := candidate;
wb3_in when state = WB3_BUSY else end if;
wishbone_master_out_init; wb_slave_out <= wb_masters_in(early_sel);
for i in 0 to NUM_MASTERS-1 loop
wb_masters_out(i).dat <= wb_slave_in.dat;
wb_masters_out(i).ack <= wb_slave_in.ack when early_sel = i else '0';
wb_masters_out(i).stall <= wb_slave_in.stall when early_sel = i else '1';
end loop;
end process;


-- Responses from slave don't need to mux the data bus -- Candidate selection is dumb, priority order... we could
wb1_out.dat <= wb_in.dat; -- instead consider some form of fairness but it's not really
wb2_out.dat <= wb_in.dat; -- an issue at the moment.
wb3_out.dat <= wb_in.dat; --
wb1_out.ack <= wb_in.ack when state = WB1_BUSY else '0'; wishbone_candidate: process(all)
wb2_out.ack <= wb_in.ack when state = WB2_BUSY else '0'; begin
wb3_out.ack <= wb_in.ack when state = WB3_BUSY else '0'; candidate <= selected;
for i in NUM_MASTERS-1 downto 0 loop
if wb_masters_in(i).cyc = '1' then
candidate <= i;
end if;
end loop;
end process; end process;


wishbone_arbiter_process: process(clk) wishbone_arbiter_process: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
if rst = '1' then if rst = '1' then
state <= IDLE; selected <= 0;
else elsif busy = '0' then
case state is selected <= candidate;
when IDLE =>
if wb1_in.cyc = '1' then
state <= WB1_BUSY;
elsif wb2_in.cyc = '1' then
state <= WB2_BUSY;
elsif wb3_in.cyc = '1' then
state <= WB3_BUSY;
end if;
when WB1_BUSY =>
if wb1_in.cyc = '0' then
state <= IDLE;
end if;
when WB2_BUSY =>
if wb2_in.cyc = '0' then
state <= IDLE;
end if;
when WB3_BUSY =>
if wb3_in.cyc = '0' then
state <= IDLE;
end if;
end case;
end if; end if;
end if; end if;
end process; end process;

@ -0,0 +1,175 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library work;
use work.wishbone_types.all;

entity wishbone_bram_tb is
end wishbone_bram_tb;

architecture behave of wishbone_bram_tb is
signal clk : std_ulogic;
signal rst : std_ulogic := '1';

constant clk_period : time := 10 ns;

signal w_in : wishbone_slave_out;
signal w_out : wishbone_master_out;

impure function to_adr(a: integer) return std_ulogic_vector is
begin
return std_ulogic_vector(to_unsigned(a, w_out.adr'length));
end;
begin
simple_ram_0: entity work.wishbone_bram_wrapper
generic map (
RAM_INIT_FILE => "wishbone_bram_tb.bin",
MEMORY_SIZE => 16
)
port map (
clk => clk,
rst => rst,
wishbone_out => w_in,
wishbone_in => w_out
);

clock: process
begin
clk <= '1';
wait for clk_period / 2;
clk <= '0';
wait for clk_period / 2;
end process clock;

stim: process
begin
w_out.adr <= (others => '0');
w_out.dat <= (others => '0');
w_out.cyc <= '0';
w_out.stb <= '0';
w_out.sel <= (others => '0');
w_out.we <= '0';

wait until rising_edge(clk);
rst <= '0';
wait until rising_edge(clk);

w_out.cyc <= '1';

-- Test read 0
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(0);
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0706050403020100" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test read 8
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(8);
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0F0E0D0C0B0A0908" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test write byte at 0
w_out.stb <= '1';
w_out.sel <= "00000001";
w_out.adr <= to_adr(0);
w_out.we <= '1';
w_out.dat(7 downto 0) <= x"0F";
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk) and w_in.ack = '1';
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test read back
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(0);
w_out.we <= '0';
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"070605040302010F" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test write dword at 4
w_out.stb <= '1';
w_out.sel <= "11110000";
w_out.adr <= to_adr(0);
w_out.we <= '1';
w_out.dat(63 downto 32) <= x"BAADFEED";
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk) and w_in.ack = '1';
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test read back
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(0);
w_out.we <= '0';
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"BAADFEED0302010F" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test write qword at 8
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(8);
w_out.we <= '1';
w_out.dat(63 downto 0) <= x"0001020304050607";
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk) and w_in.ack = '1';
wait until rising_edge(clk);
assert w_in.ack = '0';

-- Test read back
w_out.stb <= '1';
w_out.sel <= "11111111";
w_out.adr <= to_adr(8);
w_out.we <= '0';
assert w_in.ack = '0';
wait until rising_edge(clk);
w_out.stb <= '0';
wait until rising_edge(clk);
wait until rising_edge(clk);
assert w_in.ack = '1';
assert w_in.dat(63 downto 0) = x"0001020304050607" report to_hstring(w_in.dat);
wait until rising_edge(clk);
assert w_in.ack = '0';

assert false report "end of test" severity failure;
wait;
end process;
end behave;

@ -0,0 +1,84 @@
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use std.textio.all;

library work;
use work.utils.all;
use work.wishbone_types.all;

--! @brief Simple memory module for use in Wishbone-based systems.
entity wishbone_bram_wrapper is
generic(
MEMORY_SIZE : natural := 4096; --! Memory size in bytes.
RAM_INIT_FILE : string
);
port(
clk : in std_logic;
rst : in std_logic;

-- Wishbone interface:
wishbone_in : in wishbone_master_out;
wishbone_out : out wishbone_slave_out
);
end entity wishbone_bram_wrapper;

architecture behaviour of wishbone_bram_wrapper is
constant ram_addr_bits : integer := log2(MEMORY_SIZE) - 3;

-- RAM interface
signal ram_addr : std_logic_vector(ram_addr_bits - 1 downto 0);
signal ram_we : std_ulogic;
signal ram_re : std_ulogic;

-- Others
signal ack, ack_buf : std_ulogic;
begin

-- Actual RAM template
ram_0: entity work.main_bram
generic map(
WIDTH => 64,
HEIGHT_BITS => ram_addr_bits,
MEMORY_SIZE => MEMORY_SIZE,
RAM_INIT_FILE => RAM_INIT_FILE
)
port map(
clk => clk,
addr => ram_addr,
di => wishbone_in.dat,
do => wishbone_out.dat,
sel => wishbone_in.sel,
re => ram_re,
we => ram_we
);

-- Wishbone interface
ram_addr <= wishbone_in.adr(ram_addr_bits + 2 downto 3);
ram_we <= wishbone_in.stb and wishbone_in.cyc and wishbone_in.we;
ram_re <= wishbone_in.stb and wishbone_in.cyc and not wishbone_in.we;
wishbone_out.stall <= '0';
wishbone_out.ack <= ack_buf;

wb_0: process(clk)
begin
if rising_edge(clk) then
if rst = '1' or wishbone_in.cyc = '0' then
ack_buf <= '0';
ack <= '0';
else
-- On loads, we have a delay cycle due to BRAM bufferring
-- but not on stores. So try to send an early ack on a
-- store if we aren't behind an existing load ack.
--
if ram_we = '1' and ack = '0' then
ack_buf <= '1';
else
ack <= wishbone_in.stb;
ack_buf <= ack;
end if;
end if;
end if;
end process;

end architecture behaviour;

@ -124,7 +124,6 @@ begin


-- We always move WB cyc and stb simultaneously (no pipelining yet...) -- We always move WB cyc and stb simultaneously (no pipelining yet...)
wb_out.cyc <= '1' when state = WB_CYCLE else '0'; wb_out.cyc <= '1' when state = WB_CYCLE else '0';
wb_out.stb <= '1' when state = WB_CYCLE else '0';


-- Data latch. WB will take the read data away as soon as the cycle -- Data latch. WB will take the read data away as soon as the cycle
-- terminates but we must maintain it on DMI until req goes down, so -- terminates but we must maintain it on DMI until req goes down, so
@ -145,14 +144,23 @@ begin
if rising_edge(clk) then if rising_edge(clk) then
if (rst) then if (rst) then
state <= IDLE; state <= IDLE;
wb_out.stb <= '0';
else else
case state is case state is
when IDLE => when IDLE =>
if dmi_req = '1' and dmi_addr = DBG_WB_DATA then if dmi_req = '1' and dmi_addr = DBG_WB_DATA then
state <= WB_CYCLE; state <= WB_CYCLE;
wb_out.stb <= '1';
end if; end if;
when WB_CYCLE => when WB_CYCLE =>
if wb_in.stall = '0' then
wb_out.stb <= '0';
end if;
if wb_in.ack then if wb_in.ack then
-- We shouldn't get the ack if we hadn't already cleared
-- stb above but if this happen, don't leave it dangling.
--
wb_out.stb <= '0';
state <= DMI_WAIT; state <= DMI_WAIT;
end if; end if;
when DMI_WAIT => when DMI_WAIT =>

@ -21,9 +21,13 @@ package wishbone_types is
constant wishbone_master_out_init : wishbone_master_out := (cyc => '0', stb => '0', we => '0', others => (others => '0')); constant wishbone_master_out_init : wishbone_master_out := (cyc => '0', stb => '0', we => '0', others => (others => '0'));


type wishbone_slave_out is record type wishbone_slave_out is record
dat : wishbone_data_type; dat : wishbone_data_type;
ack : std_ulogic; ack : std_ulogic;
stall : std_ulogic;
end record; end record;
constant wishbone_slave_out_init : wishbone_slave_out := (ack => '0', others => (others => '0')); constant wishbone_slave_out_init : wishbone_slave_out := (ack => '0', stall => '0', others => (others => '0'));

type wishbone_master_out_vector is array (natural range <>) of wishbone_master_out;
type wishbone_slave_out_vector is array (natural range <>) of wishbone_slave_out;


end package wishbone_types; end package wishbone_types;

@ -44,6 +44,7 @@ architecture behaviour of writeback is
signal sign_extend : std_ulogic; signal sign_extend : std_ulogic;
signal negative : std_ulogic; signal negative : std_ulogic;
signal second_word : std_ulogic; signal second_word : std_ulogic;
signal zero : std_ulogic;
begin begin
writeback_0: process(clk) writeback_0: process(clk)
begin begin
@ -155,7 +156,9 @@ begin


-- If the data can arrive split over two cycles, this will be correct -- If the data can arrive split over two cycles, this will be correct
-- provided we don't have both sign extension and byte reversal. -- provided we don't have both sign extension and byte reversal.
negative <= (data_len(2) and data_permuted(31)) or (data_len(1) and data_permuted(15)) or negative <= (data_len(3) and data_permuted(63)) or
(data_len(2) and data_permuted(31)) or
(data_len(1) and data_permuted(15)) or
(data_len(0) and data_permuted(7)); (data_len(0) and data_permuted(7));


-- trim and sign-extend -- trim and sign-extend
@ -170,12 +173,16 @@ begin
trim_ctl(i) <= '0' & (negative and sign_extend); trim_ctl(i) <= '0' & (negative and sign_extend);
end if; end if;
end loop; end loop;
zero <= not negative;
for i in 0 to 7 loop for i in 0 to 7 loop
case trim_ctl(i) is case trim_ctl(i) is
when "11" => when "11" =>
data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8); data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8);
when "10" => when "10" =>
data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8); data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8);
if or data_permuted(i * 8 + 7 downto i * 8) /= '0' then
zero <= '0';
end if;
when "01" => when "01" =>
data_trimmed(i * 8 + 7 downto i * 8) <= x"FF"; data_trimmed(i * 8 + 7 downto i * 8) <= x"FF";
when others => when others =>
@ -190,9 +197,9 @@ begin
if rc = '1' then if rc = '1' then
c_out.write_cr_enable <= '1'; c_out.write_cr_enable <= '1';
c_out.write_cr_mask <= num_to_fxm(0); c_out.write_cr_mask <= num_to_fxm(0);
if data_trimmed(63) = '1' then if negative = '1' then
c_out.write_cr_data <= x"80000000"; c_out.write_cr_data <= x"80000000";
elsif or (data_trimmed(62 downto 0)) = '1' then elsif zero = '0' then
c_out.write_cr_data <= x"40000000"; c_out.write_cr_data <= x"40000000";
else else
c_out.write_cr_data <= x"20000000"; c_out.write_cr_data <= x"20000000";

Loading…
Cancel
Save