diff --git a/Makefile b/Makefile index 3056c53..85a0262 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,8 @@ GHDL=ghdl GHDLFLAGS=--std=08 -Psim-unisim CFLAGS=-O2 -Wall -all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \ - rotator_tb countzero_tb +all = core_tb soc_reset_tb icache_tb dcache_tb multiply_tb dmi_dtm_tb divider_tb \ + rotator_tb countzero_tb wishbone_bram_tb # XXX # loadstore_tb fetch_tb @@ -35,10 +35,14 @@ helpers.o: cache_ram.o: plru.o: plru_tb.o: plru.o -icache.o: common.o wishbone_types.o plru.o cache_ram.o -icache_tb.o: common.o wishbone_types.o icache.o simple_ram_behavioural.o -dcache.o: common.o wishbone_types.o plru.o cache_ram.o -dcache_tb.o: common.o wishbone_types.o dcache.o simple_ram_behavioural.o +utils.o: +sim_bram.o: sim_bram_helpers.o utils.o +wishbone_bram_wrapper.o: wishbone_types.o sim_bram.o utils.o +wishbone_bram_tb.o: wishbone_bram_wrapper.o +icache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o +icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o +dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o +dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o insn_helpers.o: loadstore1.o: common.o helpers.o logical.o: decode_types.o @@ -51,11 +55,8 @@ register_file.o: common.o rotator.o: common.o rotator_tb.o: common.o glibc_random.o ppc_fx_insns.o insn_helpers.o rotator.o sim_console.o: -simple_ram_behavioural_helpers.o: -simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o -simple_ram_behavioural.o: wishbone_types.o simple_ram_behavioural_helpers.o sim_uart.o: wishbone_types.o sim_console.o -soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o simple_ram_behavioural.o dmi_dtm_xilinx.o wishbone_debug_master.o +soc.o: common.o wishbone_types.o core.o wishbone_arbiter.o sim_uart.o wishbone_bram_wrapper.o dmi_dtm_xilinx.o wishbone_debug_master.o wishbone_arbiter.o: wishbone_types.o wishbone_types.o: writeback.o: common.o crhelpers.o @@ -73,17 +74,17 @@ fpga/soc_reset_tb.o: fpga/soc_reset.o soc_reset_tb: fpga/soc_reset_tb.o fpga/soc_reset.o $(GHDL) -e $(GHDLFLAGS) soc_reset_tb -core_tb: core_tb.o simple_ram_behavioural_helpers_c.o sim_console_c.o sim_jtag_socket_c.o - $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@ +core_tb: core_tb.o sim_bram_helpers_c.o sim_console_c.o sim_jtag_socket_c.o + $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o -Wl,sim_console_c.o -Wl,sim_jtag_socket_c.o $@ fetch_tb: fetch_tb.o $(GHDL) -e $(GHDLFLAGS) $@ icache_tb: icache_tb.o - $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ + $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@ dcache_tb: dcache_tb.o - $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ + $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@ plru_tb: plru_tb.o $(GHDL) -e $(GHDLFLAGS) $@ @@ -106,11 +107,11 @@ countzero_tb: countzero_tb.o simple_ram_tb: simple_ram_tb.o $(GHDL) -e $(GHDLFLAGS) $@ -simple_ram_behavioural_tb: simple_ram_behavioural_helpers_c.o simple_ram_behavioural_tb.o - $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ +wishbone_bram_tb: sim_bram_helpers_c.o wishbone_bram_tb.o + $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@ -dmi_dtm_tb: dmi_dtm_tb.o simple_ram_behavioural_helpers_c.o - $(GHDL) -e $(GHDLFLAGS) -Wl,simple_ram_behavioural_helpers_c.o $@ +dmi_dtm_tb: dmi_dtm_tb.o sim_bram_helpers_c.o + $(GHDL) -e $(GHDLFLAGS) -Wl,sim_bram_helpers_c.o $@ tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out))) diff --git a/README.md b/README.md index 86e9e22..7c6bc11 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ make - Link in the micropython image: ``` -ln -s ../micropython/ports/powerpc/build/firmware.bin simple_ram_behavioural.bin +ln -s ../micropython/ports/powerpc/build/firmware.bin main_ram.bin ``` - Now run microwatt, sending debug output to /dev/null: diff --git a/core.vhdl b/core.vhdl index 810a279..22f7dca 100644 --- a/core.vhdl +++ b/core.vhdl @@ -8,7 +8,8 @@ use work.wishbone_types.all; entity core is generic ( - SIM : boolean := false + SIM : boolean := false; + DISABLE_FLATTEN : boolean := false ); port ( clk : in std_logic; @@ -93,6 +94,29 @@ architecture behave of core is -- Debug status signal dbg_core_is_stopped: std_ulogic; + function keep_h(disable : boolean) return string is + begin + if disable then + return "yes"; + else + return "no"; + end if; + end function; + attribute keep_hierarchy : string; + attribute keep_hierarchy of fetch1_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of icache_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of fetch2_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of decode1_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of decode2_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of multiply_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN); + attribute keep_hierarchy of debug_0 : label is keep_h(DISABLE_FLATTEN); begin core_rst <= dbg_core_rst or rst; diff --git a/core_tb.vhdl b/core_tb.vhdl index 672b424..90fc30c 100644 --- a/core_tb.vhdl +++ b/core_tb.vhdl @@ -20,7 +20,7 @@ begin generic map( SIM => true, MEMORY_SIZE => 524288, - RAM_INIT_FILE => "simple_ram_behavioural.bin", + RAM_INIT_FILE => "main_ram.bin", RESET_LOW => false ) port map( diff --git a/dcache.vhdl b/dcache.vhdl index 7657dbd..7d6e74c 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -16,6 +16,7 @@ use ieee.std_logic_1164.all; use ieee.numeric_std.all; library work; +use work.utils.all; use work.common.all; use work.helpers.all; use work.wishbone_types.all; @@ -44,26 +45,6 @@ entity dcache is end entity dcache; architecture rtl of dcache is - function log2(i : natural) return integer is - variable tmp : integer := i; - variable ret : integer := 0; - begin - while tmp > 1 loop - ret := ret + 1; - tmp := tmp / 2; - end loop; - return ret; - end function; - - function ispow2(i : integer) return boolean is - begin - if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then - return true; - else - return false; - end if; - end function; - -- BRAM organisation: We never access more than wishbone_data_bits at -- a time so to save resources we make the array only that wide, and -- use consecutive indices for to make a cache "line" @@ -187,6 +168,7 @@ architecture rtl of dcache is state : state_t; wb : wishbone_master_out; store_way : way_t; + store_row : row_t; store_index : index_t; end record; @@ -213,6 +195,7 @@ architecture rtl of dcache is signal req_hit_way : way_t; signal req_tag : cache_tag_t; signal req_op : op_t; + signal req_laddr : std_ulogic_vector(63 downto 0); -- Cache RAM interface type cache_ram_out_t is array(way_t) of cache_row_t; @@ -244,12 +227,21 @@ architecture rtl of dcache is end; -- Returns whether this is the last row of a line - function is_last_row(addr: wishbone_addr_type) return boolean is + function is_last_row_addr(addr: wishbone_addr_type) return boolean is constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1'); begin return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones; end; + -- Returns whether this is the last row of a line + function is_last_row(row: row_t) return boolean is + variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0); + constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1'); + begin + row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS)); + return row_v(ROW_LINEBITS-1 downto 0) = ones; + end; + -- Return the address of the next row in the current cache line function next_row_addr(addr: wishbone_addr_type) return std_ulogic_vector is variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0); @@ -263,6 +255,21 @@ architecture rtl of dcache is return result; end; + -- Return the next row in the current cache line. We use a dedicated + -- function in order to limit the size of the generated adder to be + -- only the bits within a cache line (3 bits with default settings) + -- + function next_row(row: row_t) return row_t is + variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0); + variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0); + variable result : std_ulogic_vector(ROW_BITS-1 downto 0); + begin + row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS)); + row_idx := row_v(ROW_LINEBITS-1 downto 0); + row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1); + return to_integer(unsigned(row_v)); + end; + -- Get the tag value from the address function get_tag(addr: std_ulogic_vector(63 downto 0)) return cache_tag_t is begin @@ -381,6 +388,12 @@ begin req_row <= get_row(d_in.addr); req_tag <= get_tag(d_in.addr); + -- Calculate address of beginning of cache line, will be + -- used for cache miss processing if needed + -- + req_laddr <= d_in.addr(63 downto LINE_OFF_BITS) & + (LINE_OFF_BITS-1 downto 0 => '0'); + -- Test if pending request is a hit on any way hit_way := 0; is_hit := '0'; @@ -573,7 +586,8 @@ begin wr_data => wr_data ); process(all) - variable tmp_adr : std_ulogic_vector(63 downto 0); + variable tmp_adr : std_ulogic_vector(63 downto 0); + variable reloading : boolean; begin -- Cache hit reads do_read <= '1'; @@ -596,17 +610,17 @@ begin -- Otherwise, we might be doing a reload wr_data <= wishbone_in.dat; wr_sel <= (others => '1'); - tmp_adr := (r1.wb.adr'left downto 0 => r1.wb.adr, others => '0'); - wr_addr <= std_ulogic_vector(to_unsigned(get_row(tmp_adr), ROW_BITS)); + wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS)); end if; -- The two actual write cases here do_write <= '0'; - if r1.state = RELOAD_WAIT_ACK and wishbone_in.ack = '1' and r1.store_way = i then + reloading := r1.state = RELOAD_WAIT_ACK; + if reloading and wishbone_in.ack = '1' and r1.store_way = i then do_write <= '1'; end if; if req_op = OP_STORE_HIT and req_hit_way = i then - assert r1.state /= RELOAD_WAIT_ACK report "Store hit while in state:" & + assert not reloading report "Store hit while in state:" & state_t'image(r1.state) severity FAILURE; do_write <= '1'; @@ -637,7 +651,7 @@ begin -- single issue on load/stores so we are fine, later, we can generate -- a stall output if necessary). - if d_in.valid = '1' then + if req_op /= OP_NONE then r1.req <= d_in; report "op:" & op_t'image(req_op) & @@ -672,7 +686,8 @@ begin -- operates at stage 1. -- dcache_slow : process(clk) - variable tagset : cache_tags_set_t; + variable tagset : cache_tags_set_t; + variable stbs_done : boolean; begin if rising_edge(clk) then -- On reset, clear all valid bits to force misses @@ -731,16 +746,18 @@ begin -- Keep track of our index and way for subsequent stores. r1.store_index <= req_index; r1.store_way <= replace_way; + r1.store_row <= get_row(req_laddr); -- Prep for first wishbone read. We calculate the address of - -- the start of the cache line + -- the start of the cache line and start the WB cycle -- - r1.wb.adr <= d_in.addr(r1.wb.adr'left downto LINE_OFF_BITS) & - (LINE_OFF_BITS-1 downto 0 => '0'); + r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0); r1.wb.sel <= (others => '1'); r1.wb.we <= '0'; r1.wb.cyc <= '1'; r1.wb.stb <= '1'; + + -- Track that we had one request sent r1.state <= RELOAD_WAIT_ACK; when OP_LOAD_NC => @@ -770,6 +787,25 @@ begin end case; when RELOAD_WAIT_ACK => + -- Requests are all sent if stb is 0 + stbs_done := r1.wb.stb = '0'; + + -- If we are still sending requests, was one accepted ? + if wishbone_in.stall = '0' and not stbs_done then + -- That was the last word ? We are done sending. Clear + -- stb and set stbs_done so we can handle an eventual last + -- ack on the same cycle. + -- + if is_last_row_addr(r1.wb.adr) then + r1.wb.stb <= '0'; + stbs_done := true; + end if; + + -- Calculate the next row address + r1.wb.adr <= next_row_addr(r1.wb.adr); + end if; + + -- Incoming acks processing if wishbone_in.ack = '1' then -- Is this the data we were looking for ? Latch it so -- we can respond later. We don't currently complete the @@ -779,16 +815,17 @@ begin -- not idle, which we don't currently know how to deal -- with. -- - if r1.wb.adr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = - r1.req.addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) then + if r1.store_row = get_row(r1.req.addr) then r1.slow_data <= wishbone_in.dat; end if; - -- That was the last word ? We are done - if is_last_row(r1.wb.adr) then - cache_valids(r1.store_index)(r1.store_way) <= '1'; + -- Check for completion + if stbs_done and is_last_row(r1.store_row) then + -- Complete wishbone cycle r1.wb.cyc <= '0'; - r1.wb.stb <= '0'; + + -- Cache line is now valid + cache_valids(r1.store_index)(r1.store_way) <= '1'; -- Complete the load that missed. For load with update -- we also need to do the deferred update cycle. @@ -801,10 +838,10 @@ begin r1.state <= IDLE; report "completing miss !"; end if; - else - -- Otherwise, calculate the next row address - r1.wb.adr <= next_row_addr(r1.wb.adr); end if; + + -- Increment store row counter + r1.store_row <= next_row(r1.store_row); end if; when LOAD_UPDATE => @@ -816,7 +853,13 @@ begin r1.state <= IDLE; when STORE_WAIT_ACK | NC_LOAD_WAIT_ACK => - if wishbone_in.ack = '1' then + -- Clear stb when slave accepted request + if wishbone_in.stall = '0' then + r1.wb.stb <= '0'; + end if; + + -- Got ack ? complete. + if wishbone_in.ack = '1' then if r1.state = NC_LOAD_WAIT_ACK then r1.slow_data <= wishbone_in.dat; end if; diff --git a/dcache_tb.vhdl b/dcache_tb.vhdl index 0edbdb7..437fd7d 100644 --- a/dcache_tb.vhdl +++ b/dcache_tb.vhdl @@ -35,9 +35,9 @@ begin ); -- BRAM Memory slave - bram0: entity work.mw_soc_memory + bram0: entity work.wishbone_bram_wrapper generic map( - MEMORY_SIZE => 128, + MEMORY_SIZE => 1024, RAM_INIT_FILE => "icache_test.bin" ) port map( @@ -121,7 +121,6 @@ begin d_in.valid <= '1'; wait until rising_edge(clk); d_in.valid <= '0'; - wait until rising_edge(clk) and d_out.write_enable = '1'; assert d_out.valid = '1'; assert d_out.write_data = x"0000004100000040" @@ -130,7 +129,10 @@ begin " expected 0000004100000040" severity failure; - wait for clk_period*4; + wait until rising_edge(clk); + wait until rising_edge(clk); + wait until rising_edge(clk); + wait until rising_edge(clk); assert false report "end of test" severity failure; wait; diff --git a/decode_types.vhdl b/decode_types.vhdl index 2d85b27..9736f58 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -2,92 +2,93 @@ library ieee; use ieee.std_logic_1164.all; package decode_types is - type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD, - OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG, - OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB, - OP_CNTZ, OP_CRAND, - OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC, - OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, - OP_DCBZ, OP_DIV, OP_EXTS, - OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, - OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF, - OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD, - OP_MTCRF, OP_MTSPR, OP_MUL_L64, - OP_MUL_H64, OP_MUL_H32, OP_OR, - OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD, - OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB, - OP_SHL, OP_SHR, - OP_SYNC, OP_TD, OP_TDI, OP_TW, - OP_TWI, OP_XOR, OP_SIM_CONFIG); - - type input_reg_a_t is (NONE, RA, RA_OR_ZERO); - type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32); - type input_reg_c_t is (NONE, RS); - type output_reg_a_t is (NONE, RT, RA); - type rc_t is (NONE, ONE, RC); - type carry_in_t is (ZERO, CA, ONE); - - constant SH_OFFSET : integer := 0; - constant MB_OFFSET : integer := 1; - constant ME_OFFSET : integer := 1; - constant SH32_OFFSET : integer := 0; - constant MB32_OFFSET : integer := 1; - constant ME32_OFFSET : integer := 2; - - constant FXM_OFFSET : integer := 0; - - constant BO_OFFSET : integer := 0; - constant BI_OFFSET : integer := 1; - constant BH_OFFSET : integer := 2; - - constant BF_OFFSET : integer := 0; - constant L_OFFSET : integer := 1; - - constant TOO_OFFSET : integer := 0; - - type unit_t is (NONE, ALU, LDST, MUL, DIV); - type length_t is (NONE, is1B, is2B, is4B, is8B); - - type decode_rom_t is record - unit : unit_t; - insn_type : insn_type_t; - input_reg_a : input_reg_a_t; - input_reg_b : input_reg_b_t; - input_reg_c : input_reg_c_t; - output_reg_a : output_reg_a_t; - - input_cr : std_ulogic; - output_cr : std_ulogic; - - invert_a : std_ulogic; - invert_out : std_ulogic; - input_carry : carry_in_t; - output_carry : std_ulogic; - - -- load/store signals - length : length_t; - byte_reverse : std_ulogic; - sign_extend : std_ulogic; - update : std_ulogic; - reserve : std_ulogic; - - -- multiplier and ALU signals - is_32bit : std_ulogic; - is_signed : std_ulogic; - - rc : rc_t; - lr : std_ulogic; - - sgl_pipe : std_ulogic; - end record; - constant decode_rom_init : decode_rom_t := (unit => NONE, - insn_type => OP_ILLEGAL, input_reg_a => NONE, - input_reg_b => NONE, input_reg_c => NONE, - output_reg_a => NONE, input_cr => '0', output_cr => '0', - invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', - length => NONE, byte_reverse => '0', sign_extend => '0', - update => '0', reserve => '0', is_32bit => '0', - is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0'); + type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD, + OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG, + OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB, + OP_CNTZ, OP_CRAND, + OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC, + OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, + OP_DCBZ, OP_DIV, OP_EXTS, + OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, + OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF, + OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD, + OP_MTCRF, OP_MTSPR, OP_MUL_L64, + OP_MUL_H64, OP_MUL_H32, OP_OR, + OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD, + OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB, + OP_SHL, OP_SHR, + OP_SYNC, OP_TD, OP_TDI, OP_TW, + OP_TWI, OP_XOR, OP_SIM_CONFIG + ); + + type input_reg_a_t is (NONE, RA, RA_OR_ZERO); + type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32); + type input_reg_c_t is (NONE, RS); + type output_reg_a_t is (NONE, RT, RA); + type rc_t is (NONE, ONE, RC); + type carry_in_t is (ZERO, CA, ONE); + + constant SH_OFFSET : integer := 0; + constant MB_OFFSET : integer := 1; + constant ME_OFFSET : integer := 1; + constant SH32_OFFSET : integer := 0; + constant MB32_OFFSET : integer := 1; + constant ME32_OFFSET : integer := 2; + + constant FXM_OFFSET : integer := 0; + + constant BO_OFFSET : integer := 0; + constant BI_OFFSET : integer := 1; + constant BH_OFFSET : integer := 2; + + constant BF_OFFSET : integer := 0; + constant L_OFFSET : integer := 1; + + constant TOO_OFFSET : integer := 0; + + type unit_t is (NONE, ALU, LDST, MUL, DIV); + type length_t is (NONE, is1B, is2B, is4B, is8B); + + type decode_rom_t is record + unit : unit_t; + insn_type : insn_type_t; + input_reg_a : input_reg_a_t; + input_reg_b : input_reg_b_t; + input_reg_c : input_reg_c_t; + output_reg_a : output_reg_a_t; + + input_cr : std_ulogic; + output_cr : std_ulogic; + + invert_a : std_ulogic; + invert_out : std_ulogic; + input_carry : carry_in_t; + output_carry : std_ulogic; + + -- load/store signals + length : length_t; + byte_reverse : std_ulogic; + sign_extend : std_ulogic; + update : std_ulogic; + reserve : std_ulogic; + + -- multiplier and ALU signals + is_32bit : std_ulogic; + is_signed : std_ulogic; + + rc : rc_t; + lr : std_ulogic; + + sgl_pipe : std_ulogic; + end record; + constant decode_rom_init : decode_rom_t := (unit => NONE, + insn_type => OP_ILLEGAL, input_reg_a => NONE, + input_reg_b => NONE, input_reg_c => NONE, + output_reg_a => NONE, input_cr => '0', output_cr => '0', + invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', + length => NONE, byte_reverse => '0', sign_extend => '0', + update => '0', reserve => '0', is_32bit => '0', + is_signed => '0', rc => NONE, lr => '0', sgl_pipe => '0'); end decode_types; diff --git a/dmi_dtm_tb.vhdl b/dmi_dtm_tb.vhdl index fe60c12..0694266 100644 --- a/dmi_dtm_tb.vhdl +++ b/dmi_dtm_tb.vhdl @@ -50,8 +50,8 @@ begin dmi_ack => dmi_ack ); - simple_ram_0: entity work.mw_soc_memory - generic map(RAM_INIT_FILE => "simple_ram_behavioural.bin", + simple_ram_0: entity work.wishbone_bram_wrapper + generic map(RAM_INIT_FILE => "main_ram.bin", MEMORY_SIZE => 524288) port map(clk => clk, rst => rst, wishbone_in => wishbone_ram_out, diff --git a/fpga/main_bram.vhdl b/fpga/main_bram.vhdl new file mode 100644 index 0000000..810d60c --- /dev/null +++ b/fpga/main_bram.vhdl @@ -0,0 +1,83 @@ +-- Single port Block RAM with one cycle output buffer + +library ieee; +use ieee.std_logic_1164.all; +use ieee.std_logic_unsigned.all; +use ieee.numeric_std.all; +use std.textio.all; + +library work; + +entity main_bram is + generic( + WIDTH : natural := 64; + HEIGHT_BITS : natural := 1024; + MEMORY_SIZE : natural := 65536; + RAM_INIT_FILE : string + ); + port( + clk : in std_logic; + addr : in std_logic_vector(HEIGHT_BITS - 1 downto 0) ; + di : in std_logic_vector(WIDTH-1 downto 0); + do : out std_logic_vector(WIDTH-1 downto 0); + sel : in std_logic_vector((WIDTH/8)-1 downto 0); + re : in std_ulogic; + we : in std_ulogic + ); +end entity main_bram; + +architecture behaviour of main_bram is + + constant WIDTH_BYTES : natural := WIDTH / 8; + + -- RAM type definition + type ram_t is array(0 to (MEMORY_SIZE / WIDTH_BYTES) - 1) of std_logic_vector(WIDTH-1 downto 0); + + -- RAM loading + impure function init_ram(name : STRING) return ram_t is + file ram_file : text open read_mode is name; + variable ram_line : line; + variable temp_word : std_logic_vector(WIDTH-1 downto 0); + variable temp_ram : ram_t := (others => (others => '0')); + begin + for i in 0 to (MEMORY_SIZE / WIDTH_BYTES) - 1 loop + exit when endfile(ram_file); + readline(ram_file, ram_line); + hread(ram_line, temp_word); + temp_ram(i) := temp_word; + end loop; + + return temp_ram; + end function; + + -- RAM instance + signal memory : ram_t := init_ram(RAM_INIT_FILE); + attribute ram_style : string; + attribute ram_style of memory : signal is "block"; + attribute ram_decomp : string; + attribute ram_decomp of memory : signal is "power"; + + -- Others + signal obuf : std_logic_vector(WIDTH-1 downto 0); +begin + + -- Actual RAM template + memory_0: process(clk) + begin + if rising_edge(clk) then + if we = '1' then + for i in 0 to 7 loop + if sel(i) = '1' then + memory(conv_integer(addr))((i + 1) * 8 - 1 downto i * 8) <= + di((i + 1) * 8 - 1 downto i * 8); + end if; + end loop; + end if; + if re = '1' then + obuf <= memory(conv_integer(addr)); + end if; + do <= obuf; + end if; + end process; + +end architecture behaviour; diff --git a/fpga/mw_soc_memory.vhdl b/fpga/mw_soc_memory.vhdl deleted file mode 100644 index e9ace36..0000000 --- a/fpga/mw_soc_memory.vhdl +++ /dev/null @@ -1,106 +0,0 @@ --- Based on: --- The Potato Processor - A simple processor for FPGAs --- (c) Kristian Klomsten Skordal 2014 - 2015 - -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; -use std.textio.all; - -library work; -use work.wishbone_types.all; - -use work.pp_utilities.all; - ---! @brief Simple memory module for use in Wishbone-based systems. -entity mw_soc_memory is - generic( - MEMORY_SIZE : natural := 4096; --! Memory size in bytes. - RAM_INIT_FILE : string - ); - port( - clk : in std_logic; - rst : in std_logic; - - -- Wishbone interface: - wishbone_in : in wishbone_master_out; - wishbone_out : out wishbone_slave_out - ); -end entity mw_soc_memory; - -architecture behaviour of mw_soc_memory is - signal wb_adr_in : std_logic_vector(log2(MEMORY_SIZE) - 1 downto 0); - type ram_t is array(0 to (MEMORY_SIZE / 8) - 1) of std_logic_vector(63 downto 0); - - impure function init_ram(name : STRING) return ram_t is - file ram_file : text open read_mode is name; - variable ram_line : line; - variable temp_word : std_logic_vector(63 downto 0); - variable temp_ram : ram_t := (others => (others => '0')); - begin - for i in 0 to (MEMORY_SIZE/8)-1 loop - exit when endfile(ram_file); - readline(ram_file, ram_line); - hread(ram_line, temp_word); - temp_ram(i) := temp_word; - end loop; - - return temp_ram; - end function; - - signal memory : ram_t := init_ram(RAM_INIT_FILE); - - attribute ram_style : string; - attribute ram_style of memory : signal is "block"; - - attribute ram_decomp : string; - attribute ram_decomp of memory : signal is "power"; - - type state_type is (IDLE, ACK); - signal state : state_type; - - signal read_ack : std_logic; - -begin - - wb_adr_in <= wishbone_in.adr(log2(MEMORY_SIZE) - 1 downto 0); - - wishbone_out.ack <= read_ack and wishbone_in.stb; - - memory_0: process(clk) - begin - if rising_edge(clk) then - if rst = '1' then - read_ack <= '0'; - state <= IDLE; - else - if wishbone_in.cyc = '1' then - case state is - when IDLE => - if wishbone_in.stb = '1' and wishbone_in.we = '1' then - for i in 0 to 7 loop - if wishbone_in.sel(i) = '1' then - memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3))))(((i + 1) * 8) - 1 downto i * 8) - <= wishbone_in.dat(((i + 1) * 8) - 1 downto i * 8); - end if; - end loop; - read_ack <= '1'; - state <= ACK; - elsif wishbone_in.stb = '1' then - wishbone_out.dat <= memory(to_integer(unsigned(wb_adr_in(wb_adr_in'left downto 3)))); - read_ack <= '1'; - state <= ACK; - end if; - when ACK => - read_ack <= '0'; - state <= IDLE; - end case; - else - state <= IDLE; - read_ack <= '0'; - end if; - end if; - end if; - end process; - -end architecture behaviour; diff --git a/fpga/pp_soc_uart.vhd b/fpga/pp_soc_uart.vhd index 1d5c629..879ea26 100644 --- a/fpga/pp_soc_uart.vhd +++ b/fpga/pp_soc_uart.vhd @@ -34,351 +34,353 @@ use ieee.numeric_std.all; --! - Bit 0: data received (receive buffer not empty) --! - Bit 1: ready to send data (transmit buffer empty) entity pp_soc_uart is - generic( - FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs. + generic( + FIFO_DEPTH : natural := 64 --! Depth of the input and output FIFOs. ); - port( - clk : in std_logic; - reset : in std_logic; - - -- UART ports: - txd : out std_logic; - rxd : in std_logic; - - -- Interrupt signal: - irq : out std_logic; - - -- Wishbone ports: - wb_adr_in : in std_logic_vector(11 downto 0); - wb_dat_in : in std_logic_vector( 7 downto 0); - wb_dat_out : out std_logic_vector( 7 downto 0); - wb_we_in : in std_logic; - wb_cyc_in : in std_logic; - wb_stb_in : in std_logic; - wb_ack_out : out std_logic + port( + clk : in std_logic; + reset : in std_logic; + + -- UART ports: + txd : out std_logic; + rxd : in std_logic; + + -- Interrupt signal: + irq : out std_logic; + + -- Wishbone ports: + wb_adr_in : in std_logic_vector(11 downto 0); + wb_dat_in : in std_logic_vector( 7 downto 0); + wb_dat_out : out std_logic_vector( 7 downto 0); + wb_we_in : in std_logic; + wb_cyc_in : in std_logic; + wb_stb_in : in std_logic; + wb_ack_out : out std_logic ); end entity pp_soc_uart; architecture behaviour of pp_soc_uart is - subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit. + subtype bitnumber is natural range 0 to 7; --! Type representing the index of a bit. - -- UART sample clock signals: - signal sample_clk : std_logic; - signal sample_clk_divisor : std_logic_vector(7 downto 0); - signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range); + -- UART sample clock signals: + signal sample_clk : std_logic; + signal sample_clk_divisor : std_logic_vector(7 downto 0); + signal sample_clk_counter : std_logic_vector(sample_clk_divisor'range); - -- UART receive process signals: - type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT); - signal rx_state : rx_state_type; - signal rx_byte : std_logic_vector(7 downto 0); - signal rx_current_bit : bitnumber; + -- UART receive process signals: + type rx_state_type is (IDLE, RECEIVE, STARTBIT, STOPBIT); + signal rx_state : rx_state_type; + signal rx_byte : std_logic_vector(7 downto 0); + signal rx_current_bit : bitnumber; - subtype rx_sample_counter_type is natural range 0 to 15; - signal rx_sample_counter : rx_sample_counter_type; - signal rx_sample_value : rx_sample_counter_type; + subtype rx_sample_counter_type is natural range 0 to 15; + signal rx_sample_counter : rx_sample_counter_type; + signal rx_sample_value : rx_sample_counter_type; - subtype rx_sample_delay_type is natural range 0 to 7; - signal rx_sample_delay : rx_sample_delay_type; + subtype rx_sample_delay_type is natural range 0 to 7; + signal rx_sample_delay : rx_sample_delay_type; - -- UART transmit process signals: - type tx_state_type is (IDLE, TRANSMIT, STOPBIT); - signal tx_state : tx_state_type; - signal tx_byte : std_logic_vector(7 downto 0); - signal tx_current_bit : bitnumber; + -- UART transmit process signals: + type tx_state_type is (IDLE, TRANSMIT, STOPBIT); + signal tx_state : tx_state_type; + signal tx_byte : std_logic_vector(7 downto 0); + signal tx_current_bit : bitnumber; - -- UART transmit clock: - subtype uart_tx_counter_type is natural range 0 to 15; - signal uart_tx_counter : uart_tx_counter_type := 0; - signal uart_tx_clk : std_logic; + -- UART transmit clock: + subtype uart_tx_counter_type is natural range 0 to 15; + signal uart_tx_counter : uart_tx_counter_type := 0; + signal uart_tx_clk : std_logic; - -- Buffer signals: - signal send_buffer_full, send_buffer_empty : std_logic; - signal recv_buffer_full, recv_buffer_empty : std_logic; - signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0); - signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0); - signal send_buffer_push, send_buffer_pop : std_logic := '0'; - signal recv_buffer_push, recv_buffer_pop : std_logic := '0'; + -- Buffer signals: + signal send_buffer_full, send_buffer_empty : std_logic; + signal recv_buffer_full, recv_buffer_empty : std_logic; + signal send_buffer_input, send_buffer_output : std_logic_vector(7 downto 0); + signal recv_buffer_input, recv_buffer_output : std_logic_vector(7 downto 0); + signal send_buffer_push, send_buffer_pop : std_logic := '0'; + signal recv_buffer_push, recv_buffer_pop : std_logic := '0'; - -- IRQ enable signals: - signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0'; + -- IRQ enable signals: + signal irq_recv_enable, irq_tx_ready_enable : std_logic := '0'; - -- Wishbone signals: - type wb_state_type is (IDLE, WRITE_ACK, READ_ACK); - signal wb_state : wb_state_type; + -- Wishbone signals: + type wb_state_type is (IDLE, WRITE_ACK, READ_ACK); + signal wb_state : wb_state_type; - signal wb_ack : std_logic; --! Wishbone acknowledge signal + signal wb_ack : std_logic; --! Wishbone acknowledge signal begin - irq <= (irq_recv_enable and (not recv_buffer_empty)) - or (irq_tx_ready_enable and send_buffer_empty); - - ---------- UART receive ---------- - - recv_buffer_input <= rx_byte; - - uart_receive: process(clk) - begin - if rising_edge(clk) then - if reset = '1' then - rx_state <= IDLE; - recv_buffer_push <= '0'; + irq <= (irq_recv_enable and (not recv_buffer_empty)) + or (irq_tx_ready_enable and send_buffer_empty); + + ---------- UART receive ---------- + + recv_buffer_input <= rx_byte; + + uart_receive: process(clk) + begin + if rising_edge(clk) then + if reset = '1' then + rx_state <= IDLE; + recv_buffer_push <= '0'; + else + case rx_state is + when IDLE => + if recv_buffer_push = '1' then + recv_buffer_push <= '0'; + end if; + + if sample_clk = '1' and rxd = '0' then + rx_sample_value <= rx_sample_counter; + rx_sample_delay <= 0; + rx_current_bit <= 0; + rx_state <= STARTBIT; + end if; + when STARTBIT => + if sample_clk = '1' then + if rx_sample_delay = 7 then + rx_state <= RECEIVE; + rx_sample_value <= rx_sample_counter; + rx_sample_delay <= 0; else - case rx_state is - when IDLE => - if recv_buffer_push = '1' then - recv_buffer_push <= '0'; - end if; - - if sample_clk = '1' and rxd = '0' then - rx_sample_value <= rx_sample_counter; - rx_sample_delay <= 0; - rx_current_bit <= 0; - rx_state <= STARTBIT; - end if; - when STARTBIT => - if sample_clk = '1' then - if rx_sample_delay = 7 then - rx_state <= RECEIVE; - rx_sample_value <= rx_sample_counter; - rx_sample_delay <= 0; - else - rx_sample_delay <= rx_sample_delay + 1; - end if; - end if; - when RECEIVE => - if sample_clk = '1' and rx_sample_counter = rx_sample_value then - if rx_current_bit /= 7 then - rx_byte(rx_current_bit) <= rxd; - rx_current_bit <= rx_current_bit + 1; - else - rx_byte(rx_current_bit) <= rxd; - rx_state <= STOPBIT; - end if; - end if; - when STOPBIT => - if sample_clk = '1' and rx_sample_counter = rx_sample_value then - rx_state <= IDLE; - - if recv_buffer_full = '0' then - recv_buffer_push <= '1'; - end if; - end if; - end case; + rx_sample_delay <= rx_sample_delay + 1; end if; - end if; - end process uart_receive; - - sample_counter: process(clk) - begin - if rising_edge(clk) then - if reset = '1' then - rx_sample_counter <= 0; - elsif sample_clk = '1' then - if rx_sample_counter = 15 then - rx_sample_counter <= 0; - else - rx_sample_counter <= rx_sample_counter + 1; - end if; + end if; + when RECEIVE => + if sample_clk = '1' and rx_sample_counter = rx_sample_value then + if rx_current_bit /= 7 then + rx_byte(rx_current_bit) <= rxd; + rx_current_bit <= rx_current_bit + 1; + else + rx_byte(rx_current_bit) <= rxd; + rx_state <= STOPBIT; end if; - end if; - end process sample_counter; - - ---------- UART transmit ---------- + end if; + when STOPBIT => + if sample_clk = '1' and rx_sample_counter = rx_sample_value then + rx_state <= IDLE; - tx_byte <= send_buffer_output; - - uart_transmit: process(clk) - begin - if rising_edge(clk) then - if reset = '1' then - txd <= '1'; - tx_state <= IDLE; - send_buffer_pop <= '0'; - tx_current_bit <= 0; - else - case tx_state is - when IDLE => - if send_buffer_empty = '0' and uart_tx_clk = '1' then - txd <= '0'; - send_buffer_pop <= '1'; - tx_current_bit <= 0; - tx_state <= TRANSMIT; - elsif uart_tx_clk = '1' then - txd <= '1'; - end if; - when TRANSMIT => - if send_buffer_pop = '1' then - send_buffer_pop <= '0'; - elsif uart_tx_clk = '1' and tx_current_bit = 7 then - txd <= tx_byte(tx_current_bit); - tx_state <= STOPBIT; - elsif uart_tx_clk = '1' then - txd <= tx_byte(tx_current_bit); - tx_current_bit <= tx_current_bit + 1; - end if; - when STOPBIT => - if uart_tx_clk = '1' then - txd <= '1'; - tx_state <= IDLE; - end if; - end case; + if recv_buffer_full = '0' then + recv_buffer_push <= '1'; end if; + end if; + end case; + end if; + end if; + end process uart_receive; + + sample_counter: process(clk) + begin + if rising_edge(clk) then + if reset = '1' then + rx_sample_counter <= 0; + elsif sample_clk = '1' then + if rx_sample_counter = 15 then + rx_sample_counter <= 0; + else + rx_sample_counter <= rx_sample_counter + 1; end if; - end process uart_transmit; - - uart_tx_clock_generator: process(clk) - begin - if rising_edge(clk) then - if reset = '1' then - uart_tx_counter <= 0; - uart_tx_clk <= '0'; - else - if sample_clk = '1' then - if uart_tx_counter = 15 then - uart_tx_counter <= 0; - uart_tx_clk <= '1'; - else - uart_tx_counter <= uart_tx_counter + 1; - uart_tx_clk <= '0'; - end if; - else - uart_tx_clk <= '0'; - end if; - end if; + end if; + end if; + end process sample_counter; + + ---------- UART transmit ---------- + + tx_byte <= send_buffer_output; + + uart_transmit: process(clk) + begin + if rising_edge(clk) then + if reset = '1' then + txd <= '1'; + tx_state <= IDLE; + send_buffer_pop <= '0'; + tx_current_bit <= 0; + else + case tx_state is + when IDLE => + if send_buffer_empty = '0' and uart_tx_clk = '1' then + txd <= '0'; + send_buffer_pop <= '1'; + tx_current_bit <= 0; + tx_state <= TRANSMIT; + elsif uart_tx_clk = '1' then + txd <= '1'; + end if; + when TRANSMIT => + if send_buffer_pop = '1' then + send_buffer_pop <= '0'; + elsif uart_tx_clk = '1' and tx_current_bit = 7 then + txd <= tx_byte(tx_current_bit); + tx_state <= STOPBIT; + elsif uart_tx_clk = '1' then + txd <= tx_byte(tx_current_bit); + tx_current_bit <= tx_current_bit + 1; + end if; + when STOPBIT => + if uart_tx_clk = '1' then + txd <= '1'; + tx_state <= IDLE; + end if; + end case; + end if; + end if; + end process uart_transmit; + + uart_tx_clock_generator: process(clk) + begin + if rising_edge(clk) then + if reset = '1' then + uart_tx_counter <= 0; + uart_tx_clk <= '0'; + else + if sample_clk = '1' then + if uart_tx_counter = 15 then + uart_tx_counter <= 0; + uart_tx_clk <= '1'; + else + uart_tx_counter <= uart_tx_counter + 1; + uart_tx_clk <= '0'; + end if; + else + uart_tx_clk <= '0'; end if; - end process uart_tx_clock_generator; - - ---------- Sample clock generator ---------- - - sample_clock_generator: process(clk) - begin - if rising_edge(clk) then - if reset = '1' then - sample_clk_counter <= (others => '0'); - sample_clk <= '0'; - else - if sample_clk_divisor /= x"00" then - if sample_clk_counter = sample_clk_divisor then - sample_clk_counter <= (others => '0'); - sample_clk <= '1'; - else - sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1); - sample_clk <= '0'; - end if; - end if; - end if; + end if; + end if; + end process uart_tx_clock_generator; + + ---------- Sample clock generator ---------- + + sample_clock_generator: process(clk) + begin + if rising_edge(clk) then + if reset = '1' then + sample_clk_counter <= (others => '0'); + sample_clk <= '0'; + else + if sample_clk_divisor /= x"00" then + if sample_clk_counter = sample_clk_divisor then + sample_clk_counter <= (others => '0'); + sample_clk <= '1'; + else + sample_clk_counter <= std_logic_vector(unsigned(sample_clk_counter) + 1); + sample_clk <= '0'; + end if; end if; - end process sample_clock_generator; - - ---------- Data Buffers ---------- - - send_buffer: entity work.pp_fifo - generic map( - DEPTH => FIFO_DEPTH, - WIDTH => 8 - ) port map( - clk => clk, - reset => reset, - full => send_buffer_full, - empty => send_buffer_empty, - data_in => send_buffer_input, - data_out => send_buffer_output, - push => send_buffer_push, - pop => send_buffer_pop + end if; + end if; + end process sample_clock_generator; + + ---------- Data Buffers ---------- + + send_buffer: entity work.pp_fifo + generic map( + DEPTH => FIFO_DEPTH, + WIDTH => 8 + ) port map( + clk => clk, + reset => reset, + full => send_buffer_full, + empty => send_buffer_empty, + data_in => send_buffer_input, + data_out => send_buffer_output, + push => send_buffer_push, + pop => send_buffer_pop ); - recv_buffer: entity work.pp_fifo - generic map( - DEPTH => FIFO_DEPTH, - WIDTH => 8 - ) port map( - clk => clk, - reset => reset, - full => recv_buffer_full, - empty => recv_buffer_empty, - data_in => recv_buffer_input, - data_out => recv_buffer_output, - push => recv_buffer_push, - pop => recv_buffer_pop + recv_buffer: entity work.pp_fifo + generic map( + DEPTH => FIFO_DEPTH, + WIDTH => 8 + ) port map( + clk => clk, + reset => reset, + full => recv_buffer_full, + empty => recv_buffer_empty, + data_in => recv_buffer_input, + data_out => recv_buffer_output, + push => recv_buffer_push, + pop => recv_buffer_pop ); - ---------- Wishbone Interface ---------- - - wb_ack_out <= wb_ack and wb_cyc_in and wb_stb_in; - - wishbone: process(clk) - begin - if rising_edge(clk) then - if reset = '1' then - wb_ack <= '0'; - wb_state <= IDLE; - send_buffer_push <= '0'; - recv_buffer_pop <= '0'; - sample_clk_divisor <= (others => '0'); - irq_recv_enable <= '0'; - irq_tx_ready_enable <= '0'; - else - case wb_state is - when IDLE => - if wb_cyc_in = '1' and wb_stb_in = '1' then - if wb_we_in = '1' then -- Write to register - if wb_adr_in = x"000" then - send_buffer_input <= wb_dat_in; - send_buffer_push <= '1'; - elsif wb_adr_in = x"018" then - sample_clk_divisor <= wb_dat_in; - elsif wb_adr_in = x"020" then - irq_recv_enable <= wb_dat_in(0); - irq_tx_ready_enable <= wb_dat_in(1); - end if; - - -- Invalid writes are acked and ignored. - - wb_ack <= '1'; - wb_state <= WRITE_ACK; - else -- Read from register - if wb_adr_in = x"008" then - recv_buffer_pop <= '1'; - elsif wb_adr_in = x"010" then - wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full & send_buffer_empty & recv_buffer_empty; - wb_ack <= '1'; - elsif wb_adr_in = x"018" then - wb_dat_out <= sample_clk_divisor; - wb_ack <= '1'; - elsif wb_adr_in = x"020" then - wb_dat_out <= (0 => irq_recv_enable, 1 => irq_tx_ready_enable, others => '0'); - wb_ack <= '1'; - else - wb_dat_out <= (others => '0'); - wb_ack <= '1'; - end if; - wb_state <= READ_ACK; - end if; - end if; - when WRITE_ACK => - send_buffer_push <= '0'; - - if wb_stb_in = '0' then - wb_ack <= '0'; - wb_state <= IDLE; - end if; - when READ_ACK => - if recv_buffer_pop = '1' then - recv_buffer_pop <= '0'; - else - wb_dat_out <= recv_buffer_output; - wb_ack <= '1'; - end if; - - if wb_stb_in = '0' then - wb_ack <= '0'; - wb_state <= IDLE; - end if; - end case; + ---------- Wishbone Interface ---------- + + wb_ack_out <= wb_ack and wb_cyc_in and wb_stb_in; + + wishbone: process(clk) + begin + if rising_edge(clk) then + if reset = '1' then + wb_ack <= '0'; + wb_state <= IDLE; + send_buffer_push <= '0'; + recv_buffer_pop <= '0'; + sample_clk_divisor <= (others => '0'); + irq_recv_enable <= '0'; + irq_tx_ready_enable <= '0'; + else + case wb_state is + when IDLE => + if wb_cyc_in = '1' and wb_stb_in = '1' then + if wb_we_in = '1' then -- Write to register + if wb_adr_in = x"000" then + send_buffer_input <= wb_dat_in; + send_buffer_push <= '1'; + elsif wb_adr_in = x"018" then + sample_clk_divisor <= wb_dat_in; + elsif wb_adr_in = x"020" then + irq_recv_enable <= wb_dat_in(0); + irq_tx_ready_enable <= wb_dat_in(1); + end if; + + -- Invalid writes are acked and ignored. + wb_ack <= '1'; + wb_state <= WRITE_ACK; + else -- Read from register + if wb_adr_in = x"008" then + recv_buffer_pop <= '1'; + elsif wb_adr_in = x"010" then + wb_dat_out <= x"0" & send_buffer_full & recv_buffer_full & + send_buffer_empty & recv_buffer_empty; + wb_ack <= '1'; + elsif wb_adr_in = x"018" then + wb_dat_out <= sample_clk_divisor; + wb_ack <= '1'; + elsif wb_adr_in = x"020" then + wb_dat_out <= (0 => irq_recv_enable, + 1 => irq_tx_ready_enable, + others => '0'); + wb_ack <= '1'; + else + wb_dat_out <= (others => '0'); + wb_ack <= '1'; + end if; + wb_state <= READ_ACK; end if; - end if; - end process wishbone; + end if; + when WRITE_ACK => + send_buffer_push <= '0'; + + if wb_stb_in = '0' then + wb_ack <= '0'; + wb_state <= IDLE; + end if; + when READ_ACK => + if recv_buffer_pop = '1' then + recv_buffer_pop <= '0'; + else + wb_dat_out <= recv_buffer_output; + wb_ack <= '1'; + end if; + + if wb_stb_in = '0' then + wb_ack <= '0'; + wb_state <= IDLE; + end if; + end case; + end if; + end if; + end process wishbone; end architecture behaviour; diff --git a/fpga/toplevel.vhdl b/fpga/toplevel.vhdl index d73c802..38af730 100644 --- a/fpga/toplevel.vhdl +++ b/fpga/toplevel.vhdl @@ -7,7 +7,8 @@ entity toplevel is RAM_INIT_FILE : string := "firmware.hex"; RESET_LOW : boolean := true; CLK_INPUT : positive := 100000000; - CLK_FREQUENCY : positive := 100000000 + CLK_FREQUENCY : positive := 100000000; + DISABLE_FLATTEN_CORE : boolean := false ); port( ext_clk : in std_ulogic; @@ -62,7 +63,8 @@ begin MEMORY_SIZE => MEMORY_SIZE, RAM_INIT_FILE => RAM_INIT_FILE, RESET_LOW => RESET_LOW, - SIM => false + SIM => false, + DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE ) port map ( system_clk => system_clk, diff --git a/icache.vhdl b/icache.vhdl index fccff9a..20d5724 100644 --- a/icache.vhdl +++ b/icache.vhdl @@ -21,6 +21,7 @@ use ieee.std_logic_1164.all; use ieee.numeric_std.all; library work; +use work.utils.all; use work.common.all; use work.wishbone_types.all; @@ -51,26 +52,6 @@ entity icache is end entity icache; architecture rtl of icache is - function log2(i : natural) return integer is - variable tmp : integer := i; - variable ret : integer := 0; - begin - while tmp > 1 loop - ret := ret + 1; - tmp := tmp / 2; - end loop; - return ret; - end function; - - function ispow2(i : integer) return boolean is - begin - if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then - return true; - else - return false; - end if; - end function; - -- BRAM organisation: We never access more than wishbone_data_bits at -- a time so to save resources we make the array only that wide, and -- use consecutive indices for to make a cache "line" @@ -159,6 +140,7 @@ architecture rtl of icache is wb : wishbone_master_out; store_way : way_t; store_index : index_t; + store_row : row_t; end record; signal r : reg_internal_t; @@ -170,6 +152,7 @@ architecture rtl of icache is signal req_tag : cache_tag_t; signal req_is_hit : std_ulogic; signal req_is_miss : std_ulogic; + signal req_laddr : std_ulogic_vector(63 downto 0); -- Cache RAM interface type cache_ram_out_t is array(way_t) of cache_row_t; @@ -193,12 +176,21 @@ architecture rtl of icache is end; -- Returns whether this is the last row of a line - function is_last_row(addr: wishbone_addr_type) return boolean is + function is_last_row_addr(addr: wishbone_addr_type) return boolean is constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1'); begin return addr(LINE_OFF_BITS-1 downto ROW_OFF_BITS) = ones; end; + -- Returns whether this is the last row of a line + function is_last_row(row: row_t) return boolean is + variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0); + constant ones : std_ulogic_vector(ROW_LINEBITS-1 downto 0) := (others => '1'); + begin + row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS)); + return row_v(ROW_LINEBITS-1 downto 0) = ones; + end; + -- Return the address of the next row in the current cache line function next_row_addr(addr: wishbone_addr_type) return std_ulogic_vector is @@ -213,6 +205,21 @@ architecture rtl of icache is return result; end; + -- Return the next row in the current cache line. We use a dedicated + -- function in order to limit the size of the generated adder to be + -- only the bits within a cache line (3 bits with default settings) + -- + function next_row(row: row_t) return row_t is + variable row_v : std_ulogic_vector(ROW_BITS-1 downto 0); + variable row_idx : std_ulogic_vector(ROW_LINEBITS-1 downto 0); + variable result : std_ulogic_vector(ROW_BITS-1 downto 0); + begin + row_v := std_ulogic_vector(to_unsigned(row, ROW_BITS)); + row_idx := row_v(ROW_LINEBITS-1 downto 0); + row_v(ROW_LINEBITS-1 downto 0) := std_ulogic_vector(unsigned(row_idx) + 1); + return to_integer(unsigned(row_v)); + end; + -- Read the instruction word for the given address in the current cache row function read_insn_word(addr: std_ulogic_vector(63 downto 0); data: cache_row_t) return std_ulogic_vector is @@ -298,7 +305,6 @@ begin wr_data => wishbone_in.dat ); process(all) - variable tmp_adr : std_ulogic_vector(63 downto 0); begin do_read <= '1'; do_write <= '0'; @@ -307,8 +313,7 @@ begin end if; cache_out(i) <= dout; rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS)); - tmp_adr := (r.wb.adr'left downto 0 => r.wb.adr, others => '0'); - wr_addr <= std_ulogic_vector(to_unsigned(get_row(tmp_adr), ROW_BITS)); + wr_addr <= std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS)); end process; end generate; @@ -358,6 +363,12 @@ begin req_row <= get_row(i_in.nia); req_tag <= get_tag(i_in.nia); + -- Calculate address of beginning of cache line, will be + -- used for cache miss processing if needed + -- + req_laddr <= i_in.nia(63 downto LINE_OFF_BITS) & + (LINE_OFF_BITS-1 downto 0 => '0'); + -- Test if pending request is a hit on any way hit_way := 0; is_hit := '0'; @@ -427,7 +438,8 @@ begin -- Cache miss/reload synchronous machine icache_miss : process(clk) - variable tagset : cache_tags_set_t; + variable tagset : cache_tags_set_t; + variable stbs_done : boolean; begin if rising_edge(clk) then -- On reset, clear all valid bits to force misses @@ -473,29 +485,54 @@ begin -- Keep track of our index and way for subsequent stores r.store_index <= req_index; r.store_way <= replace_way; + r.store_row <= get_row(req_laddr); -- Prep for first wishbone read. We calculate the address of - -- the start of the cache line + -- the start of the cache line and start the WB cycle. -- - r.wb.adr <= i_in.nia(r.wb.adr'left downto LINE_OFF_BITS) & - (LINE_OFF_BITS-1 downto 0 => '0'); + r.wb.adr <= req_laddr(r.wb.adr'left downto 0); r.wb.cyc <= '1'; r.wb.stb <= '1'; + -- Track that we had one request sent r.state <= WAIT_ACK; end if; + when WAIT_ACK => + -- Requests are all sent if stb is 0 + stbs_done := r.wb.stb = '0'; + + -- If we are still sending requests, was one accepted ? + if wishbone_in.stall = '0' and not stbs_done then + -- That was the last word ? We are done sending. Clear + -- stb and set stbs_done so we can handle an eventual last + -- ack on the same cycle. + -- + if is_last_row_addr(r.wb.adr) then + r.wb.stb <= '0'; + stbs_done := true; + end if; + + -- Calculate the next row address + r.wb.adr <= next_row_addr(r.wb.adr); + end if; + + -- Incoming acks processing if wishbone_in.ack = '1' then - -- That was the last word ? We are done - if is_last_row(r.wb.adr) then - cache_valids(r.store_index)(r.store_way) <= '1'; + -- Check for completion + if stbs_done and is_last_row(r.store_row) then + -- Complete wishbone cycle r.wb.cyc <= '0'; - r.wb.stb <= '0'; + + -- Cache line is now valid + cache_valids(r.store_index)(r.store_way) <= '1'; + + -- We are done r.state <= IDLE; - else - -- Otherwise, calculate the next row address - r.wb.adr <= next_row_addr(r.wb.adr); end if; + + -- Increment store row counter + r.store_row <= next_row(r.store_row); end if; end case; end if; diff --git a/icache_tb.vhdl b/icache_tb.vhdl index a82912e..ea5cf3a 100644 --- a/icache_tb.vhdl +++ b/icache_tb.vhdl @@ -36,9 +36,9 @@ begin ); -- BRAM Memory slave - bram0: entity work.mw_soc_memory + bram0: entity work.wishbone_bram_wrapper generic map( - MEMORY_SIZE => 128, + MEMORY_SIZE => 1024, RAM_INIT_FILE => "icache_test.bin" ) port map( @@ -68,15 +68,20 @@ begin begin i_out.req <= '0'; i_out.nia <= (others => '0'); + i_out.stop_mark <= '0'; - wait for 4*clk_period; + wait until rising_edge(clk); + wait until rising_edge(clk); + wait until rising_edge(clk); + wait until rising_edge(clk); i_out.req <= '1'; i_out.nia <= x"0000000000000004"; wait for 30*clk_period; + wait until rising_edge(clk); - assert i_in.valid = '1'; + assert i_in.valid = '1' severity failure; assert i_in.insn = x"00000001" report "insn @" & to_hstring(i_out.nia) & "=" & to_hstring(i_in.insn) & @@ -85,27 +90,29 @@ begin i_out.req <= '0'; - wait for clk_period; + wait until rising_edge(clk); -- hit i_out.req <= '1'; i_out.nia <= x"0000000000000008"; - wait for clk_period; - assert i_in.valid = '1'; + wait until rising_edge(clk); + wait until rising_edge(clk); + assert i_in.valid = '1' severity failure; assert i_in.insn = x"00000002" report "insn @" & to_hstring(i_out.nia) & "=" & to_hstring(i_in.insn) & " expected 00000002" severity failure; - wait for clk_period; + wait until rising_edge(clk); -- another miss i_out.req <= '1'; i_out.nia <= x"0000000000000040"; wait for 30*clk_period; + wait until rising_edge(clk); - assert i_in.valid = '1'; + assert i_in.valid = '1' severity failure; assert i_in.insn = x"00000010" report "insn @" & to_hstring(i_out.nia) & "=" & to_hstring(i_in.insn) & @@ -115,13 +122,15 @@ begin -- test something that aliases i_out.req <= '1'; i_out.nia <= x"0000000000000100"; - wait for clk_period; - assert i_in.valid = '0'; - wait for clk_period; + wait until rising_edge(clk); + wait until rising_edge(clk); + assert i_in.valid = '0' severity failure; + wait until rising_edge(clk); wait for 30*clk_period; + wait until rising_edge(clk); - assert i_in.valid = '1'; + assert i_in.valid = '1' severity failure; assert i_in.insn = x"00000040" report "insn @" & to_hstring(i_out.nia) & "=" & to_hstring(i_in.insn) & diff --git a/microwatt.core b/microwatt.core index 5fb9a7a..ee48376 100644 --- a/microwatt.core +++ b/microwatt.core @@ -36,20 +36,22 @@ filesets: - plru.vhdl - cache_ram.vhdl - core_debug.vhdl + - utils.vhdl file_type : vhdlSource-2008 soc: files: - wishbone_arbiter.vhdl - wishbone_debug_master.vhdl + - wishbone_bram_wrapper.vhdl - soc.vhdl file_type : vhdlSource-2008 fpga: files: - - fpga/pp_fifo.vhd - - fpga/mw_soc_memory.vhdl + - fpga/main_bram.vhdl - fpga/soc_reset.vhdl + - fpga/pp_fifo.vhd - fpga/pp_soc_uart.vhd - fpga/pp_utilities.vhd - fpga/toplevel.vhdl @@ -93,6 +95,7 @@ targets: - ram_init_file - clk_input - clk_frequency + - disable_flatten_core tools: vivado: {part : xc7a100tcsg324-1} toplevel : toplevel @@ -105,6 +108,7 @@ targets: - ram_init_file - clk_input - clk_frequency + - disable_flatten_core tools: vivado: {part : xc7a200tsbg484-1} toplevel : toplevel @@ -117,6 +121,7 @@ targets: - ram_init_file - clk_input - clk_frequency + - disable_flatten_core tools: vivado: {part : xc7a35ticsg324-1L} toplevel : toplevel @@ -129,6 +134,7 @@ targets: - ram_init_file - clk_input - clk_frequency + - disable_flatten_core tools: vivado: {part : xc7a100ticsg324-1L} toplevel : toplevel @@ -142,6 +148,7 @@ targets: - reset_low=false - clk_input=12000000 - clk_frequency + - disable_flatten_core tools: vivado: {part : xc7a35tcpg236-1} toplevel : toplevel @@ -179,3 +186,9 @@ parameters: description : Generated system clock frequency in HZ (for top-generic based boards) paramtype : generic default : 50000000 + + disable_flatten_core: + datatype : bool + description : Prevent Vivado from flattening the main core components + paramtype : generic + default : false diff --git a/scripts/run_test.sh b/scripts/run_test.sh index b6f2ee6..ef737fe 100755 --- a/scripts/run_test.sh +++ b/scripts/run_test.sh @@ -21,7 +21,7 @@ Y=$(${MICROWATT_DIR}/scripts/hash.py tests/${TEST}.out) cd $TMPDIR -cp ${MICROWATT_DIR}/tests/${TEST}.bin simple_ram_behavioural.bin +cp ${MICROWATT_DIR}/tests/${TEST}.bin main_ram.bin X=$( ${MICROWATT_DIR}/core_tb | ${MICROWATT_DIR}/scripts/hash.py ) diff --git a/scripts/test_micropython.py b/scripts/test_micropython.py index d7ffb2d..edc076f 100755 --- a/scripts/test_micropython.py +++ b/scripts/test_micropython.py @@ -13,7 +13,7 @@ cwd = os.getcwd() os.chdir(tempdir.name) copyfile(os.path.join(cwd, 'tests/micropython.bin'), - os.path.join(tempdir.name, 'simple_ram_behavioural.bin')) + os.path.join(tempdir.name, 'main_ram.bin')) cmd = [ os.path.join(cwd, './core_tb') ] diff --git a/scripts/test_micropython_long.py b/scripts/test_micropython_long.py index 805c6b2..6dea3a4 100755 --- a/scripts/test_micropython_long.py +++ b/scripts/test_micropython_long.py @@ -13,7 +13,7 @@ cwd = os.getcwd() os.chdir(tempdir.name) copyfile(os.path.join(cwd, 'tests/micropython.bin'), - os.path.join(tempdir.name, 'simple_ram_behavioural.bin')) + os.path.join(tempdir.name, 'main_ram.bin')) cmd = [ os.path.join(cwd, './core_tb') ] diff --git a/sim_bram.vhdl b/sim_bram.vhdl new file mode 100644 index 0000000..d2d4f1b --- /dev/null +++ b/sim_bram.vhdl @@ -0,0 +1,67 @@ +-- Single port Block RAM with one cycle output buffer +-- +-- Simulated via C helpers + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use std.textio.all; + +library work; +use work.utils.all; +use work.sim_bram_helpers.all; + +entity main_bram is + generic( + WIDTH : natural := 64; + HEIGHT_BITS : natural := 1024; + MEMORY_SIZE : natural := 65536; + RAM_INIT_FILE : string + ); + port( + clk : in std_logic; + addr : in std_logic_vector(HEIGHT_BITS - 1 downto 0) ; + di : in std_logic_vector(WIDTH-1 downto 0); + do : out std_logic_vector(WIDTH-1 downto 0); + sel : in std_logic_vector((WIDTH/8)-1 downto 0); + re : in std_ulogic; + we : in std_ulogic + ); +end entity main_bram; + +architecture sim of main_bram is + + constant WIDTH_BYTES : natural := WIDTH / 8; + constant pad_zeros : std_ulogic_vector(log2(WIDTH_BYTES)-1 downto 0) + := (others => '0'); + + signal identifier : integer := behavioural_initialize(filename => RAM_INIT_FILE, + size => MEMORY_SIZE); + -- Others + signal obuf : std_logic_vector(WIDTH-1 downto 0); +begin + + -- Actual RAM template + memory_0: process(clk) + variable ret_dat_v : std_ulogic_vector(63 downto 0); + variable addr64 : std_ulogic_vector(63 downto 0); + begin + if rising_edge(clk) then + addr64 := (others => '0'); + addr64(HEIGHT_BITS + 2 downto 3) := addr; + if we = '1' then + report "RAM writing " & to_hstring(di) & " to " & + to_hstring(addr & pad_zeros) & " sel:" & to_hstring(sel); + behavioural_write(di, addr64, to_integer(unsigned(sel)), identifier); + end if; + if re = '1' then + behavioural_read(ret_dat_v, addr64, to_integer(unsigned(sel)), identifier); + report "RAM reading from " & to_hstring(addr & pad_zeros) & + " returns " & to_hstring(ret_dat_v); + obuf <= ret_dat_v(obuf'left downto 0); + end if; + do <= obuf; + end if; + end process; + +end architecture sim; diff --git a/simple_ram_behavioural_helpers.vhdl b/sim_bram_helpers.vhdl similarity index 84% rename from simple_ram_behavioural_helpers.vhdl rename to sim_bram_helpers.vhdl index 507594f..c511a6e 100644 --- a/simple_ram_behavioural_helpers.vhdl +++ b/sim_bram_helpers.vhdl @@ -1,24 +1,24 @@ library ieee; use ieee.std_logic_1164.all; -package simple_ram_behavioural_helpers is +package sim_bram_helpers is function behavioural_initialize (filename: String; size: integer) return integer; attribute foreign of behavioural_initialize : function is "VHPIDIRECT behavioural_initialize"; - procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer; reload: integer); + procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier:integer); attribute foreign of behavioural_read : procedure is "VHPIDIRECT behavioural_read"; procedure behavioural_write (val: std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer); attribute foreign of behavioural_write : procedure is "VHPIDIRECT behavioural_write"; -end simple_ram_behavioural_helpers; +end sim_bram_helpers; -package body simple_ram_behavioural_helpers is +package body sim_bram_helpers is function behavioural_initialize (filename: String; size: integer) return integer is begin assert false report "VHPI" severity failure; end behavioural_initialize; - procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer; reload: integer) is + procedure behavioural_read (val: out std_ulogic_vector(63 downto 0); addr: std_ulogic_vector(63 downto 0); length: integer; identifier: integer) is begin assert false report "VHPI" severity failure; end behavioural_read; @@ -27,4 +27,4 @@ package body simple_ram_behavioural_helpers is begin assert false report "VHPI" severity failure; end behavioural_write; -end simple_ram_behavioural_helpers; +end sim_bram_helpers; diff --git a/simple_ram_behavioural_helpers_c.c b/sim_bram_helpers_c.c similarity index 100% rename from simple_ram_behavioural_helpers_c.c rename to sim_bram_helpers_c.c diff --git a/simple_ram_behavioural.vhdl b/simple_ram_behavioural.vhdl deleted file mode 100644 index 0f6a90a..0000000 --- a/simple_ram_behavioural.vhdl +++ /dev/null @@ -1,79 +0,0 @@ -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; -use std.textio.all; - -library work; -use work.wishbone_types.all; -use work.simple_ram_behavioural_helpers.all; - -entity mw_soc_memory is - generic ( - RAM_INIT_FILE : string; - MEMORY_SIZE : integer - ); - - port ( - clk : in std_ulogic; - rst : in std_ulogic; - - wishbone_in : in wishbone_master_out; - wishbone_out : out wishbone_slave_out - ); -end mw_soc_memory; - -architecture behave of mw_soc_memory is - type wishbone_state_t is (IDLE, ACK); - - signal state : wishbone_state_t := IDLE; - signal ret_ack : std_ulogic := '0'; - signal identifier : integer := behavioural_initialize(filename => RAM_INIT_FILE, size => MEMORY_SIZE); - signal reload : integer := 0; -begin - wishbone_process: process(clk) - variable ret_dat: std_ulogic_vector(63 downto 0) := (others => '0'); - variable adr: std_ulogic_vector(63 downto 0); - begin - wishbone_out.ack <= ret_ack and wishbone_in.cyc and wishbone_in.stb; - wishbone_out.dat <= ret_dat; - - if rising_edge(clk) then - if rst = '1' then - state <= IDLE; - ret_ack <= '0'; - else - ret_dat := x"FFFFFFFFFFFFFFFF"; - - -- Active - if wishbone_in.cyc = '1' then - case state is - when IDLE => - if wishbone_in.stb = '1' then - -- write - adr := (wishbone_in.adr'left downto 0 => wishbone_in.adr, others => '0'); - if wishbone_in.we = '1' then - assert not(is_x(wishbone_in.dat)) and not(is_x(wishbone_in.adr)) severity failure; - report "RAM writing " & to_hstring(wishbone_in.dat) & " to " & to_hstring(wishbone_in.adr); - behavioural_write(wishbone_in.dat, adr, to_integer(unsigned(wishbone_in.sel)), identifier); - reload <= reload + 1; - ret_ack <= '1'; - state <= ACK; - else - behavioural_read(ret_dat, adr, to_integer(unsigned(wishbone_in.sel)), identifier, reload); - report "RAM reading from " & to_hstring(wishbone_in.adr) & " returns " & to_hstring(ret_dat); - ret_ack <= '1'; - state <= ACK; - end if; - end if; - when ACK => - ret_ack <= '0'; - state <= IDLE; - end case; - else - ret_ack <= '0'; - state <= IDLE; - end if; - end if; - end if; - end process; -end behave; diff --git a/simple_ram_behavioural_tb.vhdl b/simple_ram_behavioural_tb.vhdl deleted file mode 100644 index bee7d2e..0000000 --- a/simple_ram_behavioural_tb.vhdl +++ /dev/null @@ -1,246 +0,0 @@ -library ieee; -use ieee.std_logic_1164.all; -use ieee.numeric_std.all; - -library work; -use work.wishbone_types.all; - -entity simple_ram_behavioural_tb is -end simple_ram_behavioural_tb; - -architecture behave of simple_ram_behavioural_tb is - signal clk : std_ulogic; - signal rst : std_ulogic := '1'; - - constant clk_period : time := 10 ns; - - signal w_in : wishbone_slave_out; - signal w_out : wishbone_master_out; - - impure function to_adr(a: integer) return std_ulogic_vector is - begin - return std_ulogic_vector(to_unsigned(a, w_out.adr'length)); - end; -begin - simple_ram_0: entity work.mw_soc_memory - generic map ( - RAM_INIT_FILE => "simple_ram_behavioural_tb.bin", - MEMORY_SIZE => 16 - ) - port map ( - clk => clk, - rst => rst, - wishbone_out => w_in, - wishbone_in => w_out - ); - - clock: process - begin - clk <= '1'; - wait for clk_period / 2; - clk <= '0'; - wait for clk_period / 2; - end process clock; - - stim: process - begin - w_out.adr <= (others => '0'); - w_out.dat <= (others => '0'); - w_out.cyc <= '0'; - w_out.stb <= '0'; - w_out.sel <= (others => '0'); - w_out.we <= '0'; - - wait for clk_period; - rst <= '0'; - - wait for clk_period; - - w_out.cyc <= '1'; - - -- test various read lengths and alignments - w_out.stb <= '1'; - w_out.sel <= "00000001"; - w_out.adr <= to_adr(0); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(7 downto 0) = x"00" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "00000001"; - w_out.adr <= to_adr(1); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(7 downto 0) = x"01" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "00000001"; - w_out.adr <= to_adr(7); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(7 downto 0) = x"07" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "00000011"; - w_out.adr <= to_adr(0); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(15 downto 0) = x"0100" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "00000011"; - w_out.adr <= to_adr(1); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(15 downto 0) = x"0201" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "00000011"; - w_out.adr <= to_adr(7); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(15 downto 0) = x"0807" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "00001111"; - w_out.adr <= to_adr(0); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(31 downto 0) = x"03020100" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "00001111"; - w_out.adr <= to_adr(1); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(31 downto 0) = x"04030201" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "00001111"; - w_out.adr <= to_adr(7); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(31 downto 0) = x"0A090807" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "11111111"; - w_out.adr <= to_adr(0); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(63 downto 0) = x"0706050403020100" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "11111111"; - w_out.adr <= to_adr(1); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(63 downto 0) = x"0807060504030201" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "11111111"; - w_out.adr <= to_adr(7); - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(63 downto 0) = x"0E0D0C0B0A090807" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - -- test various write lengths and alignments - w_out.stb <= '1'; - w_out.sel <= "00000001"; - w_out.adr <= to_adr(0); - w_out.we <= '1'; - w_out.dat(7 downto 0) <= x"0F"; - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "00000001"; - w_out.adr <= to_adr(0); - w_out.we <= '0'; - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat(7 downto 0) = x"0F" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "11111111"; - w_out.adr <= to_adr(7); - w_out.we <= '1'; - w_out.dat <= x"BADC0FFEBADC0FFE"; - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - w_out.stb <= '1'; - w_out.sel <= "11111111"; - w_out.adr <= to_adr(7); - w_out.we <= '0'; - assert w_in.ack = '0'; - wait for clk_period; - assert w_in.ack = '1'; - assert w_in.dat = x"BADC0FFEBADC0FFE" report to_hstring(w_in.dat); - w_out.stb <= '0'; - wait for clk_period; - assert w_in.ack = '0'; - - assert false report "end of test" severity failure; - wait; - end process; -end behave; diff --git a/soc.vhdl b/soc.vhdl index 458a751..fb8a36d 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -17,7 +17,8 @@ entity soc is MEMORY_SIZE : positive; RAM_INIT_FILE : string; RESET_LOW : boolean; - SIM : boolean + SIM : boolean; + DISABLE_FLATTEN_CORE : boolean := false ); port( rst : in std_ulogic; @@ -42,6 +43,12 @@ architecture behaviour of soc is signal wishbone_debug_in : wishbone_slave_out; signal wishbone_debug_out : wishbone_master_out; + -- Arbiter array (ghdl doesnt' support assigning the array + -- elements in the entity instantiation) + constant NUM_WB_MASTERS : positive := 3; + signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1); + signal wb_masters_in : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1); + -- Wishbone master (output of arbiter): signal wb_master_in : wishbone_slave_out; signal wb_master_out : wishbone_master_out; @@ -76,7 +83,8 @@ begin -- Processor core processor: entity work.core generic map( - SIM => SIM + SIM => SIM, + DISABLE_FLATTEN => DISABLE_FLATTEN_CORE ) port map( clk => system_clk, @@ -94,13 +102,22 @@ begin ); -- Wishbone bus master arbiter & mux + wb_masters_out <= (0 => wishbone_dcore_out, + 1 => wishbone_icore_out, + 2 => wishbone_debug_out); + wishbone_dcore_in <= wb_masters_in(0); + wishbone_icore_in <= wb_masters_in(1); + wishbone_debug_in <= wb_masters_in(2); wishbone_arbiter_0: entity work.wishbone_arbiter + generic map( + NUM_MASTERS => NUM_WB_MASTERS + ) port map( clk => system_clk, rst => rst, - wb1_in => wishbone_dcore_out, wb1_out => wishbone_dcore_in, - wb2_in => wishbone_icore_out, wb2_out => wishbone_icore_in, - wb3_in => wishbone_debug_out, wb3_out => wishbone_debug_in, - wb_out => wb_master_out, wb_in => wb_master_in + wb_masters_in => wb_masters_out, + wb_masters_out => wb_masters_in, + wb_slave_out => wb_master_out, + wb_slave_in => wb_master_in ); -- Wishbone slaves address decoder & mux @@ -136,6 +153,7 @@ begin when others => wb_master_in.dat <= (others => '1'); wb_master_in.ack <= wb_master_out.stb and wb_master_out.cyc; + wb_master_in.stall <= '0'; end case; end process slave_intercon; @@ -164,9 +182,10 @@ begin wb_ack_out => wb_uart0_out.ack ); wb_uart0_out.dat <= x"00000000000000" & uart_dat8; + wb_uart0_out.stall <= '0' when wb_uart0_in.cyc = '0' else not wb_uart0_out.ack; -- BRAM Memory slave - bram0: entity work.mw_soc_memory + bram0: entity work.wishbone_bram_wrapper generic map( MEMORY_SIZE => MEMORY_SIZE, RAM_INIT_FILE => RAM_INIT_FILE diff --git a/utils.vhdl b/utils.vhdl new file mode 100644 index 0000000..7238641 --- /dev/null +++ b/utils.vhdl @@ -0,0 +1,35 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +package utils is + + function log2(i : natural) return integer; + function ispow2(i : integer) return boolean; + +end utils; + +package body utils is + + function log2(i : natural) return integer is + variable tmp : integer := i; + variable ret : integer := 0; + begin + while tmp > 1 loop + ret := ret + 1; + tmp := tmp / 2; + end loop; + return ret; + end function; + + function ispow2(i : integer) return boolean is + begin + if to_integer(to_unsigned(i, 32) and to_unsigned(i - 1, 32)) = 0 then + return true; + else + return false; + end if; + end function; + +end utils; + diff --git a/wishbone_arbiter.vhdl b/wishbone_arbiter.vhdl index d839b31..cb632bf 100644 --- a/wishbone_arbiter.vhdl +++ b/wishbone_arbiter.vhdl @@ -6,73 +6,64 @@ use work.wishbone_types.all; -- TODO: Use an array of master/slaves with parametric size entity wishbone_arbiter is + generic( + NUM_MASTERS : positive := 3 + ); port (clk : in std_ulogic; rst : in std_ulogic; - wb1_in : in wishbone_master_out; - wb1_out : out wishbone_slave_out; + wb_masters_in : in wishbone_master_out_vector(0 to NUM_MASTERS-1); + wb_masters_out : out wishbone_slave_out_vector(0 to NUM_MASTERS-1); - wb2_in : in wishbone_master_out; - wb2_out : out wishbone_slave_out; - - wb3_in : in wishbone_master_out; - wb3_out : out wishbone_slave_out; - - wb_out : out wishbone_master_out; - wb_in : in wishbone_slave_out + wb_slave_out : out wishbone_master_out; + wb_slave_in : in wishbone_slave_out ); end wishbone_arbiter; architecture behave of wishbone_arbiter is - type wishbone_arbiter_state_t is (IDLE, WB1_BUSY, WB2_BUSY, WB3_BUSY); - signal state : wishbone_arbiter_state_t := IDLE; + subtype wb_arb_master_t is integer range 0 to NUM_MASTERS-1; + signal candidate, selected : wb_arb_master_t; + signal busy : std_ulogic; begin - wishbone_muxes: process(state, wb_in, wb1_in, wb2_in, wb3_in) + busy <= wb_masters_in(selected).cyc; + + wishbone_muxes: process(selected, candidate, busy, wb_slave_in, wb_masters_in) + variable early_sel : wb_arb_master_t; begin - -- Requests from masters are fully muxed - wb_out <= wb1_in when state = WB1_BUSY else - wb2_in when state = WB2_BUSY else - wb3_in when state = WB3_BUSY else - wishbone_master_out_init; + early_sel := selected; + if busy = '0' then + early_sel := candidate; + end if; + wb_slave_out <= wb_masters_in(early_sel); + for i in 0 to NUM_MASTERS-1 loop + wb_masters_out(i).dat <= wb_slave_in.dat; + wb_masters_out(i).ack <= wb_slave_in.ack when early_sel = i else '0'; + wb_masters_out(i).stall <= wb_slave_in.stall when early_sel = i else '1'; + end loop; + end process; - -- Responses from slave don't need to mux the data bus - wb1_out.dat <= wb_in.dat; - wb2_out.dat <= wb_in.dat; - wb3_out.dat <= wb_in.dat; - wb1_out.ack <= wb_in.ack when state = WB1_BUSY else '0'; - wb2_out.ack <= wb_in.ack when state = WB2_BUSY else '0'; - wb3_out.ack <= wb_in.ack when state = WB3_BUSY else '0'; + -- Candidate selection is dumb, priority order... we could + -- instead consider some form of fairness but it's not really + -- an issue at the moment. + -- + wishbone_candidate: process(all) + begin + candidate <= selected; + for i in NUM_MASTERS-1 downto 0 loop + if wb_masters_in(i).cyc = '1' then + candidate <= i; + end if; + end loop; end process; wishbone_arbiter_process: process(clk) begin if rising_edge(clk) then if rst = '1' then - state <= IDLE; - else - case state is - when IDLE => - if wb1_in.cyc = '1' then - state <= WB1_BUSY; - elsif wb2_in.cyc = '1' then - state <= WB2_BUSY; - elsif wb3_in.cyc = '1' then - state <= WB3_BUSY; - end if; - when WB1_BUSY => - if wb1_in.cyc = '0' then - state <= IDLE; - end if; - when WB2_BUSY => - if wb2_in.cyc = '0' then - state <= IDLE; - end if; - when WB3_BUSY => - if wb3_in.cyc = '0' then - state <= IDLE; - end if; - end case; + selected <= 0; + elsif busy = '0' then + selected <= candidate; end if; end if; end process; diff --git a/simple_ram_behavioural_tb.bin b/wishbone_bram_tb.bin similarity index 100% rename from simple_ram_behavioural_tb.bin rename to wishbone_bram_tb.bin diff --git a/wishbone_bram_tb.vhdl b/wishbone_bram_tb.vhdl new file mode 100644 index 0000000..be64db6 --- /dev/null +++ b/wishbone_bram_tb.vhdl @@ -0,0 +1,175 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.wishbone_types.all; + +entity wishbone_bram_tb is +end wishbone_bram_tb; + +architecture behave of wishbone_bram_tb is + signal clk : std_ulogic; + signal rst : std_ulogic := '1'; + + constant clk_period : time := 10 ns; + + signal w_in : wishbone_slave_out; + signal w_out : wishbone_master_out; + + impure function to_adr(a: integer) return std_ulogic_vector is + begin + return std_ulogic_vector(to_unsigned(a, w_out.adr'length)); + end; +begin + simple_ram_0: entity work.wishbone_bram_wrapper + generic map ( + RAM_INIT_FILE => "wishbone_bram_tb.bin", + MEMORY_SIZE => 16 + ) + port map ( + clk => clk, + rst => rst, + wishbone_out => w_in, + wishbone_in => w_out + ); + + clock: process + begin + clk <= '1'; + wait for clk_period / 2; + clk <= '0'; + wait for clk_period / 2; + end process clock; + + stim: process + begin + w_out.adr <= (others => '0'); + w_out.dat <= (others => '0'); + w_out.cyc <= '0'; + w_out.stb <= '0'; + w_out.sel <= (others => '0'); + w_out.we <= '0'; + + wait until rising_edge(clk); + rst <= '0'; + wait until rising_edge(clk); + + w_out.cyc <= '1'; + + -- Test read 0 + w_out.stb <= '1'; + w_out.sel <= "11111111"; + w_out.adr <= to_adr(0); + assert w_in.ack = '0'; + wait until rising_edge(clk); + w_out.stb <= '0'; + wait until rising_edge(clk); + wait until rising_edge(clk); + assert w_in.ack = '1'; + assert w_in.dat(63 downto 0) = x"0706050403020100" report to_hstring(w_in.dat); + wait until rising_edge(clk); + assert w_in.ack = '0'; + + -- Test read 8 + w_out.stb <= '1'; + w_out.sel <= "11111111"; + w_out.adr <= to_adr(8); + assert w_in.ack = '0'; + wait until rising_edge(clk); + w_out.stb <= '0'; + wait until rising_edge(clk); + wait until rising_edge(clk); + assert w_in.ack = '1'; + assert w_in.dat(63 downto 0) = x"0F0E0D0C0B0A0908" report to_hstring(w_in.dat); + wait until rising_edge(clk); + assert w_in.ack = '0'; + + -- Test write byte at 0 + w_out.stb <= '1'; + w_out.sel <= "00000001"; + w_out.adr <= to_adr(0); + w_out.we <= '1'; + w_out.dat(7 downto 0) <= x"0F"; + assert w_in.ack = '0'; + wait until rising_edge(clk); + w_out.stb <= '0'; + wait until rising_edge(clk) and w_in.ack = '1'; + wait until rising_edge(clk); + assert w_in.ack = '0'; + + -- Test read back + w_out.stb <= '1'; + w_out.sel <= "11111111"; + w_out.adr <= to_adr(0); + w_out.we <= '0'; + assert w_in.ack = '0'; + wait until rising_edge(clk); + w_out.stb <= '0'; + wait until rising_edge(clk); + wait until rising_edge(clk); + assert w_in.ack = '1'; + assert w_in.dat(63 downto 0) = x"070605040302010F" report to_hstring(w_in.dat); + wait until rising_edge(clk); + assert w_in.ack = '0'; + + -- Test write dword at 4 + w_out.stb <= '1'; + w_out.sel <= "11110000"; + w_out.adr <= to_adr(0); + w_out.we <= '1'; + w_out.dat(63 downto 32) <= x"BAADFEED"; + assert w_in.ack = '0'; + wait until rising_edge(clk); + w_out.stb <= '0'; + wait until rising_edge(clk) and w_in.ack = '1'; + wait until rising_edge(clk); + assert w_in.ack = '0'; + + -- Test read back + w_out.stb <= '1'; + w_out.sel <= "11111111"; + w_out.adr <= to_adr(0); + w_out.we <= '0'; + assert w_in.ack = '0'; + wait until rising_edge(clk); + w_out.stb <= '0'; + wait until rising_edge(clk); + wait until rising_edge(clk); + assert w_in.ack = '1'; + assert w_in.dat(63 downto 0) = x"BAADFEED0302010F" report to_hstring(w_in.dat); + wait until rising_edge(clk); + assert w_in.ack = '0'; + + -- Test write qword at 8 + w_out.stb <= '1'; + w_out.sel <= "11111111"; + w_out.adr <= to_adr(8); + w_out.we <= '1'; + w_out.dat(63 downto 0) <= x"0001020304050607"; + assert w_in.ack = '0'; + wait until rising_edge(clk); + w_out.stb <= '0'; + wait until rising_edge(clk) and w_in.ack = '1'; + wait until rising_edge(clk); + assert w_in.ack = '0'; + + -- Test read back + w_out.stb <= '1'; + w_out.sel <= "11111111"; + w_out.adr <= to_adr(8); + w_out.we <= '0'; + assert w_in.ack = '0'; + wait until rising_edge(clk); + w_out.stb <= '0'; + wait until rising_edge(clk); + wait until rising_edge(clk); + assert w_in.ack = '1'; + assert w_in.dat(63 downto 0) = x"0001020304050607" report to_hstring(w_in.dat); + wait until rising_edge(clk); + assert w_in.ack = '0'; + + assert false report "end of test" severity failure; + wait; + end process; +end behave; diff --git a/wishbone_bram_wrapper.vhdl b/wishbone_bram_wrapper.vhdl new file mode 100644 index 0000000..14520b5 --- /dev/null +++ b/wishbone_bram_wrapper.vhdl @@ -0,0 +1,84 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; +use std.textio.all; + +library work; +use work.utils.all; +use work.wishbone_types.all; + +--! @brief Simple memory module for use in Wishbone-based systems. +entity wishbone_bram_wrapper is + generic( + MEMORY_SIZE : natural := 4096; --! Memory size in bytes. + RAM_INIT_FILE : string + ); + port( + clk : in std_logic; + rst : in std_logic; + + -- Wishbone interface: + wishbone_in : in wishbone_master_out; + wishbone_out : out wishbone_slave_out + ); +end entity wishbone_bram_wrapper; + +architecture behaviour of wishbone_bram_wrapper is + constant ram_addr_bits : integer := log2(MEMORY_SIZE) - 3; + + -- RAM interface + signal ram_addr : std_logic_vector(ram_addr_bits - 1 downto 0); + signal ram_we : std_ulogic; + signal ram_re : std_ulogic; + + -- Others + signal ack, ack_buf : std_ulogic; +begin + + -- Actual RAM template + ram_0: entity work.main_bram + generic map( + WIDTH => 64, + HEIGHT_BITS => ram_addr_bits, + MEMORY_SIZE => MEMORY_SIZE, + RAM_INIT_FILE => RAM_INIT_FILE + ) + port map( + clk => clk, + addr => ram_addr, + di => wishbone_in.dat, + do => wishbone_out.dat, + sel => wishbone_in.sel, + re => ram_re, + we => ram_we + ); + + -- Wishbone interface + ram_addr <= wishbone_in.adr(ram_addr_bits + 2 downto 3); + ram_we <= wishbone_in.stb and wishbone_in.cyc and wishbone_in.we; + ram_re <= wishbone_in.stb and wishbone_in.cyc and not wishbone_in.we; + wishbone_out.stall <= '0'; + wishbone_out.ack <= ack_buf; + + wb_0: process(clk) + begin + if rising_edge(clk) then + if rst = '1' or wishbone_in.cyc = '0' then + ack_buf <= '0'; + ack <= '0'; + else + -- On loads, we have a delay cycle due to BRAM bufferring + -- but not on stores. So try to send an early ack on a + -- store if we aren't behind an existing load ack. + -- + if ram_we = '1' and ack = '0' then + ack_buf <= '1'; + else + ack <= wishbone_in.stb; + ack_buf <= ack; + end if; + end if; + end if; + end process; + +end architecture behaviour; diff --git a/wishbone_debug_master.vhdl b/wishbone_debug_master.vhdl index 3ba6b21..11b9ee3 100644 --- a/wishbone_debug_master.vhdl +++ b/wishbone_debug_master.vhdl @@ -124,7 +124,6 @@ begin -- We always move WB cyc and stb simultaneously (no pipelining yet...) wb_out.cyc <= '1' when state = WB_CYCLE else '0'; - wb_out.stb <= '1' when state = WB_CYCLE else '0'; -- Data latch. WB will take the read data away as soon as the cycle -- terminates but we must maintain it on DMI until req goes down, so @@ -145,14 +144,23 @@ begin if rising_edge(clk) then if (rst) then state <= IDLE; + wb_out.stb <= '0'; else case state is when IDLE => if dmi_req = '1' and dmi_addr = DBG_WB_DATA then state <= WB_CYCLE; + wb_out.stb <= '1'; end if; when WB_CYCLE => + if wb_in.stall = '0' then + wb_out.stb <= '0'; + end if; if wb_in.ack then + -- We shouldn't get the ack if we hadn't already cleared + -- stb above but if this happen, don't leave it dangling. + -- + wb_out.stb <= '0'; state <= DMI_WAIT; end if; when DMI_WAIT => diff --git a/wishbone_types.vhdl b/wishbone_types.vhdl index 12f0bc7..d1f2a45 100644 --- a/wishbone_types.vhdl +++ b/wishbone_types.vhdl @@ -21,9 +21,13 @@ package wishbone_types is constant wishbone_master_out_init : wishbone_master_out := (cyc => '0', stb => '0', we => '0', others => (others => '0')); type wishbone_slave_out is record - dat : wishbone_data_type; - ack : std_ulogic; + dat : wishbone_data_type; + ack : std_ulogic; + stall : std_ulogic; end record; - constant wishbone_slave_out_init : wishbone_slave_out := (ack => '0', others => (others => '0')); + constant wishbone_slave_out_init : wishbone_slave_out := (ack => '0', stall => '0', others => (others => '0')); + + type wishbone_master_out_vector is array (natural range <>) of wishbone_master_out; + type wishbone_slave_out_vector is array (natural range <>) of wishbone_slave_out; end package wishbone_types; diff --git a/writeback.vhdl b/writeback.vhdl index 0d9397c..e2b74f8 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -44,6 +44,7 @@ architecture behaviour of writeback is signal sign_extend : std_ulogic; signal negative : std_ulogic; signal second_word : std_ulogic; + signal zero : std_ulogic; begin writeback_0: process(clk) begin @@ -155,7 +156,9 @@ begin -- If the data can arrive split over two cycles, this will be correct -- provided we don't have both sign extension and byte reversal. - negative <= (data_len(2) and data_permuted(31)) or (data_len(1) and data_permuted(15)) or + negative <= (data_len(3) and data_permuted(63)) or + (data_len(2) and data_permuted(31)) or + (data_len(1) and data_permuted(15)) or (data_len(0) and data_permuted(7)); -- trim and sign-extend @@ -170,12 +173,16 @@ begin trim_ctl(i) <= '0' & (negative and sign_extend); end if; end loop; + zero <= not negative; for i in 0 to 7 loop case trim_ctl(i) is when "11" => data_trimmed(i * 8 + 7 downto i * 8) <= data_latched(i * 8 + 7 downto i * 8); when "10" => data_trimmed(i * 8 + 7 downto i * 8) <= data_permuted(i * 8 + 7 downto i * 8); + if or data_permuted(i * 8 + 7 downto i * 8) /= '0' then + zero <= '0'; + end if; when "01" => data_trimmed(i * 8 + 7 downto i * 8) <= x"FF"; when others => @@ -190,9 +197,9 @@ begin if rc = '1' then c_out.write_cr_enable <= '1'; c_out.write_cr_mask <= num_to_fxm(0); - if data_trimmed(63) = '1' then + if negative = '1' then c_out.write_cr_data <= x"80000000"; - elsif or (data_trimmed(62 downto 0)) = '1' then + elsif zero = '0' then c_out.write_cr_data <= x"40000000"; else c_out.write_cr_data <= x"20000000";