diff --git a/core_tb.vhdl b/core_tb.vhdl index 9c08919..a35d73d 100644 --- a/core_tb.vhdl +++ b/core_tb.vhdl @@ -18,6 +18,8 @@ architecture behave of core_tb is -- Dummy DRAM signal wb_dram_in : wishbone_master_out; signal wb_dram_out : wishbone_slave_out; + signal wb_dram_ctrl_in : wb_io_master_out; + signal wb_dram_ctrl_out : wb_io_slave_out; begin soc0: entity work.soc @@ -35,6 +37,8 @@ begin uart0_txd => open, wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, + wb_dram_ctrl_in => wb_dram_ctrl_in, + wb_dram_ctrl_out => wb_dram_ctrl_out, alt_reset => '0' ); @@ -59,6 +63,9 @@ begin -- Dummy DRAM wb_dram_out.ack <= wb_dram_in.cyc and wb_dram_in.stb; wb_dram_out.dat <= x"FFFFFFFFFFFFFFFF"; - wb_dram_out.stall <= wb_dram_in.cyc and not wb_dram_out.ack; + wb_dram_out.stall <= '0'; + wb_dram_ctrl_out.ack <= wb_dram_ctrl_in.cyc and wb_dram_ctrl_in.stb; + wb_dram_ctrl_out.dat <= x"FFFFFFFF"; + wb_dram_ctrl_out.stall <= '0'; end; diff --git a/fpga/top-arty.vhdl b/fpga/top-arty.vhdl index f2da6c2..e3782ed 100644 --- a/fpga/top-arty.vhdl +++ b/fpga/top-arty.vhdl @@ -65,11 +65,15 @@ architecture behaviour of toplevel is signal system_clk : std_ulogic; signal system_clk_locked : std_ulogic; - -- DRAM wishbone connection - signal wb_dram_in : wishbone_master_out; - signal wb_dram_out : wishbone_slave_out; - signal wb_dram_ctrl : std_ulogic; - signal wb_dram_init : std_ulogic; + -- DRAM main data wishbone connection + signal wb_dram_in : wishbone_master_out; + signal wb_dram_out : wishbone_slave_out; + + -- DRAM control wishbone connection + signal wb_dram_ctrl_in : wb_io_master_out; + signal wb_dram_ctrl_out : wb_io_slave_out; + signal wb_dram_is_csr : std_ulogic; + signal wb_dram_is_init : std_ulogic; -- Control/status signal core_alt_reset : std_ulogic; @@ -104,8 +108,10 @@ begin uart0_rxd => uart_main_rx, wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, - wb_dram_ctrl => wb_dram_ctrl, - wb_dram_init => wb_dram_init, + wb_dram_ctrl_in => wb_dram_ctrl_in, + wb_dram_ctrl_out => wb_dram_ctrl_out, + wb_dram_is_csr => wb_dram_is_csr, + wb_dram_is_init => wb_dram_is_init, alt_reset => core_alt_reset ); @@ -195,8 +201,10 @@ begin wb_in => wb_dram_in, wb_out => wb_dram_out, - wb_is_ctrl => wb_dram_ctrl, - wb_is_init => wb_dram_init, + wb_ctrl_in => wb_dram_ctrl_in, + wb_ctrl_out => wb_dram_ctrl_out, + wb_ctrl_is_csr => wb_dram_is_csr, + wb_ctrl_is_init => wb_dram_is_init, serial_tx => uart_pmod_tx, serial_rx => uart_pmod_rx, diff --git a/fpga/top-generic.vhdl b/fpga/top-generic.vhdl index daefeee..a4c4e73 100644 --- a/fpga/top-generic.vhdl +++ b/fpga/top-generic.vhdl @@ -33,9 +33,15 @@ architecture behaviour of toplevel is signal system_clk : std_ulogic; signal system_clk_locked : std_ulogic; - -- Dummy DRAM - signal wb_dram_in : wishbone_master_out; - signal wb_dram_out : wishbone_slave_out; + -- DRAM main data wishbone connection + signal wb_dram_in : wishbone_master_out; + signal wb_dram_out : wishbone_slave_out; + + -- DRAM control wishbone connection + signal wb_dram_ctrl_in : wb_io_master_out; + signal wb_dram_ctrl_out : wb_io_slave_out; + signal wb_dram_is_csr : std_ulogic; + signal wb_dram_is_init : std_ulogic; begin @@ -79,7 +85,12 @@ begin rst => soc_rst, uart0_txd => uart0_txd, uart0_rxd => uart0_rxd, + wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, + wb_dram_ctrl_in => wb_dram_ctrl_in, + wb_dram_ctrl_out => wb_dram_ctrl_out, + wb_dram_is_csr => wb_dram_is_csr, + wb_dram_is_init => wb_dram_is_init, alt_reset => '0' ); @@ -87,5 +98,8 @@ begin wb_dram_out.ack <= wb_dram_in.cyc and wb_dram_in.stb; wb_dram_out.dat <= x"FFFFFFFFFFFFFFFF"; wb_dram_out.stall <= wb_dram_in.cyc and not wb_dram_out.ack; + wb_dram_ctrl_out.ack <= wb_dram_in.cyc and wb_dram_in.stb; + wb_dram_ctrl_out.dat <= x"FFFFFFFF"; + wb_dram_ctrl_out.stall <= wb_dram_in.cyc and not wb_dram_out.ack; end architecture behaviour; diff --git a/fpga/top-nexys-video.vhdl b/fpga/top-nexys-video.vhdl index 0e4b097..9acbee1 100644 --- a/fpga/top-nexys-video.vhdl +++ b/fpga/top-nexys-video.vhdl @@ -57,11 +57,15 @@ architecture behaviour of toplevel is signal system_clk : std_ulogic; signal system_clk_locked : std_ulogic; - -- DRAM wishbone connection - signal wb_dram_in : wishbone_master_out; - signal wb_dram_out : wishbone_slave_out; - signal wb_dram_ctrl : std_ulogic; - signal wb_dram_init : std_ulogic; + -- DRAM main data wishbone connection + signal wb_dram_in : wishbone_master_out; + signal wb_dram_out : wishbone_slave_out; + + -- DRAM control wishbone connection + signal wb_dram_ctrl_in : wb_io_master_out; + signal wb_dram_ctrl_out : wb_io_slave_out; + signal wb_dram_is_csr : std_ulogic; + signal wb_dram_is_init : std_ulogic; -- Control/status signal core_alt_reset : std_ulogic; @@ -87,8 +91,10 @@ begin uart0_rxd => uart_main_rx, wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, - wb_dram_ctrl => wb_dram_ctrl, - wb_dram_init => wb_dram_init, + wb_dram_ctrl_in => wb_dram_ctrl_in, + wb_dram_ctrl_out => wb_dram_ctrl_out, + wb_dram_is_csr => wb_dram_is_csr, + wb_dram_is_init => wb_dram_is_init, alt_reset => core_alt_reset ); @@ -176,8 +182,10 @@ begin wb_in => wb_dram_in, wb_out => wb_dram_out, - wb_is_ctrl => wb_dram_ctrl, - wb_is_init => wb_dram_init, + wb_ctrl_in => wb_dram_ctrl_in, + wb_ctrl_out => wb_dram_ctrl_out, + wb_ctrl_is_csr => wb_dram_is_csr, + wb_ctrl_is_init => wb_dram_is_init, serial_tx => open, serial_rx => '0', diff --git a/include/microwatt_soc.h b/include/microwatt_soc.h index b0dab83..443a8ae 100644 --- a/include/microwatt_soc.h +++ b/include/microwatt_soc.h @@ -7,12 +7,13 @@ #define MEMORY_BASE 0x00000000 /* "Main" memory alias, either BRAM or DRAM */ #define DRAM_BASE 0x40000000 /* DRAM if present */ +#define BRAM_BASE 0x80000000 /* Internal BRAM */ + #define SYSCON_BASE 0xc0000000 /* System control regs */ #define UART_BASE 0xc0002000 /* UART */ #define XICS_BASE 0xc0004000 /* Interrupt controller */ #define DRAM_CTRL_BASE 0xc0100000 /* LiteDRAM control registers */ -#define BRAM_BASE 0xf0000000 /* Internal BRAM */ -#define DRAM_INIT_BASE 0xffff0000 /* Internal DRAM init firmware */ +#define DRAM_INIT_BASE 0xf0000000 /* Internal DRAM init firmware */ /* * Register definitions for the syscon registers diff --git a/litedram/gen-src/sdram_init/head.S b/litedram/gen-src/sdram_init/head.S index 2c750f9..235bf14 100644 --- a/litedram/gen-src/sdram_init/head.S +++ b/litedram/gen-src/sdram_init/head.S @@ -14,7 +14,7 @@ * limitations under the License. */ -#define STACK_TOP 0xffff4000 +#define STACK_TOP 0xf0004000 #define FIXUP_ENDIAN \ tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \ diff --git a/litedram/gen-src/sdram_init/sdram_init.lds.S b/litedram/gen-src/sdram_init/sdram_init.lds.S index f1bc291..e6cf0bc 100644 --- a/litedram/gen-src/sdram_init/sdram_init.lds.S +++ b/litedram/gen-src/sdram_init/sdram_init.lds.S @@ -8,6 +8,7 @@ SECTIONS KEEP(*(.head)) } . = DRAM_INIT_BASE | 0x1000; + .text : { *(.text*) *(.sfpr) *(.rodata*) } .data : { *(.data*) } .bss : { *(.bss*) } diff --git a/litedram/gen-src/wrapper-mw-init.vhdl b/litedram/gen-src/wrapper-mw-init.vhdl index c4d702e..1595793 100644 --- a/litedram/gen-src/wrapper-mw-init.vhdl +++ b/litedram/gen-src/wrapper-mw-init.vhdl @@ -15,18 +15,20 @@ entity litedram_wrapper is port( -- LiteDRAM generates the system clock and reset -- from the input clkin - clk_in : in std_ulogic; - rst : in std_ulogic; - system_clk : out std_ulogic; - system_reset : out std_ulogic; - core_alt_reset : out std_ulogic; - pll_locked : out std_ulogic; + clk_in : in std_ulogic; + rst : in std_ulogic; + system_clk : out std_ulogic; + system_reset : out std_ulogic; + core_alt_reset : out std_ulogic; + pll_locked : out std_ulogic; -- Wishbone ports: - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; - wb_is_ctrl : in std_ulogic; - wb_is_init : in std_ulogic; + wb_in : in wishbone_master_out; + wb_out : out wishbone_slave_out; + wb_ctrl_in : in wb_io_master_out; + wb_ctrl_out : out wb_io_slave_out; + wb_ctrl_is_csr : in std_ulogic; + wb_ctrl_is_init : in std_ulogic; -- Init core serial debug serial_tx : out std_ulogic; @@ -128,8 +130,8 @@ architecture behaviour of litedram_wrapper is signal wb_ctrl_ack : std_ulogic; signal wb_ctrl_we : std_ulogic; - signal wb_init_in : wishbone_master_out; - signal wb_init_out : wishbone_slave_out; + signal wb_init_in : wb_io_master_out; + signal wb_init_out : wb_io_slave_out; type state_t is (CMD, MWRITE, MREAD); signal state : state_t; @@ -138,7 +140,7 @@ architecture behaviour of litedram_wrapper is constant INIT_RAM_ABITS :integer := 14; constant INIT_RAM_FILE : string := "litedram_core.init"; - type ram_t is array(0 to (INIT_RAM_SIZE / 8) - 1) of std_logic_vector(63 downto 0); + type ram_t is array(0 to (INIT_RAM_SIZE / 4) - 1) of std_logic_vector(31 downto 0); impure function init_load_ram(name : string) return ram_t is file ram_file : text open read_mode is name; @@ -150,7 +152,8 @@ architecture behaviour of litedram_wrapper is exit when endfile(ram_file); readline(ram_file, ram_line); hread(ram_line, temp_word); - temp_ram(i) := temp_word; + temp_ram(i*2) := temp_word(31 downto 0); + temp_ram(i*2+1) := temp_word(63 downto 32); end loop; return temp_ram; end function; @@ -162,79 +165,93 @@ architecture behaviour of litedram_wrapper is begin - -- BRAM Memory slave + -- alternate core reset address set when DRAM is not initialized. + core_alt_reset <= not init_done; + + -- BRAM Memory slave. TODO: Pipeline it with an output buffer + -- to improve timing init_ram_0: process(system_clk) variable adr : integer; begin if rising_edge(system_clk) then wb_init_out.ack <= '0'; if (wb_init_in.cyc and wb_init_in.stb) = '1' then - adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 3)))); + adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 2)))); if wb_init_in.we = '0' then wb_init_out.dat <= init_ram(adr); else - for i in 0 to 7 loop + for i in 0 to 3 loop if wb_init_in.sel(i) = '1' then init_ram(adr)(((i + 1) * 8) - 1 downto i * 8) <= wb_init_in.dat(((i + 1) * 8) - 1 downto i * 8); end if; end loop; end if; - wb_init_out.ack <= not wb_init_out.ack; + wb_init_out.ack <= '1'; end if; end if; end process; - wb_init_in.adr <= wb_in.adr; - wb_init_in.dat <= wb_in.dat; - wb_init_in.sel <= wb_in.sel; - wb_init_in.we <= wb_in.we; - wb_init_in.stb <= wb_in.stb; - wb_init_in.cyc <= wb_in.cyc and wb_is_init; + -- + -- Control bus wishbone: This muxes the wishbone to the CSRs + -- and an internal small one to the init BRAM + -- + + -- Init DRAM wishbone IN signals + wb_init_in.adr <= wb_ctrl_in.adr; + wb_init_in.dat <= wb_ctrl_in.dat; + wb_init_in.sel <= wb_ctrl_in.sel; + wb_init_in.we <= wb_ctrl_in.we; + wb_init_in.stb <= wb_ctrl_in.stb; + wb_init_in.cyc <= wb_ctrl_in.cyc and wb_ctrl_is_init; + + -- DRAM CSR IN signals + wb_ctrl_adr <= x"0000" & wb_ctrl_in.adr(15 downto 2); + wb_ctrl_dat_w <= wb_ctrl_in.dat; + wb_ctrl_sel <= wb_ctrl_in.sel; + wb_ctrl_we <= wb_ctrl_in.we; + wb_ctrl_cyc <= wb_ctrl_in.cyc and wb_ctrl_is_csr; + wb_ctrl_stb <= wb_ctrl_in.stb and wb_ctrl_is_csr; - -- Address bit 3 selects the top or bottom half of the data + -- Ctrl bus wishbone OUT signals + wb_ctrl_out.ack <= wb_ctrl_ack when wb_ctrl_is_csr = '1' + else wb_init_out.ack; + wb_ctrl_out.dat <= wb_ctrl_dat_r when wb_ctrl_is_csr = '1' + else wb_init_out.dat; + wb_ctrl_out.stall <= wb_init_out.stall when wb_ctrl_is_init else + '0' when wb_ctrl_in.cyc = '0' else not wb_ctrl_ack; + + -- + -- Data bus wishbone to LiteDRAM native port + -- + -- Address bit 3 selects the top or bottom half of the data -- bus (64-bit wishbone vs. 128-bit DRAM interface) -- + -- XXX TODO: Figure out how to pipeline this + -- ad3 <= wb_in.adr(3); - -- DRAM data interface signals - user_port0_cmd_valid <= (wb_in.cyc and wb_in.stb and not wb_is_ctrl and not wb_is_init) - when state = CMD else '0'; - user_port0_cmd_we <= wb_in.we when state = CMD else '0'; + -- Wishbone port IN signals + user_port0_cmd_valid <= wb_in.cyc and wb_in.stb when state = CMD else '0'; + user_port0_cmd_we <= wb_in.we when state = CMD else '0'; user_port0_wdata_valid <= '1' when state = MWRITE else '0'; user_port0_rdata_ready <= '1' when state = MREAD else '0'; - user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); - user_port0_wdata_data <= wb_in.dat & wb_in.dat; - user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else - "00000000" & wb_in.sel; - - -- DRAM ctrl interface signals - wb_ctrl_adr <= x"0000" & wb_in.adr(15 downto 2); - wb_ctrl_dat_w <= wb_in.dat(31 downto 0); - wb_ctrl_sel <= wb_in.sel(3 downto 0); - wb_ctrl_cyc <= wb_in.cyc and wb_is_ctrl; - wb_ctrl_stb <= wb_in.stb and wb_is_ctrl; - wb_ctrl_we <= wb_in.we; - - -- Wishbone out signals - wb_out.ack <= wb_ctrl_ack when wb_is_ctrl ='1' else - wb_init_out.ack when wb_is_init = '1' else - user_port0_wdata_ready when state = MWRITE else + user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); + user_port0_wdata_data <= wb_in.dat & wb_in.dat; + user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else + "00000000" & wb_in.sel; + + -- Wishbone OUT signals + wb_out.ack <= user_port0_wdata_ready when state = MWRITE else user_port0_rdata_valid when state = MREAD else '0'; - wb_out.dat <= (x"00000000" & wb_ctrl_dat_r) when wb_is_ctrl = '1' else - wb_init_out.dat when wb_is_init = '1' else - user_port0_rdata_data(127 downto 64) when ad3 = '1' else + wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else user_port0_rdata_data(63 downto 0); + -- We don't do pipelining yet. wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- Reset ignored, the reset controller use the pll lock signal, - -- and alternate core reset address set when DRAM is not initialized. - -- - core_alt_reset <= not init_done; - - -- State machine + -- DRAM user port State machine sm: process(system_clk) begin @@ -255,7 +272,7 @@ begin if user_port0_rdata_valid = '1' then state <= CMD; end if; - end case; + end case; end if; end if; end process; diff --git a/litedram/gen-src/wrapper-self-init.vhdl b/litedram/gen-src/wrapper-self-init.vhdl index 34e69e3..01acfd9 100644 --- a/litedram/gen-src/wrapper-self-init.vhdl +++ b/litedram/gen-src/wrapper-self-init.vhdl @@ -15,18 +15,20 @@ entity litedram_wrapper is port( -- LiteDRAM generates the system clock and reset -- from the input clkin - clk_in : in std_ulogic; - rst : in std_ulogic; - system_clk : out std_ulogic; - system_reset : out std_ulogic; - core_alt_reset : out std_ulogic; - pll_locked : out std_ulogic; + clk_in : in std_ulogic; + rst : in std_ulogic; + system_clk : out std_ulogic; + system_reset : out std_ulogic; + core_alt_reset : out std_ulogic; + pll_locked : out std_ulogic; -- Wishbone ports: - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; - wb_is_ctrl : in std_ulogic; - wb_is_init : in std_ulogic; + wb_in : in wishbone_master_out; + wb_out : out wishbone_slave_out; + wb_ctrl_in : in wb_io_master_out; + wb_ctrl_out : out wb_io_slave_out; + wb_ctrl_is_csr : in std_ulogic; + wb_ctrl_is_init : in std_ulogic; -- Init core serial debug serial_tx : out std_ulogic; @@ -52,7 +54,6 @@ entity litedram_wrapper is ddram_cke : out std_ulogic; ddram_odt : out std_ulogic; ddram_reset_n : out std_ulogic - ); end entity litedram_wrapper; architecture behaviour of litedram_wrapper is @@ -117,36 +118,47 @@ architecture behaviour of litedram_wrapper is begin + -- Reset, lift it when init done, no alt core reset + system_reset <= dram_user_reset or not init_done; + core_alt_reset <= '0'; + + -- Control bus is unused + wb_ctrl_out.ack <= (wb_is_ctrl = '1' or wb_is_init = '1') and wb_ctrl_in.cyc; + else wb_init_out.ack; + wb_ctrl_out.dat <= (others => '0'); + wb_ctrl_out.stall <= '0'; + + -- + -- Data bus wishbone to LiteDRAM native port + -- -- Address bit 3 selects the top or bottom half of the data -- bus (64-bit wishbone vs. 128-bit DRAM interface) -- + -- XXX TODO: Figure out how to pipeline this + -- ad3 <= wb_in.adr(3); - -- DRAM interface signals - user_port0_cmd_valid <= (wb_in.cyc and wb_in.stb and not wb_is_ctrl and not wb_is_init) - when state = CMD else '0'; - user_port0_cmd_we <= wb_in.we when state = CMD else '0'; + -- Wishbone port IN signals + user_port0_cmd_valid <= wb_in.cyc and wb_in.stb when state = CMD else '0'; + user_port0_cmd_we <= wb_in.we when state = CMD else '0'; user_port0_wdata_valid <= '1' when state = MWRITE else '0'; user_port0_rdata_ready <= '1' when state = MREAD else '0'; - user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); - user_port0_wdata_data <= wb_in.dat & wb_in.dat; - user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else - "00000000" & wb_in.sel; - - -- Wishbone out signals. CSR and init memory do nothing, just ack - wb_out.ack <= '1' when (wb_is_ctrl = '1' or wb_is_init = '1') else - user_port0_wdata_ready when state = MWRITE else + user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); + user_port0_wdata_data <= wb_in.dat & wb_in.dat; + user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else + "00000000" & wb_in.sel; + + -- Wishbone OUT signals + wb_out.ack <= user_port0_wdata_ready when state = MWRITE else user_port0_rdata_valid when state = MREAD else '0'; - wb_out.dat <= (others => '0') when (wb_is_ctrl = '1' or wb_is_init = '1') else - user_port0_rdata_data(127 downto 64) when ad3 = '1' else + + wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else user_port0_rdata_data(63 downto 0); - wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- Reset, lift it when init done, no alt core reset - system_reset <= dram_user_reset or not init_done; - core_alt_reset <= '0'; + -- We don't do pipelining yet. + wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- State machine + -- DRAM user port State machine sm: process(system_clk) begin diff --git a/litedram/generated/arty/litedram-wrapper.vhdl b/litedram/generated/arty/litedram-wrapper.vhdl index c4d702e..1595793 100644 --- a/litedram/generated/arty/litedram-wrapper.vhdl +++ b/litedram/generated/arty/litedram-wrapper.vhdl @@ -15,18 +15,20 @@ entity litedram_wrapper is port( -- LiteDRAM generates the system clock and reset -- from the input clkin - clk_in : in std_ulogic; - rst : in std_ulogic; - system_clk : out std_ulogic; - system_reset : out std_ulogic; - core_alt_reset : out std_ulogic; - pll_locked : out std_ulogic; + clk_in : in std_ulogic; + rst : in std_ulogic; + system_clk : out std_ulogic; + system_reset : out std_ulogic; + core_alt_reset : out std_ulogic; + pll_locked : out std_ulogic; -- Wishbone ports: - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; - wb_is_ctrl : in std_ulogic; - wb_is_init : in std_ulogic; + wb_in : in wishbone_master_out; + wb_out : out wishbone_slave_out; + wb_ctrl_in : in wb_io_master_out; + wb_ctrl_out : out wb_io_slave_out; + wb_ctrl_is_csr : in std_ulogic; + wb_ctrl_is_init : in std_ulogic; -- Init core serial debug serial_tx : out std_ulogic; @@ -128,8 +130,8 @@ architecture behaviour of litedram_wrapper is signal wb_ctrl_ack : std_ulogic; signal wb_ctrl_we : std_ulogic; - signal wb_init_in : wishbone_master_out; - signal wb_init_out : wishbone_slave_out; + signal wb_init_in : wb_io_master_out; + signal wb_init_out : wb_io_slave_out; type state_t is (CMD, MWRITE, MREAD); signal state : state_t; @@ -138,7 +140,7 @@ architecture behaviour of litedram_wrapper is constant INIT_RAM_ABITS :integer := 14; constant INIT_RAM_FILE : string := "litedram_core.init"; - type ram_t is array(0 to (INIT_RAM_SIZE / 8) - 1) of std_logic_vector(63 downto 0); + type ram_t is array(0 to (INIT_RAM_SIZE / 4) - 1) of std_logic_vector(31 downto 0); impure function init_load_ram(name : string) return ram_t is file ram_file : text open read_mode is name; @@ -150,7 +152,8 @@ architecture behaviour of litedram_wrapper is exit when endfile(ram_file); readline(ram_file, ram_line); hread(ram_line, temp_word); - temp_ram(i) := temp_word; + temp_ram(i*2) := temp_word(31 downto 0); + temp_ram(i*2+1) := temp_word(63 downto 32); end loop; return temp_ram; end function; @@ -162,79 +165,93 @@ architecture behaviour of litedram_wrapper is begin - -- BRAM Memory slave + -- alternate core reset address set when DRAM is not initialized. + core_alt_reset <= not init_done; + + -- BRAM Memory slave. TODO: Pipeline it with an output buffer + -- to improve timing init_ram_0: process(system_clk) variable adr : integer; begin if rising_edge(system_clk) then wb_init_out.ack <= '0'; if (wb_init_in.cyc and wb_init_in.stb) = '1' then - adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 3)))); + adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 2)))); if wb_init_in.we = '0' then wb_init_out.dat <= init_ram(adr); else - for i in 0 to 7 loop + for i in 0 to 3 loop if wb_init_in.sel(i) = '1' then init_ram(adr)(((i + 1) * 8) - 1 downto i * 8) <= wb_init_in.dat(((i + 1) * 8) - 1 downto i * 8); end if; end loop; end if; - wb_init_out.ack <= not wb_init_out.ack; + wb_init_out.ack <= '1'; end if; end if; end process; - wb_init_in.adr <= wb_in.adr; - wb_init_in.dat <= wb_in.dat; - wb_init_in.sel <= wb_in.sel; - wb_init_in.we <= wb_in.we; - wb_init_in.stb <= wb_in.stb; - wb_init_in.cyc <= wb_in.cyc and wb_is_init; + -- + -- Control bus wishbone: This muxes the wishbone to the CSRs + -- and an internal small one to the init BRAM + -- + + -- Init DRAM wishbone IN signals + wb_init_in.adr <= wb_ctrl_in.adr; + wb_init_in.dat <= wb_ctrl_in.dat; + wb_init_in.sel <= wb_ctrl_in.sel; + wb_init_in.we <= wb_ctrl_in.we; + wb_init_in.stb <= wb_ctrl_in.stb; + wb_init_in.cyc <= wb_ctrl_in.cyc and wb_ctrl_is_init; + + -- DRAM CSR IN signals + wb_ctrl_adr <= x"0000" & wb_ctrl_in.adr(15 downto 2); + wb_ctrl_dat_w <= wb_ctrl_in.dat; + wb_ctrl_sel <= wb_ctrl_in.sel; + wb_ctrl_we <= wb_ctrl_in.we; + wb_ctrl_cyc <= wb_ctrl_in.cyc and wb_ctrl_is_csr; + wb_ctrl_stb <= wb_ctrl_in.stb and wb_ctrl_is_csr; - -- Address bit 3 selects the top or bottom half of the data + -- Ctrl bus wishbone OUT signals + wb_ctrl_out.ack <= wb_ctrl_ack when wb_ctrl_is_csr = '1' + else wb_init_out.ack; + wb_ctrl_out.dat <= wb_ctrl_dat_r when wb_ctrl_is_csr = '1' + else wb_init_out.dat; + wb_ctrl_out.stall <= wb_init_out.stall when wb_ctrl_is_init else + '0' when wb_ctrl_in.cyc = '0' else not wb_ctrl_ack; + + -- + -- Data bus wishbone to LiteDRAM native port + -- + -- Address bit 3 selects the top or bottom half of the data -- bus (64-bit wishbone vs. 128-bit DRAM interface) -- + -- XXX TODO: Figure out how to pipeline this + -- ad3 <= wb_in.adr(3); - -- DRAM data interface signals - user_port0_cmd_valid <= (wb_in.cyc and wb_in.stb and not wb_is_ctrl and not wb_is_init) - when state = CMD else '0'; - user_port0_cmd_we <= wb_in.we when state = CMD else '0'; + -- Wishbone port IN signals + user_port0_cmd_valid <= wb_in.cyc and wb_in.stb when state = CMD else '0'; + user_port0_cmd_we <= wb_in.we when state = CMD else '0'; user_port0_wdata_valid <= '1' when state = MWRITE else '0'; user_port0_rdata_ready <= '1' when state = MREAD else '0'; - user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); - user_port0_wdata_data <= wb_in.dat & wb_in.dat; - user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else - "00000000" & wb_in.sel; - - -- DRAM ctrl interface signals - wb_ctrl_adr <= x"0000" & wb_in.adr(15 downto 2); - wb_ctrl_dat_w <= wb_in.dat(31 downto 0); - wb_ctrl_sel <= wb_in.sel(3 downto 0); - wb_ctrl_cyc <= wb_in.cyc and wb_is_ctrl; - wb_ctrl_stb <= wb_in.stb and wb_is_ctrl; - wb_ctrl_we <= wb_in.we; - - -- Wishbone out signals - wb_out.ack <= wb_ctrl_ack when wb_is_ctrl ='1' else - wb_init_out.ack when wb_is_init = '1' else - user_port0_wdata_ready when state = MWRITE else + user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); + user_port0_wdata_data <= wb_in.dat & wb_in.dat; + user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else + "00000000" & wb_in.sel; + + -- Wishbone OUT signals + wb_out.ack <= user_port0_wdata_ready when state = MWRITE else user_port0_rdata_valid when state = MREAD else '0'; - wb_out.dat <= (x"00000000" & wb_ctrl_dat_r) when wb_is_ctrl = '1' else - wb_init_out.dat when wb_is_init = '1' else - user_port0_rdata_data(127 downto 64) when ad3 = '1' else + wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else user_port0_rdata_data(63 downto 0); + -- We don't do pipelining yet. wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- Reset ignored, the reset controller use the pll lock signal, - -- and alternate core reset address set when DRAM is not initialized. - -- - core_alt_reset <= not init_done; - - -- State machine + -- DRAM user port State machine sm: process(system_clk) begin @@ -255,7 +272,7 @@ begin if user_port0_rdata_valid = '1' then state <= CMD; end if; - end case; + end case; end if; end if; end process; diff --git a/litedram/generated/arty/litedram_core.init b/litedram/generated/arty/litedram_core.init index b1a75f8..22485ac 100644 --- a/litedram/generated/arty/litedram_core.init +++ b/litedram/generated/arty/litedram_core.init @@ -4,10 +4,10 @@ a602487d05009f42 a64b5a7d14004a39 2402004ca64b7b7d 602100003c200000 -6421ffff782107c6 +6421f000782107c6 3d80000060213f00 798c07c6618c0000 -618c108c658cffff +618c108c658cf000 4e8004217d8903a6 0000000048000002 0000000000000000 diff --git a/litedram/generated/arty/litedram_core.v b/litedram/generated/arty/litedram_core.v index 9208d87..bb0671b 100644 --- a/litedram/generated/arty/litedram_core.v +++ b/litedram/generated/arty/litedram_core.v @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (0d16e03) & LiteX (3391398a) on 2020-05-15 13:30:46 +// Auto-generated by Migen (0d16e03) & LiteX (3391398a) on 2020-05-16 19:06:01 //-------------------------------------------------------------------------------- module litedram_core( input wire clk, diff --git a/litedram/generated/nexys-video/litedram-wrapper.vhdl b/litedram/generated/nexys-video/litedram-wrapper.vhdl index c4d702e..1595793 100644 --- a/litedram/generated/nexys-video/litedram-wrapper.vhdl +++ b/litedram/generated/nexys-video/litedram-wrapper.vhdl @@ -15,18 +15,20 @@ entity litedram_wrapper is port( -- LiteDRAM generates the system clock and reset -- from the input clkin - clk_in : in std_ulogic; - rst : in std_ulogic; - system_clk : out std_ulogic; - system_reset : out std_ulogic; - core_alt_reset : out std_ulogic; - pll_locked : out std_ulogic; + clk_in : in std_ulogic; + rst : in std_ulogic; + system_clk : out std_ulogic; + system_reset : out std_ulogic; + core_alt_reset : out std_ulogic; + pll_locked : out std_ulogic; -- Wishbone ports: - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; - wb_is_ctrl : in std_ulogic; - wb_is_init : in std_ulogic; + wb_in : in wishbone_master_out; + wb_out : out wishbone_slave_out; + wb_ctrl_in : in wb_io_master_out; + wb_ctrl_out : out wb_io_slave_out; + wb_ctrl_is_csr : in std_ulogic; + wb_ctrl_is_init : in std_ulogic; -- Init core serial debug serial_tx : out std_ulogic; @@ -128,8 +130,8 @@ architecture behaviour of litedram_wrapper is signal wb_ctrl_ack : std_ulogic; signal wb_ctrl_we : std_ulogic; - signal wb_init_in : wishbone_master_out; - signal wb_init_out : wishbone_slave_out; + signal wb_init_in : wb_io_master_out; + signal wb_init_out : wb_io_slave_out; type state_t is (CMD, MWRITE, MREAD); signal state : state_t; @@ -138,7 +140,7 @@ architecture behaviour of litedram_wrapper is constant INIT_RAM_ABITS :integer := 14; constant INIT_RAM_FILE : string := "litedram_core.init"; - type ram_t is array(0 to (INIT_RAM_SIZE / 8) - 1) of std_logic_vector(63 downto 0); + type ram_t is array(0 to (INIT_RAM_SIZE / 4) - 1) of std_logic_vector(31 downto 0); impure function init_load_ram(name : string) return ram_t is file ram_file : text open read_mode is name; @@ -150,7 +152,8 @@ architecture behaviour of litedram_wrapper is exit when endfile(ram_file); readline(ram_file, ram_line); hread(ram_line, temp_word); - temp_ram(i) := temp_word; + temp_ram(i*2) := temp_word(31 downto 0); + temp_ram(i*2+1) := temp_word(63 downto 32); end loop; return temp_ram; end function; @@ -162,79 +165,93 @@ architecture behaviour of litedram_wrapper is begin - -- BRAM Memory slave + -- alternate core reset address set when DRAM is not initialized. + core_alt_reset <= not init_done; + + -- BRAM Memory slave. TODO: Pipeline it with an output buffer + -- to improve timing init_ram_0: process(system_clk) variable adr : integer; begin if rising_edge(system_clk) then wb_init_out.ack <= '0'; if (wb_init_in.cyc and wb_init_in.stb) = '1' then - adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 3)))); + adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 2)))); if wb_init_in.we = '0' then wb_init_out.dat <= init_ram(adr); else - for i in 0 to 7 loop + for i in 0 to 3 loop if wb_init_in.sel(i) = '1' then init_ram(adr)(((i + 1) * 8) - 1 downto i * 8) <= wb_init_in.dat(((i + 1) * 8) - 1 downto i * 8); end if; end loop; end if; - wb_init_out.ack <= not wb_init_out.ack; + wb_init_out.ack <= '1'; end if; end if; end process; - wb_init_in.adr <= wb_in.adr; - wb_init_in.dat <= wb_in.dat; - wb_init_in.sel <= wb_in.sel; - wb_init_in.we <= wb_in.we; - wb_init_in.stb <= wb_in.stb; - wb_init_in.cyc <= wb_in.cyc and wb_is_init; + -- + -- Control bus wishbone: This muxes the wishbone to the CSRs + -- and an internal small one to the init BRAM + -- + + -- Init DRAM wishbone IN signals + wb_init_in.adr <= wb_ctrl_in.adr; + wb_init_in.dat <= wb_ctrl_in.dat; + wb_init_in.sel <= wb_ctrl_in.sel; + wb_init_in.we <= wb_ctrl_in.we; + wb_init_in.stb <= wb_ctrl_in.stb; + wb_init_in.cyc <= wb_ctrl_in.cyc and wb_ctrl_is_init; + + -- DRAM CSR IN signals + wb_ctrl_adr <= x"0000" & wb_ctrl_in.adr(15 downto 2); + wb_ctrl_dat_w <= wb_ctrl_in.dat; + wb_ctrl_sel <= wb_ctrl_in.sel; + wb_ctrl_we <= wb_ctrl_in.we; + wb_ctrl_cyc <= wb_ctrl_in.cyc and wb_ctrl_is_csr; + wb_ctrl_stb <= wb_ctrl_in.stb and wb_ctrl_is_csr; - -- Address bit 3 selects the top or bottom half of the data + -- Ctrl bus wishbone OUT signals + wb_ctrl_out.ack <= wb_ctrl_ack when wb_ctrl_is_csr = '1' + else wb_init_out.ack; + wb_ctrl_out.dat <= wb_ctrl_dat_r when wb_ctrl_is_csr = '1' + else wb_init_out.dat; + wb_ctrl_out.stall <= wb_init_out.stall when wb_ctrl_is_init else + '0' when wb_ctrl_in.cyc = '0' else not wb_ctrl_ack; + + -- + -- Data bus wishbone to LiteDRAM native port + -- + -- Address bit 3 selects the top or bottom half of the data -- bus (64-bit wishbone vs. 128-bit DRAM interface) -- + -- XXX TODO: Figure out how to pipeline this + -- ad3 <= wb_in.adr(3); - -- DRAM data interface signals - user_port0_cmd_valid <= (wb_in.cyc and wb_in.stb and not wb_is_ctrl and not wb_is_init) - when state = CMD else '0'; - user_port0_cmd_we <= wb_in.we when state = CMD else '0'; + -- Wishbone port IN signals + user_port0_cmd_valid <= wb_in.cyc and wb_in.stb when state = CMD else '0'; + user_port0_cmd_we <= wb_in.we when state = CMD else '0'; user_port0_wdata_valid <= '1' when state = MWRITE else '0'; user_port0_rdata_ready <= '1' when state = MREAD else '0'; - user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); - user_port0_wdata_data <= wb_in.dat & wb_in.dat; - user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else - "00000000" & wb_in.sel; - - -- DRAM ctrl interface signals - wb_ctrl_adr <= x"0000" & wb_in.adr(15 downto 2); - wb_ctrl_dat_w <= wb_in.dat(31 downto 0); - wb_ctrl_sel <= wb_in.sel(3 downto 0); - wb_ctrl_cyc <= wb_in.cyc and wb_is_ctrl; - wb_ctrl_stb <= wb_in.stb and wb_is_ctrl; - wb_ctrl_we <= wb_in.we; - - -- Wishbone out signals - wb_out.ack <= wb_ctrl_ack when wb_is_ctrl ='1' else - wb_init_out.ack when wb_is_init = '1' else - user_port0_wdata_ready when state = MWRITE else + user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); + user_port0_wdata_data <= wb_in.dat & wb_in.dat; + user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else + "00000000" & wb_in.sel; + + -- Wishbone OUT signals + wb_out.ack <= user_port0_wdata_ready when state = MWRITE else user_port0_rdata_valid when state = MREAD else '0'; - wb_out.dat <= (x"00000000" & wb_ctrl_dat_r) when wb_is_ctrl = '1' else - wb_init_out.dat when wb_is_init = '1' else - user_port0_rdata_data(127 downto 64) when ad3 = '1' else + wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else user_port0_rdata_data(63 downto 0); + -- We don't do pipelining yet. wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- Reset ignored, the reset controller use the pll lock signal, - -- and alternate core reset address set when DRAM is not initialized. - -- - core_alt_reset <= not init_done; - - -- State machine + -- DRAM user port State machine sm: process(system_clk) begin @@ -255,7 +272,7 @@ begin if user_port0_rdata_valid = '1' then state <= CMD; end if; - end case; + end case; end if; end if; end process; diff --git a/litedram/generated/nexys-video/litedram_core.init b/litedram/generated/nexys-video/litedram_core.init index b1a75f8..22485ac 100644 --- a/litedram/generated/nexys-video/litedram_core.init +++ b/litedram/generated/nexys-video/litedram_core.init @@ -4,10 +4,10 @@ a602487d05009f42 a64b5a7d14004a39 2402004ca64b7b7d 602100003c200000 -6421ffff782107c6 +6421f000782107c6 3d80000060213f00 798c07c6618c0000 -618c108c658cffff +618c108c658cf000 4e8004217d8903a6 0000000048000002 0000000000000000 diff --git a/litedram/generated/nexys-video/litedram_core.v b/litedram/generated/nexys-video/litedram_core.v index dd29267..4afac81 100644 --- a/litedram/generated/nexys-video/litedram_core.v +++ b/litedram/generated/nexys-video/litedram_core.v @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (0d16e03) & LiteX (3391398a) on 2020-05-15 13:30:49 +// Auto-generated by Migen (0d16e03) & LiteX (3391398a) on 2020-05-16 19:06:03 //-------------------------------------------------------------------------------- module litedram_core( input wire clk, diff --git a/soc.vhdl b/soc.vhdl index a42e071..a9f46fd 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -12,14 +12,17 @@ use work.wishbone_types.all; -- Memory map. *** Keep include/microwatt_soc.h updated on changes *** -- +-- Main bus: -- 0x00000000: Block RAM (MEMORY_SIZE) or DRAM depending on syscon -- 0x40000000: DRAM (when present) +-- 0x80000000: Block RAM (aliased & repeated) + +-- IO Bus: -- 0xc0000000: SYSCON -- 0xc0002000: UART0 -- 0xc0004000: XICS ICP -- 0xc0100000: LiteDRAM control (CSRs) --- 0xf0000000: Block RAM (aliased & repeated) --- 0xffff0000: DRAM init code (if any) +-- 0xf0000000: DRAM init code (if any) entity soc is generic ( @@ -37,10 +40,12 @@ entity soc is system_clk : in std_ulogic; -- DRAM controller signals - wb_dram_in : out wishbone_master_out; - wb_dram_out : in wishbone_slave_out; - wb_dram_ctrl : out std_ulogic; - wb_dram_init : out std_ulogic; + wb_dram_in : out wishbone_master_out; + wb_dram_out : in wishbone_slave_out; + wb_dram_ctrl_in : out wb_io_master_out; + wb_dram_ctrl_out : in wb_io_slave_out; + wb_dram_is_csr : out std_ulogic; + wb_dram_is_init : out std_ulogic; -- UART0 signals: uart0_txd : out std_ulogic; @@ -71,20 +76,28 @@ architecture behaviour of soc is signal wb_master_in : wishbone_slave_out; signal wb_master_out : wishbone_master_out; + -- Main "IO" bus, from main slave decoder to the latch + signal wb_io_in : wishbone_master_out; + signal wb_io_out : wishbone_slave_out; + + -- Secondary (smaller) IO bus after the IO bus latch + signal wb_sio_out : wb_io_master_out; + signal wb_sio_in : wb_io_slave_out; + -- Syscon signals signal dram_at_0 : std_ulogic; - signal do_core_reset : std_ulogic; - signal wb_syscon_in : wishbone_master_out; - signal wb_syscon_out : wishbone_slave_out; + signal do_core_reset : std_ulogic; + signal wb_syscon_in : wb_io_master_out; + signal wb_syscon_out : wb_io_slave_out; -- UART0 signals: - signal wb_uart0_in : wishbone_master_out; - signal wb_uart0_out : wishbone_slave_out; + signal wb_uart0_in : wb_io_master_out; + signal wb_uart0_out : wb_io_slave_out; signal uart_dat8 : std_ulogic_vector(7 downto 0); -- XICS0 signals: - signal wb_xics0_in : wishbone_master_out; - signal wb_xics0_out : wishbone_slave_out; + signal wb_xics0_in : wb_io_master_out; + signal wb_xics0_out : wb_io_slave_out; signal int_level_in : std_ulogic_vector(15 downto 0); signal xics_to_execute1 : XicsToExecute1Type; @@ -141,7 +154,7 @@ begin generic map( SIM => SIM, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, - ALT_RESET_ADDRESS => (15 downto 0 => '0', others => '1') + ALT_RESET_ADDRESS => (27 downto 0 => '0', others => '1') ) port map( clk => system_clk, @@ -180,90 +193,271 @@ begin wb_slave_in => wb_master_in ); - -- Wishbone slaves address decoder & mux - slave_intercon: process(wb_master_out, wb_bram_out, wb_uart0_out, wb_dram_out, wb_syscon_out) - -- Selected slave - type slave_type is (SLAVE_SYSCON, - SLAVE_UART, - SLAVE_BRAM, - SLAVE_DRAM, - SLAVE_DRAM_INIT, - SLAVE_DRAM_CTRL, - SLAVE_ICP_0, - SLAVE_NONE); - variable slave : slave_type; + -- Top level Wishbone slaves address decoder & mux + -- + -- From CPU to BRAM, DRAM, IO, selected on top 3 bits and dram_at_0 + -- 0000 - BRAM + -- 0001 - DRAM + -- 01xx - DRAM + -- 10xx - BRAM + -- 11xx - IO + -- + slave_top_intercon: process(wb_master_out, wb_bram_out, wb_dram_out, wb_io_out, dram_at_0) + type slave_top_type is (SLAVE_TOP_BRAM, + SLAVE_TOP_DRAM, + SLAVE_TOP_IO); + variable slave_top : slave_top_type; + variable top_decode : std_ulogic_vector(3 downto 0); begin - -- Simple address decoder. - slave := SLAVE_NONE; - -- Simple address decoder. Ignore top bits to save silicon for now - slave := SLAVE_NONE; - if std_match(wb_master_out.adr, x"0-------") then - slave := SLAVE_DRAM when HAS_DRAM and dram_at_0 = '1' else - SLAVE_BRAM; - elsif std_match(wb_master_out.adr, x"FFFF----") then - slave := SLAVE_DRAM_INIT; - elsif std_match(wb_master_out.adr, x"F-------") then - slave := SLAVE_BRAM; - elsif std_match(wb_master_out.adr, x"4-------") and HAS_DRAM then - slave := SLAVE_DRAM; - elsif std_match(wb_master_out.adr, x"C0000---") then - slave := SLAVE_SYSCON; - elsif std_match(wb_master_out.adr, x"C0002---") then - slave := SLAVE_UART; - elsif std_match(wb_master_out.adr, x"C01-----") then - slave := SLAVE_DRAM_CTRL; - elsif std_match(wb_master_out.adr, x"C0004---") then - slave := SLAVE_ICP_0; + -- Top-level address decoder + top_decode := wb_master_out.adr(31 downto 29) & dram_at_0; + slave_top := SLAVE_TOP_BRAM; + if std_match(top_decode, "0000") then + slave_top := SLAVE_TOP_BRAM; + elsif std_match(top_decode, "0001") then + slave_top := SLAVE_TOP_DRAM; + elsif std_match(top_decode, "01--") then + slave_top := SLAVE_TOP_DRAM; + elsif std_match(top_decode, "10--") then + slave_top := SLAVE_TOP_BRAM; + elsif std_match(top_decode, "11--") then + slave_top := SLAVE_TOP_IO; end if; - -- Wishbone muxing. Defaults: + -- Top level wishbone muxing. wb_bram_in <= wb_master_out; wb_bram_in.cyc <= '0'; - wb_uart0_in <= wb_master_out; + wb_dram_in <= wb_master_out; + wb_dram_in.cyc <= '0'; + wb_io_in <= wb_master_out; + wb_io_in.cyc <= '0'; + case slave_top is + when SLAVE_TOP_BRAM => + wb_bram_in.cyc <= wb_master_out.cyc; + wb_master_in <= wb_bram_out; + when SLAVE_TOP_DRAM => + wb_dram_in.cyc <= wb_master_out.cyc; + wb_master_in <= wb_dram_out; + when SLAVE_TOP_IO => + wb_io_in.cyc <= wb_master_out.cyc; + wb_master_in <= wb_io_out; + end case; + end process slave_top_intercon; + + -- IO wishbone slave 64->32 bits converter + -- + -- For timing reasons, this adds a one cycle latch on the way both + -- in and out. This relaxes timing and routing pressure on the "main" + -- memory bus by moving all simple IOs to a slower 32-bit bus. + -- + -- This implementation is rather dumb at the moment, no stash buffer, + -- so we stall whenever that latch is busy. This can be improved. + -- + slave_io_latch: process(system_clk) + -- State + type state_t is (IDLE, WAIT_ACK_BOT, WAIT_ACK_TOP); + variable state : state_t; + + -- Misc + variable has_top : boolean; + variable has_bot : boolean; + begin + if rising_edge(system_clk) then + if (rst) then + state := IDLE; + wb_io_out.ack <= '0'; + wb_io_out.stall <= '0'; + wb_sio_out.cyc <= '0'; + wb_sio_out.stb <= '0'; + has_top := false; + has_bot := false; + else + case state is + when IDLE => + -- Clear ACK in case it was set + wb_io_out.ack <= '0'; + + -- Do we have a cycle ? + if wb_io_in.cyc = '1' and wb_io_in.stb = '1' then + -- Stall master until we are done, we are't (yet) pipelining + -- this, it's all slow IOs. + wb_io_out.stall <= '1'; + + -- Start cycle downstream + wb_sio_out.cyc <= '1'; + wb_sio_out.stb <= '1'; + + -- Copy write enable to IO out, copy address as well + wb_sio_out.we <= wb_io_in.we; + wb_sio_out.adr <= wb_io_in.adr(wb_sio_out.adr'left downto 3) & "000"; + + -- Do we have a top word and/or a bottom word ? + has_top := wb_io_in.sel(7 downto 4) /= "0000"; + has_bot := wb_io_in.sel(3 downto 0) /= "0000"; + + -- If we have a bottom word, handle it first, otherwise + -- send the top word down. XXX Split the actual mux out + -- and only generate a control signal. + if has_bot then + if wb_io_in.we = '1' then + wb_sio_out.dat <= wb_io_in.dat(31 downto 0); + end if; + wb_sio_out.sel <= wb_io_in.sel(3 downto 0); + + -- Wait for ack + state := WAIT_ACK_BOT; + else + if wb_io_in.we = '1' then + wb_sio_out.dat <= wb_io_in.dat(63 downto 32); + end if; + wb_sio_out.sel <= wb_io_in.sel(7 downto 4); + + -- Bump address + wb_sio_out.adr(2) <= '1'; + + -- Wait for ack + state := WAIT_ACK_TOP; + end if; + end if; + when WAIT_ACK_BOT => + -- If we aren't stalled by the device, clear stb + if wb_sio_in.stall = '0' then + wb_sio_out.stb <= '0'; + end if; + + -- Handle ack + if wb_sio_in.ack = '1' then + -- If it's a read, latch the data + if wb_sio_out.we = '0' then + wb_io_out.dat(31 downto 0) <= wb_sio_in.dat; + end if; + + -- Do we have a "top" part as well ? + if has_top then + -- Latch data & sel + if wb_io_in.we = '1' then + wb_sio_out.dat <= wb_io_in.dat(63 downto 32); + end if; + wb_sio_out.sel <= wb_io_in.sel(7 downto 4); + + -- Bump address and set STB + wb_sio_out.adr(2) <= '1'; + wb_sio_out.stb <= '1'; + + -- Wait for new ack + state := WAIT_ACK_TOP; + else + -- We are done, ack up, clear cyc downstram + wb_sio_out.cyc <= '0'; + + -- And ack & unstall upstream + wb_io_out.ack <= '1'; + wb_io_out.stall <= '0'; + + -- Wait for next one + state := IDLE; + end if; + end if; + when WAIT_ACK_TOP => + -- If we aren't stalled by the device, clear stb + if wb_sio_in.stall = '0' then + wb_sio_out.stb <= '0'; + end if; + + -- Handle ack + if wb_sio_in.ack = '1' then + -- If it's a read, latch the data + if wb_sio_out.we = '0' then + wb_io_out.dat(63 downto 32) <= wb_sio_in.dat; + end if; + + -- We are done, ack up, clear cyc downstram + wb_sio_out.cyc <= '0'; + + -- And ack & unstall upstream + wb_io_out.ack <= '1'; + wb_io_out.stall <= '0'; + + -- Wait for next one + state := IDLE; + end if; + end case; + end if; + end if; + end process; + + -- IO wishbone slave intercon. + -- + slave_io_intercon: process(wb_sio_out, wb_syscon_out, wb_uart0_out, + wb_dram_ctrl_out, wb_xics0_out) + -- IO branch split: + type slave_io_type is (SLAVE_IO_SYSCON, + SLAVE_IO_UART, + SLAVE_IO_DRAM_INIT, + SLAVE_IO_DRAM_CSR, + SLAVE_IO_ICP_0, + SLAVE_IO_NONE); + variable slave_io : slave_io_type; + + variable match : std_ulogic_vector(31 downto 12); + begin + + -- Simple address decoder. + slave_io := SLAVE_IO_NONE; + match := "11" & wb_sio_out.adr(29 downto 12); + if std_match(match, x"F----") then + slave_io := SLAVE_IO_DRAM_INIT; + elsif std_match(match, x"C0000") then + slave_io := SLAVE_IO_SYSCON; + elsif std_match(match, x"C0002") then + slave_io := SLAVE_IO_UART; + elsif std_match(match, x"C01--") then + slave_io := SLAVE_IO_DRAM_CSR; + elsif std_match(match, x"C0004") then + slave_io := SLAVE_IO_ICP_0; + end if; + wb_uart0_in <= wb_sio_out; wb_uart0_in.cyc <= '0'; -- Only give xics 8 bits of wb addr - wb_xics0_in <= wb_master_out; + wb_xics0_in <= wb_sio_out; wb_xics0_in.adr <= (others => '0'); - wb_xics0_in.adr(7 downto 0) <= wb_master_out.adr(7 downto 0); + wb_xics0_in.adr(7 downto 0) <= wb_sio_out.adr(7 downto 0); wb_xics0_in.cyc <= '0'; - wb_dram_in <= wb_master_out; - wb_dram_in.cyc <= '0'; - wb_dram_ctrl <= '0'; - wb_dram_init <= '0'; - wb_syscon_in <= wb_master_out; + wb_dram_ctrl_in <= wb_sio_out; + wb_dram_ctrl_in.cyc <= '0'; + wb_dram_is_csr <= '0'; + wb_dram_is_init <= '0'; + + wb_syscon_in <= wb_sio_out; wb_syscon_in.cyc <= '0'; - case slave is - when SLAVE_BRAM => - wb_bram_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_bram_out; - when SLAVE_DRAM => - wb_dram_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_dram_out; - when SLAVE_DRAM_INIT => - wb_dram_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_dram_out; - wb_dram_init <= '1'; - when SLAVE_DRAM_CTRL => - wb_dram_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_dram_out; - wb_dram_ctrl <= '1'; - when SLAVE_SYSCON => - wb_syscon_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_syscon_out; - when SLAVE_UART => - wb_uart0_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_uart0_out; - when SLAVE_ICP_0 => - wb_xics0_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_xics0_out; + + case slave_io is + when SLAVE_IO_DRAM_INIT => + wb_dram_ctrl_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_dram_ctrl_out; + wb_dram_is_init <= '1'; + when SLAVE_IO_DRAM_CSR => + wb_dram_ctrl_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_dram_ctrl_out; + wb_dram_is_csr <= '1'; + when SLAVE_IO_SYSCON => + wb_syscon_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_syscon_out; + when SLAVE_IO_UART => + wb_uart0_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_uart0_out; + when SLAVE_IO_ICP_0 => + wb_xics0_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_xics0_out; when others => - wb_master_in.dat <= (others => '1'); - wb_master_in.ack <= wb_master_out.stb and wb_master_out.cyc; - wb_master_in.stall <= '0'; + wb_sio_in.dat <= (others => '1'); + wb_sio_in.ack <= wb_sio_out.stb and wb_sio_out.cyc; + wb_sio_in.stall <= '0'; end case; - end process slave_intercon; + + end process; -- Syscon slave syscon0: entity work.syscon @@ -287,10 +481,6 @@ begin -- Simulated memory and UART -- UART0 wishbone slave - -- XXX FIXME: Need a proper wb64->wb8 adapter that - -- converts SELs into low address bits and muxes - -- data accordingly (either that or rejects large - -- cycles). uart0: entity work.pp_soc_uart generic map( FIFO_DEPTH => 32 @@ -309,7 +499,7 @@ begin wb_we_in => wb_uart0_in.we, wb_ack_out => wb_uart0_out.ack ); - wb_uart0_out.dat <= x"00000000000000" & uart_dat8; + wb_uart0_out.dat <= x"000000" & uart_dat8; wb_uart0_out.stall <= '0' when wb_uart0_in.cyc = '0' else not wb_uart0_out.ack; xics0: entity work.xics diff --git a/syscon.vhdl b/syscon.vhdl index a5b569b..a9dd1cc 100644 --- a/syscon.vhdl +++ b/syscon.vhdl @@ -20,8 +20,8 @@ entity syscon is rst : in std_ulogic; -- Wishbone ports: - wishbone_in : in wishbone_master_out; - wishbone_out : out wishbone_slave_out; + wishbone_in : in wb_io_master_out; + wishbone_out : out wb_io_slave_out; -- System control ports dram_at_0 : out std_ulogic; @@ -43,6 +43,9 @@ architecture behaviour of syscon is constant SYS_REG_CLKINFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "100"; constant SYS_REG_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "101"; + -- Muxed reg read signal + signal reg_out : std_ulogic_vector(63 downto 0); + -- INFO register bits constant SYS_REG_INFO_HAS_UART : integer := 0; constant SYS_REG_INFO_HAS_DRAM : integer := 1; @@ -99,7 +102,7 @@ begin SYS_REG_CTRL_BITS-1 downto 0 => reg_ctrl); -- Register read mux - with wishbone_in.adr(SYS_REG_BITS+2 downto 3) select wishbone_out.dat <= + with wishbone_in.adr(SYS_REG_BITS+2 downto 3) select reg_out <= SIG_VALUE when SYS_REG_SIG, reg_info when SYS_REG_INFO, reg_braminfo when SYS_REG_BRAMINFO, @@ -107,6 +110,8 @@ begin reg_clkinfo when SYS_REG_CLKINFO, reg_ctrl_out when SYS_REG_CTRL, (others => '0') when others; + wishbone_out.dat <= reg_out(63 downto 32) when wishbone_in.adr(2) = '1' else + reg_out(31 downto 0); -- Register writes regs_write: process(clk) @@ -116,7 +121,9 @@ begin reg_ctrl <= (others => '0'); else if wishbone_in.cyc and wishbone_in.stb and wishbone_in.we then - if wishbone_in.adr(SYS_REG_BITS+2 downto 3) = SYS_REG_CTRL then + -- Change this if CTRL ever has more than 32 bits + if wishbone_in.adr(SYS_REG_BITS+2 downto 3) = SYS_REG_CTRL and + wishbone_in.adr(2) = '0' then reg_ctrl(SYS_REG_CTRL_BITS-1 downto 0) <= wishbone_in.dat(SYS_REG_CTRL_BITS-1 downto 0); end if; diff --git a/tests/xics/xics.h b/tests/xics/xics.h index 09238cc..bbb1f99 100644 --- a/tests/xics/xics.h +++ b/tests/xics/xics.h @@ -1,8 +1,7 @@ #include -#define XICS_BASE 0xc0004000 - -static uint64_t xics_base = XICS_BASE; +#include "microwatt_soc.h" +#include "io.h" #define XICS_XIRR_POLL 0x0 #define XICS_XIRR 0x4 @@ -11,26 +10,20 @@ static uint64_t xics_base = XICS_BASE; uint8_t xics_read8(int offset) { - uint32_t val; - - __asm__ volatile("lbzcix %0,%1,%2" : "=r" (val) : "b" (xics_base), "r" (offset)); - return val; + return readb(XICS_BASE + offset); } void xics_write8(int offset, uint8_t val) { - __asm__ volatile("stbcix %0,%1,%2" : : "r" (val), "b" (xics_base), "r" (offset)); + writeb(val, XICS_BASE + offset); } uint32_t xics_read32(int offset) { - uint32_t val; - - __asm__ volatile("lwzcix %0,%1,%2" : "=r" (val) : "b" (xics_base), "r" (offset)); - return val; + return readl(XICS_BASE + offset); } void xics_write32(int offset, uint32_t val) { - __asm__ volatile("stwcix %0,%1,%2" : : "r" (val), "b" (xics_base), "r" (offset)); + writel(val, XICS_BASE + offset); } diff --git a/wishbone_types.vhdl b/wishbone_types.vhdl index c628ca2..693deac 100644 --- a/wishbone_types.vhdl +++ b/wishbone_types.vhdl @@ -2,6 +2,9 @@ library ieee; use ieee.std_logic_1164.all; package wishbone_types is + -- + -- Main CPU bus. 32-bit address, 64-bit data + -- constant wishbone_addr_bits : integer := 32; constant wishbone_data_bits : integer := 64; constant wishbone_sel_bits : integer := wishbone_data_bits/8; @@ -30,4 +33,22 @@ package wishbone_types is type wishbone_master_out_vector is array (natural range <>) of wishbone_master_out; type wishbone_slave_out_vector is array (natural range <>) of wishbone_slave_out; + -- + -- IO Bus to a device, 30-bit address, 32-bits data + -- + type wb_io_master_out is record + adr : std_ulogic_vector(29 downto 0); + dat : std_ulogic_vector(31 downto 0); + sel : std_ulogic_vector(3 downto 0); + cyc : std_ulogic; + stb : std_ulogic; + we : std_ulogic; + end record; + + type wb_io_slave_out is record + dat : std_ulogic_vector(31 downto 0); + ack : std_ulogic; + stall : std_ulogic; + end record; + end package wishbone_types; diff --git a/xics.vhdl b/xics.vhdl index 09a1ba6..7d49433 100644 --- a/xics.vhdl +++ b/xics.vhdl @@ -30,8 +30,8 @@ entity xics is clk : in std_logic; rst : in std_logic; - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; + wb_in : in wb_io_master_out; + wb_out : out wb_io_slave_out; int_level_in : in std_ulogic_vector(LEVEL_NUM - 1 downto 0); @@ -47,7 +47,7 @@ architecture behaviour of xics is mfrr : std_ulogic_vector(7 downto 0); mfrr_pending : std_ulogic; irq : std_ulogic; - wb_rd_data : wishbone_data_type; + wb_rd_data : std_ulogic_vector(31 downto 0); wb_ack : std_ulogic; end record; constant reg_internal_init : reg_internal_t := @@ -62,11 +62,11 @@ architecture behaviour of xics is -- hardwire the hardware IRQ priority constant HW_PRIORITY : std_ulogic_vector(7 downto 0) := x"80"; - -- 32 bit offsets for each presentation - constant XIRR_POLL : std_ulogic_vector(31 downto 0) := x"00000000"; - constant XIRR : std_ulogic_vector(31 downto 0) := x"00000004"; - constant RESV0 : std_ulogic_vector(31 downto 0) := x"00000008"; - constant MFRR : std_ulogic_vector(31 downto 0) := x"0000000c"; + -- 8 bit offsets for each presentation + constant XIRR_POLL : std_ulogic_vector(7 downto 0) := x"00"; + constant XIRR : std_ulogic_vector(7 downto 0) := x"04"; + constant RESV0 : std_ulogic_vector(7 downto 0) := x"08"; + constant MFRR : std_ulogic_vector(7 downto 0) := x"0c"; begin @@ -95,62 +95,73 @@ begin irq_eoi := '0'; if wb_in.cyc = '1' and wb_in.stb = '1' then - -- wishbone addresses we get are 64 bit alligned, so we - -- need to use the sel bits to get 32 bit chunks. v.wb_ack := '1'; -- always ack if wb_in.we = '1' then -- write -- writes to both XIRR are the same - if wb_in.adr = XIRR_POLL then - report "XICS XIRR_POLL/XIRR write"; - if wb_in.sel = x"0f" then -- 4 bytes + case wb_in.adr(7 downto 0) is + when XIRR_POLL => + report "XICS XIRR_POLL write"; + if wb_in.sel = x"f" then -- 4 bytes + v.cppr := wb_in.dat(31 downto 24); + elsif wb_in.sel = x"1" then -- 1 byte + v.cppr := wb_in.dat(7 downto 0); + end if; + when XIRR => + if wb_in.sel = x"f" then -- 4 byte + report "XICS XIRR write word:" & to_hstring(wb_in.dat); v.cppr := wb_in.dat(31 downto 24); - elsif wb_in.sel = x"f0" then -- 4 byte - v.cppr := wb_in.dat(63 downto 56); irq_eoi := '1'; - elsif wb_in.sel = x"01" then -- 1 byte + elsif wb_in.sel = x"1" then -- 1 byte + report "XICS XIRR write byte:" & to_hstring(wb_in.dat(7 downto 0)); v.cppr := wb_in.dat(7 downto 0); - elsif wb_in.sel = x"10" then -- 1 byte - v.cppr := wb_in.dat(39 downto 32); + else + report "XICS XIRR UNSUPPORTED write ! sel=" & to_hstring(wb_in.sel); end if; - - elsif wb_in.adr = RESV0 then - report "XICS MFRR write"; - if wb_in.sel = x"f0" then -- 4 bytes + when MFRR => + if wb_in.sel = x"f" then -- 4 bytes + report "XICS MFRR write word:" & to_hstring(wb_in.dat); v.mfrr_pending := '1'; - v.mfrr := wb_in.dat(63 downto 56); - elsif wb_in.sel = x"10" then -- 1 byte + v.mfrr := wb_in.dat(31 downto 24); + elsif wb_in.sel = x"1" then -- 1 byte + report "XICS MFRR write byte:" & to_hstring(wb_in.dat(7 downto 0)); v.mfrr_pending := '1'; - v.mfrr := wb_in.dat(39 downto 32); + v.mfrr := wb_in.dat(7 downto 0); + else + report "XICS MFRR UNSUPPORTED write ! sel=" & to_hstring(wb_in.sel); end if; - - end if; + when others => + end case; else -- read v.wb_rd_data := (others => '0'); - if wb_in.adr = XIRR_POLL then - report "XICS XIRR_POLL/XIRR read"; - if wb_in.sel = x"0f" then + case wb_in.adr(7 downto 0) is + when XIRR_POLL => + report "XICS XIRR_POLL read"; + if wb_in.sel = x"f" then v.wb_rd_data(23 downto 0) := r.xisr; v.wb_rd_data(31 downto 24) := r.cppr; - elsif wb_in.sel = x"f0" then - v.wb_rd_data(55 downto 32) := r.xisr; - v.wb_rd_data(63 downto 56) := r.cppr; - xirr_accept_rd := '1'; - elsif wb_in.sel = x"01" then + elsif wb_in.sel = x"1" then v.wb_rd_data(7 downto 0) := r.cppr; - elsif wb_in.sel = x"10" then - v.wb_rd_data(39 downto 32) := r.cppr; + end if; + when XIRR => + report "XICS XIRR read"; + if wb_in.sel = x"f" then + v.wb_rd_data(23 downto 0) := r.xisr; + v.wb_rd_data(31 downto 24) := r.cppr; + xirr_accept_rd := '1'; + elsif wb_in.sel = x"1" then + v.wb_rd_data(7 downto 0) := r.cppr; end if; - - elsif wb_in.adr = RESV0 then + when MFRR => report "XICS MFRR read"; - if wb_in.sel = x"f0" then -- 4 bytes - v.wb_rd_data(63 downto 56) := r.mfrr; - elsif wb_in.sel = x"10" then -- 1 byte + if wb_in.sel = x"f" then -- 4 bytes + v.wb_rd_data(31 downto 24) := r.mfrr; + elsif wb_in.sel = x"1" then -- 1 byte v.wb_rd_data( 7 downto 0) := r.mfrr; end if; - end if; + when others => + end case; end if; end if;