From 573b6b4bc4ce7410904738c9b192b07b8e481daf Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 11 May 2020 21:22:07 +1000 Subject: [PATCH] soc: Rework interconnect This changes the SoC interconnect such that the main 64-bit wishbone out of the processor is first split between only 3 slaves (BRAM, DRAM and a general "IO" bus) instead of all the slaves in the SoC. The IO bus leg is then latched and down-converted to 32 bits data width, before going through a second address decoder for the various IO devices. This significantly reduces routing and timing pressure on the main bus, allowing to get rid of frequent timing violations when synthetizing on small'ish FPGAs such as the Artix-7 35T found on the original Arty board. Signed-off-by: Benjamin Herrenschmidt --- core_tb.vhdl | 9 +- fpga/top-arty.vhdl | 26 +- fpga/top-generic.vhdl | 20 +- fpga/top-nexys-video.vhdl | 26 +- include/microwatt_soc.h | 5 +- litedram/gen-src/sdram_init/head.S | 2 +- litedram/gen-src/sdram_init/sdram_init.lds.S | 1 + litedram/gen-src/wrapper-mw-init.vhdl | 129 +++--- litedram/gen-src/wrapper-self-init.vhdl | 72 ++-- litedram/generated/arty/litedram-wrapper.vhdl | 129 +++--- litedram/generated/arty/litedram_core.init | 4 +- litedram/generated/arty/litedram_core.v | 2 +- .../nexys-video/litedram-wrapper.vhdl | 129 +++--- .../generated/nexys-video/litedram_core.init | 4 +- .../generated/nexys-video/litedram_core.v | 2 +- soc.vhdl | 368 +++++++++++++----- syscon.vhdl | 15 +- tests/xics/xics.h | 19 +- wishbone_types.vhdl | 21 + xics.vhdl | 97 +++-- 20 files changed, 702 insertions(+), 378 deletions(-) diff --git a/core_tb.vhdl b/core_tb.vhdl index 9c08919..a35d73d 100644 --- a/core_tb.vhdl +++ b/core_tb.vhdl @@ -18,6 +18,8 @@ architecture behave of core_tb is -- Dummy DRAM signal wb_dram_in : wishbone_master_out; signal wb_dram_out : wishbone_slave_out; + signal wb_dram_ctrl_in : wb_io_master_out; + signal wb_dram_ctrl_out : wb_io_slave_out; begin soc0: entity work.soc @@ -35,6 +37,8 @@ begin uart0_txd => open, wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, + wb_dram_ctrl_in => wb_dram_ctrl_in, + wb_dram_ctrl_out => wb_dram_ctrl_out, alt_reset => '0' ); @@ -59,6 +63,9 @@ begin -- Dummy DRAM wb_dram_out.ack <= wb_dram_in.cyc and wb_dram_in.stb; wb_dram_out.dat <= x"FFFFFFFFFFFFFFFF"; - wb_dram_out.stall <= wb_dram_in.cyc and not wb_dram_out.ack; + wb_dram_out.stall <= '0'; + wb_dram_ctrl_out.ack <= wb_dram_ctrl_in.cyc and wb_dram_ctrl_in.stb; + wb_dram_ctrl_out.dat <= x"FFFFFFFF"; + wb_dram_ctrl_out.stall <= '0'; end; diff --git a/fpga/top-arty.vhdl b/fpga/top-arty.vhdl index f2da6c2..e3782ed 100644 --- a/fpga/top-arty.vhdl +++ b/fpga/top-arty.vhdl @@ -65,11 +65,15 @@ architecture behaviour of toplevel is signal system_clk : std_ulogic; signal system_clk_locked : std_ulogic; - -- DRAM wishbone connection - signal wb_dram_in : wishbone_master_out; - signal wb_dram_out : wishbone_slave_out; - signal wb_dram_ctrl : std_ulogic; - signal wb_dram_init : std_ulogic; + -- DRAM main data wishbone connection + signal wb_dram_in : wishbone_master_out; + signal wb_dram_out : wishbone_slave_out; + + -- DRAM control wishbone connection + signal wb_dram_ctrl_in : wb_io_master_out; + signal wb_dram_ctrl_out : wb_io_slave_out; + signal wb_dram_is_csr : std_ulogic; + signal wb_dram_is_init : std_ulogic; -- Control/status signal core_alt_reset : std_ulogic; @@ -104,8 +108,10 @@ begin uart0_rxd => uart_main_rx, wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, - wb_dram_ctrl => wb_dram_ctrl, - wb_dram_init => wb_dram_init, + wb_dram_ctrl_in => wb_dram_ctrl_in, + wb_dram_ctrl_out => wb_dram_ctrl_out, + wb_dram_is_csr => wb_dram_is_csr, + wb_dram_is_init => wb_dram_is_init, alt_reset => core_alt_reset ); @@ -195,8 +201,10 @@ begin wb_in => wb_dram_in, wb_out => wb_dram_out, - wb_is_ctrl => wb_dram_ctrl, - wb_is_init => wb_dram_init, + wb_ctrl_in => wb_dram_ctrl_in, + wb_ctrl_out => wb_dram_ctrl_out, + wb_ctrl_is_csr => wb_dram_is_csr, + wb_ctrl_is_init => wb_dram_is_init, serial_tx => uart_pmod_tx, serial_rx => uart_pmod_rx, diff --git a/fpga/top-generic.vhdl b/fpga/top-generic.vhdl index daefeee..a4c4e73 100644 --- a/fpga/top-generic.vhdl +++ b/fpga/top-generic.vhdl @@ -33,9 +33,15 @@ architecture behaviour of toplevel is signal system_clk : std_ulogic; signal system_clk_locked : std_ulogic; - -- Dummy DRAM - signal wb_dram_in : wishbone_master_out; - signal wb_dram_out : wishbone_slave_out; + -- DRAM main data wishbone connection + signal wb_dram_in : wishbone_master_out; + signal wb_dram_out : wishbone_slave_out; + + -- DRAM control wishbone connection + signal wb_dram_ctrl_in : wb_io_master_out; + signal wb_dram_ctrl_out : wb_io_slave_out; + signal wb_dram_is_csr : std_ulogic; + signal wb_dram_is_init : std_ulogic; begin @@ -79,7 +85,12 @@ begin rst => soc_rst, uart0_txd => uart0_txd, uart0_rxd => uart0_rxd, + wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, + wb_dram_ctrl_in => wb_dram_ctrl_in, + wb_dram_ctrl_out => wb_dram_ctrl_out, + wb_dram_is_csr => wb_dram_is_csr, + wb_dram_is_init => wb_dram_is_init, alt_reset => '0' ); @@ -87,5 +98,8 @@ begin wb_dram_out.ack <= wb_dram_in.cyc and wb_dram_in.stb; wb_dram_out.dat <= x"FFFFFFFFFFFFFFFF"; wb_dram_out.stall <= wb_dram_in.cyc and not wb_dram_out.ack; + wb_dram_ctrl_out.ack <= wb_dram_in.cyc and wb_dram_in.stb; + wb_dram_ctrl_out.dat <= x"FFFFFFFF"; + wb_dram_ctrl_out.stall <= wb_dram_in.cyc and not wb_dram_out.ack; end architecture behaviour; diff --git a/fpga/top-nexys-video.vhdl b/fpga/top-nexys-video.vhdl index 0e4b097..9acbee1 100644 --- a/fpga/top-nexys-video.vhdl +++ b/fpga/top-nexys-video.vhdl @@ -57,11 +57,15 @@ architecture behaviour of toplevel is signal system_clk : std_ulogic; signal system_clk_locked : std_ulogic; - -- DRAM wishbone connection - signal wb_dram_in : wishbone_master_out; - signal wb_dram_out : wishbone_slave_out; - signal wb_dram_ctrl : std_ulogic; - signal wb_dram_init : std_ulogic; + -- DRAM main data wishbone connection + signal wb_dram_in : wishbone_master_out; + signal wb_dram_out : wishbone_slave_out; + + -- DRAM control wishbone connection + signal wb_dram_ctrl_in : wb_io_master_out; + signal wb_dram_ctrl_out : wb_io_slave_out; + signal wb_dram_is_csr : std_ulogic; + signal wb_dram_is_init : std_ulogic; -- Control/status signal core_alt_reset : std_ulogic; @@ -87,8 +91,10 @@ begin uart0_rxd => uart_main_rx, wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, - wb_dram_ctrl => wb_dram_ctrl, - wb_dram_init => wb_dram_init, + wb_dram_ctrl_in => wb_dram_ctrl_in, + wb_dram_ctrl_out => wb_dram_ctrl_out, + wb_dram_is_csr => wb_dram_is_csr, + wb_dram_is_init => wb_dram_is_init, alt_reset => core_alt_reset ); @@ -176,8 +182,10 @@ begin wb_in => wb_dram_in, wb_out => wb_dram_out, - wb_is_ctrl => wb_dram_ctrl, - wb_is_init => wb_dram_init, + wb_ctrl_in => wb_dram_ctrl_in, + wb_ctrl_out => wb_dram_ctrl_out, + wb_ctrl_is_csr => wb_dram_is_csr, + wb_ctrl_is_init => wb_dram_is_init, serial_tx => open, serial_rx => '0', diff --git a/include/microwatt_soc.h b/include/microwatt_soc.h index b0dab83..443a8ae 100644 --- a/include/microwatt_soc.h +++ b/include/microwatt_soc.h @@ -7,12 +7,13 @@ #define MEMORY_BASE 0x00000000 /* "Main" memory alias, either BRAM or DRAM */ #define DRAM_BASE 0x40000000 /* DRAM if present */ +#define BRAM_BASE 0x80000000 /* Internal BRAM */ + #define SYSCON_BASE 0xc0000000 /* System control regs */ #define UART_BASE 0xc0002000 /* UART */ #define XICS_BASE 0xc0004000 /* Interrupt controller */ #define DRAM_CTRL_BASE 0xc0100000 /* LiteDRAM control registers */ -#define BRAM_BASE 0xf0000000 /* Internal BRAM */ -#define DRAM_INIT_BASE 0xffff0000 /* Internal DRAM init firmware */ +#define DRAM_INIT_BASE 0xf0000000 /* Internal DRAM init firmware */ /* * Register definitions for the syscon registers diff --git a/litedram/gen-src/sdram_init/head.S b/litedram/gen-src/sdram_init/head.S index 2c750f9..235bf14 100644 --- a/litedram/gen-src/sdram_init/head.S +++ b/litedram/gen-src/sdram_init/head.S @@ -14,7 +14,7 @@ * limitations under the License. */ -#define STACK_TOP 0xffff4000 +#define STACK_TOP 0xf0004000 #define FIXUP_ENDIAN \ tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \ diff --git a/litedram/gen-src/sdram_init/sdram_init.lds.S b/litedram/gen-src/sdram_init/sdram_init.lds.S index f1bc291..e6cf0bc 100644 --- a/litedram/gen-src/sdram_init/sdram_init.lds.S +++ b/litedram/gen-src/sdram_init/sdram_init.lds.S @@ -8,6 +8,7 @@ SECTIONS KEEP(*(.head)) } . = DRAM_INIT_BASE | 0x1000; + .text : { *(.text*) *(.sfpr) *(.rodata*) } .data : { *(.data*) } .bss : { *(.bss*) } diff --git a/litedram/gen-src/wrapper-mw-init.vhdl b/litedram/gen-src/wrapper-mw-init.vhdl index c4d702e..1595793 100644 --- a/litedram/gen-src/wrapper-mw-init.vhdl +++ b/litedram/gen-src/wrapper-mw-init.vhdl @@ -15,18 +15,20 @@ entity litedram_wrapper is port( -- LiteDRAM generates the system clock and reset -- from the input clkin - clk_in : in std_ulogic; - rst : in std_ulogic; - system_clk : out std_ulogic; - system_reset : out std_ulogic; - core_alt_reset : out std_ulogic; - pll_locked : out std_ulogic; + clk_in : in std_ulogic; + rst : in std_ulogic; + system_clk : out std_ulogic; + system_reset : out std_ulogic; + core_alt_reset : out std_ulogic; + pll_locked : out std_ulogic; -- Wishbone ports: - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; - wb_is_ctrl : in std_ulogic; - wb_is_init : in std_ulogic; + wb_in : in wishbone_master_out; + wb_out : out wishbone_slave_out; + wb_ctrl_in : in wb_io_master_out; + wb_ctrl_out : out wb_io_slave_out; + wb_ctrl_is_csr : in std_ulogic; + wb_ctrl_is_init : in std_ulogic; -- Init core serial debug serial_tx : out std_ulogic; @@ -128,8 +130,8 @@ architecture behaviour of litedram_wrapper is signal wb_ctrl_ack : std_ulogic; signal wb_ctrl_we : std_ulogic; - signal wb_init_in : wishbone_master_out; - signal wb_init_out : wishbone_slave_out; + signal wb_init_in : wb_io_master_out; + signal wb_init_out : wb_io_slave_out; type state_t is (CMD, MWRITE, MREAD); signal state : state_t; @@ -138,7 +140,7 @@ architecture behaviour of litedram_wrapper is constant INIT_RAM_ABITS :integer := 14; constant INIT_RAM_FILE : string := "litedram_core.init"; - type ram_t is array(0 to (INIT_RAM_SIZE / 8) - 1) of std_logic_vector(63 downto 0); + type ram_t is array(0 to (INIT_RAM_SIZE / 4) - 1) of std_logic_vector(31 downto 0); impure function init_load_ram(name : string) return ram_t is file ram_file : text open read_mode is name; @@ -150,7 +152,8 @@ architecture behaviour of litedram_wrapper is exit when endfile(ram_file); readline(ram_file, ram_line); hread(ram_line, temp_word); - temp_ram(i) := temp_word; + temp_ram(i*2) := temp_word(31 downto 0); + temp_ram(i*2+1) := temp_word(63 downto 32); end loop; return temp_ram; end function; @@ -162,79 +165,93 @@ architecture behaviour of litedram_wrapper is begin - -- BRAM Memory slave + -- alternate core reset address set when DRAM is not initialized. + core_alt_reset <= not init_done; + + -- BRAM Memory slave. TODO: Pipeline it with an output buffer + -- to improve timing init_ram_0: process(system_clk) variable adr : integer; begin if rising_edge(system_clk) then wb_init_out.ack <= '0'; if (wb_init_in.cyc and wb_init_in.stb) = '1' then - adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 3)))); + adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 2)))); if wb_init_in.we = '0' then wb_init_out.dat <= init_ram(adr); else - for i in 0 to 7 loop + for i in 0 to 3 loop if wb_init_in.sel(i) = '1' then init_ram(adr)(((i + 1) * 8) - 1 downto i * 8) <= wb_init_in.dat(((i + 1) * 8) - 1 downto i * 8); end if; end loop; end if; - wb_init_out.ack <= not wb_init_out.ack; + wb_init_out.ack <= '1'; end if; end if; end process; - wb_init_in.adr <= wb_in.adr; - wb_init_in.dat <= wb_in.dat; - wb_init_in.sel <= wb_in.sel; - wb_init_in.we <= wb_in.we; - wb_init_in.stb <= wb_in.stb; - wb_init_in.cyc <= wb_in.cyc and wb_is_init; + -- + -- Control bus wishbone: This muxes the wishbone to the CSRs + -- and an internal small one to the init BRAM + -- + + -- Init DRAM wishbone IN signals + wb_init_in.adr <= wb_ctrl_in.adr; + wb_init_in.dat <= wb_ctrl_in.dat; + wb_init_in.sel <= wb_ctrl_in.sel; + wb_init_in.we <= wb_ctrl_in.we; + wb_init_in.stb <= wb_ctrl_in.stb; + wb_init_in.cyc <= wb_ctrl_in.cyc and wb_ctrl_is_init; + + -- DRAM CSR IN signals + wb_ctrl_adr <= x"0000" & wb_ctrl_in.adr(15 downto 2); + wb_ctrl_dat_w <= wb_ctrl_in.dat; + wb_ctrl_sel <= wb_ctrl_in.sel; + wb_ctrl_we <= wb_ctrl_in.we; + wb_ctrl_cyc <= wb_ctrl_in.cyc and wb_ctrl_is_csr; + wb_ctrl_stb <= wb_ctrl_in.stb and wb_ctrl_is_csr; - -- Address bit 3 selects the top or bottom half of the data + -- Ctrl bus wishbone OUT signals + wb_ctrl_out.ack <= wb_ctrl_ack when wb_ctrl_is_csr = '1' + else wb_init_out.ack; + wb_ctrl_out.dat <= wb_ctrl_dat_r when wb_ctrl_is_csr = '1' + else wb_init_out.dat; + wb_ctrl_out.stall <= wb_init_out.stall when wb_ctrl_is_init else + '0' when wb_ctrl_in.cyc = '0' else not wb_ctrl_ack; + + -- + -- Data bus wishbone to LiteDRAM native port + -- + -- Address bit 3 selects the top or bottom half of the data -- bus (64-bit wishbone vs. 128-bit DRAM interface) -- + -- XXX TODO: Figure out how to pipeline this + -- ad3 <= wb_in.adr(3); - -- DRAM data interface signals - user_port0_cmd_valid <= (wb_in.cyc and wb_in.stb and not wb_is_ctrl and not wb_is_init) - when state = CMD else '0'; - user_port0_cmd_we <= wb_in.we when state = CMD else '0'; + -- Wishbone port IN signals + user_port0_cmd_valid <= wb_in.cyc and wb_in.stb when state = CMD else '0'; + user_port0_cmd_we <= wb_in.we when state = CMD else '0'; user_port0_wdata_valid <= '1' when state = MWRITE else '0'; user_port0_rdata_ready <= '1' when state = MREAD else '0'; - user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); - user_port0_wdata_data <= wb_in.dat & wb_in.dat; - user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else - "00000000" & wb_in.sel; - - -- DRAM ctrl interface signals - wb_ctrl_adr <= x"0000" & wb_in.adr(15 downto 2); - wb_ctrl_dat_w <= wb_in.dat(31 downto 0); - wb_ctrl_sel <= wb_in.sel(3 downto 0); - wb_ctrl_cyc <= wb_in.cyc and wb_is_ctrl; - wb_ctrl_stb <= wb_in.stb and wb_is_ctrl; - wb_ctrl_we <= wb_in.we; - - -- Wishbone out signals - wb_out.ack <= wb_ctrl_ack when wb_is_ctrl ='1' else - wb_init_out.ack when wb_is_init = '1' else - user_port0_wdata_ready when state = MWRITE else + user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); + user_port0_wdata_data <= wb_in.dat & wb_in.dat; + user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else + "00000000" & wb_in.sel; + + -- Wishbone OUT signals + wb_out.ack <= user_port0_wdata_ready when state = MWRITE else user_port0_rdata_valid when state = MREAD else '0'; - wb_out.dat <= (x"00000000" & wb_ctrl_dat_r) when wb_is_ctrl = '1' else - wb_init_out.dat when wb_is_init = '1' else - user_port0_rdata_data(127 downto 64) when ad3 = '1' else + wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else user_port0_rdata_data(63 downto 0); + -- We don't do pipelining yet. wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- Reset ignored, the reset controller use the pll lock signal, - -- and alternate core reset address set when DRAM is not initialized. - -- - core_alt_reset <= not init_done; - - -- State machine + -- DRAM user port State machine sm: process(system_clk) begin @@ -255,7 +272,7 @@ begin if user_port0_rdata_valid = '1' then state <= CMD; end if; - end case; + end case; end if; end if; end process; diff --git a/litedram/gen-src/wrapper-self-init.vhdl b/litedram/gen-src/wrapper-self-init.vhdl index 34e69e3..01acfd9 100644 --- a/litedram/gen-src/wrapper-self-init.vhdl +++ b/litedram/gen-src/wrapper-self-init.vhdl @@ -15,18 +15,20 @@ entity litedram_wrapper is port( -- LiteDRAM generates the system clock and reset -- from the input clkin - clk_in : in std_ulogic; - rst : in std_ulogic; - system_clk : out std_ulogic; - system_reset : out std_ulogic; - core_alt_reset : out std_ulogic; - pll_locked : out std_ulogic; + clk_in : in std_ulogic; + rst : in std_ulogic; + system_clk : out std_ulogic; + system_reset : out std_ulogic; + core_alt_reset : out std_ulogic; + pll_locked : out std_ulogic; -- Wishbone ports: - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; - wb_is_ctrl : in std_ulogic; - wb_is_init : in std_ulogic; + wb_in : in wishbone_master_out; + wb_out : out wishbone_slave_out; + wb_ctrl_in : in wb_io_master_out; + wb_ctrl_out : out wb_io_slave_out; + wb_ctrl_is_csr : in std_ulogic; + wb_ctrl_is_init : in std_ulogic; -- Init core serial debug serial_tx : out std_ulogic; @@ -52,7 +54,6 @@ entity litedram_wrapper is ddram_cke : out std_ulogic; ddram_odt : out std_ulogic; ddram_reset_n : out std_ulogic - ); end entity litedram_wrapper; architecture behaviour of litedram_wrapper is @@ -117,36 +118,47 @@ architecture behaviour of litedram_wrapper is begin + -- Reset, lift it when init done, no alt core reset + system_reset <= dram_user_reset or not init_done; + core_alt_reset <= '0'; + + -- Control bus is unused + wb_ctrl_out.ack <= (wb_is_ctrl = '1' or wb_is_init = '1') and wb_ctrl_in.cyc; + else wb_init_out.ack; + wb_ctrl_out.dat <= (others => '0'); + wb_ctrl_out.stall <= '0'; + + -- + -- Data bus wishbone to LiteDRAM native port + -- -- Address bit 3 selects the top or bottom half of the data -- bus (64-bit wishbone vs. 128-bit DRAM interface) -- + -- XXX TODO: Figure out how to pipeline this + -- ad3 <= wb_in.adr(3); - -- DRAM interface signals - user_port0_cmd_valid <= (wb_in.cyc and wb_in.stb and not wb_is_ctrl and not wb_is_init) - when state = CMD else '0'; - user_port0_cmd_we <= wb_in.we when state = CMD else '0'; + -- Wishbone port IN signals + user_port0_cmd_valid <= wb_in.cyc and wb_in.stb when state = CMD else '0'; + user_port0_cmd_we <= wb_in.we when state = CMD else '0'; user_port0_wdata_valid <= '1' when state = MWRITE else '0'; user_port0_rdata_ready <= '1' when state = MREAD else '0'; - user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); - user_port0_wdata_data <= wb_in.dat & wb_in.dat; - user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else - "00000000" & wb_in.sel; - - -- Wishbone out signals. CSR and init memory do nothing, just ack - wb_out.ack <= '1' when (wb_is_ctrl = '1' or wb_is_init = '1') else - user_port0_wdata_ready when state = MWRITE else + user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); + user_port0_wdata_data <= wb_in.dat & wb_in.dat; + user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else + "00000000" & wb_in.sel; + + -- Wishbone OUT signals + wb_out.ack <= user_port0_wdata_ready when state = MWRITE else user_port0_rdata_valid when state = MREAD else '0'; - wb_out.dat <= (others => '0') when (wb_is_ctrl = '1' or wb_is_init = '1') else - user_port0_rdata_data(127 downto 64) when ad3 = '1' else + + wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else user_port0_rdata_data(63 downto 0); - wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- Reset, lift it when init done, no alt core reset - system_reset <= dram_user_reset or not init_done; - core_alt_reset <= '0'; + -- We don't do pipelining yet. + wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- State machine + -- DRAM user port State machine sm: process(system_clk) begin diff --git a/litedram/generated/arty/litedram-wrapper.vhdl b/litedram/generated/arty/litedram-wrapper.vhdl index c4d702e..1595793 100644 --- a/litedram/generated/arty/litedram-wrapper.vhdl +++ b/litedram/generated/arty/litedram-wrapper.vhdl @@ -15,18 +15,20 @@ entity litedram_wrapper is port( -- LiteDRAM generates the system clock and reset -- from the input clkin - clk_in : in std_ulogic; - rst : in std_ulogic; - system_clk : out std_ulogic; - system_reset : out std_ulogic; - core_alt_reset : out std_ulogic; - pll_locked : out std_ulogic; + clk_in : in std_ulogic; + rst : in std_ulogic; + system_clk : out std_ulogic; + system_reset : out std_ulogic; + core_alt_reset : out std_ulogic; + pll_locked : out std_ulogic; -- Wishbone ports: - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; - wb_is_ctrl : in std_ulogic; - wb_is_init : in std_ulogic; + wb_in : in wishbone_master_out; + wb_out : out wishbone_slave_out; + wb_ctrl_in : in wb_io_master_out; + wb_ctrl_out : out wb_io_slave_out; + wb_ctrl_is_csr : in std_ulogic; + wb_ctrl_is_init : in std_ulogic; -- Init core serial debug serial_tx : out std_ulogic; @@ -128,8 +130,8 @@ architecture behaviour of litedram_wrapper is signal wb_ctrl_ack : std_ulogic; signal wb_ctrl_we : std_ulogic; - signal wb_init_in : wishbone_master_out; - signal wb_init_out : wishbone_slave_out; + signal wb_init_in : wb_io_master_out; + signal wb_init_out : wb_io_slave_out; type state_t is (CMD, MWRITE, MREAD); signal state : state_t; @@ -138,7 +140,7 @@ architecture behaviour of litedram_wrapper is constant INIT_RAM_ABITS :integer := 14; constant INIT_RAM_FILE : string := "litedram_core.init"; - type ram_t is array(0 to (INIT_RAM_SIZE / 8) - 1) of std_logic_vector(63 downto 0); + type ram_t is array(0 to (INIT_RAM_SIZE / 4) - 1) of std_logic_vector(31 downto 0); impure function init_load_ram(name : string) return ram_t is file ram_file : text open read_mode is name; @@ -150,7 +152,8 @@ architecture behaviour of litedram_wrapper is exit when endfile(ram_file); readline(ram_file, ram_line); hread(ram_line, temp_word); - temp_ram(i) := temp_word; + temp_ram(i*2) := temp_word(31 downto 0); + temp_ram(i*2+1) := temp_word(63 downto 32); end loop; return temp_ram; end function; @@ -162,79 +165,93 @@ architecture behaviour of litedram_wrapper is begin - -- BRAM Memory slave + -- alternate core reset address set when DRAM is not initialized. + core_alt_reset <= not init_done; + + -- BRAM Memory slave. TODO: Pipeline it with an output buffer + -- to improve timing init_ram_0: process(system_clk) variable adr : integer; begin if rising_edge(system_clk) then wb_init_out.ack <= '0'; if (wb_init_in.cyc and wb_init_in.stb) = '1' then - adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 3)))); + adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 2)))); if wb_init_in.we = '0' then wb_init_out.dat <= init_ram(adr); else - for i in 0 to 7 loop + for i in 0 to 3 loop if wb_init_in.sel(i) = '1' then init_ram(adr)(((i + 1) * 8) - 1 downto i * 8) <= wb_init_in.dat(((i + 1) * 8) - 1 downto i * 8); end if; end loop; end if; - wb_init_out.ack <= not wb_init_out.ack; + wb_init_out.ack <= '1'; end if; end if; end process; - wb_init_in.adr <= wb_in.adr; - wb_init_in.dat <= wb_in.dat; - wb_init_in.sel <= wb_in.sel; - wb_init_in.we <= wb_in.we; - wb_init_in.stb <= wb_in.stb; - wb_init_in.cyc <= wb_in.cyc and wb_is_init; + -- + -- Control bus wishbone: This muxes the wishbone to the CSRs + -- and an internal small one to the init BRAM + -- + + -- Init DRAM wishbone IN signals + wb_init_in.adr <= wb_ctrl_in.adr; + wb_init_in.dat <= wb_ctrl_in.dat; + wb_init_in.sel <= wb_ctrl_in.sel; + wb_init_in.we <= wb_ctrl_in.we; + wb_init_in.stb <= wb_ctrl_in.stb; + wb_init_in.cyc <= wb_ctrl_in.cyc and wb_ctrl_is_init; + + -- DRAM CSR IN signals + wb_ctrl_adr <= x"0000" & wb_ctrl_in.adr(15 downto 2); + wb_ctrl_dat_w <= wb_ctrl_in.dat; + wb_ctrl_sel <= wb_ctrl_in.sel; + wb_ctrl_we <= wb_ctrl_in.we; + wb_ctrl_cyc <= wb_ctrl_in.cyc and wb_ctrl_is_csr; + wb_ctrl_stb <= wb_ctrl_in.stb and wb_ctrl_is_csr; - -- Address bit 3 selects the top or bottom half of the data + -- Ctrl bus wishbone OUT signals + wb_ctrl_out.ack <= wb_ctrl_ack when wb_ctrl_is_csr = '1' + else wb_init_out.ack; + wb_ctrl_out.dat <= wb_ctrl_dat_r when wb_ctrl_is_csr = '1' + else wb_init_out.dat; + wb_ctrl_out.stall <= wb_init_out.stall when wb_ctrl_is_init else + '0' when wb_ctrl_in.cyc = '0' else not wb_ctrl_ack; + + -- + -- Data bus wishbone to LiteDRAM native port + -- + -- Address bit 3 selects the top or bottom half of the data -- bus (64-bit wishbone vs. 128-bit DRAM interface) -- + -- XXX TODO: Figure out how to pipeline this + -- ad3 <= wb_in.adr(3); - -- DRAM data interface signals - user_port0_cmd_valid <= (wb_in.cyc and wb_in.stb and not wb_is_ctrl and not wb_is_init) - when state = CMD else '0'; - user_port0_cmd_we <= wb_in.we when state = CMD else '0'; + -- Wishbone port IN signals + user_port0_cmd_valid <= wb_in.cyc and wb_in.stb when state = CMD else '0'; + user_port0_cmd_we <= wb_in.we when state = CMD else '0'; user_port0_wdata_valid <= '1' when state = MWRITE else '0'; user_port0_rdata_ready <= '1' when state = MREAD else '0'; - user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); - user_port0_wdata_data <= wb_in.dat & wb_in.dat; - user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else - "00000000" & wb_in.sel; - - -- DRAM ctrl interface signals - wb_ctrl_adr <= x"0000" & wb_in.adr(15 downto 2); - wb_ctrl_dat_w <= wb_in.dat(31 downto 0); - wb_ctrl_sel <= wb_in.sel(3 downto 0); - wb_ctrl_cyc <= wb_in.cyc and wb_is_ctrl; - wb_ctrl_stb <= wb_in.stb and wb_is_ctrl; - wb_ctrl_we <= wb_in.we; - - -- Wishbone out signals - wb_out.ack <= wb_ctrl_ack when wb_is_ctrl ='1' else - wb_init_out.ack when wb_is_init = '1' else - user_port0_wdata_ready when state = MWRITE else + user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); + user_port0_wdata_data <= wb_in.dat & wb_in.dat; + user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else + "00000000" & wb_in.sel; + + -- Wishbone OUT signals + wb_out.ack <= user_port0_wdata_ready when state = MWRITE else user_port0_rdata_valid when state = MREAD else '0'; - wb_out.dat <= (x"00000000" & wb_ctrl_dat_r) when wb_is_ctrl = '1' else - wb_init_out.dat when wb_is_init = '1' else - user_port0_rdata_data(127 downto 64) when ad3 = '1' else + wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else user_port0_rdata_data(63 downto 0); + -- We don't do pipelining yet. wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- Reset ignored, the reset controller use the pll lock signal, - -- and alternate core reset address set when DRAM is not initialized. - -- - core_alt_reset <= not init_done; - - -- State machine + -- DRAM user port State machine sm: process(system_clk) begin @@ -255,7 +272,7 @@ begin if user_port0_rdata_valid = '1' then state <= CMD; end if; - end case; + end case; end if; end if; end process; diff --git a/litedram/generated/arty/litedram_core.init b/litedram/generated/arty/litedram_core.init index b1a75f8..22485ac 100644 --- a/litedram/generated/arty/litedram_core.init +++ b/litedram/generated/arty/litedram_core.init @@ -4,10 +4,10 @@ a602487d05009f42 a64b5a7d14004a39 2402004ca64b7b7d 602100003c200000 -6421ffff782107c6 +6421f000782107c6 3d80000060213f00 798c07c6618c0000 -618c108c658cffff +618c108c658cf000 4e8004217d8903a6 0000000048000002 0000000000000000 diff --git a/litedram/generated/arty/litedram_core.v b/litedram/generated/arty/litedram_core.v index 9208d87..bb0671b 100644 --- a/litedram/generated/arty/litedram_core.v +++ b/litedram/generated/arty/litedram_core.v @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (0d16e03) & LiteX (3391398a) on 2020-05-15 13:30:46 +// Auto-generated by Migen (0d16e03) & LiteX (3391398a) on 2020-05-16 19:06:01 //-------------------------------------------------------------------------------- module litedram_core( input wire clk, diff --git a/litedram/generated/nexys-video/litedram-wrapper.vhdl b/litedram/generated/nexys-video/litedram-wrapper.vhdl index c4d702e..1595793 100644 --- a/litedram/generated/nexys-video/litedram-wrapper.vhdl +++ b/litedram/generated/nexys-video/litedram-wrapper.vhdl @@ -15,18 +15,20 @@ entity litedram_wrapper is port( -- LiteDRAM generates the system clock and reset -- from the input clkin - clk_in : in std_ulogic; - rst : in std_ulogic; - system_clk : out std_ulogic; - system_reset : out std_ulogic; - core_alt_reset : out std_ulogic; - pll_locked : out std_ulogic; + clk_in : in std_ulogic; + rst : in std_ulogic; + system_clk : out std_ulogic; + system_reset : out std_ulogic; + core_alt_reset : out std_ulogic; + pll_locked : out std_ulogic; -- Wishbone ports: - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; - wb_is_ctrl : in std_ulogic; - wb_is_init : in std_ulogic; + wb_in : in wishbone_master_out; + wb_out : out wishbone_slave_out; + wb_ctrl_in : in wb_io_master_out; + wb_ctrl_out : out wb_io_slave_out; + wb_ctrl_is_csr : in std_ulogic; + wb_ctrl_is_init : in std_ulogic; -- Init core serial debug serial_tx : out std_ulogic; @@ -128,8 +130,8 @@ architecture behaviour of litedram_wrapper is signal wb_ctrl_ack : std_ulogic; signal wb_ctrl_we : std_ulogic; - signal wb_init_in : wishbone_master_out; - signal wb_init_out : wishbone_slave_out; + signal wb_init_in : wb_io_master_out; + signal wb_init_out : wb_io_slave_out; type state_t is (CMD, MWRITE, MREAD); signal state : state_t; @@ -138,7 +140,7 @@ architecture behaviour of litedram_wrapper is constant INIT_RAM_ABITS :integer := 14; constant INIT_RAM_FILE : string := "litedram_core.init"; - type ram_t is array(0 to (INIT_RAM_SIZE / 8) - 1) of std_logic_vector(63 downto 0); + type ram_t is array(0 to (INIT_RAM_SIZE / 4) - 1) of std_logic_vector(31 downto 0); impure function init_load_ram(name : string) return ram_t is file ram_file : text open read_mode is name; @@ -150,7 +152,8 @@ architecture behaviour of litedram_wrapper is exit when endfile(ram_file); readline(ram_file, ram_line); hread(ram_line, temp_word); - temp_ram(i) := temp_word; + temp_ram(i*2) := temp_word(31 downto 0); + temp_ram(i*2+1) := temp_word(63 downto 32); end loop; return temp_ram; end function; @@ -162,79 +165,93 @@ architecture behaviour of litedram_wrapper is begin - -- BRAM Memory slave + -- alternate core reset address set when DRAM is not initialized. + core_alt_reset <= not init_done; + + -- BRAM Memory slave. TODO: Pipeline it with an output buffer + -- to improve timing init_ram_0: process(system_clk) variable adr : integer; begin if rising_edge(system_clk) then wb_init_out.ack <= '0'; if (wb_init_in.cyc and wb_init_in.stb) = '1' then - adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 3)))); + adr := to_integer((unsigned(wb_init_in.adr(INIT_RAM_ABITS-1 downto 2)))); if wb_init_in.we = '0' then wb_init_out.dat <= init_ram(adr); else - for i in 0 to 7 loop + for i in 0 to 3 loop if wb_init_in.sel(i) = '1' then init_ram(adr)(((i + 1) * 8) - 1 downto i * 8) <= wb_init_in.dat(((i + 1) * 8) - 1 downto i * 8); end if; end loop; end if; - wb_init_out.ack <= not wb_init_out.ack; + wb_init_out.ack <= '1'; end if; end if; end process; - wb_init_in.adr <= wb_in.adr; - wb_init_in.dat <= wb_in.dat; - wb_init_in.sel <= wb_in.sel; - wb_init_in.we <= wb_in.we; - wb_init_in.stb <= wb_in.stb; - wb_init_in.cyc <= wb_in.cyc and wb_is_init; + -- + -- Control bus wishbone: This muxes the wishbone to the CSRs + -- and an internal small one to the init BRAM + -- + + -- Init DRAM wishbone IN signals + wb_init_in.adr <= wb_ctrl_in.adr; + wb_init_in.dat <= wb_ctrl_in.dat; + wb_init_in.sel <= wb_ctrl_in.sel; + wb_init_in.we <= wb_ctrl_in.we; + wb_init_in.stb <= wb_ctrl_in.stb; + wb_init_in.cyc <= wb_ctrl_in.cyc and wb_ctrl_is_init; + + -- DRAM CSR IN signals + wb_ctrl_adr <= x"0000" & wb_ctrl_in.adr(15 downto 2); + wb_ctrl_dat_w <= wb_ctrl_in.dat; + wb_ctrl_sel <= wb_ctrl_in.sel; + wb_ctrl_we <= wb_ctrl_in.we; + wb_ctrl_cyc <= wb_ctrl_in.cyc and wb_ctrl_is_csr; + wb_ctrl_stb <= wb_ctrl_in.stb and wb_ctrl_is_csr; - -- Address bit 3 selects the top or bottom half of the data + -- Ctrl bus wishbone OUT signals + wb_ctrl_out.ack <= wb_ctrl_ack when wb_ctrl_is_csr = '1' + else wb_init_out.ack; + wb_ctrl_out.dat <= wb_ctrl_dat_r when wb_ctrl_is_csr = '1' + else wb_init_out.dat; + wb_ctrl_out.stall <= wb_init_out.stall when wb_ctrl_is_init else + '0' when wb_ctrl_in.cyc = '0' else not wb_ctrl_ack; + + -- + -- Data bus wishbone to LiteDRAM native port + -- + -- Address bit 3 selects the top or bottom half of the data -- bus (64-bit wishbone vs. 128-bit DRAM interface) -- + -- XXX TODO: Figure out how to pipeline this + -- ad3 <= wb_in.adr(3); - -- DRAM data interface signals - user_port0_cmd_valid <= (wb_in.cyc and wb_in.stb and not wb_is_ctrl and not wb_is_init) - when state = CMD else '0'; - user_port0_cmd_we <= wb_in.we when state = CMD else '0'; + -- Wishbone port IN signals + user_port0_cmd_valid <= wb_in.cyc and wb_in.stb when state = CMD else '0'; + user_port0_cmd_we <= wb_in.we when state = CMD else '0'; user_port0_wdata_valid <= '1' when state = MWRITE else '0'; user_port0_rdata_ready <= '1' when state = MREAD else '0'; - user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); - user_port0_wdata_data <= wb_in.dat & wb_in.dat; - user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else - "00000000" & wb_in.sel; - - -- DRAM ctrl interface signals - wb_ctrl_adr <= x"0000" & wb_in.adr(15 downto 2); - wb_ctrl_dat_w <= wb_in.dat(31 downto 0); - wb_ctrl_sel <= wb_in.sel(3 downto 0); - wb_ctrl_cyc <= wb_in.cyc and wb_is_ctrl; - wb_ctrl_stb <= wb_in.stb and wb_is_ctrl; - wb_ctrl_we <= wb_in.we; - - -- Wishbone out signals - wb_out.ack <= wb_ctrl_ack when wb_is_ctrl ='1' else - wb_init_out.ack when wb_is_init = '1' else - user_port0_wdata_ready when state = MWRITE else + user_port0_cmd_addr <= wb_in.adr(DRAM_ABITS+3 downto 4); + user_port0_wdata_data <= wb_in.dat & wb_in.dat; + user_port0_wdata_we <= wb_in.sel & "00000000" when ad3 = '1' else + "00000000" & wb_in.sel; + + -- Wishbone OUT signals + wb_out.ack <= user_port0_wdata_ready when state = MWRITE else user_port0_rdata_valid when state = MREAD else '0'; - wb_out.dat <= (x"00000000" & wb_ctrl_dat_r) when wb_is_ctrl = '1' else - wb_init_out.dat when wb_is_init = '1' else - user_port0_rdata_data(127 downto 64) when ad3 = '1' else + wb_out.dat <= user_port0_rdata_data(127 downto 64) when ad3 = '1' else user_port0_rdata_data(63 downto 0); + -- We don't do pipelining yet. wb_out.stall <= '0' when wb_in.cyc = '0' else not wb_out.ack; - -- Reset ignored, the reset controller use the pll lock signal, - -- and alternate core reset address set when DRAM is not initialized. - -- - core_alt_reset <= not init_done; - - -- State machine + -- DRAM user port State machine sm: process(system_clk) begin @@ -255,7 +272,7 @@ begin if user_port0_rdata_valid = '1' then state <= CMD; end if; - end case; + end case; end if; end if; end process; diff --git a/litedram/generated/nexys-video/litedram_core.init b/litedram/generated/nexys-video/litedram_core.init index b1a75f8..22485ac 100644 --- a/litedram/generated/nexys-video/litedram_core.init +++ b/litedram/generated/nexys-video/litedram_core.init @@ -4,10 +4,10 @@ a602487d05009f42 a64b5a7d14004a39 2402004ca64b7b7d 602100003c200000 -6421ffff782107c6 +6421f000782107c6 3d80000060213f00 798c07c6618c0000 -618c108c658cffff +618c108c658cf000 4e8004217d8903a6 0000000048000002 0000000000000000 diff --git a/litedram/generated/nexys-video/litedram_core.v b/litedram/generated/nexys-video/litedram_core.v index dd29267..4afac81 100644 --- a/litedram/generated/nexys-video/litedram_core.v +++ b/litedram/generated/nexys-video/litedram_core.v @@ -1,5 +1,5 @@ //-------------------------------------------------------------------------------- -// Auto-generated by Migen (0d16e03) & LiteX (3391398a) on 2020-05-15 13:30:49 +// Auto-generated by Migen (0d16e03) & LiteX (3391398a) on 2020-05-16 19:06:03 //-------------------------------------------------------------------------------- module litedram_core( input wire clk, diff --git a/soc.vhdl b/soc.vhdl index a42e071..a9f46fd 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -12,14 +12,17 @@ use work.wishbone_types.all; -- Memory map. *** Keep include/microwatt_soc.h updated on changes *** -- +-- Main bus: -- 0x00000000: Block RAM (MEMORY_SIZE) or DRAM depending on syscon -- 0x40000000: DRAM (when present) +-- 0x80000000: Block RAM (aliased & repeated) + +-- IO Bus: -- 0xc0000000: SYSCON -- 0xc0002000: UART0 -- 0xc0004000: XICS ICP -- 0xc0100000: LiteDRAM control (CSRs) --- 0xf0000000: Block RAM (aliased & repeated) --- 0xffff0000: DRAM init code (if any) +-- 0xf0000000: DRAM init code (if any) entity soc is generic ( @@ -37,10 +40,12 @@ entity soc is system_clk : in std_ulogic; -- DRAM controller signals - wb_dram_in : out wishbone_master_out; - wb_dram_out : in wishbone_slave_out; - wb_dram_ctrl : out std_ulogic; - wb_dram_init : out std_ulogic; + wb_dram_in : out wishbone_master_out; + wb_dram_out : in wishbone_slave_out; + wb_dram_ctrl_in : out wb_io_master_out; + wb_dram_ctrl_out : in wb_io_slave_out; + wb_dram_is_csr : out std_ulogic; + wb_dram_is_init : out std_ulogic; -- UART0 signals: uart0_txd : out std_ulogic; @@ -71,20 +76,28 @@ architecture behaviour of soc is signal wb_master_in : wishbone_slave_out; signal wb_master_out : wishbone_master_out; + -- Main "IO" bus, from main slave decoder to the latch + signal wb_io_in : wishbone_master_out; + signal wb_io_out : wishbone_slave_out; + + -- Secondary (smaller) IO bus after the IO bus latch + signal wb_sio_out : wb_io_master_out; + signal wb_sio_in : wb_io_slave_out; + -- Syscon signals signal dram_at_0 : std_ulogic; - signal do_core_reset : std_ulogic; - signal wb_syscon_in : wishbone_master_out; - signal wb_syscon_out : wishbone_slave_out; + signal do_core_reset : std_ulogic; + signal wb_syscon_in : wb_io_master_out; + signal wb_syscon_out : wb_io_slave_out; -- UART0 signals: - signal wb_uart0_in : wishbone_master_out; - signal wb_uart0_out : wishbone_slave_out; + signal wb_uart0_in : wb_io_master_out; + signal wb_uart0_out : wb_io_slave_out; signal uart_dat8 : std_ulogic_vector(7 downto 0); -- XICS0 signals: - signal wb_xics0_in : wishbone_master_out; - signal wb_xics0_out : wishbone_slave_out; + signal wb_xics0_in : wb_io_master_out; + signal wb_xics0_out : wb_io_slave_out; signal int_level_in : std_ulogic_vector(15 downto 0); signal xics_to_execute1 : XicsToExecute1Type; @@ -141,7 +154,7 @@ begin generic map( SIM => SIM, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, - ALT_RESET_ADDRESS => (15 downto 0 => '0', others => '1') + ALT_RESET_ADDRESS => (27 downto 0 => '0', others => '1') ) port map( clk => system_clk, @@ -180,90 +193,271 @@ begin wb_slave_in => wb_master_in ); - -- Wishbone slaves address decoder & mux - slave_intercon: process(wb_master_out, wb_bram_out, wb_uart0_out, wb_dram_out, wb_syscon_out) - -- Selected slave - type slave_type is (SLAVE_SYSCON, - SLAVE_UART, - SLAVE_BRAM, - SLAVE_DRAM, - SLAVE_DRAM_INIT, - SLAVE_DRAM_CTRL, - SLAVE_ICP_0, - SLAVE_NONE); - variable slave : slave_type; + -- Top level Wishbone slaves address decoder & mux + -- + -- From CPU to BRAM, DRAM, IO, selected on top 3 bits and dram_at_0 + -- 0000 - BRAM + -- 0001 - DRAM + -- 01xx - DRAM + -- 10xx - BRAM + -- 11xx - IO + -- + slave_top_intercon: process(wb_master_out, wb_bram_out, wb_dram_out, wb_io_out, dram_at_0) + type slave_top_type is (SLAVE_TOP_BRAM, + SLAVE_TOP_DRAM, + SLAVE_TOP_IO); + variable slave_top : slave_top_type; + variable top_decode : std_ulogic_vector(3 downto 0); begin - -- Simple address decoder. - slave := SLAVE_NONE; - -- Simple address decoder. Ignore top bits to save silicon for now - slave := SLAVE_NONE; - if std_match(wb_master_out.adr, x"0-------") then - slave := SLAVE_DRAM when HAS_DRAM and dram_at_0 = '1' else - SLAVE_BRAM; - elsif std_match(wb_master_out.adr, x"FFFF----") then - slave := SLAVE_DRAM_INIT; - elsif std_match(wb_master_out.adr, x"F-------") then - slave := SLAVE_BRAM; - elsif std_match(wb_master_out.adr, x"4-------") and HAS_DRAM then - slave := SLAVE_DRAM; - elsif std_match(wb_master_out.adr, x"C0000---") then - slave := SLAVE_SYSCON; - elsif std_match(wb_master_out.adr, x"C0002---") then - slave := SLAVE_UART; - elsif std_match(wb_master_out.adr, x"C01-----") then - slave := SLAVE_DRAM_CTRL; - elsif std_match(wb_master_out.adr, x"C0004---") then - slave := SLAVE_ICP_0; + -- Top-level address decoder + top_decode := wb_master_out.adr(31 downto 29) & dram_at_0; + slave_top := SLAVE_TOP_BRAM; + if std_match(top_decode, "0000") then + slave_top := SLAVE_TOP_BRAM; + elsif std_match(top_decode, "0001") then + slave_top := SLAVE_TOP_DRAM; + elsif std_match(top_decode, "01--") then + slave_top := SLAVE_TOP_DRAM; + elsif std_match(top_decode, "10--") then + slave_top := SLAVE_TOP_BRAM; + elsif std_match(top_decode, "11--") then + slave_top := SLAVE_TOP_IO; end if; - -- Wishbone muxing. Defaults: + -- Top level wishbone muxing. wb_bram_in <= wb_master_out; wb_bram_in.cyc <= '0'; - wb_uart0_in <= wb_master_out; + wb_dram_in <= wb_master_out; + wb_dram_in.cyc <= '0'; + wb_io_in <= wb_master_out; + wb_io_in.cyc <= '0'; + case slave_top is + when SLAVE_TOP_BRAM => + wb_bram_in.cyc <= wb_master_out.cyc; + wb_master_in <= wb_bram_out; + when SLAVE_TOP_DRAM => + wb_dram_in.cyc <= wb_master_out.cyc; + wb_master_in <= wb_dram_out; + when SLAVE_TOP_IO => + wb_io_in.cyc <= wb_master_out.cyc; + wb_master_in <= wb_io_out; + end case; + end process slave_top_intercon; + + -- IO wishbone slave 64->32 bits converter + -- + -- For timing reasons, this adds a one cycle latch on the way both + -- in and out. This relaxes timing and routing pressure on the "main" + -- memory bus by moving all simple IOs to a slower 32-bit bus. + -- + -- This implementation is rather dumb at the moment, no stash buffer, + -- so we stall whenever that latch is busy. This can be improved. + -- + slave_io_latch: process(system_clk) + -- State + type state_t is (IDLE, WAIT_ACK_BOT, WAIT_ACK_TOP); + variable state : state_t; + + -- Misc + variable has_top : boolean; + variable has_bot : boolean; + begin + if rising_edge(system_clk) then + if (rst) then + state := IDLE; + wb_io_out.ack <= '0'; + wb_io_out.stall <= '0'; + wb_sio_out.cyc <= '0'; + wb_sio_out.stb <= '0'; + has_top := false; + has_bot := false; + else + case state is + when IDLE => + -- Clear ACK in case it was set + wb_io_out.ack <= '0'; + + -- Do we have a cycle ? + if wb_io_in.cyc = '1' and wb_io_in.stb = '1' then + -- Stall master until we are done, we are't (yet) pipelining + -- this, it's all slow IOs. + wb_io_out.stall <= '1'; + + -- Start cycle downstream + wb_sio_out.cyc <= '1'; + wb_sio_out.stb <= '1'; + + -- Copy write enable to IO out, copy address as well + wb_sio_out.we <= wb_io_in.we; + wb_sio_out.adr <= wb_io_in.adr(wb_sio_out.adr'left downto 3) & "000"; + + -- Do we have a top word and/or a bottom word ? + has_top := wb_io_in.sel(7 downto 4) /= "0000"; + has_bot := wb_io_in.sel(3 downto 0) /= "0000"; + + -- If we have a bottom word, handle it first, otherwise + -- send the top word down. XXX Split the actual mux out + -- and only generate a control signal. + if has_bot then + if wb_io_in.we = '1' then + wb_sio_out.dat <= wb_io_in.dat(31 downto 0); + end if; + wb_sio_out.sel <= wb_io_in.sel(3 downto 0); + + -- Wait for ack + state := WAIT_ACK_BOT; + else + if wb_io_in.we = '1' then + wb_sio_out.dat <= wb_io_in.dat(63 downto 32); + end if; + wb_sio_out.sel <= wb_io_in.sel(7 downto 4); + + -- Bump address + wb_sio_out.adr(2) <= '1'; + + -- Wait for ack + state := WAIT_ACK_TOP; + end if; + end if; + when WAIT_ACK_BOT => + -- If we aren't stalled by the device, clear stb + if wb_sio_in.stall = '0' then + wb_sio_out.stb <= '0'; + end if; + + -- Handle ack + if wb_sio_in.ack = '1' then + -- If it's a read, latch the data + if wb_sio_out.we = '0' then + wb_io_out.dat(31 downto 0) <= wb_sio_in.dat; + end if; + + -- Do we have a "top" part as well ? + if has_top then + -- Latch data & sel + if wb_io_in.we = '1' then + wb_sio_out.dat <= wb_io_in.dat(63 downto 32); + end if; + wb_sio_out.sel <= wb_io_in.sel(7 downto 4); + + -- Bump address and set STB + wb_sio_out.adr(2) <= '1'; + wb_sio_out.stb <= '1'; + + -- Wait for new ack + state := WAIT_ACK_TOP; + else + -- We are done, ack up, clear cyc downstram + wb_sio_out.cyc <= '0'; + + -- And ack & unstall upstream + wb_io_out.ack <= '1'; + wb_io_out.stall <= '0'; + + -- Wait for next one + state := IDLE; + end if; + end if; + when WAIT_ACK_TOP => + -- If we aren't stalled by the device, clear stb + if wb_sio_in.stall = '0' then + wb_sio_out.stb <= '0'; + end if; + + -- Handle ack + if wb_sio_in.ack = '1' then + -- If it's a read, latch the data + if wb_sio_out.we = '0' then + wb_io_out.dat(63 downto 32) <= wb_sio_in.dat; + end if; + + -- We are done, ack up, clear cyc downstram + wb_sio_out.cyc <= '0'; + + -- And ack & unstall upstream + wb_io_out.ack <= '1'; + wb_io_out.stall <= '0'; + + -- Wait for next one + state := IDLE; + end if; + end case; + end if; + end if; + end process; + + -- IO wishbone slave intercon. + -- + slave_io_intercon: process(wb_sio_out, wb_syscon_out, wb_uart0_out, + wb_dram_ctrl_out, wb_xics0_out) + -- IO branch split: + type slave_io_type is (SLAVE_IO_SYSCON, + SLAVE_IO_UART, + SLAVE_IO_DRAM_INIT, + SLAVE_IO_DRAM_CSR, + SLAVE_IO_ICP_0, + SLAVE_IO_NONE); + variable slave_io : slave_io_type; + + variable match : std_ulogic_vector(31 downto 12); + begin + + -- Simple address decoder. + slave_io := SLAVE_IO_NONE; + match := "11" & wb_sio_out.adr(29 downto 12); + if std_match(match, x"F----") then + slave_io := SLAVE_IO_DRAM_INIT; + elsif std_match(match, x"C0000") then + slave_io := SLAVE_IO_SYSCON; + elsif std_match(match, x"C0002") then + slave_io := SLAVE_IO_UART; + elsif std_match(match, x"C01--") then + slave_io := SLAVE_IO_DRAM_CSR; + elsif std_match(match, x"C0004") then + slave_io := SLAVE_IO_ICP_0; + end if; + wb_uart0_in <= wb_sio_out; wb_uart0_in.cyc <= '0'; -- Only give xics 8 bits of wb addr - wb_xics0_in <= wb_master_out; + wb_xics0_in <= wb_sio_out; wb_xics0_in.adr <= (others => '0'); - wb_xics0_in.adr(7 downto 0) <= wb_master_out.adr(7 downto 0); + wb_xics0_in.adr(7 downto 0) <= wb_sio_out.adr(7 downto 0); wb_xics0_in.cyc <= '0'; - wb_dram_in <= wb_master_out; - wb_dram_in.cyc <= '0'; - wb_dram_ctrl <= '0'; - wb_dram_init <= '0'; - wb_syscon_in <= wb_master_out; + wb_dram_ctrl_in <= wb_sio_out; + wb_dram_ctrl_in.cyc <= '0'; + wb_dram_is_csr <= '0'; + wb_dram_is_init <= '0'; + + wb_syscon_in <= wb_sio_out; wb_syscon_in.cyc <= '0'; - case slave is - when SLAVE_BRAM => - wb_bram_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_bram_out; - when SLAVE_DRAM => - wb_dram_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_dram_out; - when SLAVE_DRAM_INIT => - wb_dram_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_dram_out; - wb_dram_init <= '1'; - when SLAVE_DRAM_CTRL => - wb_dram_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_dram_out; - wb_dram_ctrl <= '1'; - when SLAVE_SYSCON => - wb_syscon_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_syscon_out; - when SLAVE_UART => - wb_uart0_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_uart0_out; - when SLAVE_ICP_0 => - wb_xics0_in.cyc <= wb_master_out.cyc; - wb_master_in <= wb_xics0_out; + + case slave_io is + when SLAVE_IO_DRAM_INIT => + wb_dram_ctrl_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_dram_ctrl_out; + wb_dram_is_init <= '1'; + when SLAVE_IO_DRAM_CSR => + wb_dram_ctrl_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_dram_ctrl_out; + wb_dram_is_csr <= '1'; + when SLAVE_IO_SYSCON => + wb_syscon_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_syscon_out; + when SLAVE_IO_UART => + wb_uart0_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_uart0_out; + when SLAVE_IO_ICP_0 => + wb_xics0_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_xics0_out; when others => - wb_master_in.dat <= (others => '1'); - wb_master_in.ack <= wb_master_out.stb and wb_master_out.cyc; - wb_master_in.stall <= '0'; + wb_sio_in.dat <= (others => '1'); + wb_sio_in.ack <= wb_sio_out.stb and wb_sio_out.cyc; + wb_sio_in.stall <= '0'; end case; - end process slave_intercon; + + end process; -- Syscon slave syscon0: entity work.syscon @@ -287,10 +481,6 @@ begin -- Simulated memory and UART -- UART0 wishbone slave - -- XXX FIXME: Need a proper wb64->wb8 adapter that - -- converts SELs into low address bits and muxes - -- data accordingly (either that or rejects large - -- cycles). uart0: entity work.pp_soc_uart generic map( FIFO_DEPTH => 32 @@ -309,7 +499,7 @@ begin wb_we_in => wb_uart0_in.we, wb_ack_out => wb_uart0_out.ack ); - wb_uart0_out.dat <= x"00000000000000" & uart_dat8; + wb_uart0_out.dat <= x"000000" & uart_dat8; wb_uart0_out.stall <= '0' when wb_uart0_in.cyc = '0' else not wb_uart0_out.ack; xics0: entity work.xics diff --git a/syscon.vhdl b/syscon.vhdl index a5b569b..a9dd1cc 100644 --- a/syscon.vhdl +++ b/syscon.vhdl @@ -20,8 +20,8 @@ entity syscon is rst : in std_ulogic; -- Wishbone ports: - wishbone_in : in wishbone_master_out; - wishbone_out : out wishbone_slave_out; + wishbone_in : in wb_io_master_out; + wishbone_out : out wb_io_slave_out; -- System control ports dram_at_0 : out std_ulogic; @@ -43,6 +43,9 @@ architecture behaviour of syscon is constant SYS_REG_CLKINFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "100"; constant SYS_REG_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "101"; + -- Muxed reg read signal + signal reg_out : std_ulogic_vector(63 downto 0); + -- INFO register bits constant SYS_REG_INFO_HAS_UART : integer := 0; constant SYS_REG_INFO_HAS_DRAM : integer := 1; @@ -99,7 +102,7 @@ begin SYS_REG_CTRL_BITS-1 downto 0 => reg_ctrl); -- Register read mux - with wishbone_in.adr(SYS_REG_BITS+2 downto 3) select wishbone_out.dat <= + with wishbone_in.adr(SYS_REG_BITS+2 downto 3) select reg_out <= SIG_VALUE when SYS_REG_SIG, reg_info when SYS_REG_INFO, reg_braminfo when SYS_REG_BRAMINFO, @@ -107,6 +110,8 @@ begin reg_clkinfo when SYS_REG_CLKINFO, reg_ctrl_out when SYS_REG_CTRL, (others => '0') when others; + wishbone_out.dat <= reg_out(63 downto 32) when wishbone_in.adr(2) = '1' else + reg_out(31 downto 0); -- Register writes regs_write: process(clk) @@ -116,7 +121,9 @@ begin reg_ctrl <= (others => '0'); else if wishbone_in.cyc and wishbone_in.stb and wishbone_in.we then - if wishbone_in.adr(SYS_REG_BITS+2 downto 3) = SYS_REG_CTRL then + -- Change this if CTRL ever has more than 32 bits + if wishbone_in.adr(SYS_REG_BITS+2 downto 3) = SYS_REG_CTRL and + wishbone_in.adr(2) = '0' then reg_ctrl(SYS_REG_CTRL_BITS-1 downto 0) <= wishbone_in.dat(SYS_REG_CTRL_BITS-1 downto 0); end if; diff --git a/tests/xics/xics.h b/tests/xics/xics.h index 09238cc..bbb1f99 100644 --- a/tests/xics/xics.h +++ b/tests/xics/xics.h @@ -1,8 +1,7 @@ #include -#define XICS_BASE 0xc0004000 - -static uint64_t xics_base = XICS_BASE; +#include "microwatt_soc.h" +#include "io.h" #define XICS_XIRR_POLL 0x0 #define XICS_XIRR 0x4 @@ -11,26 +10,20 @@ static uint64_t xics_base = XICS_BASE; uint8_t xics_read8(int offset) { - uint32_t val; - - __asm__ volatile("lbzcix %0,%1,%2" : "=r" (val) : "b" (xics_base), "r" (offset)); - return val; + return readb(XICS_BASE + offset); } void xics_write8(int offset, uint8_t val) { - __asm__ volatile("stbcix %0,%1,%2" : : "r" (val), "b" (xics_base), "r" (offset)); + writeb(val, XICS_BASE + offset); } uint32_t xics_read32(int offset) { - uint32_t val; - - __asm__ volatile("lwzcix %0,%1,%2" : "=r" (val) : "b" (xics_base), "r" (offset)); - return val; + return readl(XICS_BASE + offset); } void xics_write32(int offset, uint32_t val) { - __asm__ volatile("stwcix %0,%1,%2" : : "r" (val), "b" (xics_base), "r" (offset)); + writel(val, XICS_BASE + offset); } diff --git a/wishbone_types.vhdl b/wishbone_types.vhdl index c628ca2..693deac 100644 --- a/wishbone_types.vhdl +++ b/wishbone_types.vhdl @@ -2,6 +2,9 @@ library ieee; use ieee.std_logic_1164.all; package wishbone_types is + -- + -- Main CPU bus. 32-bit address, 64-bit data + -- constant wishbone_addr_bits : integer := 32; constant wishbone_data_bits : integer := 64; constant wishbone_sel_bits : integer := wishbone_data_bits/8; @@ -30,4 +33,22 @@ package wishbone_types is type wishbone_master_out_vector is array (natural range <>) of wishbone_master_out; type wishbone_slave_out_vector is array (natural range <>) of wishbone_slave_out; + -- + -- IO Bus to a device, 30-bit address, 32-bits data + -- + type wb_io_master_out is record + adr : std_ulogic_vector(29 downto 0); + dat : std_ulogic_vector(31 downto 0); + sel : std_ulogic_vector(3 downto 0); + cyc : std_ulogic; + stb : std_ulogic; + we : std_ulogic; + end record; + + type wb_io_slave_out is record + dat : std_ulogic_vector(31 downto 0); + ack : std_ulogic; + stall : std_ulogic; + end record; + end package wishbone_types; diff --git a/xics.vhdl b/xics.vhdl index 09a1ba6..7d49433 100644 --- a/xics.vhdl +++ b/xics.vhdl @@ -30,8 +30,8 @@ entity xics is clk : in std_logic; rst : in std_logic; - wb_in : in wishbone_master_out; - wb_out : out wishbone_slave_out; + wb_in : in wb_io_master_out; + wb_out : out wb_io_slave_out; int_level_in : in std_ulogic_vector(LEVEL_NUM - 1 downto 0); @@ -47,7 +47,7 @@ architecture behaviour of xics is mfrr : std_ulogic_vector(7 downto 0); mfrr_pending : std_ulogic; irq : std_ulogic; - wb_rd_data : wishbone_data_type; + wb_rd_data : std_ulogic_vector(31 downto 0); wb_ack : std_ulogic; end record; constant reg_internal_init : reg_internal_t := @@ -62,11 +62,11 @@ architecture behaviour of xics is -- hardwire the hardware IRQ priority constant HW_PRIORITY : std_ulogic_vector(7 downto 0) := x"80"; - -- 32 bit offsets for each presentation - constant XIRR_POLL : std_ulogic_vector(31 downto 0) := x"00000000"; - constant XIRR : std_ulogic_vector(31 downto 0) := x"00000004"; - constant RESV0 : std_ulogic_vector(31 downto 0) := x"00000008"; - constant MFRR : std_ulogic_vector(31 downto 0) := x"0000000c"; + -- 8 bit offsets for each presentation + constant XIRR_POLL : std_ulogic_vector(7 downto 0) := x"00"; + constant XIRR : std_ulogic_vector(7 downto 0) := x"04"; + constant RESV0 : std_ulogic_vector(7 downto 0) := x"08"; + constant MFRR : std_ulogic_vector(7 downto 0) := x"0c"; begin @@ -95,62 +95,73 @@ begin irq_eoi := '0'; if wb_in.cyc = '1' and wb_in.stb = '1' then - -- wishbone addresses we get are 64 bit alligned, so we - -- need to use the sel bits to get 32 bit chunks. v.wb_ack := '1'; -- always ack if wb_in.we = '1' then -- write -- writes to both XIRR are the same - if wb_in.adr = XIRR_POLL then - report "XICS XIRR_POLL/XIRR write"; - if wb_in.sel = x"0f" then -- 4 bytes + case wb_in.adr(7 downto 0) is + when XIRR_POLL => + report "XICS XIRR_POLL write"; + if wb_in.sel = x"f" then -- 4 bytes + v.cppr := wb_in.dat(31 downto 24); + elsif wb_in.sel = x"1" then -- 1 byte + v.cppr := wb_in.dat(7 downto 0); + end if; + when XIRR => + if wb_in.sel = x"f" then -- 4 byte + report "XICS XIRR write word:" & to_hstring(wb_in.dat); v.cppr := wb_in.dat(31 downto 24); - elsif wb_in.sel = x"f0" then -- 4 byte - v.cppr := wb_in.dat(63 downto 56); irq_eoi := '1'; - elsif wb_in.sel = x"01" then -- 1 byte + elsif wb_in.sel = x"1" then -- 1 byte + report "XICS XIRR write byte:" & to_hstring(wb_in.dat(7 downto 0)); v.cppr := wb_in.dat(7 downto 0); - elsif wb_in.sel = x"10" then -- 1 byte - v.cppr := wb_in.dat(39 downto 32); + else + report "XICS XIRR UNSUPPORTED write ! sel=" & to_hstring(wb_in.sel); end if; - - elsif wb_in.adr = RESV0 then - report "XICS MFRR write"; - if wb_in.sel = x"f0" then -- 4 bytes + when MFRR => + if wb_in.sel = x"f" then -- 4 bytes + report "XICS MFRR write word:" & to_hstring(wb_in.dat); v.mfrr_pending := '1'; - v.mfrr := wb_in.dat(63 downto 56); - elsif wb_in.sel = x"10" then -- 1 byte + v.mfrr := wb_in.dat(31 downto 24); + elsif wb_in.sel = x"1" then -- 1 byte + report "XICS MFRR write byte:" & to_hstring(wb_in.dat(7 downto 0)); v.mfrr_pending := '1'; - v.mfrr := wb_in.dat(39 downto 32); + v.mfrr := wb_in.dat(7 downto 0); + else + report "XICS MFRR UNSUPPORTED write ! sel=" & to_hstring(wb_in.sel); end if; - - end if; + when others => + end case; else -- read v.wb_rd_data := (others => '0'); - if wb_in.adr = XIRR_POLL then - report "XICS XIRR_POLL/XIRR read"; - if wb_in.sel = x"0f" then + case wb_in.adr(7 downto 0) is + when XIRR_POLL => + report "XICS XIRR_POLL read"; + if wb_in.sel = x"f" then v.wb_rd_data(23 downto 0) := r.xisr; v.wb_rd_data(31 downto 24) := r.cppr; - elsif wb_in.sel = x"f0" then - v.wb_rd_data(55 downto 32) := r.xisr; - v.wb_rd_data(63 downto 56) := r.cppr; - xirr_accept_rd := '1'; - elsif wb_in.sel = x"01" then + elsif wb_in.sel = x"1" then v.wb_rd_data(7 downto 0) := r.cppr; - elsif wb_in.sel = x"10" then - v.wb_rd_data(39 downto 32) := r.cppr; + end if; + when XIRR => + report "XICS XIRR read"; + if wb_in.sel = x"f" then + v.wb_rd_data(23 downto 0) := r.xisr; + v.wb_rd_data(31 downto 24) := r.cppr; + xirr_accept_rd := '1'; + elsif wb_in.sel = x"1" then + v.wb_rd_data(7 downto 0) := r.cppr; end if; - - elsif wb_in.adr = RESV0 then + when MFRR => report "XICS MFRR read"; - if wb_in.sel = x"f0" then -- 4 bytes - v.wb_rd_data(63 downto 56) := r.mfrr; - elsif wb_in.sel = x"10" then -- 1 byte + if wb_in.sel = x"f" then -- 4 bytes + v.wb_rd_data(31 downto 24) := r.mfrr; + elsif wb_in.sel = x"1" then -- 1 byte v.wb_rd_data( 7 downto 0) := r.mfrr; end if; - end if; + when others => + end case; end if; end if;