From cc4dcb3597914bcf6730cdbabc7fbe3605c80c94 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 5 Jun 2020 11:32:08 +1000 Subject: [PATCH] spi: Add SPI Flash controller This adds an SPI flash controller which supports direct memory-mapped access to the flash along with a manual mode to send commands. The direct mode can be set via generic to default to single wire or quad mode. The controller supports normal, dual and quad accesses with configurable commands, clock divider, dummy clocks etc... The SPI clock can be an even divider of sys_clk starting at 2 (so max 50Mhz with our typical Arty designs). A flash offset is carried via generics to syscon to tell SW about which portion of the flash is reserved for the FPGA bitfile. There is currently no plumbing to make the CPU reset past that address (TBD). Note: Operating at 50Mhz has proven unreliable without adding some delay to the sampling of the input data. I'm working in improving this, in the meantime, I'm leaving the default set at 25 Mhz. Signed-off-by: Benjamin Herrenschmidt --- core_tb.vhdl | 12 +- fpga/arty_a7.xdc | 16 + fpga/nexys-video.xdc | 10 + fpga/top-arty.vhdl | 397 +++++++++++++++---------- fpga/top-generic.vhdl | 8 +- fpga/top-nexys-video.vhdl | 66 ++++- include/microwatt_soc.h | 34 ++- microwatt.core | 13 + soc.vhdl | 115 ++++++-- spi_flash_ctrl.vhdl | 601 ++++++++++++++++++++++++++++++++++++++ spi_rxtx.vhdl | 386 ++++++++++++++++++++++++ syscon.vhdl | 39 ++- 12 files changed, 1499 insertions(+), 198 deletions(-) create mode 100644 spi_flash_ctrl.vhdl create mode 100644 spi_rxtx.vhdl diff --git a/core_tb.vhdl b/core_tb.vhdl index a35d73d..ed147a3 100644 --- a/core_tb.vhdl +++ b/core_tb.vhdl @@ -20,6 +20,9 @@ architecture behave of core_tb is signal wb_dram_out : wishbone_slave_out; signal wb_dram_ctrl_in : wb_io_master_out; signal wb_dram_ctrl_out : wb_io_slave_out; + + -- Dummy SPI + signal spi_sdat_i : std_ulogic_vector(0 downto 0); begin soc0: entity work.soc @@ -28,19 +31,26 @@ begin MEMORY_SIZE => (384*1024), RAM_INIT_FILE => "main_ram.bin", RESET_LOW => false, - CLK_FREQ => 100000000 + CLK_FREQ => 100000000, + HAS_SPI_FLASH => false ) port map( rst => rst, system_clk => clk, uart0_rxd => '0', uart0_txd => open, + spi_flash_sck => open, + spi_flash_cs_n => open, + spi_flash_sdat_o => open, + spi_flash_sdat_oe => open, + spi_flash_sdat_i => spi_sdat_i, wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, wb_dram_ctrl_in => wb_dram_ctrl_in, wb_dram_ctrl_out => wb_dram_ctrl_out, alt_reset => '0' ); + spi_sdat_i(0) <= '1'; clk_process: process begin diff --git a/fpga/arty_a7.xdc b/fpga/arty_a7.xdc index 71d691a..65832c7 100644 --- a/fpga/arty_a7.xdc +++ b/fpga/arty_a7.xdc @@ -26,6 +26,22 @@ set_property -dict { PACKAGE_PIN E1 IOSTANDARD LVCMOS33 } [get_ports { led0_b } set_property -dict { PACKAGE_PIN F6 IOSTANDARD LVCMOS33 } [get_ports { led0_g }]; set_property -dict { PACKAGE_PIN G6 IOSTANDARD LVCMOS33 } [get_ports { led0_r }]; +################################################################################ +# SPI Flash +################################################################################ + +set_property -dict { PACKAGE_PIN L13 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_cs_n }]; +set_property -dict { PACKAGE_PIN L16 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_clk }]; +set_property -dict { PACKAGE_PIN K17 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_mosi }]; +set_property -dict { PACKAGE_PIN K18 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_miso }]; +set_property -dict { PACKAGE_PIN L14 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_wp_n }]; +set_property -dict { PACKAGE_PIN M14 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_hold_n }]; + +# Put registers into IOBs to improve timing +set_property IOB true [get_cells -hierarchical -filter {NAME =~*/spi_rxtx/*sck_1*}] +set_property IOB true [get_cells -hierarchical -filter {NAME =~*/spi_rxtx/dat_i_l*}] + + ################################################################################ # DRAM (generated by LiteX) ################################################################################ diff --git a/fpga/nexys-video.xdc b/fpga/nexys-video.xdc index 358c382..85c7331 100644 --- a/fpga/nexys-video.xdc +++ b/fpga/nexys-video.xdc @@ -25,6 +25,16 @@ set_property -dict {PACKAGE_PIN V18 IOSTANDARD LVCMOS33} [get_ports uart_main_rx set_property -dict { PACKAGE_PIN T14 IOSTANDARD LVCMOS33 } [get_ports { led0 }]; set_property -dict { PACKAGE_PIN T15 IOSTANDARD LVCMOS33 } [get_ports { led1 }]; +################################################################################ +# SPI Flash +################################################################################ + +set_property -dict { PACKAGE_PIN T19 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_cs_n }]; +set_property -dict { PACKAGE_PIN P22 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_mosi }]; +set_property -dict { PACKAGE_PIN R22 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_miso }]; +set_property -dict { PACKAGE_PIN P21 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_wp_n }]; +set_property -dict { PACKAGE_PIN R21 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_hold_n }]; + ################################################################################ # DRAM (generated by LiteX) ################################################################################ diff --git a/fpga/top-arty.vhdl b/fpga/top-arty.vhdl index ee77d93..b13ed34 100644 --- a/fpga/top-arty.vhdl +++ b/fpga/top-arty.vhdl @@ -10,44 +10,56 @@ use work.wishbone_types.all; entity toplevel is generic ( - MEMORY_SIZE : integer := 16384; - RAM_INIT_FILE : string := "firmware.hex"; - RESET_LOW : boolean := true; - CLK_FREQUENCY : positive := 100000000; - USE_LITEDRAM : boolean := false; - NO_BRAM : boolean := false; - DISABLE_FLATTEN_CORE : boolean := false - ); + MEMORY_SIZE : integer := 16384; + RAM_INIT_FILE : string := "firmware.hex"; + RESET_LOW : boolean := true; + CLK_FREQUENCY : positive := 100000000; + USE_LITEDRAM : boolean := false; + NO_BRAM : boolean := false; + DISABLE_FLATTEN_CORE : boolean := false; + SCLK_STARTUPE2 : boolean := false; + SPI_FLASH_OFFSET : integer := 4194304; + SPI_FLASH_DEF_CKDV : natural := 1; + SPI_FLASH_DEF_QUAD : boolean := true + ); port( - ext_clk : in std_ulogic; - ext_rst : in std_ulogic; - - -- UART0 signals: - uart_main_tx : out std_ulogic; - uart_main_rx : in std_ulogic; - - -- LEDs - led0_b : out std_ulogic; - led0_g : out std_ulogic; - led0_r : out std_ulogic; - - -- DRAM wires - ddram_a : out std_ulogic_vector(13 downto 0); - ddram_ba : out std_ulogic_vector(2 downto 0); - ddram_ras_n : out std_ulogic; - ddram_cas_n : out std_ulogic; - ddram_we_n : out std_ulogic; - ddram_cs_n : out std_ulogic; - ddram_dm : out std_ulogic_vector(1 downto 0); - ddram_dq : inout std_ulogic_vector(15 downto 0); - ddram_dqs_p : inout std_ulogic_vector(1 downto 0); - ddram_dqs_n : inout std_ulogic_vector(1 downto 0); - ddram_clk_p : out std_ulogic; - ddram_clk_n : out std_ulogic; - ddram_cke : out std_ulogic; - ddram_odt : out std_ulogic; - ddram_reset_n : out std_ulogic - ); + ext_clk : in std_ulogic; + ext_rst : in std_ulogic; + + -- UART0 signals: + uart_main_tx : out std_ulogic; + uart_main_rx : in std_ulogic; + + -- LEDs + led0_b : out std_ulogic; + led0_g : out std_ulogic; + led0_r : out std_ulogic; + + -- SPI + spi_flash_cs_n : out std_ulogic; + spi_flash_clk : out std_ulogic; + spi_flash_mosi : inout std_ulogic; + spi_flash_miso : inout std_ulogic; + spi_flash_wp_n : inout std_ulogic; + spi_flash_hold_n : inout std_ulogic; + + -- DRAM wires + ddram_a : out std_ulogic_vector(13 downto 0); + ddram_ba : out std_ulogic_vector(2 downto 0); + ddram_ras_n : out std_ulogic; + ddram_cas_n : out std_ulogic; + ddram_we_n : out std_ulogic; + ddram_cs_n : out std_ulogic; + ddram_dm : out std_ulogic_vector(1 downto 0); + ddram_dq : inout std_ulogic_vector(15 downto 0); + ddram_dqs_p : inout std_ulogic_vector(1 downto 0); + ddram_dqs_n : inout std_ulogic_vector(1 downto 0); + ddram_clk_p : out std_ulogic; + ddram_clk_n : out std_ulogic; + ddram_cke : out std_ulogic; + ddram_odt : out std_ulogic; + ddram_reset_n : out std_ulogic + ); end entity toplevel; architecture behaviour of toplevel is @@ -81,6 +93,13 @@ architecture behaviour of toplevel is -- Dumb PWM for the LEDs, those RGB LEDs are too bright otherwise signal pwm_counter : std_ulogic_vector(8 downto 0); + -- SPI flash + signal spi_sck : std_ulogic; + signal spi_cs_n : std_ulogic; + signal spi_sdat_o : std_ulogic_vector(3 downto 0); + signal spi_sdat_oe : std_ulogic_vector(3 downto 0); + signal spi_sdat_i : std_ulogic_vector(3 downto 0); + -- Fixup various memory sizes based on generics function get_bram_size return natural is begin @@ -106,62 +125,116 @@ begin -- Main SoC soc0: entity work.soc - generic map( - MEMORY_SIZE => BRAM_SIZE, - RAM_INIT_FILE => RAM_INIT_FILE, - RESET_LOW => RESET_LOW, - SIM => false, - CLK_FREQ => CLK_FREQUENCY, - HAS_DRAM => USE_LITEDRAM, - DRAM_SIZE => 256 * 1024 * 1024, - DRAM_INIT_SIZE => PAYLOAD_SIZE, - DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE - ) - port map ( - system_clk => system_clk, - rst => soc_rst, - uart0_txd => uart_main_tx, - uart0_rxd => uart_main_rx, - wb_dram_in => wb_dram_in, - wb_dram_out => wb_dram_out, - wb_dram_ctrl_in => wb_dram_ctrl_in, - wb_dram_ctrl_out => wb_dram_ctrl_out, - wb_dram_is_csr => wb_dram_is_csr, - wb_dram_is_init => wb_dram_is_init, - alt_reset => core_alt_reset - ); + generic map( + MEMORY_SIZE => BRAM_SIZE, + RAM_INIT_FILE => RAM_INIT_FILE, + RESET_LOW => RESET_LOW, + SIM => false, + CLK_FREQ => CLK_FREQUENCY, + HAS_DRAM => USE_LITEDRAM, + DRAM_SIZE => 256 * 1024 * 1024, + DRAM_INIT_SIZE => PAYLOAD_SIZE, + DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE, + HAS_SPI_FLASH => true, + SPI_FLASH_DLINES => 4, + SPI_FLASH_OFFSET => SPI_FLASH_OFFSET, + SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV, + SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD + ) + port map ( + -- System signals + system_clk => system_clk, + rst => soc_rst, + + -- UART signals + uart0_txd => uart_main_tx, + uart0_rxd => uart_main_rx, + + -- SPI signals + spi_flash_sck => spi_sck, + spi_flash_cs_n => spi_cs_n, + spi_flash_sdat_o => spi_sdat_o, + spi_flash_sdat_oe => spi_sdat_oe, + spi_flash_sdat_i => spi_sdat_i, + + -- DRAM wishbone + wb_dram_in => wb_dram_in, + wb_dram_out => wb_dram_out, + wb_dram_ctrl_in => wb_dram_ctrl_in, + wb_dram_ctrl_out => wb_dram_ctrl_out, + wb_dram_is_csr => wb_dram_is_csr, + wb_dram_is_init => wb_dram_is_init, + alt_reset => core_alt_reset + ); + + -- SPI Flash + -- + -- Note: Unlike many other boards, the SPI flash on the Arty has + -- an actual pin to generate the clock and doesn't require to use + -- the STARTUPE2 primitive. + -- + spi_flash_cs_n <= spi_cs_n; + spi_flash_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z'; + spi_flash_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z'; + spi_flash_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z'; + spi_flash_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else 'Z'; + spi_sdat_i(0) <= spi_flash_mosi; + spi_sdat_i(1) <= spi_flash_miso; + spi_sdat_i(2) <= spi_flash_wp_n; + spi_sdat_i(3) <= spi_flash_hold_n; + + spi_sclk_startupe2: if SCLK_STARTUPE2 generate + spi_flash_clk <= 'Z'; + + STARTUPE2_INST: STARTUPE2 + port map ( + CLK => '0', + GSR => '0', + GTS => '0', + KEYCLEARB => '0', + PACK => '0', + USRCCLKO => spi_sck, + USRCCLKTS => '0', + USRDONEO => '1', + USRDONETS => '0' + ); + end generate; + + spi_direct_sclk: if not SCLK_STARTUPE2 generate + spi_flash_clk <= spi_sck; + end generate; nodram: if not USE_LITEDRAM generate signal ddram_clk_dummy : std_ulogic; begin - reset_controller: entity work.soc_reset - generic map( - RESET_LOW => RESET_LOW - ) - port map( - ext_clk => ext_clk, - pll_clk => system_clk, - pll_locked_in => system_clk_locked, - ext_rst_in => ext_rst, - pll_rst_out => pll_rst, - rst_out => soc_rst - ); - - clkgen: entity work.clock_generator - generic map( - CLK_INPUT_HZ => 100000000, - CLK_OUTPUT_HZ => CLK_FREQUENCY - ) - port map( - ext_clk => ext_clk, - pll_rst_in => pll_rst, - pll_clk_out => system_clk, - pll_locked_out => system_clk_locked - ); - - led0_b_pwm <= '1'; - led0_r_pwm <= '1'; - led0_g_pwm <= '0'; + reset_controller: entity work.soc_reset + generic map( + RESET_LOW => RESET_LOW + ) + port map( + ext_clk => ext_clk, + pll_clk => system_clk, + pll_locked_in => system_clk_locked, + ext_rst_in => ext_rst, + pll_rst_out => pll_rst, + rst_out => soc_rst + ); + + clkgen: entity work.clock_generator + generic map( + CLK_INPUT_HZ => 100000000, + CLK_OUTPUT_HZ => CLK_FREQUENCY + ) + port map( + ext_clk => ext_clk, + pll_rst_in => pll_rst, + pll_clk_out => system_clk, + pll_locked_out => system_clk_locked + ); + + led0_b_pwm <= '1'; + led0_r_pwm <= '1'; + led0_g_pwm <= '0'; core_alt_reset <= '0'; -- Vivado barfs on those differential signals if left @@ -179,91 +252,91 @@ begin has_dram: if USE_LITEDRAM generate signal dram_init_done : std_ulogic; - signal dram_init_error : std_ulogic; - signal dram_sys_rst : std_ulogic; + signal dram_init_error : std_ulogic; + signal dram_sys_rst : std_ulogic; begin - -- Eventually dig out the frequency from the generator - -- but for now, assert it's 100Mhz - assert CLK_FREQUENCY = 100000000; + -- Eventually dig out the frequency from the generator + -- but for now, assert it's 100Mhz + assert CLK_FREQUENCY = 100000000; - reset_controller: entity work.soc_reset - generic map( - RESET_LOW => RESET_LOW, + reset_controller: entity work.soc_reset + generic map( + RESET_LOW => RESET_LOW, PLL_RESET_BITS => 18, SOC_RESET_BITS => 1 - ) - port map( - ext_clk => ext_clk, - pll_clk => system_clk, - pll_locked_in => '1', - ext_rst_in => ext_rst, - pll_rst_out => pll_rst, - rst_out => open - ); - - dram: entity work.litedram_wrapper - generic map( - DRAM_ABITS => 24, - DRAM_ALINES => 14, + ) + port map( + ext_clk => ext_clk, + pll_clk => system_clk, + pll_locked_in => '1', + ext_rst_in => ext_rst, + pll_rst_out => pll_rst, + rst_out => open + ); + + dram: entity work.litedram_wrapper + generic map( + DRAM_ABITS => 24, + DRAM_ALINES => 14, PAYLOAD_FILE => RAM_INIT_FILE, PAYLOAD_SIZE => PAYLOAD_SIZE - ) - port map( - clk_in => ext_clk, - rst => pll_rst, - system_clk => system_clk, - system_reset => soc_rst, - core_alt_reset => core_alt_reset, - pll_locked => system_clk_locked, - - wb_in => wb_dram_in, - wb_out => wb_dram_out, - wb_ctrl_in => wb_dram_ctrl_in, - wb_ctrl_out => wb_dram_ctrl_out, - wb_ctrl_is_csr => wb_dram_is_csr, - wb_ctrl_is_init => wb_dram_is_init, - - init_done => dram_init_done, - init_error => dram_init_error, - - ddram_a => ddram_a, - ddram_ba => ddram_ba, - ddram_ras_n => ddram_ras_n, - ddram_cas_n => ddram_cas_n, - ddram_we_n => ddram_we_n, - ddram_cs_n => ddram_cs_n, - ddram_dm => ddram_dm, - ddram_dq => ddram_dq, - ddram_dqs_p => ddram_dqs_p, - ddram_dqs_n => ddram_dqs_n, - ddram_clk_p => ddram_clk_p, - ddram_clk_n => ddram_clk_n, - ddram_cke => ddram_cke, - ddram_odt => ddram_odt, - ddram_reset_n => ddram_reset_n - ); - - led0_b_pwm <= not dram_init_done; - led0_r_pwm <= dram_init_error; - led0_g_pwm <= dram_init_done and not dram_init_error; + ) + port map( + clk_in => ext_clk, + rst => pll_rst, + system_clk => system_clk, + system_reset => soc_rst, + core_alt_reset => core_alt_reset, + pll_locked => system_clk_locked, + + wb_in => wb_dram_in, + wb_out => wb_dram_out, + wb_ctrl_in => wb_dram_ctrl_in, + wb_ctrl_out => wb_dram_ctrl_out, + wb_ctrl_is_csr => wb_dram_is_csr, + wb_ctrl_is_init => wb_dram_is_init, + + init_done => dram_init_done, + init_error => dram_init_error, + + ddram_a => ddram_a, + ddram_ba => ddram_ba, + ddram_ras_n => ddram_ras_n, + ddram_cas_n => ddram_cas_n, + ddram_we_n => ddram_we_n, + ddram_cs_n => ddram_cs_n, + ddram_dm => ddram_dm, + ddram_dq => ddram_dq, + ddram_dqs_p => ddram_dqs_p, + ddram_dqs_n => ddram_dqs_n, + ddram_clk_p => ddram_clk_p, + ddram_clk_n => ddram_clk_n, + ddram_cke => ddram_cke, + ddram_odt => ddram_odt, + ddram_reset_n => ddram_reset_n + ); + + led0_b_pwm <= not dram_init_done; + led0_r_pwm <= dram_init_error; + led0_g_pwm <= dram_init_done and not dram_init_error; end generate; leds_pwm : process(system_clk) begin - if rising_edge(system_clk) then - pwm_counter <= std_ulogic_vector(signed(pwm_counter) + 1); - if pwm_counter(8 downto 4) = "00000" then - led0_b <= led0_b_pwm; - led0_r <= led0_r_pwm; - led0_g <= led0_g_pwm; - else - led0_b <= '0'; - led0_r <= '0'; - led0_g <= '0'; - end if; - end if; + if rising_edge(system_clk) then + pwm_counter <= std_ulogic_vector(signed(pwm_counter) + 1); + if pwm_counter(8 downto 4) = "00000" then + led0_b <= led0_b_pwm; + led0_r <= led0_r_pwm; + led0_g <= led0_g_pwm; + else + led0_b <= '0'; + led0_r <= '0'; + led0_g <= '0'; + end if; + end if; end process; end architecture behaviour; diff --git a/fpga/top-generic.vhdl b/fpga/top-generic.vhdl index a4c4e73..99e7253 100644 --- a/fpga/top-generic.vhdl +++ b/fpga/top-generic.vhdl @@ -78,13 +78,19 @@ begin RESET_LOW => RESET_LOW, SIM => false, CLK_FREQ => CLK_FREQUENCY, - DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE + DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE, + HAS_SPI => false ) port map ( system_clk => system_clk, rst => soc_rst, uart0_txd => uart0_txd, uart0_rxd => uart0_rxd, + spi0_sck => open, + spi0_cs_n => open, + spi0_sdat_o => open, + spi0_sdat_oe => open, + spi0_sdat_i => '1', wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, wb_dram_ctrl_in => wb_dram_ctrl_in, diff --git a/fpga/top-nexys-video.vhdl b/fpga/top-nexys-video.vhdl index 45c2f39..ec78c3e 100644 --- a/fpga/top-nexys-video.vhdl +++ b/fpga/top-nexys-video.vhdl @@ -16,7 +16,10 @@ entity toplevel is CLK_FREQUENCY : positive := 100000000; USE_LITEDRAM : boolean := false; NO_BRAM : boolean := false; - DISABLE_FLATTEN_CORE : boolean := false + DISABLE_FLATTEN_CORE : boolean := false; + SPI_FLASH_OFFSET : integer := 10485760; + SPI_FLASH_DEF_CKDV : natural := 1; + SPI_FLASH_DEF_QUAD : boolean := true ); port( ext_clk : in std_ulogic; @@ -30,6 +33,13 @@ entity toplevel is led0 : out std_logic; led1 : out std_logic; + -- SPI + spi_flash_cs_n : out std_ulogic; + spi_flash_mosi : inout std_ulogic; + spi_flash_miso : inout std_ulogic; + spi_flash_wp_n : inout std_ulogic; + spi_flash_hold_n : inout std_ulogic; + -- DRAM wires ddram_a : out std_logic_vector(14 downto 0); ddram_ba : out std_logic_vector(2 downto 0); @@ -71,6 +81,13 @@ architecture behaviour of toplevel is -- Control/status signal core_alt_reset : std_ulogic; + -- SPI flash + signal spi_sck : std_ulogic; + signal spi_cs_n : std_ulogic; + signal spi_sdat_o : std_ulogic_vector(3 downto 0); + signal spi_sdat_oe : std_ulogic_vector(3 downto 0); + signal spi_sdat_i : std_ulogic_vector(3 downto 0); + -- Fixup various memory sizes based on generics function get_bram_size return natural is begin @@ -105,13 +122,30 @@ begin HAS_DRAM => USE_LITEDRAM, DRAM_SIZE => 512 * 1024 * 1024, DRAM_INIT_SIZE => PAYLOAD_SIZE, - DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE + DISABLE_FLATTEN_CORE => DISABLE_FLATTEN_CORE, + HAS_SPI_FLASH => true, + SPI_FLASH_DLINES => 4, + SPI_FLASH_OFFSET => SPI_FLASH_OFFSET, + SPI_FLASH_DEF_CKDV => SPI_FLASH_DEF_CKDV, + SPI_FLASH_DEF_QUAD => SPI_FLASH_DEF_QUAD ) port map ( + -- System signals system_clk => system_clk, rst => soc_rst, - uart0_txd => uart_main_tx, + + -- UART signals + uart0_txd => uart_main_tx, uart0_rxd => uart_main_rx, + + -- SPI signals + spi_flash_sck => spi_sck, + spi_flash_cs_n => spi_cs_n, + spi_flash_sdat_o => spi_sdat_o, + spi_flash_sdat_oe => spi_sdat_oe, + spi_flash_sdat_i => spi_sdat_i, + + -- DRAM wishbone wb_dram_in => wb_dram_in, wb_dram_out => wb_dram_out, wb_dram_ctrl_in => wb_dram_ctrl_in, @@ -121,6 +155,32 @@ begin alt_reset => core_alt_reset ); + -- SPI Flash. The SPI clk needs to be fed through the STARTUPE2 + -- primitive of the FPGA as it's not a normal pin + -- + spi_flash_cs_n <= spi_cs_n; + spi_flash_mosi <= spi_sdat_o(0) when spi_sdat_oe(0) = '1' else 'Z'; + spi_flash_miso <= spi_sdat_o(1) when spi_sdat_oe(1) = '1' else 'Z'; + spi_flash_wp_n <= spi_sdat_o(2) when spi_sdat_oe(2) = '1' else 'Z'; + spi_flash_hold_n <= spi_sdat_o(3) when spi_sdat_oe(3) = '1' else 'Z'; + spi_sdat_i(0) <= spi_flash_mosi; + spi_sdat_i(1) <= spi_flash_miso; + spi_sdat_i(2) <= spi_flash_wp_n; + spi_sdat_i(3) <= spi_flash_hold_n; + + STARTUPE2_INST: STARTUPE2 + port map ( + CLK => '0', + GSR => '0', + GTS => '0', + KEYCLEARB => '0', + PACK => '0', + USRCCLKO => spi_sck, + USRCCLKTS => '0', + USRDONEO => '1', + USRDONETS => '0' + ); + nodram: if not USE_LITEDRAM generate signal ddram_clk_dummy : std_ulogic; begin diff --git a/include/microwatt_soc.h b/include/microwatt_soc.h index 51c3266..037bcc2 100644 --- a/include/microwatt_soc.h +++ b/include/microwatt_soc.h @@ -12,8 +12,10 @@ #define SYSCON_BASE 0xc0000000 /* System control regs */ #define UART_BASE 0xc0002000 /* UART */ #define XICS_BASE 0xc0004000 /* Interrupt controller */ +#define SPI_FCTRL_BASE 0xc0006000 /* SPI flash controller registers */ #define DRAM_CTRL_BASE 0xc0100000 /* LiteDRAM control registers */ -#define DRAM_INIT_BASE 0xf0000000 /* Internal DRAM init firmware */ +#define SPI_FLASH_BASE 0xf0000000 /* SPI Flash memory map */ +#define DRAM_INIT_BASE 0xff000000 /* Internal DRAM init firmware */ /* * Register definitions for the syscon registers @@ -24,6 +26,7 @@ #define SYS_REG_INFO_HAS_UART (1ull << 0) #define SYS_REG_INFO_HAS_DRAM (1ull << 1) #define SYS_REG_INFO_HAS_BRAM (1ull << 2) +#define SYS_REG_INFO_HAS_SPI_FLASH (1ull << 3) #define SYS_REG_BRAMINFO 0x10 #define SYS_REG_BRAMINFO_SIZE_MASK 0xfffffffffffffull #define SYS_REG_DRAMINFO 0x18 @@ -35,6 +38,10 @@ #define SYS_REG_CTRL_CORE_RESET (1ull << 1) #define SYS_REG_CTRL_SOC_RESET (1ull << 2) #define SYS_REG_DRAMINITINFO 0x30 +#define SYS_REG_SPI_INFO 0x38 +#define SYS_REG_SPI_INFO_FLASH_OFF_MASK 0xffffffff + + /* * Register definitions for the potato UART @@ -49,5 +56,30 @@ #define POTATO_CONSOLE_CLOCK_DIV 0x18 #define POTATO_CONSOLE_IRQ_EN 0x20 +/* + * Register definitions for the SPI controller + */ +#define SPI_REG_DATA 0x00 /* Byte access: single wire transfer */ +#define SPI_REG_DATA_DUAL 0x01 /* Byte access: dual wire transfer */ +#define SPI_REG_DATA_QUAD 0x02 /* Byte access: quad wire transfer */ +#define SPI_REG_CTRL 0x04 /* Reset and manual mode control */ +#define SPI_REG_CTRL_RESET 0x01 /* reset all registers */ +#define SPI_REG_CTRL_MANUAL_CS 0x02 /* assert CS, enable manual mode */ +#define SPI_REG_CTRL_CKDIV_SHIFT 8 /* clock div */ +#define SPI_REG_CTRL_CKDIV_MASK 0xff +#define SPI_REG_AUTO_CFG 0x08 /* Automatic map configuration */ +#define SPI_REG_AUTO_CFG_CMD_SHIFT 0 /* Command to use for reads */ +#define SPI_REG_AUTO_CFG_CMD_MASK 0xff +#define SPI_REG_AUTO_CFG_DUMMIES_SHIFT 8 /* # dummy cycles */ +#define SPI_REG_AUTO_CFG_DUMMIES_MASK 0x7 +#define SPI_REG_AUTO_CFG_MODE_SHIFT 11 /* SPI wire mode */ +#define SPI_REG_AUTO_CFG_MODE_MASK 0x3 +#define SPI_REG_AUT_CFG_MODE_SINGLE (0 << 11) +#define SPI_REG_AUT_CFG_MODE_DUAL (2 << 11) +#define SPI_REG_AUT_CFG_MODE_QUAD (3 << 11) +#define SPI_REG_AUTO_CFG_ADDR4 (1u << 13) /* 3 or 4 addr bytes */ +#define SPI_REG_AUTO_CFG_CKDIV_SHIFT 16 /* clock div */ +#define SPI_REG_AUTO_CFG_CKDIV_MASK 0xff + #endif /* __MICROWATT_SOC_H */ diff --git a/microwatt.core b/microwatt.core index fb26f63..87ef39d 100644 --- a/microwatt.core +++ b/microwatt.core @@ -49,6 +49,8 @@ filesets: - xics.vhdl - syscon.vhdl - sync_fifo.vhdl + - spi_rxtx.vhdl + - spi_flash_ctrl.vhdl file_type : vhdlSource-2008 fpga: @@ -119,6 +121,7 @@ targets: - clk_input - clk_frequency - disable_flatten_core + - spi_flash_offset=10485760 tools: vivado: {part : xc7a200tsbg484-1} toplevel : toplevel @@ -132,6 +135,7 @@ targets: - use_litedram=true - disable_flatten_core - no_bram + - spi_flash_offset=10485760 generate: [dram_nexys_video] tools: vivado: {part : xc7a200tsbg484-1} @@ -146,6 +150,7 @@ targets: - clk_input - clk_frequency - disable_flatten_core + - spi_flash_offset=3145728 tools: vivado: {part : xc7a35ticsg324-1L} toplevel : toplevel @@ -159,6 +164,7 @@ targets: - use_litedram=true - disable_flatten_core - no_bram + - spi_flash_offset=3145728 generate: [dram_arty] tools: vivado: {part : xc7a35ticsg324-1L} @@ -173,6 +179,7 @@ targets: - clk_input - clk_frequency - disable_flatten_core + - spi_flash_offset=4194304 tools: vivado: {part : xc7a100ticsg324-1L} toplevel : toplevel @@ -186,6 +193,7 @@ targets: - use_litedram=true - disable_flatten_core - no_bram + - spi_flash_offset=4194304 generate: [dram_arty] tools: vivado: {part : xc7a100ticsg324-1L} @@ -266,3 +274,8 @@ parameters: description : No internal block RAM (only DRAM and init code carrying payload) paramtype : generic default : false + + spi_flash_offset: + datatype : int + description : Offset (in bytes) in the SPI flash of the code payload to run + paramtype : generic diff --git a/soc.vhdl b/soc.vhdl index 62d6ac4..047be96 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -21,20 +21,27 @@ use work.wishbone_types.all; -- 0xc0000000: SYSCON -- 0xc0002000: UART0 -- 0xc0004000: XICS ICP +-- 0xc0006000: SPI Flash controller -- 0xc0100000: LiteDRAM control (CSRs) --- 0xf0000000: DRAM init code (if any) +-- 0xf0000000: Flash "ROM" mapping +-- 0xff000000: DRAM init code (if any) or flash ROM entity soc is generic ( - MEMORY_SIZE : natural; - RAM_INIT_FILE : string; - RESET_LOW : boolean; - CLK_FREQ : positive; - SIM : boolean; + MEMORY_SIZE : natural; + RAM_INIT_FILE : string; + RESET_LOW : boolean; + CLK_FREQ : positive; + SIM : boolean; DISABLE_FLATTEN_CORE : boolean := false; - HAS_DRAM : boolean := false; - DRAM_SIZE : integer := 0; - DRAM_INIT_SIZE : integer := 0 + HAS_DRAM : boolean := false; + DRAM_SIZE : integer := 0; + DRAM_INIT_SIZE : integer := 0; + HAS_SPI_FLASH : boolean := false; + SPI_FLASH_DLINES : positive := 1; + SPI_FLASH_OFFSET : integer := 0; + SPI_FLASH_DEF_CKDV : natural := 2; + SPI_FLASH_DEF_QUAD : boolean := false ); port( rst : in std_ulogic; @@ -52,6 +59,13 @@ entity soc is uart0_txd : out std_ulogic; uart0_rxd : in std_ulogic; + -- SPI Flash signals + spi_flash_sck : out std_ulogic; + spi_flash_cs_n : out std_ulogic; + spi_flash_sdat_o : out std_ulogic_vector(SPI_FLASH_DLINES-1 downto 0); + spi_flash_sdat_oe : out std_ulogic_vector(SPI_FLASH_DLINES-1 downto 0); + spi_flash_sdat_i : in std_ulogic_vector(SPI_FLASH_DLINES-1 downto 0); + -- DRAM controller signals alt_reset : in std_ulogic ); @@ -96,6 +110,12 @@ architecture behaviour of soc is signal wb_uart0_out : wb_io_slave_out; signal uart_dat8 : std_ulogic_vector(7 downto 0); + -- SPI Flash controller signals: + signal wb_spiflash_in : wb_io_master_out; + signal wb_spiflash_out : wb_io_slave_out; + signal wb_spiflash_is_reg : std_ulogic; + signal wb_spiflash_is_map : std_ulogic; + -- XICS0 signals: signal wb_xics0_in : wb_io_master_out; signal wb_xics0_out : wb_io_slave_out; @@ -127,12 +147,23 @@ architecture behaviour of soc is signal rst_core : std_ulogic := '1'; signal rst_uart : std_ulogic := '1'; signal rst_xics : std_ulogic := '1'; + signal rst_spi : std_ulogic := '1'; signal rst_bram : std_ulogic := '1'; signal rst_dtm : std_ulogic := '1'; signal rst_wbar : std_ulogic := '1'; signal rst_wbdb : std_ulogic := '1'; signal alt_reset_d : std_ulogic; + -- IO branch split: + type slave_io_type is (SLAVE_IO_SYSCON, + SLAVE_IO_UART, + SLAVE_IO_DRAM_INIT, + SLAVE_IO_DRAM_CSR, + SLAVE_IO_ICP_0, + SLAVE_IO_SPI_FLASH_REG, + SLAVE_IO_SPI_FLASH_MAP, + SLAVE_IO_NONE); + signal slave_io_dbg : slave_io_type; begin resets: process(system_clk) @@ -140,6 +171,7 @@ begin if rising_edge(system_clk) then rst_core <= rst or do_core_reset; rst_uart <= rst; + rst_spi <= rst; rst_xics <= rst; rst_bram <= rst; rst_dtm <= rst; @@ -154,7 +186,7 @@ begin generic map( SIM => SIM, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, - ALT_RESET_ADDRESS => (27 downto 0 => '0', others => '1') + ALT_RESET_ADDRESS => (23 downto 0 => '0', others => '1') ) port map( clk => system_clk, @@ -389,14 +421,7 @@ begin -- IO wishbone slave intercon. -- slave_io_intercon: process(wb_sio_out, wb_syscon_out, wb_uart0_out, - wb_dram_ctrl_out, wb_xics0_out) - -- IO branch split: - type slave_io_type is (SLAVE_IO_SYSCON, - SLAVE_IO_UART, - SLAVE_IO_DRAM_INIT, - SLAVE_IO_DRAM_CSR, - SLAVE_IO_ICP_0, - SLAVE_IO_NONE); + wb_dram_ctrl_out, wb_xics0_out, wb_spiflash_out) variable slave_io : slave_io_type; variable match : std_ulogic_vector(31 downto 12); @@ -405,8 +430,10 @@ begin -- Simple address decoder. slave_io := SLAVE_IO_NONE; match := "11" & wb_sio_out.adr(29 downto 12); - if std_match(match, x"F----") then + if std_match(match, x"FF---") and HAS_DRAM then slave_io := SLAVE_IO_DRAM_INIT; + elsif std_match(match, x"F----") then + slave_io := SLAVE_IO_SPI_FLASH_MAP; elsif std_match(match, x"C0000") then slave_io := SLAVE_IO_SYSCON; elsif std_match(match, x"C0002") then @@ -415,9 +442,16 @@ begin slave_io := SLAVE_IO_DRAM_CSR; elsif std_match(match, x"C0004") then slave_io := SLAVE_IO_ICP_0; + elsif std_match(match, x"C0006") then + slave_io := SLAVE_IO_SPI_FLASH_REG; end if; + slave_io_dbg <= slave_io; wb_uart0_in <= wb_sio_out; wb_uart0_in.cyc <= '0'; + wb_spiflash_in <= wb_sio_out; + wb_spiflash_in.cyc <= '0'; + wb_spiflash_is_reg <= '0'; + wb_spiflash_is_map <= '0'; -- Only give xics 8 bits of wb addr wb_xics0_in <= wb_sio_out; @@ -451,6 +485,17 @@ begin when SLAVE_IO_ICP_0 => wb_xics0_in.cyc <= wb_sio_out.cyc; wb_sio_in <= wb_xics0_out; + when SLAVE_IO_SPI_FLASH_MAP => + -- Clear top bits so they don't make their way to the + -- fash chip. + wb_spiflash_in.adr(29 downto 28) <= "00"; + wb_spiflash_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_spiflash_out; + wb_spiflash_is_map <= '1'; + when SLAVE_IO_SPI_FLASH_REG => + wb_spiflash_in.cyc <= wb_sio_out.cyc; + wb_sio_in <= wb_spiflash_out; + wb_spiflash_is_reg <= '1'; when others => wb_sio_in.dat <= (others => '1'); wb_sio_in.ack <= wb_sio_out.stb and wb_sio_out.cyc; @@ -467,7 +512,9 @@ begin BRAM_SIZE => MEMORY_SIZE, DRAM_SIZE => DRAM_SIZE, DRAM_INIT_SIZE => DRAM_INIT_SIZE, - CLK_FREQ => CLK_FREQ + CLK_FREQ => CLK_FREQ, + HAS_SPI_FLASH => HAS_SPI_FLASH, + SPI_FLASH_OFFSET => SPI_FLASH_OFFSET ) port map( clk => system_clk, @@ -503,6 +550,34 @@ begin wb_uart0_out.dat <= x"000000" & uart_dat8; wb_uart0_out.stall <= '0' when wb_uart0_in.cyc = '0' else not wb_uart0_out.ack; + spiflash_gen: if HAS_SPI_FLASH generate + spiflash: entity work.spi_flash_ctrl + generic map ( + DATA_LINES => SPI_FLASH_DLINES, + DEF_CLK_DIV => SPI_FLASH_DEF_CKDV, + DEF_QUAD_READ => SPI_FLASH_DEF_QUAD + ) + port map( + rst => rst_spi, + clk => system_clk, + wb_in => wb_spiflash_in, + wb_out => wb_spiflash_out, + wb_sel_reg => wb_spiflash_is_reg, + wb_sel_map => wb_spiflash_is_map, + sck => spi_flash_sck, + cs_n => spi_flash_cs_n, + sdat_o => spi_flash_sdat_o, + sdat_oe => spi_flash_sdat_oe, + sdat_i => spi_flash_sdat_i + ); + end generate; + + no_spi0_gen: if not HAS_SPI_FLASH generate + wb_spiflash_out.dat <= (others => '1'); + wb_spiflash_out.ack <= wb_spiflash_in.cyc and wb_spiflash_in.stb; + wb_spiflash_out.stall <= wb_spiflash_in.cyc and not wb_spiflash_out.ack; + end generate; + xics0: entity work.xics generic map( LEVEL_NUM => 16 diff --git a/spi_flash_ctrl.vhdl b/spi_flash_ctrl.vhdl new file mode 100644 index 0000000..bc41d08 --- /dev/null +++ b/spi_flash_ctrl.vhdl @@ -0,0 +1,601 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.wishbone_types.all; + +entity spi_flash_ctrl is + generic ( + -- Default config for auto-mode + DEF_CLK_DIV : natural := 2; -- Clock divider SCK = CLK/((CLK_DIV+1)*2) + DEF_QUAD_READ : boolean := false; -- Use quad read with 8 clk dummy + + -- Number of data lines (1=MISO/MOSI, otherwise 2 or 4) + DATA_LINES : positive := 1 + ); + port ( + clk : in std_ulogic; + rst : in std_ulogic; + + -- Wishbone ports: + wb_in : in wb_io_master_out; + wb_out : out wb_io_slave_out; + + -- Wishbone extra selects + wb_sel_reg : in std_ulogic; + wb_sel_map : in std_ulogic; + + -- SPI port + sck : out std_ulogic; + cs_n : out std_ulogic; + sdat_o : out std_ulogic_vector(DATA_LINES-1 downto 0); + sdat_oe : out std_ulogic_vector(DATA_LINES-1 downto 0); + sdat_i : in std_ulogic_vector(DATA_LINES-1 downto 0) + ); +end entity spi_flash_ctrl; + +architecture rtl of spi_flash_ctrl is + + -- Register indices + constant SPI_REG_BITS : positive := 3; + + -- Register addresses (matches wishbone addr downto 2, ie, 4 bytes per reg) + constant SPI_REG_DATA : std_ulogic_vector(SPI_REG_BITS-1 downto 0) := "000"; + constant SPI_REG_CTRL : std_ulogic_vector(SPI_REG_BITS-1 downto 0) := "001"; + constant SPI_REG_AUTO_CFG : std_ulogic_vector(SPI_REG_BITS-1 downto 0) := "010"; + constant SPI_REG_INVALID : std_ulogic_vector(SPI_REG_BITS-1 downto 0) := "111"; + + -- Control register + signal ctrl_reg : std_ulogic_vector(15 downto 0) := (others => '0'); + alias ctrl_reset : std_ulogic is ctrl_reg(0); + alias ctrl_cs : std_ulogic is ctrl_reg(1); + alias ctrl_rsrv1 : std_ulogic is ctrl_reg(2); + alias ctrl_rsrv2 : std_ulogic is ctrl_reg(3); + alias ctrl_div : std_ulogic_vector(7 downto 0) is ctrl_reg(15 downto 8); + + -- Auto mode config register + signal auto_cfg_reg : std_ulogic_vector(29 downto 0) := (others => '0'); + alias auto_cfg_cmd : std_ulogic_vector(7 downto 0) is auto_cfg_reg(7 downto 0); + alias auto_cfg_dummies : std_ulogic_vector(2 downto 0) is auto_cfg_reg(10 downto 8); + alias auto_cfg_mode : std_ulogic_vector(1 downto 0) is auto_cfg_reg(12 downto 11); + alias auto_cfg_addr4 : std_ulogic is auto_cfg_reg(13); + alias auto_cfg_rsrv1 : std_ulogic is auto_cfg_reg(14); + alias auto_cfg_rsrv2 : std_ulogic is auto_cfg_reg(15); + alias auto_cfg_div : std_ulogic_vector(7 downto 0) is auto_cfg_reg(23 downto 16); + alias auto_cfg_cstout : std_ulogic_vector(5 downto 0) is auto_cfg_reg(29 downto 24); + + -- Constants below match top 2 bits of rxtx "mode" + constant SPI_AUTO_CFG_MODE_SINGLE : std_ulogic_vector(1 downto 0) := "00"; + constant SPI_AUTO_CFG_MODE_DUAL : std_ulogic_vector(1 downto 0) := "10"; + constant SPI_AUTO_CFG_MODE_QUAD : std_ulogic_vector(1 downto 0) := "11"; + + -- Signals to rxtx + signal cmd_valid : std_ulogic; + signal cmd_clk_div : natural range 0 to 255; + signal cmd_mode : std_ulogic_vector(2 downto 0); + signal cmd_ready : std_ulogic; + signal d_clks : std_ulogic_vector(2 downto 0); + signal d_rx : std_ulogic_vector(7 downto 0); + signal d_tx : std_ulogic_vector(7 downto 0); + signal d_ack : std_ulogic; + signal bus_idle : std_ulogic; + + -- Latch to track that we have a pending read + signal pending_read : std_ulogic; + + -- Wishbone latches + signal wb_req : wb_io_master_out; + signal wb_stash : wb_io_master_out; + signal wb_rsp : wb_io_slave_out; + + -- Wishbone decode + signal wb_valid : std_ulogic; + signal wb_reg_valid : std_ulogic; + signal wb_reg_dat_v : std_ulogic; + signal wb_map_valid : std_ulogic; + signal wb_reg : std_ulogic_vector(SPI_REG_BITS-1 downto 0); + + -- Auto mode clock counts XXX FIXME: Look at reasonable values based + -- on system clock maybe ? Or make them programmable. + constant CS_DELAY_ASSERT : integer := 1; -- CS low to cmd + constant CS_DELAY_RECOVERY : integer := 10; -- CS high to CS low + constant DEFAULT_CS_TIMEOUT : integer := 32; + + -- Automatic mode state + type auto_state_t is (AUTO_IDLE, AUTO_CS_ON, AUTO_CMD, + AUTO_ADR0, AUTO_ADR1, AUTO_ADR2, AUTO_ADR3, + AUTO_DUMMY, + AUTO_DAT0, AUTO_DAT1, AUTO_DAT2, AUTO_DAT3, + AUTO_DAT0_DATA, AUTO_DAT1_DATA, AUTO_DAT2_DATA, AUTO_DAT3_DATA, + AUTO_SEND_ACK, AUTO_WAIT_REQ, AUTO_RECOVERY); + -- Automatic mode signals + signal auto_cs : std_ulogic; + signal auto_cmd_valid : std_ulogic; + signal auto_cmd_mode : std_ulogic_vector(2 downto 0); + signal auto_d_txd : std_ulogic_vector(7 downto 0); + signal auto_d_clks : std_ulogic_vector(2 downto 0); + signal auto_data_next : std_ulogic_vector(wb_out.dat'left downto 0); + signal auto_cnt_next : integer range 0 to 63; + signal auto_ack : std_ulogic; + signal auto_next : auto_state_t; + signal auto_lad_next : std_ulogic_vector(31 downto 0); + signal auto_latch_adr : std_ulogic; + + -- Automatic mode latches + signal auto_data : std_ulogic_vector(wb_out.dat'left downto 0) := (others => '0'); + signal auto_cnt : integer range 0 to 63 := 0; + signal auto_state : auto_state_t := AUTO_IDLE; + signal auto_last_addr : std_ulogic_vector(31 downto 0); + +begin + + -- Instanciate low level shifter + spi_rxtx: entity work.spi_rxtx + generic map ( + DATA_LINES => DATA_LINES + ) + port map( + rst => rst, + clk => clk, + clk_div_i => cmd_clk_div, + cmd_valid_i => cmd_valid, + cmd_ready_o => cmd_ready, + cmd_mode_i => cmd_mode, + cmd_clks_i => d_clks, + cmd_txd_i => d_tx, + d_rxd_o => d_rx, + d_ack_o => d_ack, + bus_idle_o => bus_idle, + sck => sck, + sdat_o => sdat_o, + sdat_oe => sdat_oe, + sdat_i => sdat_i + ); + + -- Valid wb command + wb_valid <= wb_req.stb and wb_req.cyc; + wb_reg_valid <= wb_valid and wb_sel_reg; + wb_map_valid <= wb_valid and wb_sel_map; + + -- Register decode. For map accesses, make it look like "invalid" + wb_reg <= wb_req.adr(SPI_REG_BITS+1 downto 2) when wb_reg_valid else SPI_REG_INVALID; + + -- Shortcut because we test that a lot: data register access + wb_reg_dat_v <= '1' when wb_reg = SPI_REG_DATA else '0'; + + -- Wishbone request -> SPI request + wb_request_sync: process(clk) + begin + if rising_edge(clk) then + -- We need to latch whether a read is in progress to block + -- a subsequent store, otherwise the acks will collide. + -- + -- We are heavy handed and force a wait for an idle bus if + -- a store is behind a load. Shouldn't happen with flashes + -- in practice. + -- + if cmd_valid = '1' and cmd_ready = '1' then + pending_read <= '1'; + elsif bus_idle = '1' then + pending_read <= '0'; + end if; + end if; + end process; + + wb_request_comb: process(all) + begin + if ctrl_cs = '1' then + -- Data register access (see wb_request_sync) + cmd_valid <= wb_reg_dat_v and not (pending_read and wb_req.we); + + -- Clock divider from control reg + cmd_clk_div <= to_integer(unsigned(ctrl_div)); + + -- Mode based on sel + if wb_req.sel = "0010" then + -- dual mode + cmd_mode <= "10" & wb_req.we; + d_clks <= "011"; + elsif wb_req.sel = "0100" then + -- quad mode + cmd_mode <= "11" & wb_req.we; + d_clks <= "001"; + else + -- single bit + cmd_mode <= "01" & wb_req.we; + d_clks <= "111"; + end if; + d_tx <= wb_req.dat(7 downto 0); + cs_n <= not ctrl_cs; + else + cmd_valid <= auto_cmd_valid; + cmd_mode <= auto_cmd_mode; + cmd_clk_div <= to_integer(unsigned(auto_cfg_div)); + d_tx <= auto_d_txd; + d_clks <= auto_d_clks; + cs_n <= not auto_cs; + end if; + end process; + + -- Generate wishbone responses + -- + -- Note: wb_out and wb_in should only appear in this synchronous process + -- + -- Everything else should work on wb_req and wb_rsp + wb_response_sync: process(clk) + begin + if rising_edge(clk) then + if rst = '1' then + wb_out.ack <= '0'; + wb_out.stall <= '0'; + else + -- Latch wb responses as well for 1 cycle. Stall is updated + -- below + wb_out <= wb_rsp; + + -- Implement a stash buffer. If we are stalled and stash is + -- free, fill it up. This will generate a WB stall on the + -- next cycle. + if wb_rsp.stall = '1' and wb_out.stall = '0' and + wb_in.cyc = '1' and wb_in.stb = '1' then + wb_stash <= wb_in; + wb_out.stall <= '1'; + end if; + + -- We aren't stalled, see what we can do + if wb_rsp.stall = '0' then + if wb_out.stall = '1' then + -- Something in stash ! use it and clear stash + wb_req <= wb_stash; + wb_out.stall <= '0'; + else + -- Nothing in stash, grab request from WB + if wb_in.cyc = '1' then + wb_req <= wb_in; + else + wb_req.cyc <= wb_in.cyc; + wb_req.stb <= wb_in.stb; + end if; + end if; + end if; + end if; + end if; + end process; + + wb_response_comb: process(all) + begin + -- Defaults + wb_rsp.ack <= '0'; + wb_rsp.dat <= x"00" & d_rx & d_rx & d_rx; + wb_rsp.stall <= '0'; + + -- Depending on the access type... + if wb_map_valid = '1' then + + -- Memory map access + wb_rsp.stall <= not auto_ack; -- XXX FIXME: Allow pipelining + wb_rsp.ack <= auto_ack; + wb_rsp.dat <= auto_data; + + elsif ctrl_cs = '1' and wb_reg = SPI_REG_DATA then + + -- Data register in manual mode + -- + -- Stall stores if there's a pending read to avoid + -- acks colliding. Otherwise accept all accesses + -- immediately if rxtx is ready. + -- + -- Note: This must match the logic setting cmd_valid + -- in wb_request_comb. + -- + -- We also ack stores immediately when accepted. Loads + -- are handled separately further down. + -- + if wb_req.we = '1' and pending_read = '1' then + wb_rsp.stall <= '1'; + else + wb_rsp.ack <= wb_req.we and cmd_ready; + wb_rsp.stall <= not cmd_ready; + end if; + + -- Note: loads acks are handled elsewhere + elsif wb_reg_valid = '1' then + + -- Normal register access + -- + -- Normally single cycle but ensure any auto-mode or manual + -- operation is complete first + -- + if auto_state = AUTO_IDLE and bus_idle = '1' then + wb_rsp.ack <= '1'; + wb_rsp.stall <= '0'; + + case wb_reg is + when SPI_REG_CTRL => + wb_rsp.dat <= (ctrl_reg'range => ctrl_reg, others => '0'); + when SPI_REG_AUTO_CFG => + wb_rsp.dat <= (auto_cfg_reg'range => auto_cfg_reg, others => '0'); + when others => null; + end case; + else + wb_rsp.stall <= '1'; + end if; + end if; + + -- For loads in manual mode, we've accepted the command early + -- so none of the above connditions might be true. We thus need + -- to send the ack whenever we are getting it from rxtx. + -- + -- This shouldn't collide with any of the above acks because we hold + -- normal register accesses and stores when there is a pending + -- load or the bus is busy. + -- + if ctrl_cs = '1' and d_ack = '1' then + assert pending_read = '1' report "d_ack without pending read !" severity failure; + wb_rsp.ack <= '1'; + end if; + end process; + + -- Automatic mode state machine + auto_sync: process(clk) + begin + if rising_edge(clk) then + auto_state <= auto_next; + auto_cnt <= auto_cnt_next; + auto_data <= auto_data_next; + if auto_latch_adr = '1' then + auto_last_addr <= auto_lad_next; + end if; + end if; + end process; + + auto_comb: process(all) + variable addr : std_ulogic_vector(31 downto 0); + variable req_is_next : boolean; + + function mode_to_clks(mode: std_ulogic_vector(1 downto 0)) return std_ulogic_vector is + begin + if mode = SPI_AUTO_CFG_MODE_QUAD then + return "001"; + elsif mode = SPI_AUTO_CFG_MODE_DUAL then + return "011"; + else + return "111"; + end if; + end function; + begin + -- Default outputs + auto_ack <= '0'; + auto_cs <= '0'; + auto_cmd_valid <= '0'; + auto_d_txd <= x"00"; + auto_cmd_mode <= "001"; + auto_d_clks <= "111"; + auto_latch_adr <= '0'; + + -- Default next state + auto_next <= auto_state; + auto_cnt_next <= auto_cnt; + auto_data_next <= auto_data; + + -- Convert wishbone address into a flash address. We mask + -- off the 4 top address bits to get rid of the "f" there. + addr := "00" & wb_req.adr(29 downto 2) & "00"; + + -- Calculate the next address for store & compare later + auto_lad_next <= std_ulogic_vector(unsigned(addr) + 4); + + -- Match incoming request address with next address + req_is_next := addr = auto_last_addr; + + -- XXX TODO: + -- - Support < 32-bit accesses + + -- Reset + if rst = '1' or ctrl_reset = '1' then + auto_cs <= '0'; + auto_cnt_next <= 0; + auto_next <= AUTO_IDLE; + else + -- Run counter + if auto_cnt /= 0 then + auto_cnt_next <= auto_cnt - 1; + end if; + + -- Automatic CS is set whenever state isn't IDLE or RECOVERY + if auto_state /= AUTO_IDLE and + auto_state /= AUTO_RECOVERY then + auto_cs <= '1'; + end if; + + -- State machine + case auto_state is + when AUTO_IDLE => + -- Access to the memory map only when manual CS isn't set + if wb_map_valid = '1' and ctrl_cs = '0' then + -- Ignore writes, we don't support them yet + if wb_req.we = '1' then + auto_ack <= '1'; + else + -- Start machine with CS assertion delay + auto_next <= AUTO_CS_ON; + auto_cnt_next <= CS_DELAY_ASSERT; + end if; + end if; + when AUTO_CS_ON => + if auto_cnt = 0 then + -- CS asserted long enough, send command + auto_next <= AUTO_CMD; + end if; + when AUTO_CMD => + auto_d_txd <= auto_cfg_cmd; + auto_cmd_valid <= '1'; + if cmd_ready = '1' then + if auto_cfg_addr4 = '1' then + auto_next <= AUTO_ADR3; + else + auto_next <= AUTO_ADR2; + end if; + end if; + when AUTO_ADR3 => + auto_d_txd <= addr(31 downto 24); + auto_cmd_valid <= '1'; + if cmd_ready = '1' then + auto_next <= AUTO_ADR2; + end if; + when AUTO_ADR2 => + auto_d_txd <= addr(23 downto 16); + auto_cmd_valid <= '1'; + if cmd_ready = '1' then + auto_next <= AUTO_ADR1; + end if; + when AUTO_ADR1 => + auto_d_txd <= addr(15 downto 8); + auto_cmd_valid <= '1'; + if cmd_ready = '1' then + auto_next <= AUTO_ADR0; + end if; + when AUTO_ADR0 => + auto_d_txd <= addr(7 downto 0); + auto_cmd_valid <= '1'; + if cmd_ready = '1' then + if auto_cfg_dummies = "000" then + auto_next <= AUTO_DAT0; + else + auto_next <= AUTO_DUMMY; + end if; + end if; + when AUTO_DUMMY => + auto_cmd_valid <= '1'; + auto_d_clks <= auto_cfg_dummies; + if cmd_ready = '1' then + auto_next <= AUTO_DAT0; + end if; + when AUTO_DAT0 => + auto_cmd_valid <= '1'; + auto_cmd_mode <= auto_cfg_mode & "0"; + auto_d_clks <= mode_to_clks(auto_cfg_mode); + if cmd_ready = '1' then + auto_next <= AUTO_DAT0_DATA; + end if; + when AUTO_DAT0_DATA => + if d_ack = '1' then + auto_data_next(7 downto 0) <= d_rx; + auto_next <= AUTO_DAT1; + end if; + when AUTO_DAT1 => + auto_cmd_valid <= '1'; + auto_cmd_mode <= auto_cfg_mode & "0"; + auto_d_clks <= mode_to_clks(auto_cfg_mode); + if cmd_ready = '1' then + auto_next <= AUTO_DAT1_DATA; + end if; + when AUTO_DAT1_DATA => + if d_ack = '1' then + auto_data_next(15 downto 8) <= d_rx; + auto_next <= AUTO_DAT2; + end if; + when AUTO_DAT2 => + auto_cmd_valid <= '1'; + auto_cmd_mode <= auto_cfg_mode & "0"; + auto_d_clks <= mode_to_clks(auto_cfg_mode); + if cmd_ready = '1' then + auto_next <= AUTO_DAT2_DATA; + end if; + when AUTO_DAT2_DATA => + if d_ack = '1' then + auto_data_next(23 downto 16) <= d_rx; + auto_next <= AUTO_DAT3; + end if; + when AUTO_DAT3 => + auto_cmd_valid <= '1'; + auto_cmd_mode <= auto_cfg_mode & "0"; + auto_d_clks <= mode_to_clks(auto_cfg_mode); + if cmd_ready = '1' then + auto_next <= AUTO_DAT3_DATA; + end if; + when AUTO_DAT3_DATA => + if d_ack = '1' then + auto_data_next(31 downto 24) <= d_rx; + auto_next <= AUTO_SEND_ACK; + auto_latch_adr <= '1'; + end if; + when AUTO_SEND_ACK => + auto_ack <= '1'; + auto_cnt_next <= to_integer(unsigned(auto_cfg_cstout)); + auto_next <= AUTO_WAIT_REQ; + when AUTO_WAIT_REQ => + -- Incoming bus request we can take ? Otherwise do we need + -- to cancel the wait ? + if wb_map_valid = '1' and req_is_next and wb_req.we = '0' then + auto_next <= AUTO_DAT0; + elsif wb_map_valid = '1' or wb_reg_valid = '1' or auto_cnt = 0 then + -- This means we can drop the CS right on the next clock. + -- We make the assumption here that the two cycles min + -- spent in AUTO_SEND_ACK and AUTO_WAIT_REQ are long enough + -- to deassert CS. If that doesn't hold true in the future, + -- add another state. + auto_cnt_next <= CS_DELAY_RECOVERY; + auto_next <= AUTO_RECOVERY; + end if; + when AUTO_RECOVERY => + if auto_cnt = 0 then + auto_next <= AUTO_IDLE; + end if; + end case; + end if; + end process; + + -- Register write sync machine + reg_write: process(clk) + function reg_wr(r : in std_ulogic_vector; + w : in wb_io_master_out) return std_ulogic_vector is + variable b : natural range 0 to 31; + variable t : std_ulogic_vector(r'range); + begin + t := r; + for i in r'range loop + if w.sel(i/8) = '1' then + t(i) := w.dat(i); + end if; + end loop; + return t; + end function; + begin + if rising_edge(clk) then + -- Reset auto-clear + if rst = '1' or ctrl_reset = '1' then + ctrl_reset <= '0'; + ctrl_cs <= '0'; + ctrl_rsrv1 <= '0'; + ctrl_rsrv2 <= '0'; + ctrl_div <= std_ulogic_vector(to_unsigned(DEF_CLK_DIV, 8)); + if DEF_QUAD_READ then + auto_cfg_cmd <= x"6b"; + auto_cfg_dummies <= "111"; + auto_cfg_mode <= SPI_AUTO_CFG_MODE_QUAD; + else + auto_cfg_cmd <= x"03"; + auto_cfg_dummies <= "000"; + auto_cfg_mode <= SPI_AUTO_CFG_MODE_SINGLE; + end if; + auto_cfg_addr4 <= '0'; + auto_cfg_rsrv1 <= '0'; + auto_cfg_rsrv2 <= '0'; + auto_cfg_div <= std_ulogic_vector(to_unsigned(DEF_CLK_DIV, 8)); + auto_cfg_cstout <= std_ulogic_vector(to_unsigned(DEFAULT_CS_TIMEOUT, 6)); + end if; + + if wb_reg_valid = '1' and wb_req.we = '1' and auto_state = AUTO_IDLE and bus_idle = '1' then + if wb_reg = SPI_REG_CTRL then + ctrl_reg <= reg_wr(ctrl_reg, wb_req); + end if; + if wb_reg = SPI_REG_AUTO_CFG then + auto_cfg_reg <= reg_wr(auto_cfg_reg, wb_req); + end if; + end if; + end if; + end process; + +end architecture; diff --git a/spi_rxtx.vhdl b/spi_rxtx.vhdl new file mode 100644 index 0000000..acb5bec --- /dev/null +++ b/spi_rxtx.vhdl @@ -0,0 +1,386 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.wishbone_types.all; + +entity spi_rxtx is + generic ( + DATA_LINES : positive := 1; -- Number of data lines + -- 1=MISO/MOSI, otherwise 2 or 4 + INPUT_DELAY : natural range 0 to 1 := 1 -- Delay latching of SPI input: + -- 0=no delay, 1=clk/2 + ); + port ( + clk : in std_ulogic; + rst : in std_ulogic; + + -- + -- Clock divider + -- SCK = CLK/((CLK_DIV+1)*2) : 0=CLK/2, 1=CLK/4, 2=CLK/6.... + -- + -- This need to be changed before a command. + -- XX TODO add handshake + clk_div_i : in natural range 0 to 255; + + -- + -- Command port (includes write data) + -- + + -- Valid & ready: command sampled when valid=1 and ready=1 + cmd_valid_i : in std_ulogic; + cmd_ready_o : out std_ulogic; + + -- Command modes: + -- 000 : Single bit read+write + -- 010 : Single bit read + -- 011 : Single bit write + -- 100 : Dual read + -- 101 : Dual write + -- 110 : Quad read + -- 111 : Quad write + cmd_mode_i : in std_ulogic_vector(2 downto 0); + + -- # clocks-1 in a command (#bits-1) + cmd_clks_i : in std_ulogic_vector(2 downto 0); + + -- Write data (sampled with command) + cmd_txd_i : in std_ulogic_vector(7 downto 0); + + -- + -- Read data port. Data valid when d_ack=1, no ready + -- signal, receiver must be ready + -- + d_rxd_o : out std_ulogic_vector(7 downto 0); + d_ack_o : out std_ulogic := '0'; + + -- Set when all commands are done. Needed for callers to know when + -- to release CS# + bus_idle_o : out std_ulogic; + + -- + -- SPI port. These might need to go into special IOBUFs or STARTUPE2 on + -- Xilinx. + -- + -- Data lines are organized as follow: + -- + -- DATA_LINES = 1 + -- + -- sdat_o(0) is MOSI (master output slave input) + -- sdat_i(0) is MISO (master input slave output) + -- + -- DATA_LINES > 1 + -- + -- sdat_o(0..n) are DQ(0..n) + -- sdat_i(0..n) are DQ(0..n) + -- + -- as such, beware that: + -- + -- sdat_o(0) is MOSI (master output slave input) + -- sdat_i(1) is MISO (master input slave output) + -- + -- In order to leave dealing with the details of how to wire the tristate + -- and bidirectional pins to the system specific toplevel, we separate + -- the input and output signals, and provide a "sdat_oe" signal which + -- is the "output enable" of each line. + -- + sck : out std_ulogic; + sdat_o : out std_ulogic_vector(DATA_LINES-1 downto 0); + sdat_oe : out std_ulogic_vector(DATA_LINES-1 downto 0); + sdat_i : in std_ulogic_vector(DATA_LINES-1 downto 0) + ); +end entity spi_rxtx; + +architecture rtl of spi_rxtx is + + -- Internal clock signal. Output is gated by sck_en_int + signal sck_0 : std_ulogic; + signal sck_1 : std_ulogic; + + -- Clock divider latch + signal clk_div : natural range 0 to 255; + + -- 1 clk pulses indicating when to send and when to latch + -- + -- Typically for CPOL=CPHA + -- sck_send is sck falling edge + -- sck_recv is sck rising edge + -- + -- Those pulses are generated "ahead" of the corresponding + -- edge so then are "seen" at the rising sysclk edge matching + -- the corresponding sck edgeg. + signal sck_send : std_ulogic; + signal sck_recv : std_ulogic; + + -- Command mode latch + signal cmd_mode : std_ulogic_vector(2 downto 0); + + -- Output shift register (use fifo ?) + signal oreg : std_ulogic_vector(7 downto 0); + + -- Input latch + signal dat_i_l : std_ulogic_vector(DATA_LINES-1 downto 0); + + -- Data ack latch + signal dat_ack_l : std_ulogic; + + -- Delayed recv signal for the read machine + signal sck_recv_d : std_ulogic := '0'; + + -- Input shift register (use fifo ?) + signal ireg : std_ulogic_vector(7 downto 0) := (others => '0'); + + -- Bit counter + signal bit_count : std_ulogic_vector(2 downto 0); + + -- Next/start/stop command signals. Set when counter goes negative + signal next_cmd : std_ulogic; + signal start_cmd : std_ulogic; + signal end_cmd : std_ulogic; + + function data_single(mode : std_ulogic_vector(2 downto 0)) return boolean is + begin + return mode(2) = '0'; + end; + function data_dual(mode : std_ulogic_vector(2 downto 0)) return boolean is + begin + return mode(2 downto 1) = "10"; + end; + function data_quad(mode : std_ulogic_vector(2 downto 0)) return boolean is + begin + return mode(2 downto 1) = "11"; + end; + function data_write(mode : std_ulogic_vector(2 downto 0)) return boolean is + begin + return mode(0) = '1'; + end; + + type state_t is (STANDBY, DATA); + signal state : state_t := STANDBY; +begin + + -- We don't support multiple data lines at this point + assert DATA_LINES = 1 or DATA_LINES = 2 or DATA_LINES = 4 + report "Unsupported DATA_LINES configuration !" severity failure; + + -- Clock generation + -- + -- XX HARD WIRE CPOL=1 CPHA=1 for now + sck_gen: process(clk) + variable counter : integer range 0 to 255; + begin + if rising_edge(clk) then + if rst = '1' then + sck_0 <= '1'; + sck_1 <= '1'; + sck_send <= '0'; + sck_recv <= '0'; + clk_div <= 0; + elsif counter = clk_div then + counter := 0; + + -- Latch new divider + clk_div <= clk_div_i; + + -- Internal version of the clock + sck_0 <= not sck_0; + + -- Generate send/receive pulses to run out state machine + sck_recv <= not sck_0; + sck_send <= sck_0; + else + counter := counter + 1; + sck_recv <= '0'; + sck_send <= '0'; + end if; + + -- Delayed version of the clock to line up with + -- the up/down signals + -- + -- XXX Figure out a better way + if (state = DATA and end_cmd = '0') or (next_cmd = '1' and cmd_valid_i = '1') then + sck_1 <= sck_0; + else + sck_1 <= '1'; + end if; + end if; + end process; + + -- SPI clock + sck <= sck_1; + + -- Ready to start the next command. This is set on the clock down + -- after the counter goes negative. + -- Note: in addition to latching a new command, this will cause + -- the counter to be reloaded. + next_cmd <= '1' when sck_send = '1' and bit_count = "111" else '0'; + + -- We start a command when we have a valid request at that time. + start_cmd <= next_cmd and cmd_valid_i; + + -- We end commands if we get start_cmd and there's nothing to + -- start. This sends up to standby holding CLK high + end_cmd <= next_cmd and not cmd_valid_i; + + -- Generate cmd_ready. It will go up and down with sck, we could + -- gate it with cmd_valid to make it look cleaner but that would + -- add yet another combinational loop on the wishbone that I'm + -- to avoid. + cmd_ready_o <= next_cmd; + + -- Generate bus_idle_o + bus_idle_o <= '1' when state = STANDBY else '0'; + + -- Main state machine. Also generates cmd and data ACKs + machine: process(clk) + begin + if rising_edge(clk) then + if rst = '1' then + state <= STANDBY; + cmd_mode <= "000"; + else + -- First clk down of a new cycle. Latch a request if any + -- or get out. + if start_cmd = '1' then + state <= DATA; + cmd_mode <= cmd_mode_i; + elsif end_cmd = '1' then + state <= STANDBY; + end if; + end if; + end if; + end process; + + -- Run the bit counter in DATA state. It will update on rising + -- SCK edges. It starts at d_clks on command latch + count_bit: process(clk) + begin + if rising_edge(clk) then + if start_cmd = '1' then + bit_count <= cmd_clks_i; + elsif state /= DATA then + bit_count <= (others => '1'); + elsif sck_recv = '1' then + bit_count <= std_ulogic_vector(unsigned(bit_count) - 1); + end if; + end if; + end process; + + -- Shift output data + shift_out: process(clk) + begin + if rising_edge(clk) then + -- Starting a command + if start_cmd = '1' then + oreg <= cmd_txd_i(7 downto 0); + elsif sck_send = '1' then + -- Get shift amount + if data_single(cmd_mode) then + oreg <= oreg(6 downto 0) & '0'; + elsif data_dual(cmd_mode) then + oreg <= oreg(5 downto 0) & "00"; + else + oreg <= oreg(3 downto 0) & "0000"; + end if; + end if; + end if; + end process; + + -- Data out + sdat_o(0) <= oreg(7); + dl2: if DATA_LINES > 1 generate + sdat_o(1) <= oreg(6); + end generate; + dl4: if DATA_LINES > 2 generate + sdat_o(2) <= oreg(5); + sdat_o(3) <= oreg(4); + end generate; + + -- Data lines direction + dlines: process(all) + begin + for i in DATA_LINES-1 downto 0 loop + sdat_oe(i) <= '0'; + if state = DATA then + -- In single mode, we always enable MOSI, otherwise + -- we control the output enable based on the direction + -- of transfer. + -- + if i = 0 and (data_single(cmd_mode) or data_write(cmd_mode)) then + sdat_oe(i) <= '1'; + end if; + if i = 1 and data_dual(cmd_mode) and data_write(cmd_mode) then + sdat_oe(i) <= '1'; + end if; + if i > 0 and data_quad(cmd_mode) and data_write(cmd_mode) then + sdat_oe(i) <= '1'; + end if; + end if; + end loop; + end process; + + -- Latch input data no delay + input_delay_0: if INPUT_DELAY = 0 generate + process(clk) + begin + if rising_edge(clk) then + dat_i_l <= sdat_i; + end if; + end process; + end generate; + + -- Latch input data half clock delay + input_delay_1: if INPUT_DELAY = 1 generate + process(clk) + begin + if falling_edge(clk) then + dat_i_l <= sdat_i; + end if; + end process; + end generate; + + -- Shift input data + shift_in: process(clk) + begin + if rising_edge(clk) then + + -- Delay the receive signal to match the input latch + if state = DATA then + sck_recv_d <= sck_recv; + else + sck_recv_d <= '0'; + end if; + + -- Generate read data acks + if bit_count = "000" and sck_recv = '1' then + dat_ack_l <= not cmd_mode(0); + else + dat_ack_l <= '0'; + end if; + + -- And delay them as well + d_ack_o <= dat_ack_l; + + -- Shift register on delayed data & receive signal + if sck_recv_d = '1' then + if DATA_LINES = 1 then + ireg <= ireg(6 downto 0) & dat_i_l(0); + else + if data_dual(cmd_mode) then + ireg <= ireg(5 downto 0) & dat_i_l(1) & dat_i_l(0); + elsif data_quad(cmd_mode) then + ireg <= ireg(3 downto 0) & dat_i_l(3) & dat_i_l(2) & dat_i_l(1) & dat_i_l(0); + else + assert(data_single(cmd_mode)); + ireg <= ireg(6 downto 0) & dat_i_l(1); + end if; + end if; + end if; + end if; + end process; + + -- Data recieve register + d_rxd_o <= ireg; + +end architecture; diff --git a/syscon.vhdl b/syscon.vhdl index 79d9531..e319f02 100644 --- a/syscon.vhdl +++ b/syscon.vhdl @@ -8,13 +8,15 @@ use work.wishbone_types.all; entity syscon is generic ( - SIG_VALUE : std_ulogic_vector(63 downto 0) := x"f00daa5500010001"; - CLK_FREQ : integer; - HAS_UART : boolean; - HAS_DRAM : boolean; - BRAM_SIZE : integer; - DRAM_SIZE : integer; - DRAM_INIT_SIZE : integer + SIG_VALUE : std_ulogic_vector(63 downto 0) := x"f00daa5500010001"; + CLK_FREQ : integer; + HAS_UART : boolean; + HAS_DRAM : boolean; + BRAM_SIZE : integer; + DRAM_SIZE : integer; + DRAM_INIT_SIZE : integer; + HAS_SPI_FLASH : boolean; + SPI_FLASH_OFFSET : integer ); port ( clk : in std_ulogic; @@ -44,6 +46,7 @@ architecture behaviour of syscon is constant SYS_REG_CLKINFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "100"; constant SYS_REG_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "101"; constant SYS_REG_DRAMINITINFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "110"; + constant SYS_REG_SPIFLASHINFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "111"; -- Muxed reg read signal signal reg_out : std_ulogic_vector(63 downto 0); @@ -52,6 +55,7 @@ architecture behaviour of syscon is constant SYS_REG_INFO_HAS_UART : integer := 0; constant SYS_REG_INFO_HAS_DRAM : integer := 1; constant SYS_REG_INFO_HAS_BRAM : integer := 2; + constant SYS_REG_INFO_HAS_SPIF : integer := 3; -- BRAMINFO contains the BRAM size in the bottom 52 bits -- DRAMINFO contains the DRAM size if any in the bottom 52 bits @@ -64,6 +68,12 @@ architecture behaviour of syscon is constant SYS_REG_CTRL_CORE_RESET : integer := 1; constant SYS_REG_CTRL_SOC_RESET : integer := 2; + -- SPI Info register bits + -- + -- Top 32-bit is flash offset which is the amount of flash + -- reserved for the FPGA bitfile if any + constant SYS_REG_SPI_INFO_IS_FLASH : integer := 0; + -- Ctrl register signal reg_ctrl : std_ulogic_vector(SYS_REG_CTRL_BITS-1 downto 0); signal reg_ctrl_out : std_ulogic_vector(63 downto 0); @@ -74,10 +84,13 @@ architecture behaviour of syscon is signal reg_draminfo : std_ulogic_vector(63 downto 0); signal reg_dramiinfo : std_ulogic_vector(63 downto 0); signal reg_clkinfo : std_ulogic_vector(63 downto 0); + signal reg_spiinfo : std_ulogic_vector(63 downto 0); signal info_has_dram : std_ulogic; signal info_has_bram : std_ulogic; signal info_has_uart : std_ulogic; + signal info_has_spif : std_ulogic; signal info_clk : std_ulogic_vector(39 downto 0); + signal info_fl_off : std_ulogic_vector(31 downto 0); begin -- Generated output signals @@ -93,10 +106,12 @@ begin info_has_uart <= '1' when HAS_UART else '0'; info_has_dram <= '1' when HAS_DRAM else '0'; info_has_bram <= '1' when BRAM_SIZE /= 0 else '0'; + info_has_spif <= '1' when HAS_SPI_FLASH else '0'; info_clk <= std_ulogic_vector(to_unsigned(CLK_FREQ, 40)); - reg_info <= (0 => info_has_uart, - 1 => info_has_dram, - 2 => info_has_bram, + reg_info <= (SYS_REG_INFO_HAS_UART => info_has_uart, + SYS_REG_INFO_HAS_DRAM => info_has_dram, + SYS_REG_INFO_HAS_BRAM => info_has_bram, + SYS_REG_INFO_HAS_SPIF => info_has_spif, others => '0'); reg_braminfo <= x"000" & std_ulogic_vector(to_unsigned(BRAM_SIZE, 52)); reg_draminfo <= x"000" & std_ulogic_vector(to_unsigned(DRAM_SIZE, 52)) when HAS_DRAM @@ -105,6 +120,9 @@ begin else (others => '0'); reg_clkinfo <= (39 downto 0 => info_clk, others => '0'); + info_fl_off <= std_ulogic_vector(to_unsigned(SPI_FLASH_OFFSET, 32)); + reg_spiinfo <= (31 downto 0 => info_fl_off, + others => '0'); -- Control register read composition reg_ctrl_out <= (63 downto SYS_REG_CTRL_BITS => '0', @@ -119,6 +137,7 @@ begin reg_dramiinfo when SYS_REG_DRAMINITINFO, reg_clkinfo when SYS_REG_CLKINFO, reg_ctrl_out when SYS_REG_CTRL, + reg_spiinfo when SYS_REG_SPIFLASHINFO, (others => '0') when others; wishbone_out.dat <= reg_out(63 downto 32) when wishbone_in.adr(2) = '1' else reg_out(31 downto 0);