diff --git a/include/microwatt_soc.h b/include/microwatt_soc.h index 6717b4b..67ea13d 100644 --- a/include/microwatt_soc.h +++ b/include/microwatt_soc.h @@ -65,7 +65,8 @@ #define SYS_REG_UART_IS_16550 (1ull << 32) #define SYS_REG_GIT_INFO 0x50 #define SYS_REG_GIT_IS_DIRTY (1ull << 63) - +#define SYS_REG_CPU_CTRL 0x58 +#define SYS_REG_CPU_CTRL_ENABLE 0xff /* * Register definitions for the potato UART diff --git a/soc.vhdl b/soc.vhdl index 3e3b438..0ed234d 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -67,6 +67,7 @@ entity soc is RAM_INIT_FILE : string; CLK_FREQ : positive; SIM : boolean; + NCPUS : positive := 1; HAS_FPU : boolean := true; HAS_BTC : boolean := true; DISABLE_FLATTEN_CORE : boolean := false; @@ -148,20 +149,18 @@ end entity soc; architecture behaviour of soc is + subtype cpu_index_t is natural range 0 to NCPUS-1; + type dword_percpu_array is array(cpu_index_t) of std_ulogic_vector(63 downto 0); + -- internal reset signal soc_reset : std_ulogic; -- Wishbone master signals: - signal wishbone_dcore_in : wishbone_slave_out; - signal wishbone_dcore_out : wishbone_master_out; - signal wishbone_icore_in : wishbone_slave_out; - signal wishbone_icore_out : wishbone_master_out; - signal wishbone_debug_in : wishbone_slave_out; - signal wishbone_debug_out : wishbone_master_out; - - -- Arbiter array (ghdl doesnt' support assigning the array - -- elements in the entity instantiation) - constant NUM_WB_MASTERS : positive := 4; + signal wishbone_debug_in : wishbone_slave_out; + signal wishbone_debug_out : wishbone_master_out; + + -- Arbiter array + constant NUM_WB_MASTERS : positive := NCPUS * 2 + 2; signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1); signal wb_masters_in : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1); @@ -180,7 +179,7 @@ architecture behaviour of soc is -- Syscon signals signal dram_at_0 : std_ulogic; - signal do_core_reset : std_ulogic; + signal do_core_reset : std_ulogic_vector(NCPUS-1 downto 0); signal alt_reset : std_ulogic; signal wb_syscon_in : wb_io_master_out; signal wb_syscon_out : wb_io_slave_out; @@ -210,7 +209,7 @@ architecture behaviour of soc is signal wb_xics_ics_out : wb_io_slave_out; signal int_level_in : std_ulogic_vector(15 downto 0); signal ics_to_icp : ics_to_icp_t; - signal core_ext_irq : std_ulogic; + signal core_ext_irq : std_ulogic_vector(NCPUS-1 downto 0) := (others => '0'); -- GPIO signals: signal wb_gpio_in : wb_io_master_out; @@ -233,12 +232,12 @@ architecture behaviour of soc is signal dmi_wb_dout : std_ulogic_vector(63 downto 0); signal dmi_wb_req : std_ulogic; signal dmi_wb_ack : std_ulogic; - signal dmi_core_dout : std_ulogic_vector(63 downto 0); - signal dmi_core_req : std_ulogic; - signal dmi_core_ack : std_ulogic; + signal dmi_core_dout : dword_percpu_array; + signal dmi_core_req : std_ulogic_vector(NCPUS-1 downto 0); + signal dmi_core_ack : std_ulogic_vector(NCPUS-1 downto 0); -- Delayed/latched resets and alt_reset - signal rst_core : std_ulogic; + signal rst_core : std_ulogic_vector(NCPUS-1 downto 0); signal rst_uart : std_ulogic; signal rst_xics : std_ulogic; signal rst_spi : std_ulogic; @@ -270,6 +269,8 @@ architecture behaviour of soc is signal io_cycle_gpio : std_ulogic; signal io_cycle_external : std_ulogic; + signal core_run_out : std_ulogic_vector(NCPUS-1 downto 0); + function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is variable wwb : wishbone_master_out; begin @@ -334,7 +335,9 @@ begin resets: process(system_clk) begin if rising_edge(system_clk) then - rst_core <= soc_reset or do_core_reset; + for i in 0 to NCPUS-1 loop + rst_core(i) <= soc_reset or do_core_reset(i); + end loop; rst_uart <= soc_reset; rst_spi <= soc_reset; rst_xics <= soc_reset; @@ -347,11 +350,12 @@ begin end if; end process; - -- Processor core - processor: entity work.core + -- Processor cores + processors: for i in 0 to NCPUS-1 generate + core: entity work.core generic map( SIM => SIM, - CPU_INDEX => 0, + CPU_INDEX => i, HAS_FPU => HAS_FPU, HAS_BTC => HAS_BTC, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, @@ -367,32 +371,31 @@ begin ) port map( clk => system_clk, - rst => rst_core, + rst => rst_core(i), alt_reset => alt_reset_d, - run_out => run_out, - wishbone_insn_in => wishbone_icore_in, - wishbone_insn_out => wishbone_icore_out, - wishbone_data_in => wishbone_dcore_in, - wishbone_data_out => wishbone_dcore_out, + run_out => core_run_out(i), + wishbone_insn_in => wb_masters_in(i + NCPUS), + wishbone_insn_out => wb_masters_out(i + NCPUS), + wishbone_data_in => wb_masters_in(i), + wishbone_data_out => wb_masters_out(i), wb_snoop_in => wb_snoop, dmi_addr => dmi_addr(3 downto 0), - dmi_dout => dmi_core_dout, + dmi_dout => dmi_core_dout(i), dmi_din => dmi_dout, dmi_wr => dmi_wr, - dmi_ack => dmi_core_ack, - dmi_req => dmi_core_req, - ext_irq => core_ext_irq + dmi_ack => dmi_core_ack(i), + dmi_req => dmi_core_req(i), + ext_irq => core_ext_irq(i) ); + end generate; + + run_out <= or (core_run_out); -- Wishbone bus master arbiter & mux - wb_masters_out <= (0 => wishbone_dcore_out, - 1 => wishbone_icore_out, - 2 => wishbone_widen_data(wishbone_dma_out), - 3 => wishbone_debug_out); - wishbone_dcore_in <= wb_masters_in(0); - wishbone_icore_in <= wb_masters_in(1); - wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2), wishbone_dma_out.adr); - wishbone_debug_in <= wb_masters_in(3); + wb_masters_out(2*NCPUS) <= wishbone_widen_data(wishbone_dma_out); + wb_masters_out(2*NCPUS + 1) <= wishbone_debug_out; + wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2*NCPUS), wishbone_dma_out.adr); + wishbone_debug_in <= wb_masters_in(2*NCPUS + 1); wishbone_arbiter_0: entity work.wishbone_arbiter generic map( NUM_MASTERS => NUM_WB_MASTERS @@ -780,6 +783,7 @@ begin -- Syscon slave syscon0: entity work.syscon generic map( + NCPUS => NCPUS, HAS_UART => true, HAS_DRAM => HAS_DRAM, BRAM_SIZE => MEMORY_SIZE, @@ -950,7 +954,7 @@ begin wb_in => wb_xics_icp_in, wb_out => wb_xics_icp_out, ics_in => ics_to_icp, - core_irq_out => core_ext_irq + core_irq_out => core_ext_irq(0) ); xics_ics: entity work.xics_ics @@ -1034,15 +1038,15 @@ begin ); -- DMI interconnect - dmi_intercon: process(dmi_addr, dmi_req, - dmi_wb_ack, dmi_wb_dout, - dmi_core_ack, dmi_core_dout) + dmi_intercon: process(all) -- DMI address map (each address is a full 64-bit register) -- -- Offset: Size: Slave: -- 0 4 Wishbone - -- 10 16 Core + -- 10 16 Core 0 + -- 20 16 Core 1 + -- ... and so on for NCPUS cores type slave_type is (SLAVE_WB, SLAVE_CORE, @@ -1053,25 +1057,29 @@ begin slave := SLAVE_NONE; if std_match(dmi_addr, "000000--") then slave := SLAVE_WB; - elsif std_match(dmi_addr, "0001----") then + elsif not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) <= NCPUS then slave := SLAVE_CORE; end if; -- DMI muxing dmi_wb_req <= '0'; - dmi_core_req <= '0'; + dmi_core_req <= (others => '0'); + dmi_din <= (others => '1'); + dmi_ack <= dmi_req; case slave is when SLAVE_WB => dmi_wb_req <= dmi_req; dmi_ack <= dmi_wb_ack; dmi_din <= dmi_wb_dout; when SLAVE_CORE => - dmi_core_req <= dmi_req; - dmi_ack <= dmi_core_ack; - dmi_din <= dmi_core_dout; + for i in 0 to NCPUS-1 loop + if not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) = i + 1 then + dmi_core_req(i) <= dmi_req; + dmi_ack <= dmi_core_ack(i); + dmi_din <= dmi_core_dout(i); + end if; + end loop; when others => - dmi_ack <= dmi_req; - dmi_din <= (others => '1'); end case; -- SIM magic exit diff --git a/syscon.vhdl b/syscon.vhdl index 99fa835..98990d1 100644 --- a/syscon.vhdl +++ b/syscon.vhdl @@ -9,6 +9,7 @@ use work.wishbone_types.all; entity syscon is generic ( + NCPUS : positive := 1; SIG_VALUE : std_ulogic_vector(63 downto 0) := x"f00daa5500010001"; CLK_FREQ : integer; HAS_UART : boolean; @@ -33,7 +34,7 @@ entity syscon is -- System control ports dram_at_0 : out std_ulogic; - core_reset : out std_ulogic; + core_reset : out std_ulogic_vector(NCPUS-1 downto 0); soc_reset : out std_ulogic; alt_reset : out std_ulogic ); @@ -56,6 +57,7 @@ architecture behaviour of syscon is constant SYS_REG_UART0_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001000"; constant SYS_REG_UART1_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001001"; constant SYS_REG_GIT_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001010"; + constant SYS_REG_CPU_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001011"; -- Muxed reg read signal signal reg_out : std_ulogic_vector(63 downto 0); @@ -116,6 +118,7 @@ architecture behaviour of syscon is signal reg_uart0info : std_ulogic_vector(63 downto 0); signal reg_uart1info : std_ulogic_vector(63 downto 0); signal reg_gitinfo : std_ulogic_vector(63 downto 0); + signal reg_cpuctrl : std_ulogic_vector(63 downto 0); signal info_has_dram : std_ulogic; signal info_has_bram : std_ulogic; signal info_has_uart : std_ulogic; @@ -134,7 +137,8 @@ begin -- Generated output signals dram_at_0 <= '1' when BRAM_SIZE = 0 else reg_ctrl(SYS_REG_CTRL_DRAM_AT_0); soc_reset <= reg_ctrl(SYS_REG_CTRL_SOC_RESET); - core_reset <= reg_ctrl(SYS_REG_CTRL_CORE_RESET); + core_reset <= not reg_cpuctrl(NCPUS-1 downto 0) when reg_ctrl(SYS_REG_CTRL_CORE_RESET) = '0' + else (others => '1'); alt_reset <= reg_ctrl(SYS_REG_CTRL_ALT_RESET); @@ -187,6 +191,8 @@ begin 55 downto 0 => GIT_HASH, others => '0'); + reg_cpuctrl(63 downto 8) <= std_ulogic_vector(to_unsigned(NCPUS, 56)); + -- Wishbone response wb_rsp.ack <= wishbone_in.cyc and wishbone_in.stb; with wishbone_in.adr(SYS_REG_BITS downto 1) select reg_out <= @@ -201,6 +207,7 @@ begin reg_uart0info when SYS_REG_UART0_INFO, reg_uart1info when SYS_REG_UART1_INFO, reg_gitinfo when SYS_REG_GIT_INFO, + reg_cpuctrl when SYS_REG_CPU_CTRL, (others => '0') when others; wb_rsp.dat <= reg_out(63 downto 32) when wishbone_in.adr(0) = '1' else reg_out(31 downto 0); @@ -225,6 +232,7 @@ begin if (rst) then reg_ctrl <= (SYS_REG_CTRL_ALT_RESET => ctrl_init_alt_reset, others => '0'); + reg_cpuctrl(7 downto 0) <= x"01"; -- enable cpu 0 only else if wishbone_in.cyc and wishbone_in.stb and wishbone_in.we then -- Change this if CTRL ever has more than 32 bits @@ -233,6 +241,10 @@ begin reg_ctrl(SYS_REG_CTRL_BITS-1 downto 0) <= wishbone_in.dat(SYS_REG_CTRL_BITS-1 downto 0); end if; + if wishbone_in.adr(SYS_REG_BITS downto 1) = SYS_REG_CPU_CTRL and + wishbone_in.adr(0) = '0' and wishbone_in.sel(0) = '1' then + reg_cpuctrl(7 downto 0) <= wishbone_in.dat(7 downto 0); + end if; end if; -- Reset auto-clear