From 9a06b0c18295115365e310ab2df2fc73cc8cfa09 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 4 Jan 2025 16:24:41 +1100 Subject: [PATCH] soc: Implement multiple CPU cores This adds an 'NCPUS' generic parameter to the soc module, which then includes that many CPU cores. The cores have separate addresses on the DMI interconnect, meaning that external JTAG debug tools can view and control the state of each core individually. The syscon module has a new 'cpu_ctrl' register, where byte 0 contains individual enable bits for each core, and byte 1 indicates the number of cores. If a core's enable bit is clear, the core is held in reset. On system reset, the enable byte is set to 0x01, so only core 0 is active. Signed-off-by: Paul Mackerras --- include/microwatt_soc.h | 3 +- soc.vhdl | 108 +++++++++++++++++++++------------------- syscon.vhdl | 16 +++++- 3 files changed, 74 insertions(+), 53 deletions(-) diff --git a/include/microwatt_soc.h b/include/microwatt_soc.h index 6717b4b..67ea13d 100644 --- a/include/microwatt_soc.h +++ b/include/microwatt_soc.h @@ -65,7 +65,8 @@ #define SYS_REG_UART_IS_16550 (1ull << 32) #define SYS_REG_GIT_INFO 0x50 #define SYS_REG_GIT_IS_DIRTY (1ull << 63) - +#define SYS_REG_CPU_CTRL 0x58 +#define SYS_REG_CPU_CTRL_ENABLE 0xff /* * Register definitions for the potato UART diff --git a/soc.vhdl b/soc.vhdl index 3e3b438..0ed234d 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -67,6 +67,7 @@ entity soc is RAM_INIT_FILE : string; CLK_FREQ : positive; SIM : boolean; + NCPUS : positive := 1; HAS_FPU : boolean := true; HAS_BTC : boolean := true; DISABLE_FLATTEN_CORE : boolean := false; @@ -148,20 +149,18 @@ end entity soc; architecture behaviour of soc is + subtype cpu_index_t is natural range 0 to NCPUS-1; + type dword_percpu_array is array(cpu_index_t) of std_ulogic_vector(63 downto 0); + -- internal reset signal soc_reset : std_ulogic; -- Wishbone master signals: - signal wishbone_dcore_in : wishbone_slave_out; - signal wishbone_dcore_out : wishbone_master_out; - signal wishbone_icore_in : wishbone_slave_out; - signal wishbone_icore_out : wishbone_master_out; - signal wishbone_debug_in : wishbone_slave_out; - signal wishbone_debug_out : wishbone_master_out; - - -- Arbiter array (ghdl doesnt' support assigning the array - -- elements in the entity instantiation) - constant NUM_WB_MASTERS : positive := 4; + signal wishbone_debug_in : wishbone_slave_out; + signal wishbone_debug_out : wishbone_master_out; + + -- Arbiter array + constant NUM_WB_MASTERS : positive := NCPUS * 2 + 2; signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1); signal wb_masters_in : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1); @@ -180,7 +179,7 @@ architecture behaviour of soc is -- Syscon signals signal dram_at_0 : std_ulogic; - signal do_core_reset : std_ulogic; + signal do_core_reset : std_ulogic_vector(NCPUS-1 downto 0); signal alt_reset : std_ulogic; signal wb_syscon_in : wb_io_master_out; signal wb_syscon_out : wb_io_slave_out; @@ -210,7 +209,7 @@ architecture behaviour of soc is signal wb_xics_ics_out : wb_io_slave_out; signal int_level_in : std_ulogic_vector(15 downto 0); signal ics_to_icp : ics_to_icp_t; - signal core_ext_irq : std_ulogic; + signal core_ext_irq : std_ulogic_vector(NCPUS-1 downto 0) := (others => '0'); -- GPIO signals: signal wb_gpio_in : wb_io_master_out; @@ -233,12 +232,12 @@ architecture behaviour of soc is signal dmi_wb_dout : std_ulogic_vector(63 downto 0); signal dmi_wb_req : std_ulogic; signal dmi_wb_ack : std_ulogic; - signal dmi_core_dout : std_ulogic_vector(63 downto 0); - signal dmi_core_req : std_ulogic; - signal dmi_core_ack : std_ulogic; + signal dmi_core_dout : dword_percpu_array; + signal dmi_core_req : std_ulogic_vector(NCPUS-1 downto 0); + signal dmi_core_ack : std_ulogic_vector(NCPUS-1 downto 0); -- Delayed/latched resets and alt_reset - signal rst_core : std_ulogic; + signal rst_core : std_ulogic_vector(NCPUS-1 downto 0); signal rst_uart : std_ulogic; signal rst_xics : std_ulogic; signal rst_spi : std_ulogic; @@ -270,6 +269,8 @@ architecture behaviour of soc is signal io_cycle_gpio : std_ulogic; signal io_cycle_external : std_ulogic; + signal core_run_out : std_ulogic_vector(NCPUS-1 downto 0); + function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is variable wwb : wishbone_master_out; begin @@ -334,7 +335,9 @@ begin resets: process(system_clk) begin if rising_edge(system_clk) then - rst_core <= soc_reset or do_core_reset; + for i in 0 to NCPUS-1 loop + rst_core(i) <= soc_reset or do_core_reset(i); + end loop; rst_uart <= soc_reset; rst_spi <= soc_reset; rst_xics <= soc_reset; @@ -347,11 +350,12 @@ begin end if; end process; - -- Processor core - processor: entity work.core + -- Processor cores + processors: for i in 0 to NCPUS-1 generate + core: entity work.core generic map( SIM => SIM, - CPU_INDEX => 0, + CPU_INDEX => i, HAS_FPU => HAS_FPU, HAS_BTC => HAS_BTC, DISABLE_FLATTEN => DISABLE_FLATTEN_CORE, @@ -367,32 +371,31 @@ begin ) port map( clk => system_clk, - rst => rst_core, + rst => rst_core(i), alt_reset => alt_reset_d, - run_out => run_out, - wishbone_insn_in => wishbone_icore_in, - wishbone_insn_out => wishbone_icore_out, - wishbone_data_in => wishbone_dcore_in, - wishbone_data_out => wishbone_dcore_out, + run_out => core_run_out(i), + wishbone_insn_in => wb_masters_in(i + NCPUS), + wishbone_insn_out => wb_masters_out(i + NCPUS), + wishbone_data_in => wb_masters_in(i), + wishbone_data_out => wb_masters_out(i), wb_snoop_in => wb_snoop, dmi_addr => dmi_addr(3 downto 0), - dmi_dout => dmi_core_dout, + dmi_dout => dmi_core_dout(i), dmi_din => dmi_dout, dmi_wr => dmi_wr, - dmi_ack => dmi_core_ack, - dmi_req => dmi_core_req, - ext_irq => core_ext_irq + dmi_ack => dmi_core_ack(i), + dmi_req => dmi_core_req(i), + ext_irq => core_ext_irq(i) ); + end generate; + + run_out <= or (core_run_out); -- Wishbone bus master arbiter & mux - wb_masters_out <= (0 => wishbone_dcore_out, - 1 => wishbone_icore_out, - 2 => wishbone_widen_data(wishbone_dma_out), - 3 => wishbone_debug_out); - wishbone_dcore_in <= wb_masters_in(0); - wishbone_icore_in <= wb_masters_in(1); - wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2), wishbone_dma_out.adr); - wishbone_debug_in <= wb_masters_in(3); + wb_masters_out(2*NCPUS) <= wishbone_widen_data(wishbone_dma_out); + wb_masters_out(2*NCPUS + 1) <= wishbone_debug_out; + wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2*NCPUS), wishbone_dma_out.adr); + wishbone_debug_in <= wb_masters_in(2*NCPUS + 1); wishbone_arbiter_0: entity work.wishbone_arbiter generic map( NUM_MASTERS => NUM_WB_MASTERS @@ -780,6 +783,7 @@ begin -- Syscon slave syscon0: entity work.syscon generic map( + NCPUS => NCPUS, HAS_UART => true, HAS_DRAM => HAS_DRAM, BRAM_SIZE => MEMORY_SIZE, @@ -950,7 +954,7 @@ begin wb_in => wb_xics_icp_in, wb_out => wb_xics_icp_out, ics_in => ics_to_icp, - core_irq_out => core_ext_irq + core_irq_out => core_ext_irq(0) ); xics_ics: entity work.xics_ics @@ -1034,15 +1038,15 @@ begin ); -- DMI interconnect - dmi_intercon: process(dmi_addr, dmi_req, - dmi_wb_ack, dmi_wb_dout, - dmi_core_ack, dmi_core_dout) + dmi_intercon: process(all) -- DMI address map (each address is a full 64-bit register) -- -- Offset: Size: Slave: -- 0 4 Wishbone - -- 10 16 Core + -- 10 16 Core 0 + -- 20 16 Core 1 + -- ... and so on for NCPUS cores type slave_type is (SLAVE_WB, SLAVE_CORE, @@ -1053,25 +1057,29 @@ begin slave := SLAVE_NONE; if std_match(dmi_addr, "000000--") then slave := SLAVE_WB; - elsif std_match(dmi_addr, "0001----") then + elsif not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) <= NCPUS then slave := SLAVE_CORE; end if; -- DMI muxing dmi_wb_req <= '0'; - dmi_core_req <= '0'; + dmi_core_req <= (others => '0'); + dmi_din <= (others => '1'); + dmi_ack <= dmi_req; case slave is when SLAVE_WB => dmi_wb_req <= dmi_req; dmi_ack <= dmi_wb_ack; dmi_din <= dmi_wb_dout; when SLAVE_CORE => - dmi_core_req <= dmi_req; - dmi_ack <= dmi_core_ack; - dmi_din <= dmi_core_dout; + for i in 0 to NCPUS-1 loop + if not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) = i + 1 then + dmi_core_req(i) <= dmi_req; + dmi_ack <= dmi_core_ack(i); + dmi_din <= dmi_core_dout(i); + end if; + end loop; when others => - dmi_ack <= dmi_req; - dmi_din <= (others => '1'); end case; -- SIM magic exit diff --git a/syscon.vhdl b/syscon.vhdl index 99fa835..98990d1 100644 --- a/syscon.vhdl +++ b/syscon.vhdl @@ -9,6 +9,7 @@ use work.wishbone_types.all; entity syscon is generic ( + NCPUS : positive := 1; SIG_VALUE : std_ulogic_vector(63 downto 0) := x"f00daa5500010001"; CLK_FREQ : integer; HAS_UART : boolean; @@ -33,7 +34,7 @@ entity syscon is -- System control ports dram_at_0 : out std_ulogic; - core_reset : out std_ulogic; + core_reset : out std_ulogic_vector(NCPUS-1 downto 0); soc_reset : out std_ulogic; alt_reset : out std_ulogic ); @@ -56,6 +57,7 @@ architecture behaviour of syscon is constant SYS_REG_UART0_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001000"; constant SYS_REG_UART1_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001001"; constant SYS_REG_GIT_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001010"; + constant SYS_REG_CPU_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001011"; -- Muxed reg read signal signal reg_out : std_ulogic_vector(63 downto 0); @@ -116,6 +118,7 @@ architecture behaviour of syscon is signal reg_uart0info : std_ulogic_vector(63 downto 0); signal reg_uart1info : std_ulogic_vector(63 downto 0); signal reg_gitinfo : std_ulogic_vector(63 downto 0); + signal reg_cpuctrl : std_ulogic_vector(63 downto 0); signal info_has_dram : std_ulogic; signal info_has_bram : std_ulogic; signal info_has_uart : std_ulogic; @@ -134,7 +137,8 @@ begin -- Generated output signals dram_at_0 <= '1' when BRAM_SIZE = 0 else reg_ctrl(SYS_REG_CTRL_DRAM_AT_0); soc_reset <= reg_ctrl(SYS_REG_CTRL_SOC_RESET); - core_reset <= reg_ctrl(SYS_REG_CTRL_CORE_RESET); + core_reset <= not reg_cpuctrl(NCPUS-1 downto 0) when reg_ctrl(SYS_REG_CTRL_CORE_RESET) = '0' + else (others => '1'); alt_reset <= reg_ctrl(SYS_REG_CTRL_ALT_RESET); @@ -187,6 +191,8 @@ begin 55 downto 0 => GIT_HASH, others => '0'); + reg_cpuctrl(63 downto 8) <= std_ulogic_vector(to_unsigned(NCPUS, 56)); + -- Wishbone response wb_rsp.ack <= wishbone_in.cyc and wishbone_in.stb; with wishbone_in.adr(SYS_REG_BITS downto 1) select reg_out <= @@ -201,6 +207,7 @@ begin reg_uart0info when SYS_REG_UART0_INFO, reg_uart1info when SYS_REG_UART1_INFO, reg_gitinfo when SYS_REG_GIT_INFO, + reg_cpuctrl when SYS_REG_CPU_CTRL, (others => '0') when others; wb_rsp.dat <= reg_out(63 downto 32) when wishbone_in.adr(0) = '1' else reg_out(31 downto 0); @@ -225,6 +232,7 @@ begin if (rst) then reg_ctrl <= (SYS_REG_CTRL_ALT_RESET => ctrl_init_alt_reset, others => '0'); + reg_cpuctrl(7 downto 0) <= x"01"; -- enable cpu 0 only else if wishbone_in.cyc and wishbone_in.stb and wishbone_in.we then -- Change this if CTRL ever has more than 32 bits @@ -233,6 +241,10 @@ begin reg_ctrl(SYS_REG_CTRL_BITS-1 downto 0) <= wishbone_in.dat(SYS_REG_CTRL_BITS-1 downto 0); end if; + if wishbone_in.adr(SYS_REG_BITS downto 1) = SYS_REG_CPU_CTRL and + wishbone_in.adr(0) = '0' and wishbone_in.sel(0) = '1' then + reg_cpuctrl(7 downto 0) <= wishbone_in.dat(7 downto 0); + end if; end if; -- Reset auto-clear