soc: Implement multiple CPU cores

This adds an 'NCPUS' generic parameter to the soc module, which then
includes that many CPU cores.

The cores have separate addresses on the DMI interconnect, meaning
that external JTAG debug tools can view and control the state of each
core individually.

The syscon module has a new 'cpu_ctrl' register, where byte 0 contains
individual enable bits for each core, and byte 1 indicates the number
of cores.  If a core's enable bit is clear, the core is held in reset.
On system reset, the enable byte is set to 0x01, so only core 0 is
active.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/436/head
Paul Mackerras 1 month ago
parent 23ff954059
commit 9a06b0c182

@ -65,7 +65,8 @@
#define SYS_REG_UART_IS_16550 (1ull << 32)
#define SYS_REG_GIT_INFO 0x50
#define SYS_REG_GIT_IS_DIRTY (1ull << 63)

#define SYS_REG_CPU_CTRL 0x58
#define SYS_REG_CPU_CTRL_ENABLE 0xff

/*
* Register definitions for the potato UART

@ -67,6 +67,7 @@ entity soc is
RAM_INIT_FILE : string;
CLK_FREQ : positive;
SIM : boolean;
NCPUS : positive := 1;
HAS_FPU : boolean := true;
HAS_BTC : boolean := true;
DISABLE_FLATTEN_CORE : boolean := false;
@ -148,20 +149,18 @@ end entity soc;

architecture behaviour of soc is

subtype cpu_index_t is natural range 0 to NCPUS-1;
type dword_percpu_array is array(cpu_index_t) of std_ulogic_vector(63 downto 0);

-- internal reset
signal soc_reset : std_ulogic;

-- Wishbone master signals:
signal wishbone_dcore_in : wishbone_slave_out;
signal wishbone_dcore_out : wishbone_master_out;
signal wishbone_icore_in : wishbone_slave_out;
signal wishbone_icore_out : wishbone_master_out;
signal wishbone_debug_in : wishbone_slave_out;
signal wishbone_debug_out : wishbone_master_out;

-- Arbiter array (ghdl doesnt' support assigning the array
-- elements in the entity instantiation)
constant NUM_WB_MASTERS : positive := 4;
-- Arbiter array
constant NUM_WB_MASTERS : positive := NCPUS * 2 + 2;
signal wb_masters_out : wishbone_master_out_vector(0 to NUM_WB_MASTERS-1);
signal wb_masters_in : wishbone_slave_out_vector(0 to NUM_WB_MASTERS-1);

@ -180,7 +179,7 @@ architecture behaviour of soc is

-- Syscon signals
signal dram_at_0 : std_ulogic;
signal do_core_reset : std_ulogic;
signal do_core_reset : std_ulogic_vector(NCPUS-1 downto 0);
signal alt_reset : std_ulogic;
signal wb_syscon_in : wb_io_master_out;
signal wb_syscon_out : wb_io_slave_out;
@ -210,7 +209,7 @@ architecture behaviour of soc is
signal wb_xics_ics_out : wb_io_slave_out;
signal int_level_in : std_ulogic_vector(15 downto 0);
signal ics_to_icp : ics_to_icp_t;
signal core_ext_irq : std_ulogic;
signal core_ext_irq : std_ulogic_vector(NCPUS-1 downto 0) := (others => '0');

-- GPIO signals:
signal wb_gpio_in : wb_io_master_out;
@ -233,12 +232,12 @@ architecture behaviour of soc is
signal dmi_wb_dout : std_ulogic_vector(63 downto 0);
signal dmi_wb_req : std_ulogic;
signal dmi_wb_ack : std_ulogic;
signal dmi_core_dout : std_ulogic_vector(63 downto 0);
signal dmi_core_req : std_ulogic;
signal dmi_core_ack : std_ulogic;
signal dmi_core_dout : dword_percpu_array;
signal dmi_core_req : std_ulogic_vector(NCPUS-1 downto 0);
signal dmi_core_ack : std_ulogic_vector(NCPUS-1 downto 0);

-- Delayed/latched resets and alt_reset
signal rst_core : std_ulogic;
signal rst_core : std_ulogic_vector(NCPUS-1 downto 0);
signal rst_uart : std_ulogic;
signal rst_xics : std_ulogic;
signal rst_spi : std_ulogic;
@ -270,6 +269,8 @@ architecture behaviour of soc is
signal io_cycle_gpio : std_ulogic;
signal io_cycle_external : std_ulogic;

signal core_run_out : std_ulogic_vector(NCPUS-1 downto 0);

function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is
variable wwb : wishbone_master_out;
begin
@ -334,7 +335,9 @@ begin
resets: process(system_clk)
begin
if rising_edge(system_clk) then
rst_core <= soc_reset or do_core_reset;
for i in 0 to NCPUS-1 loop
rst_core(i) <= soc_reset or do_core_reset(i);
end loop;
rst_uart <= soc_reset;
rst_spi <= soc_reset;
rst_xics <= soc_reset;
@ -347,11 +350,12 @@ begin
end if;
end process;

-- Processor core
processor: entity work.core
-- Processor cores
processors: for i in 0 to NCPUS-1 generate
core: entity work.core
generic map(
SIM => SIM,
CPU_INDEX => 0,
CPU_INDEX => i,
HAS_FPU => HAS_FPU,
HAS_BTC => HAS_BTC,
DISABLE_FLATTEN => DISABLE_FLATTEN_CORE,
@ -367,32 +371,31 @@ begin
)
port map(
clk => system_clk,
rst => rst_core,
rst => rst_core(i),
alt_reset => alt_reset_d,
run_out => run_out,
wishbone_insn_in => wishbone_icore_in,
wishbone_insn_out => wishbone_icore_out,
wishbone_data_in => wishbone_dcore_in,
wishbone_data_out => wishbone_dcore_out,
run_out => core_run_out(i),
wishbone_insn_in => wb_masters_in(i + NCPUS),
wishbone_insn_out => wb_masters_out(i + NCPUS),
wishbone_data_in => wb_masters_in(i),
wishbone_data_out => wb_masters_out(i),
wb_snoop_in => wb_snoop,
dmi_addr => dmi_addr(3 downto 0),
dmi_dout => dmi_core_dout,
dmi_dout => dmi_core_dout(i),
dmi_din => dmi_dout,
dmi_wr => dmi_wr,
dmi_ack => dmi_core_ack,
dmi_req => dmi_core_req,
ext_irq => core_ext_irq
dmi_ack => dmi_core_ack(i),
dmi_req => dmi_core_req(i),
ext_irq => core_ext_irq(i)
);
end generate;

run_out <= or (core_run_out);

-- Wishbone bus master arbiter & mux
wb_masters_out <= (0 => wishbone_dcore_out,
1 => wishbone_icore_out,
2 => wishbone_widen_data(wishbone_dma_out),
3 => wishbone_debug_out);
wishbone_dcore_in <= wb_masters_in(0);
wishbone_icore_in <= wb_masters_in(1);
wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2), wishbone_dma_out.adr);
wishbone_debug_in <= wb_masters_in(3);
wb_masters_out(2*NCPUS) <= wishbone_widen_data(wishbone_dma_out);
wb_masters_out(2*NCPUS + 1) <= wishbone_debug_out;
wishbone_dma_in <= wishbone_narrow_data(wb_masters_in(2*NCPUS), wishbone_dma_out.adr);
wishbone_debug_in <= wb_masters_in(2*NCPUS + 1);
wishbone_arbiter_0: entity work.wishbone_arbiter
generic map(
NUM_MASTERS => NUM_WB_MASTERS
@ -780,6 +783,7 @@ begin
-- Syscon slave
syscon0: entity work.syscon
generic map(
NCPUS => NCPUS,
HAS_UART => true,
HAS_DRAM => HAS_DRAM,
BRAM_SIZE => MEMORY_SIZE,
@ -950,7 +954,7 @@ begin
wb_in => wb_xics_icp_in,
wb_out => wb_xics_icp_out,
ics_in => ics_to_icp,
core_irq_out => core_ext_irq
core_irq_out => core_ext_irq(0)
);

xics_ics: entity work.xics_ics
@ -1034,15 +1038,15 @@ begin
);

-- DMI interconnect
dmi_intercon: process(dmi_addr, dmi_req,
dmi_wb_ack, dmi_wb_dout,
dmi_core_ack, dmi_core_dout)
dmi_intercon: process(all)

-- DMI address map (each address is a full 64-bit register)
--
-- Offset: Size: Slave:
-- 0 4 Wishbone
-- 10 16 Core
-- 10 16 Core 0
-- 20 16 Core 1
-- ... and so on for NCPUS cores

type slave_type is (SLAVE_WB,
SLAVE_CORE,
@ -1053,25 +1057,29 @@ begin
slave := SLAVE_NONE;
if std_match(dmi_addr, "000000--") then
slave := SLAVE_WB;
elsif std_match(dmi_addr, "0001----") then
elsif not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) <= NCPUS then
slave := SLAVE_CORE;
end if;

-- DMI muxing
dmi_wb_req <= '0';
dmi_core_req <= '0';
dmi_core_req <= (others => '0');
dmi_din <= (others => '1');
dmi_ack <= dmi_req;
case slave is
when SLAVE_WB =>
dmi_wb_req <= dmi_req;
dmi_ack <= dmi_wb_ack;
dmi_din <= dmi_wb_dout;
when SLAVE_CORE =>
dmi_core_req <= dmi_req;
dmi_ack <= dmi_core_ack;
dmi_din <= dmi_core_dout;
for i in 0 to NCPUS-1 loop
if not is_X(dmi_addr) and to_integer(unsigned(dmi_addr(7 downto 4))) = i + 1 then
dmi_core_req(i) <= dmi_req;
dmi_ack <= dmi_core_ack(i);
dmi_din <= dmi_core_dout(i);
end if;
end loop;
when others =>
dmi_ack <= dmi_req;
dmi_din <= (others => '1');
end case;

-- SIM magic exit

@ -9,6 +9,7 @@ use work.wishbone_types.all;

entity syscon is
generic (
NCPUS : positive := 1;
SIG_VALUE : std_ulogic_vector(63 downto 0) := x"f00daa5500010001";
CLK_FREQ : integer;
HAS_UART : boolean;
@ -33,7 +34,7 @@ entity syscon is

-- System control ports
dram_at_0 : out std_ulogic;
core_reset : out std_ulogic;
core_reset : out std_ulogic_vector(NCPUS-1 downto 0);
soc_reset : out std_ulogic;
alt_reset : out std_ulogic
);
@ -56,6 +57,7 @@ architecture behaviour of syscon is
constant SYS_REG_UART0_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001000";
constant SYS_REG_UART1_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001001";
constant SYS_REG_GIT_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001010";
constant SYS_REG_CPU_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001011";

-- Muxed reg read signal
signal reg_out : std_ulogic_vector(63 downto 0);
@ -116,6 +118,7 @@ architecture behaviour of syscon is
signal reg_uart0info : std_ulogic_vector(63 downto 0);
signal reg_uart1info : std_ulogic_vector(63 downto 0);
signal reg_gitinfo : std_ulogic_vector(63 downto 0);
signal reg_cpuctrl : std_ulogic_vector(63 downto 0);
signal info_has_dram : std_ulogic;
signal info_has_bram : std_ulogic;
signal info_has_uart : std_ulogic;
@ -134,7 +137,8 @@ begin
-- Generated output signals
dram_at_0 <= '1' when BRAM_SIZE = 0 else reg_ctrl(SYS_REG_CTRL_DRAM_AT_0);
soc_reset <= reg_ctrl(SYS_REG_CTRL_SOC_RESET);
core_reset <= reg_ctrl(SYS_REG_CTRL_CORE_RESET);
core_reset <= not reg_cpuctrl(NCPUS-1 downto 0) when reg_ctrl(SYS_REG_CTRL_CORE_RESET) = '0'
else (others => '1');
alt_reset <= reg_ctrl(SYS_REG_CTRL_ALT_RESET);


@ -187,6 +191,8 @@ begin
55 downto 0 => GIT_HASH,
others => '0');

reg_cpuctrl(63 downto 8) <= std_ulogic_vector(to_unsigned(NCPUS, 56));

-- Wishbone response
wb_rsp.ack <= wishbone_in.cyc and wishbone_in.stb;
with wishbone_in.adr(SYS_REG_BITS downto 1) select reg_out <=
@ -201,6 +207,7 @@ begin
reg_uart0info when SYS_REG_UART0_INFO,
reg_uart1info when SYS_REG_UART1_INFO,
reg_gitinfo when SYS_REG_GIT_INFO,
reg_cpuctrl when SYS_REG_CPU_CTRL,
(others => '0') when others;
wb_rsp.dat <= reg_out(63 downto 32) when wishbone_in.adr(0) = '1' else
reg_out(31 downto 0);
@ -225,6 +232,7 @@ begin
if (rst) then
reg_ctrl <= (SYS_REG_CTRL_ALT_RESET => ctrl_init_alt_reset,
others => '0');
reg_cpuctrl(7 downto 0) <= x"01"; -- enable cpu 0 only
else
if wishbone_in.cyc and wishbone_in.stb and wishbone_in.we then
-- Change this if CTRL ever has more than 32 bits
@ -233,6 +241,10 @@ begin
reg_ctrl(SYS_REG_CTRL_BITS-1 downto 0) <=
wishbone_in.dat(SYS_REG_CTRL_BITS-1 downto 0);
end if;
if wishbone_in.adr(SYS_REG_BITS downto 1) = SYS_REG_CPU_CTRL and
wishbone_in.adr(0) = '0' and wishbone_in.sel(0) = '1' then
reg_cpuctrl(7 downto 0) <= wishbone_in.dat(7 downto 0);
end if;
end if;

-- Reset auto-clear

Loading…
Cancel
Save