From 413907e4bc8a679b415a317d94d4dbe8dcf009dd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 3 Feb 2025 19:37:09 +1100 Subject: [PATCH] soc: Move timebase back into the core and enable writing to it Instead of a single global timebase register in the SoC, we now have a timebase counter in each core; however, now they are only reset by the soc reset, not the core reset. Thus they stay in sync even when some cores are disabled (via the syscon cpu_ctrl register). This implements mtspr to the TBLW and TBUW SPRs, which write the lower and upper 32 bits of this core's timebase, respectively. In order to fulfil the ISA's requirements that (a) some method for getting the timebases into sync and (b) some method for preventing userspace from reading the timebase be provided by the platform, this adds a syscon register TB_CTRL with two read/write bits implemented; bit 0 freezes all the timebases in the system when set, and bit 1 makes reading the timebase privileged (in all cores). Signed-off-by: Paul Mackerras --- common.vhdl | 8 +++++++ core.vhdl | 7 +++--- decode1.vhdl | 8 +++++++ decode2.vhdl | 7 ++++++ execute1.vhdl | 49 ++++++++++++++++++++++++++++++++++++++++- include/microwatt_soc.h | 3 +++ soc.vhdl | 25 +++++---------------- syscon.vhdl | 24 ++++++++++++++++++-- 8 files changed, 106 insertions(+), 25 deletions(-) diff --git a/common.vhdl b/common.vhdl index 16ba2b3..0207fe1 100644 --- a/common.vhdl +++ b/common.vhdl @@ -39,6 +39,8 @@ package common is constant SPR_DAR : spr_num_t := 19; constant SPR_TB : spr_num_t := 268; constant SPR_TBU : spr_num_t := 269; + constant SPR_TBLW : spr_num_t := 284; + constant SPR_TBUW : spr_num_t := 285; constant SPR_DEC : spr_num_t := 22; constant SPR_SRR0 : spr_num_t := 26; constant SPR_SRR1 : spr_num_t := 27; @@ -321,6 +323,12 @@ package common is hdexcr_hyp => aspect_bits_init, hdexcr_enf => aspect_bits_init, others => (others => '0')); + type timebase_ctrl is record + reset : std_ulogic; + rd_prot : std_ulogic; -- read-protect => userspace can't read TB + freeze : std_ulogic; + end record; + type Fetch1ToIcacheType is record req: std_ulogic; fetch_fail : std_ulogic; diff --git a/core.vhdl b/core.vhdl index bf0708e..c94db6f 100644 --- a/core.vhdl +++ b/core.vhdl @@ -31,8 +31,8 @@ entity core is -- Alternate reset (0xffff0000) for use by DRAM init fw alt_reset : in std_ulogic; - -- Global timebase - timebase : in std_ulogic_vector(63 downto 0); + -- Global timebase control + tb_ctrl : in timebase_ctrl; -- Wishbone interface wishbone_insn_in : in wishbone_slave_out; @@ -309,6 +309,7 @@ begin busy_in => decode2_busy_in, stall_out => decode2_stall_out, flush_in => flush, + tb_ctrl => tb_ctrl, complete_in => complete, stopped_out => dbg_core_is_stopped, d_in => decode1_to_decode2, @@ -376,7 +377,7 @@ begin port map ( clk => clk, rst => rst_ex1, - timebase => timebase, + tb_ctrl => tb_ctrl, flush_in => flush, busy_out => ex1_busy_out, e_in => decode2_to_execute1, diff --git a/decode1.vhdl b/decode1.vhdl index 1e59725..2fb1ad4 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -461,8 +461,16 @@ architecture behaviour of decode1 is case sprn is when SPR_TB => i.sel := SPRSEL_TB; + i.ronly := '1'; when SPR_TBU => i.sel := SPRSEL_TBU; + i.ronly := '1'; + when SPR_TBLW => + i.sel := SPRSEL_TB; + i.wonly := '1'; + when SPR_TBUW => + i.sel := SPRSEL_TB; + i.wonly := '1'; when SPR_DEC => i.sel := SPRSEL_DEC; when SPR_PVR => diff --git a/decode2.vhdl b/decode2.vhdl index 711f5d8..e99432b 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -27,6 +27,8 @@ entity decode2 is flush_in: in std_ulogic; + tb_ctrl : timebase_ctrl; + d_in : in Decode1ToDecode2Type; e_out : out Decode2ToExecute1Type; @@ -708,6 +710,11 @@ begin if (op = OP_MFSPR or op = OP_MTSPR) and d_in.insn(20) = '1' then v.e.privileged := '1'; end if; + -- Reading TB is privileged if syscon_tb_ctrl.rd_protect is 1 + if tb_ctrl.rd_prot = '1' and op = OP_MFSPR and d_in.spr_info.valid = '1' and + (d_in.spr_info.sel = SPRSEL_TB or d_in.spr_info.sel = SPRSEL_TBU) then + v.e.privileged := '1'; + end if; v.e.prefixed := d_in.prefixed; v.e.prefix := d_in.prefix; v.e.illegal_suffix := d_in.illegal_suffix; diff --git a/execute1.vhdl b/execute1.vhdl index 121d004..ee38863 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -34,7 +34,7 @@ entity execute1 is ext_irq_in : std_ulogic; interrupt_in : WritebackToExecute1Type; - timebase : std_ulogic_vector(63 downto 0); + tb_ctrl : timebase_ctrl; -- asynchronous l_out : out Execute1ToLoadstore1Type; @@ -101,6 +101,8 @@ architecture behaviour of execute1 is write_ciabr : std_ulogic; enter_wait : std_ulogic; scv_trap : std_ulogic; + write_tbl : std_ulogic; + write_tbu : std_ulogic; end record; constant side_effect_init : side_effect_type := (others => '0'); @@ -279,6 +281,10 @@ architecture behaviour of execute1 is signal stage2_stall : std_ulogic; + signal timebase : std_ulogic_vector(63 downto 0); + signal tb_next : std_ulogic_vector(63 downto 0); + signal tb_carry : std_ulogic; + type privilege_level is (USER, SUPER); type op_privilege_array is array(insn_type_t) of privilege_level; constant op_privilege: op_privilege_array := ( @@ -553,6 +559,43 @@ begin p_out => pmu_to_x ); + -- Timebase just increments at the system clock frequency. + -- Ideally it would (appear to) run at 512MHz like IBM POWER systems, + -- but Linux seems to cope OK with it being 100MHz or whatever. + tbase: process(clk) + begin + if rising_edge(clk) then + if tb_ctrl.reset = '1' then + timebase <= (others => '0'); + tb_carry <= '0'; + else + timebase <= tb_next; + tb_carry <= and(tb_next(31 downto 0)); + end if; + end if; + end process; + + tbase_comb: process(all) + variable thi, tlo : std_ulogic_vector(31 downto 0); + variable carry : std_ulogic; + begin + tlo := timebase(31 downto 0); + thi := timebase(63 downto 32); + carry := '0'; + if stage2_stall = '0' and ex1.se.write_tbl = '1' then + tlo := ex1.e.write_data(31 downto 0); + elsif tb_ctrl.freeze = '0' then + tlo := std_ulogic_vector(unsigned(tlo) + 1); + carry := tb_carry; + end if; + if stage2_stall = '0' and ex1.se.write_tbu = '1' then + thi := ex1.e.write_data(31 downto 0); + else + thi := std_ulogic_vector(unsigned(thi) + carry); + end if; + tb_next <= thi & tlo; + end process; + dbg_ctrl_out <= ctrl; log_rd_addr <= ex2.log_addr_spr; @@ -1424,6 +1467,10 @@ begin v.se.write_dscr := '1'; when SPRSEL_CIABR => v.se.write_ciabr := '1'; + when SPRSEL_TB => + v.se.write_tbl := '1'; + when SPRSEL_TBU => + v.se.write_tbu := '1'; when others => end case; end if; diff --git a/include/microwatt_soc.h b/include/microwatt_soc.h index 67ea13d..6e367b1 100644 --- a/include/microwatt_soc.h +++ b/include/microwatt_soc.h @@ -67,6 +67,9 @@ #define SYS_REG_GIT_IS_DIRTY (1ull << 63) #define SYS_REG_CPU_CTRL 0x58 #define SYS_REG_CPU_CTRL_ENABLE 0xff +#define SYS_REG_TB_CTRL 0x60 +#define SYS_REG_TB_CTRL_FREEZE 0x01 +#define SYS_REG_TB_CTRL_RD_PROTECT 0x02 /* * Register definitions for the potato UART diff --git a/soc.vhdl b/soc.vhdl index b3d03b7..bf58826 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -183,6 +183,7 @@ architecture behaviour of soc is signal alt_reset : std_ulogic; signal wb_syscon_in : wb_io_master_out; signal wb_syscon_out : wb_io_slave_out; + signal tb_ctrl : timebase_ctrl; -- UART0 signals: signal wb_uart0_in : wb_io_master_out; @@ -271,8 +272,6 @@ architecture behaviour of soc is signal core_run_out : std_ulogic_vector(NCPUS-1 downto 0); - signal timebase : std_ulogic_vector(63 downto 0); - function wishbone_widen_data(wb : wb_io_master_out) return wishbone_master_out is variable wwb : wishbone_master_out; begin @@ -333,6 +332,7 @@ begin -- either external reset, or from syscon soc_reset <= rst or sw_soc_reset; + tb_ctrl.reset <= soc_reset; resets: process(system_clk) begin @@ -352,21 +352,6 @@ begin end if; end process; - -- Timebase just increments at the system clock frequency. - -- There is currently no way to set it. - -- Ideally it would (appear to) run at 512MHz like IBM POWER systems, - -- but Linux seems to cope OK with it being 100MHz or whatever. - tbase: process(system_clk) - begin - if rising_edge(system_clk) then - if soc_reset = '1' then - timebase <= (others => '0'); - else - timebase <= std_ulogic_vector(unsigned(timebase) + 1); - end if; - end if; - end process; - -- Processor cores processors: for i in 0 to NCPUS-1 generate core: entity work.core @@ -391,7 +376,7 @@ begin rst => rst_core(i), alt_reset => alt_reset_d, run_out => core_run_out(i), - timebase => timebase, + tb_ctrl => tb_ctrl, wishbone_insn_in => wb_masters_in(i + NCPUS), wishbone_insn_out => wb_masters_out(i + NCPUS), wishbone_data_in => wb_masters_in(i), @@ -823,7 +808,9 @@ begin dram_at_0 => dram_at_0, core_reset => do_core_reset, soc_reset => sw_soc_reset, - alt_reset => alt_reset + alt_reset => alt_reset, + tb_rdp => tb_ctrl.rd_prot, + tb_frz => tb_ctrl.freeze ); -- diff --git a/syscon.vhdl b/syscon.vhdl index 98990d1..ad9ba2c 100644 --- a/syscon.vhdl +++ b/syscon.vhdl @@ -36,7 +36,9 @@ entity syscon is dram_at_0 : out std_ulogic; core_reset : out std_ulogic_vector(NCPUS-1 downto 0); soc_reset : out std_ulogic; - alt_reset : out std_ulogic + alt_reset : out std_ulogic; + tb_rdp : out std_ulogic; + tb_frz : out std_ulogic ); end entity syscon; @@ -58,6 +60,7 @@ architecture behaviour of syscon is constant SYS_REG_UART1_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001001"; constant SYS_REG_GIT_INFO : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001010"; constant SYS_REG_CPU_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001011"; + constant SYS_REG_TB_CTRL : std_ulogic_vector(SYS_REG_BITS-1 downto 0) := "001100"; -- Muxed reg read signal signal reg_out : std_ulogic_vector(63 downto 0); @@ -119,6 +122,7 @@ architecture behaviour of syscon is signal reg_uart1info : std_ulogic_vector(63 downto 0); signal reg_gitinfo : std_ulogic_vector(63 downto 0); signal reg_cpuctrl : std_ulogic_vector(63 downto 0); + signal reg_tbctrl : std_ulogic_vector(63 downto 0); signal info_has_dram : std_ulogic; signal info_has_bram : std_ulogic; signal info_has_uart : std_ulogic; @@ -130,6 +134,8 @@ architecture behaviour of syscon is signal info_fl_off : std_ulogic_vector(31 downto 0); signal uinfo_16550 : std_ulogic; signal uinfo_freq : std_ulogic_vector(31 downto 0); + signal tb_rdprot : std_ulogic; + signal tb_freeze : std_ulogic; -- Wishbone response latch signal wb_rsp : wb_io_slave_out; @@ -193,6 +199,8 @@ begin reg_cpuctrl(63 downto 8) <= std_ulogic_vector(to_unsigned(NCPUS, 56)); + reg_tbctrl <= 62x"0" & tb_rdprot & tb_freeze; + -- Wishbone response wb_rsp.ack <= wishbone_in.cyc and wishbone_in.stb; with wishbone_in.adr(SYS_REG_BITS downto 1) select reg_out <= @@ -208,6 +216,7 @@ begin reg_uart1info when SYS_REG_UART1_INFO, reg_gitinfo when SYS_REG_GIT_INFO, reg_cpuctrl when SYS_REG_CPU_CTRL, + reg_tbctrl when SYS_REG_TB_CTRL, (others => '0') when others; wb_rsp.dat <= reg_out(63 downto 32) when wishbone_in.adr(0) = '1' else reg_out(31 downto 0); @@ -222,17 +231,23 @@ begin end if; end process; + -- Timebase control + tb_rdp <= tb_rdprot; + tb_frz <= tb_freeze; + -- Initial state ctrl_init_alt_reset <= '1' when HAS_DRAM else '0'; -- Register writes - regs_write: process(clk) + regs_write : process(clk) begin if rising_edge(clk) then if (rst) then reg_ctrl <= (SYS_REG_CTRL_ALT_RESET => ctrl_init_alt_reset, others => '0'); reg_cpuctrl(7 downto 0) <= x"01"; -- enable cpu 0 only + tb_rdprot <= '0'; + tb_freeze <= '0'; else if wishbone_in.cyc and wishbone_in.stb and wishbone_in.we then -- Change this if CTRL ever has more than 32 bits @@ -245,6 +260,11 @@ begin wishbone_in.adr(0) = '0' and wishbone_in.sel(0) = '1' then reg_cpuctrl(7 downto 0) <= wishbone_in.dat(7 downto 0); end if; + if wishbone_in.adr(SYS_REG_BITS downto 1) = SYS_REG_TB_CTRL and + wishbone_in.adr(0) = '0' and wishbone_in.sel(0) = '1' then + tb_rdprot <= wishbone_in.dat(1); + tb_freeze <= wishbone_in.dat(0); + end if; end if; -- Reset auto-clear