From 51dd7f578f4a09a1509e504c0199171e8ecdd765 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 15 Apr 2025 09:59:44 +1000 Subject: [PATCH 1/3] countbits: Move more popcount calculation before the clock edge Popcount takes two cycles to execute. The computation of the final popcount value in the second cycle has showed up as a critical path on the Artix-7, so move one stage of the summation back into the first cycle. Signed-off-by: Paul Mackerras --- countbits.vhdl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/countbits.vhdl b/countbits.vhdl index 4754b03..c0ce93a 100644 --- a/countbits.vhdl +++ b/countbits.vhdl @@ -42,6 +42,10 @@ architecture behaviour of bit_counter is type fourbit8 is array(0 to 7) of fourbit; signal pc8 : fourbit8; signal pc8_r : fourbit8; + subtype fivebit is unsigned(4 downto 0); + type fivebit4 is array(0 to 3) of fivebit; + signal pc16 : fivebit4; + signal pc16_r : fivebit4; subtype sixbit is unsigned(5 downto 0); type sixbit2 is array(0 to 1) of sixbit; signal pc32 : sixbit2; @@ -96,6 +100,9 @@ begin for i in 0 to 7 loop pc8_r(i) <= pc8(i); end loop; + for i in 0 to 3 loop + pc16_r(i) <= pc16(i); + end loop; dlen_r <= datalen; pcnt_r <= do_popcnt; end if; @@ -113,11 +120,13 @@ begin for i in 0 to 7 loop pc8(i) <= ('0' & pc4(i * 2)) + ('0' & pc4(i * 2 + 1)); end loop; + for i in 0 to 3 loop + pc16(i) <= ('0' & pc8(i * 2)) + ('0' & pc8(i * 2 + 1)); + end loop; -- after a clock edge for i in 0 to 1 loop - pc32(i) <= ("00" & pc8_r(i * 4)) + ("00" & pc8_r(i * 4 + 1)) + - ("00" & pc8_r(i * 4 + 2)) + ("00" & pc8_r(i * 4 + 3)); + pc32(i) <= ('0' & pc16_r(i * 2)) + ('0' & pc16_r(i * 2 + 1)); end loop; popcnt <= (others => '0'); From b65dde1a951a44ceb194e888fe1a68a9ed026b53 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 10 Jan 2025 13:22:56 +1100 Subject: [PATCH 2/3] arty a7: Display run status of two CPUs on LEDs 6 and 7 The run status LED is off when the core is held in reset (e.g. when the second core hasn't been started yet). Signed-off-by: Paul Mackerras --- fpga/top-arty.vhdl | 8 +++++--- soc.vhdl | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fpga/top-arty.vhdl b/fpga/top-arty.vhdl index 6e524f0..e86cecd 100644 --- a/fpga/top-arty.vhdl +++ b/fpga/top-arty.vhdl @@ -145,6 +145,7 @@ architecture behaviour of toplevel is -- Status signal run_out : std_ulogic; + signal run_outs : std_ulogic_vector(CPUS-1 downto 0); -- Reset signals: signal soc_rst : std_ulogic; @@ -269,6 +270,7 @@ begin rst => soc_rst, sw_soc_reset => sw_rst, run_out => run_out, + run_outs => run_outs, -- UART signals uart0_txd => uart_main_tx, @@ -746,9 +748,9 @@ begin end process; led4 <= system_clk_locked; - led5 <= eth_clk_locked; - led6 <= not soc_rst; - led7 <= run_out; + led5 <= not soc_rst; + led6 <= run_outs(1) when CPUS > 1 else '0'; + led7 <= run_outs(0); -- GPIO gpio_in(10) <= btn0; diff --git a/soc.vhdl b/soc.vhdl index bf58826..7ae2cf2 100644 --- a/soc.vhdl +++ b/soc.vhdl @@ -101,6 +101,7 @@ entity soc is system_clk : in std_ulogic; run_out : out std_ulogic; + run_outs : out std_ulogic_vector(NCPUS-1 downto 0); -- "Large" (64-bit) DRAM wishbone wb_dram_in : out wishbone_master_out; @@ -393,6 +394,7 @@ begin end generate; run_out <= or (core_run_out); + run_outs <= core_run_out and not do_core_reset; -- Wishbone bus master arbiter & mux wb_masters_out(2*NCPUS) <= wishbone_widen_data(wishbone_dma_out); From de2e8f81ee1dcfd63486d988c2506415d9f6ad64 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 16 Apr 2025 11:49:15 +1000 Subject: [PATCH 3/3] decode: Execute cpabort as a no-op It seems that the Linux kernel executes cpabort on any CPU that implements ISA v3.1 or later, despite cpabort being optional. To cope with this, implement cpabort as a no-op. Signed-off-by: Paul Mackerras --- predecode.vhdl | 1 + 1 file changed, 1 insertion(+) diff --git a/predecode.vhdl b/predecode.vhdl index b3d956e..585626f 100644 --- a/predecode.vhdl +++ b/predecode.vhdl @@ -240,6 +240,7 @@ architecture behaviour of predecoder is 2#0_00000_11010# => INSN_cntlzw, 2#0_10001_11010# => INSN_cnttzd, 2#0_10000_11010# => INSN_cnttzw, + 2#0_11010_00110# => INSN_rnop, -- cpabort 2#0_10111_10011# => INSN_darn, 2#0_00010_10110# => INSN_dcbf, 2#0_00001_10110# => INSN_dcbst,