forked from cores/microwatt
Browse Source
This moves the calculation of the result for popcnt* into the countbits unit, renamed from countzero, so that we can take two cycles to get the result. The motivation for this is that the popcnt* calculation was showing up as a critical path. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>fpu-constant
8 changed files with 153 additions and 120 deletions
@ -0,0 +1,130 @@
@@ -0,0 +1,130 @@
|
||||
library ieee; |
||||
use ieee.std_logic_1164.all; |
||||
use ieee.numeric_std.all; |
||||
|
||||
library work; |
||||
use work.helpers.all; |
||||
|
||||
entity bit_counter is |
||||
port ( |
||||
clk : in std_logic; |
||||
rs : in std_ulogic_vector(63 downto 0); |
||||
count_right : in std_ulogic; |
||||
do_popcnt : in std_ulogic; |
||||
is_32bit : in std_ulogic; |
||||
datalen : in std_ulogic_vector(3 downto 0); |
||||
result : out std_ulogic_vector(63 downto 0) |
||||
); |
||||
end entity bit_counter; |
||||
|
||||
architecture behaviour of bit_counter is |
||||
-- signals for count-leading/trailing-zeroes |
||||
signal inp : std_ulogic_vector(63 downto 0); |
||||
signal sum : std_ulogic_vector(64 downto 0); |
||||
signal msb_r : std_ulogic; |
||||
signal onehot : std_ulogic_vector(63 downto 0); |
||||
signal onehot_r : std_ulogic_vector(63 downto 0); |
||||
signal bitnum : std_ulogic_vector(5 downto 0); |
||||
signal cntz : std_ulogic_vector(63 downto 0); |
||||
|
||||
-- signals for popcnt |
||||
signal dlen_r : std_ulogic_vector(3 downto 0); |
||||
signal pcnt_r : std_ulogic; |
||||
subtype twobit is unsigned(1 downto 0); |
||||
type twobit32 is array(0 to 31) of twobit; |
||||
signal pc2 : twobit32; |
||||
subtype threebit is unsigned(2 downto 0); |
||||
type threebit16 is array(0 to 15) of threebit; |
||||
signal pc4 : threebit16; |
||||
subtype fourbit is unsigned(3 downto 0); |
||||
type fourbit8 is array(0 to 7) of fourbit; |
||||
signal pc8 : fourbit8; |
||||
signal pc8_r : fourbit8; |
||||
subtype sixbit is unsigned(5 downto 0); |
||||
type sixbit2 is array(0 to 1) of sixbit; |
||||
signal pc32 : sixbit2; |
||||
signal popcnt : std_ulogic_vector(63 downto 0); |
||||
|
||||
begin |
||||
countzero_r: process(clk) |
||||
begin |
||||
if rising_edge(clk) then |
||||
msb_r <= sum(64); |
||||
onehot_r <= onehot; |
||||
end if; |
||||
end process; |
||||
|
||||
countzero: process(all) |
||||
begin |
||||
if is_32bit = '0' then |
||||
if count_right = '0' then |
||||
inp <= bit_reverse(rs); |
||||
else |
||||
inp <= rs; |
||||
end if; |
||||
else |
||||
inp(63 downto 32) <= x"FFFFFFFF"; |
||||
if count_right = '0' then |
||||
inp(31 downto 0) <= bit_reverse(rs(31 downto 0)); |
||||
else |
||||
inp(31 downto 0) <= rs(31 downto 0); |
||||
end if; |
||||
end if; |
||||
|
||||
sum <= std_ulogic_vector(unsigned('0' & not inp) + 1); |
||||
onehot <= sum(63 downto 0) and inp; |
||||
|
||||
-- The following occurs after a clock edge |
||||
bitnum <= bit_number(onehot_r); |
||||
|
||||
cntz <= 57x"0" & msb_r & bitnum; |
||||
end process; |
||||
|
||||
popcnt_r: process(clk) |
||||
begin |
||||
if rising_edge(clk) then |
||||
for i in 0 to 7 loop |
||||
pc8_r(i) <= pc8(i); |
||||
end loop; |
||||
dlen_r <= datalen; |
||||
pcnt_r <= do_popcnt; |
||||
end if; |
||||
end process; |
||||
|
||||
popcnt_a: process(all) |
||||
begin |
||||
for i in 0 to 31 loop |
||||
pc2(i) <= unsigned("0" & rs(i * 2 downto i * 2)) + unsigned("0" & rs(i * 2 + 1 downto i * 2 + 1)); |
||||
end loop; |
||||
for i in 0 to 15 loop |
||||
pc4(i) <= ('0' & pc2(i * 2)) + ('0' & pc2(i * 2 + 1)); |
||||
end loop; |
||||
for i in 0 to 7 loop |
||||
pc8(i) <= ('0' & pc4(i * 2)) + ('0' & pc4(i * 2 + 1)); |
||||
end loop; |
||||
|
||||
-- after a clock edge |
||||
for i in 0 to 1 loop |
||||
pc32(i) <= ("00" & pc8_r(i * 4)) + ("00" & pc8_r(i * 4 + 1)) + |
||||
("00" & pc8_r(i * 4 + 2)) + ("00" & pc8_r(i * 4 + 3)); |
||||
end loop; |
||||
|
||||
popcnt <= (others => '0'); |
||||
if dlen_r(3 downto 2) = "00" then |
||||
-- popcntb |
||||
for i in 0 to 7 loop |
||||
popcnt(i * 8 + 3 downto i * 8) <= std_ulogic_vector(pc8_r(i)); |
||||
end loop; |
||||
elsif dlen_r(3) = '0' then |
||||
-- popcntw |
||||
for i in 0 to 1 loop |
||||
popcnt(i * 32 + 5 downto i * 32) <= std_ulogic_vector(pc32(i)); |
||||
end loop; |
||||
else |
||||
popcnt(6 downto 0) <= std_ulogic_vector(('0' & pc32(0)) + ('0' & pc32(1))); |
||||
end if; |
||||
end process; |
||||
|
||||
result <= cntz when pcnt_r = '0' else popcnt; |
||||
|
||||
end behaviour; |
@ -1,60 +0,0 @@
@@ -1,60 +0,0 @@
|
||||
library ieee; |
||||
use ieee.std_logic_1164.all; |
||||
use ieee.numeric_std.all; |
||||
|
||||
library work; |
||||
use work.helpers.all; |
||||
|
||||
entity zero_counter is |
||||
port ( |
||||
clk : in std_logic; |
||||
rs : in std_ulogic_vector(63 downto 0); |
||||
count_right : in std_ulogic; |
||||
is_32bit : in std_ulogic; |
||||
result : out std_ulogic_vector(63 downto 0) |
||||
); |
||||
end entity zero_counter; |
||||
|
||||
architecture behaviour of zero_counter is |
||||
signal inp : std_ulogic_vector(63 downto 0); |
||||
signal sum : std_ulogic_vector(64 downto 0); |
||||
signal msb_r : std_ulogic; |
||||
signal onehot : std_ulogic_vector(63 downto 0); |
||||
signal onehot_r : std_ulogic_vector(63 downto 0); |
||||
signal bitnum : std_ulogic_vector(5 downto 0); |
||||
|
||||
begin |
||||
countzero_r: process(clk) |
||||
begin |
||||
if rising_edge(clk) then |
||||
msb_r <= sum(64); |
||||
onehot_r <= onehot; |
||||
end if; |
||||
end process; |
||||
|
||||
countzero: process(all) |
||||
begin |
||||
if is_32bit = '0' then |
||||
if count_right = '0' then |
||||
inp <= bit_reverse(rs); |
||||
else |
||||
inp <= rs; |
||||
end if; |
||||
else |
||||
inp(63 downto 32) <= x"FFFFFFFF"; |
||||
if count_right = '0' then |
||||
inp(31 downto 0) <= bit_reverse(rs(31 downto 0)); |
||||
else |
||||
inp(31 downto 0) <= rs(31 downto 0); |
||||
end if; |
||||
end if; |
||||
|
||||
sum <= std_ulogic_vector(unsigned('0' & not inp) + 1); |
||||
onehot <= sum(63 downto 0) and inp; |
||||
|
||||
-- The following occurs after a clock edge |
||||
bitnum <= bit_number(onehot_r); |
||||
|
||||
result <= x"00000000000000" & "0" & msb_r & bitnum; |
||||
end process; |
||||
end behaviour; |
Loading…
Reference in new issue