Implement cfuged, pdepd and pextd
This implements the cfuged, pdepd and pextd instructions in a new unit called bit_sorter (so called because cfuged and pextd can be viewed as sorting the bits of the mask). The cnt* instructions and the popcnt* instructions now use the same OP_COUNTB insn_type so as to free up an insn_type value to use for the new instructions. The new instructions are implemented using a slow and simple algorithm that takes 64 cycles to compute the result. The ex1 stage is stalled while this happens, as for a 64-bit multiply, or for a divide when there is no FPU. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>pull/434/head
parent
d7d7a3afd4
commit
fa9df33f7e
@ -0,0 +1,102 @@
|
||||
-- Implements instructions that involve sorting bits,
|
||||
-- that is, cfuged, pextd and pdepd.
|
||||
--
|
||||
-- cfuged: Sort the bits in the mask in RB into 0s at the left, 1s at the right
|
||||
-- and move the bits in RS in the same fashion to give the result
|
||||
-- pextd: Like cfuged but the only use the bits of RS where the
|
||||
-- corresponding bit in RB is 1
|
||||
-- pdepd: Inverse of pextd; take the low-order bits of RS and spread them out
|
||||
-- to the bit positions which have a 1 in RB
|
||||
|
||||
-- NB opc is bits 7-6 of the instruction:
|
||||
-- 00 = pdepd, 01 = pextd, 10 = cfuged
|
||||
|
||||
library ieee;
|
||||
use ieee.std_logic_1164.all;
|
||||
use ieee.numeric_std.all;
|
||||
|
||||
library work;
|
||||
use work.helpers.all;
|
||||
|
||||
entity bit_sorter is
|
||||
port (
|
||||
clk : in std_ulogic;
|
||||
rst : in std_ulogic;
|
||||
rs : in std_ulogic_vector(63 downto 0);
|
||||
rb : in std_ulogic_vector(63 downto 0);
|
||||
go : in std_ulogic;
|
||||
opc : in std_ulogic_vector(1 downto 0);
|
||||
done : out std_ulogic;
|
||||
result : out std_ulogic_vector(63 downto 0)
|
||||
);
|
||||
end entity bit_sorter;
|
||||
|
||||
architecture behaviour of bit_sorter is
|
||||
|
||||
signal val : std_ulogic_vector(63 downto 0);
|
||||
signal st : std_ulogic;
|
||||
signal sd : std_ulogic;
|
||||
signal opr : std_ulogic_vector(1 downto 0);
|
||||
signal bc : unsigned(5 downto 0);
|
||||
signal jl : unsigned(5 downto 0);
|
||||
signal jr : unsigned(5 downto 0);
|
||||
signal sr_ml : std_ulogic_vector(63 downto 0);
|
||||
signal sr_mr : std_ulogic_vector(63 downto 0);
|
||||
signal sr_vl : std_ulogic_vector(63 downto 0);
|
||||
signal sr_vr : std_ulogic_vector(63 downto 0);
|
||||
|
||||
begin
|
||||
bsort_r: process(clk)
|
||||
begin
|
||||
if rising_edge(clk) then
|
||||
sd <= '0';
|
||||
if rst = '1' then
|
||||
st <= '0';
|
||||
opr <= "00";
|
||||
val <= (others => '0');
|
||||
elsif go = '1' then
|
||||
st <= '1';
|
||||
sr_ml <= rb;
|
||||
sr_mr <= rb;
|
||||
sr_vl <= rs;
|
||||
sr_vr <= rs;
|
||||
opr <= opc;
|
||||
val <= (others => '0');
|
||||
bc <= to_unsigned(0, 6);
|
||||
jl <= to_unsigned(63, 6);
|
||||
jr <= to_unsigned(0, 6);
|
||||
elsif st = '1' then
|
||||
if bc = 6x"3f" then
|
||||
st <= '0';
|
||||
sd <= '1';
|
||||
end if;
|
||||
bc <= bc + 1;
|
||||
if sr_ml(63) = '0' and opr(1) = '1' then
|
||||
-- cfuged
|
||||
val(to_integer(jl)) <= sr_vl(63);
|
||||
jl <= jl - 1;
|
||||
end if;
|
||||
if sr_mr(0) = '1' then
|
||||
if opr = "00" then
|
||||
-- pdepd
|
||||
val(to_integer(bc)) <= sr_vr(0);
|
||||
else
|
||||
-- cfuged or pextd
|
||||
val(to_integer(jr)) <= sr_vr(0);
|
||||
end if;
|
||||
jr <= jr + 1;
|
||||
end if;
|
||||
sr_vl <= sr_vl(62 downto 0) & '0';
|
||||
if opr /= "00" or sr_mr(0) = '1' then
|
||||
sr_vr <= '0' & sr_vr(63 downto 1);
|
||||
end if;
|
||||
sr_ml <= sr_ml(62 downto 0) & '0';
|
||||
sr_mr <= '0' & sr_mr(63 downto 1);
|
||||
end if;
|
||||
end if;
|
||||
end process;
|
||||
|
||||
done <= sd;
|
||||
result <= val;
|
||||
|
||||
end behaviour;
|
Loading…
Reference in New Issue