From e527e3a9b72dbfa88d854d3b40fde6e7184614bc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 11 Oct 2019 16:06:01 +1100 Subject: [PATCH] countzero: Reorganize to have fewer levels of logic and fewer LUTs By using 4:1 multiplexers rather than 2:1, this cuts the number of levels of multiplexing from 4 to 2 and also reduces the total number of slice LUTs required. Because we are now handling 4 bits at each level, including the bottom level, the logic to do the priority encoding can be factored out into a function that is used at each level. This rearranges the logic so that the encoding and selection of bits is done whether or not the input operand is zero, and the if statement testing whether the input is zero only affects what is assigned to result. With this we don't get the inferred latches and we can go back to using signals rather than variables. Also add some comments about what is being done. Signed-off-by: Paul Mackerras --- countzero.vhdl | 167 +++++++++++++++++++++++++------------------------ 1 file changed, 85 insertions(+), 82 deletions(-) diff --git a/countzero.vhdl b/countzero.vhdl index 9b6d2db..d3960f0 100644 --- a/countzero.vhdl +++ b/countzero.vhdl @@ -14,97 +14,100 @@ entity zero_counter is end entity zero_counter; architecture behaviour of zero_counter is -begin - zerocounter0: process(all) - variable l32, r32 : std_ulogic; - variable v32 : std_ulogic_vector(31 downto 0); - variable v16 : std_ulogic_vector(15 downto 0); - variable v8 : std_ulogic_vector(7 downto 0); - variable v4 : std_ulogic_vector(3 downto 0); - variable sel : std_ulogic_vector(5 downto 0); - begin - l32 := '0'; - r32 := '0'; - v32 := (others => '0'); - v16 := (others => '0'); - v8 := (others => '0'); - v4 := (others => '0'); - sel := (others => '0'); - - l32 := or (rs(63 downto 32)); - r32 := or (rs(31 downto 0)); - if (l32 = '0' or is_32bit = '1') and r32 = '0' then - -- operand is zero, return 32 for 32-bit, else 64 - result <= x"00000000000000" & '0' & not is_32bit & is_32bit & "00000"; - else + signal y, z : std_ulogic_vector(3 downto 0); + signal v16 : std_ulogic_vector(15 downto 0); + signal v4 : std_ulogic_vector(3 downto 0); + signal sel : std_ulogic_vector(5 downto 0); - if count_right = '0' then - sel(5) := l32 and (not is_32bit); + -- Return the index of the leftmost or rightmost 1 in a set of 4 bits. + -- Assumes v is not "0000"; if it is, return (right ? "11" : "00"). + function encoder(v: std_ulogic_vector(3 downto 0); right: std_ulogic) return std_ulogic_vector is + begin + if right = '0' then + if v(3) = '1' then + return "11"; + elsif v(2) = '1' then + return "10"; + elsif v(1) = '1' then + return "01"; else - sel(5) := (not r32) and (not is_32bit); + return "00"; end if; - if sel(5) = '1' then - v32 := rs(63 downto 32); + else + if v(0) = '1' then + return "00"; + elsif v(1) = '1' then + return "01"; + elsif v(2) = '1' then + return "10"; else - v32 := rs(31 downto 0); + return "11"; end if; + end if; + end; - if count_right = '0' then - sel(4) := or (v32(31 downto 16)); - else - sel(4) := not (or (v32(15 downto 0))); - end if; - if sel(4) = '1' then - v16 := v32(31 downto 16); - else - v16 := v32(15 downto 0); - end if; +begin + zerocounter0: process(all) + begin + -- Test 4 groups of 16 bits each. + -- The top 2 groups are considered to be zero in 32-bit mode. + z(0) <= or (rs(15 downto 0)); + z(1) <= or (rs(31 downto 16)); + z(2) <= or (rs(47 downto 32)); + z(3) <= or (rs(63 downto 48)); + if is_32bit = '0' then + sel(5 downto 4) <= encoder(z, count_right); + else + sel(5) <= '0'; + if count_right = '0' then + sel(4) <= z(1); + else + sel(4) <= not z(0); + end if; + end if; - if count_right = '0' then - sel(3) := or (v16(15 downto 8)); - else - sel(3) := not (or (v16(7 downto 0))); - end if; - if sel(3) = '1' then - v8 := v16(15 downto 8); - else - v8 := v16(7 downto 0); - end if; + -- Select the leftmost/rightmost non-zero group of 16 bits + case sel(5 downto 4) is + when "00" => + v16 <= rs(15 downto 0); + when "01" => + v16 <= rs(31 downto 16); + when "10" => + v16 <= rs(47 downto 32); + when others => + v16 <= rs(63 downto 48); + end case; - if count_right = '0' then - sel(2) := or (v8(7 downto 4)); - else - sel(2) := not (or (v8(3 downto 0))); - end if; - if sel(2) = '1' then - v4 := v8(7 downto 4); - else - v4 := v8(3 downto 0); - end if; + -- Test 4 groups of 4 bits + y(0) <= or (v16(3 downto 0)); + y(1) <= or (v16(7 downto 4)); + y(2) <= or (v16(11 downto 8)); + y(3) <= or (v16(15 downto 12)); + sel(3 downto 2) <= encoder(y, count_right); - if count_right = '0' then - if v4(3) = '1' then - sel(1 downto 0) := "11"; - elsif v4(2) = '1' then - sel(1 downto 0) := "10"; - elsif v4(1) = '1' then - sel(1 downto 0) := "01"; - else - sel(1 downto 0) := "00"; - end if; - result <= x"00000000000000" & "00" & (not sel(5) and not is_32bit) & not sel(4 downto 0); - else - if v4(0) = '1' then - sel(1 downto 0) := "00"; - elsif v4(1) = '1' then - sel(1 downto 0) := "01"; - elsif v4(2) = '1' then - sel(1 downto 0) := "10"; - else - sel(1 downto 0) := "11"; - end if; - result <= x"00000000000000" & "00" & sel; - end if; + -- Select the leftmost/rightmost non-zero group of 4 bits + case sel(3 downto 2) is + when "00" => + v4 <= v16(3 downto 0); + when "01" => + v4 <= v16(7 downto 4); + when "10" => + v4 <= v16(11 downto 8); + when others => + v4 <= v16(15 downto 12); + end case; + + sel(1 downto 0) <= encoder(v4, count_right); + + -- sel is now the index of the leftmost/rightmost 1 bit in rs + if v4 = "0000" then + -- operand is zero, return 32 for 32-bit, else 64 + result <= x"00000000000000" & '0' & not is_32bit & is_32bit & "00000"; + elsif count_right = '0' then + -- return (63 - sel), trimmed to 5 bits in 32-bit mode + result <= x"00000000000000" & "00" & (not sel(5) and not is_32bit) & not sel(4 downto 0); + else + result <= x"00000000000000" & "00" & sel; end if; end process;