divider: Do absolute-value ops in divider instead of decode

This moves the negation of negative operands for signed divide and
modulus operations out of the decode2 stage and into the divider.
If either of the operands for a signed divide or modulus operation
is negative, the divider now takes an extra cycle to negate the
operands that are negative.

The interface to the divider now has an 'is_signed' signal rather
than a 'neg_result' signal, and the dividend and divisor can be
negative, so divider_tb had to be updated for the new interface.

The reason for doing this is that one of the worst timing violations
on the Arty A7-100 at 100MHz involved the carry chain in the adders
that did the negation of the dividend and divisor in the decode stage.
Moving the negations to a separate cycle fixes that and also seems to
reduce the total number of slice LUTs used.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/74/head
Paul Mackerras 5 years ago
parent e6536d4b8b
commit 25b9450475

@ -81,13 +81,13 @@ package common is
write_reg: std_ulogic_vector(4 downto 0);
dividend: std_ulogic_vector(63 downto 0);
divisor: std_ulogic_vector(63 downto 0);
neg_result: std_ulogic;
is_signed: std_ulogic;
is_32bit: std_ulogic;
is_extended: std_ulogic;
is_modulus: std_ulogic;
rc: std_ulogic;
end record;
constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', neg_result => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', rc => '0', others => (others => '0'));
constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', rc => '0', others => (others => '0'));

type Decode2ToRegisterFileType is record
read1_enable : std_ulogic;

@ -221,9 +221,6 @@ begin
variable v_int : reg_internal_type;
variable mul_a : std_ulogic_vector(63 downto 0);
variable mul_b : std_ulogic_vector(63 downto 0);
variable dividend : std_ulogic_vector(63 downto 0);
variable divisor : std_ulogic_vector(63 downto 0);
variable absdend : std_ulogic_vector(31 downto 0);
variable decoded_reg_a : decode_input_reg_t;
variable decoded_reg_b : decode_input_reg_t;
variable decoded_reg_c : decode_input_reg_t;
@ -308,60 +305,36 @@ begin
-- s = 1 for signed, 0 for unsigned (for div*)
-- t = 1 for 32-bit, 0 for 64-bit
-- r = RC bit (record condition code)
-- For signed division/modulus, we take absolute values and
-- tell the divider what the sign of the result should be,
-- which is the dividend sign for modulus, and the XOR of
-- the dividend and divisor signs for division.
dividend := decoded_reg_a.data;
divisor := decoded_reg_b.data;
v.d.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
v.d.is_modulus := not d_in.insn(8);
v.d.is_32bit := not d_in.insn(2);
if d_in.insn(8) = '1' then
signed_division := d_in.insn(6);
else
signed_division := d_in.insn(10);
end if;
v.d.is_signed := signed_division;
if d_in.insn(2) = '0' then
-- 64-bit forms
v.d.is_32bit := '0';
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then
v.d.is_extended := '1';
end if;
if signed_division = '1' and dividend(63) = '1' then
v.d.neg_result := '1';
v.d.dividend := std_ulogic_vector(- signed(dividend));
else
v.d.dividend := dividend;
end if;
if signed_division = '1' and divisor(63) = '1' then
if d_in.insn(8) = '1' then
v.d.neg_result := not v.d.neg_result;
end if;
v.d.divisor := std_ulogic_vector(- signed(divisor));
else
v.d.divisor := divisor;
end if;
v.d.dividend := decoded_reg_a.data;
v.d.divisor := decoded_reg_b.data;
else
-- 32-bit forms
v.d.is_32bit := '1';
if signed_division = '1' and dividend(31) = '1' then
v.d.neg_result := '1';
absdend := std_ulogic_vector(- signed(dividend(31 downto 0)));
else
absdend := dividend(31 downto 0);
end if;
if d_in.insn(8) = '1' and d_in.insn(7) = '0' then -- extended forms
v.d.dividend := absdend & x"00000000";
v.d.dividend := decoded_reg_a.data(31 downto 0) & x"00000000";
elsif signed_division = '1' and decoded_reg_a.data(31) = '1' then
-- sign extend to 64 bits
v.d.dividend := x"ffffffff" & decoded_reg_a.data(31 downto 0);
else
v.d.dividend := x"00000000" & absdend;
end if;
if signed_division = '1' and divisor(31) = '1' then
if d_in.insn(8) = '1' then
v.d.neg_result := not v.d.neg_result;
v.d.dividend := x"00000000" & decoded_reg_a.data(31 downto 0);
end if;
v.d.divisor := x"00000000" & std_ulogic_vector(- signed(divisor(31 downto 0)));
if signed_division = '1' and decoded_reg_b.data(31) = '1' then
v.d.divisor := x"ffffffff" & decoded_reg_b.data(31 downto 0);
else
v.d.divisor := x"00000000" & divisor(31 downto 0);
v.d.divisor := x"00000000" & decoded_reg_b.data(31 downto 0);
end if;
end if;
v.d.rc := decode_rc(d_in.decode.rc, d_in.insn);

@ -24,10 +24,12 @@ architecture behaviour of divider is
signal sresult : std_ulogic_vector(63 downto 0);
signal qbit : std_ulogic;
signal running : std_ulogic;
signal signcheck : std_ulogic;
signal count : unsigned(6 downto 0);
signal neg_result : std_ulogic;
signal is_modulus : std_ulogic;
signal is_32bit : std_ulogic;
signal extended : std_ulogic;
signal rc : std_ulogic;
signal write_reg : std_ulogic_vector(4 downto 0);

@ -64,7 +66,7 @@ begin
running <= '0';
count <= "0000000";
elsif d_in.valid = '1' then
if d_in.is_extended = '1' then
if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then
dend <= d_in.dividend & x"0000000000000000";
else
dend <= x"0000000000000000" & d_in.dividend;
@ -72,12 +74,27 @@ begin
div <= unsigned(d_in.divisor);
quot <= (others => '0');
write_reg <= d_in.write_reg;
neg_result <= d_in.neg_result;
neg_result <= '0';
is_modulus <= d_in.is_modulus;
extended <= d_in.is_extended;
is_32bit <= d_in.is_32bit;
rc <= d_in.rc;
count <= "0000000";
running <= '1';
signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63));
elsif signcheck = '1' then
signcheck <= '0';
neg_result <= dend(63) xor (div(63) and not is_modulus);
if dend(63) = '1' then
if extended = '1' then
dend <= std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000";
else
dend <= x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0)));
end if;
end if;
if div(63) = '1' then
div <= unsigned(- signed(div));
end if;
elsif running = '1' then
if count = "0111111" then
running <= '0';

@ -44,7 +44,7 @@ begin
d1.write_reg <= "10001";
d1.dividend <= x"0000000010001000";
d1.divisor <= x"0000000000001111";
d1.neg_result <= '0';
d1.is_signed <= '0';
d1.is_32bit <= '0';
d1.is_extended <= '0';
d1.is_modulus <= '0';
@ -55,7 +55,7 @@ begin

d1.valid <= '0';

for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -79,7 +79,7 @@ begin

d1.valid <= '0';

for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -105,27 +105,15 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

if ra(63) = '1' then
d1.dividend <= std_ulogic_vector(- signed(ra));
else
d1.dividend <= ra;
end if;
if rb(63) = '1' then
d1.divisor <= std_ulogic_vector(- signed(rb));
else
d1.divisor <= rb;
end if;
if ra(63) = rb(63) then
d1.neg_result <= '0';
else
d1.neg_result <= '1';
end if;
d1.is_signed <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -154,13 +142,13 @@ begin

d1.dividend <= ra;
d1.divisor <= rb;
d1.neg_result <= '0';
d1.is_signed <= '0';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -187,28 +175,16 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

if ra(63) = '1' then
d1.dividend <= std_ulogic_vector(- signed(ra));
else
d1.dividend <= ra;
end if;
if rb(63) = '1' then
d1.divisor <= std_ulogic_vector(- signed(rb));
else
d1.divisor <= rb;
end if;
if ra(63) = rb(63) then
d1.neg_result <= '0';
else
d1.neg_result <= '1';
end if;
d1.is_signed <= '1';
d1.is_extended <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -216,15 +192,18 @@ begin
end loop;
assert d2.valid = '1';

if unsigned(d1.divisor) > unsigned(d1.dividend) then
if rb /= x"0000000000000000" then
d128 := ra & x"0000000000000000";
q128 := std_ulogic_vector(signed(d128) / signed(rb));
if q128(127 downto 63) = x"0000000000000000" & '0' or
q128(127 downto 63) = x"ffffffffffffffff" & '1' then
behave_rt := q128(63 downto 0);
assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
report "bad divde expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
report "bad CR setting for divde";
end if;
end if;
end loop;
end loop;
end loop;
@ -239,14 +218,14 @@ begin

d1.dividend <= ra;
d1.divisor <= rb;
d1.neg_result <= '0';
d1.is_signed <= '0';
d1.is_extended <= '1';
d1.valid <= '1';

wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -254,7 +233,7 @@ begin
end loop;
assert d2.valid = '1';

if unsigned(d1.divisor) > unsigned(d1.dividend) then
if unsigned(rb) > unsigned(ra) then
d128 := ra & x"0000000000000000";
q128 := std_ulogic_vector(unsigned(d128) / unsigned(rb));
behave_rt := q128(63 downto 0);
@ -275,21 +254,9 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

if ra(63) = '1' then
d1.dividend <= std_ulogic_vector(- signed(ra));
else
d1.dividend <= ra;
end if;
if rb(63) = '1' then
d1.divisor <= std_ulogic_vector(- signed(rb));
else
d1.divisor <= rb;
end if;
if ra(63) = rb(63) then
d1.neg_result <= '0';
else
d1.neg_result <= '1';
end if;
d1.is_signed <= '1';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -297,7 +264,7 @@ begin
wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -326,7 +293,7 @@ begin

d1.dividend <= ra;
d1.divisor <= rb;
d1.neg_result <= '0';
d1.is_signed <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -334,7 +301,7 @@ begin
wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -361,21 +328,9 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 32)) & x"00000000";
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

if ra(63) = '1' then
d1.dividend <= std_ulogic_vector(- signed(ra));
else
d1.dividend <= ra;
end if;
if rb(63) = '1' then
d1.divisor <= std_ulogic_vector(- signed(rb));
else
d1.divisor <= rb;
end if;
if ra(63) = rb(63) then
d1.neg_result <= '0';
else
d1.neg_result <= '1';
end if;
d1.is_signed <= '1';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -383,7 +338,7 @@ begin
wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -391,13 +346,16 @@ begin
end loop;
assert d2.valid = '1';

if unsigned(d1.divisor(31 downto 0)) > unsigned(d1.dividend(63 downto 32)) then
if rb /= x"0000000000000000" then
behave_rt := std_ulogic_vector(signed(ra) / signed(rb));
if behave_rt(63 downto 31) = x"00000000" & '0' or
behave_rt(63 downto 31) = x"ffffffff" & '1' then
assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
report "bad divwe expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
report "bad CR setting for divwe";
end if;
end if;
end loop;
end loop;
end loop;
@ -412,7 +370,7 @@ begin

d1.dividend <= ra;
d1.divisor <= rb;
d1.neg_result <= '0';
d1.is_signed <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.valid <= '1';
@ -420,7 +378,7 @@ begin
wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -428,7 +386,7 @@ begin
end loop;
assert d2.valid = '1';

if unsigned(d1.divisor(31 downto 0)) > unsigned(d1.dividend(63 downto 32)) then
if unsigned(rb(31 downto 0)) > unsigned(ra(63 downto 32)) then
behave_rt := std_ulogic_vector(unsigned(ra) / unsigned(rb));
assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
report "bad divweu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
@ -447,17 +405,9 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

if ra(63) = '1' then
d1.dividend <= std_ulogic_vector(- signed(ra));
else
d1.dividend <= ra;
end if;
if rb(63) = '1' then
d1.divisor <= std_ulogic_vector(- signed(rb));
else
d1.divisor <= rb;
end if;
d1.neg_result <= ra(63);
d1.is_signed <= '1';
d1.is_extended <= '0';
d1.is_32bit <= '0';
d1.is_modulus <= '1';
@ -466,7 +416,7 @@ begin
wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -495,7 +445,7 @@ begin

d1.dividend <= ra;
d1.divisor <= rb;
d1.neg_result <= '0';
d1.is_signed <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '0';
d1.is_modulus <= '1';
@ -504,7 +454,7 @@ begin
wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -531,17 +481,9 @@ begin
ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));

if ra(63) = '1' then
d1.dividend <= std_ulogic_vector(- signed(ra));
else
d1.dividend <= ra;
end if;
if rb(63) = '1' then
d1.divisor <= std_ulogic_vector(- signed(rb));
else
d1.divisor <= rb;
end if;
d1.neg_result <= ra(63);
d1.is_signed <= '1';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.is_modulus <= '1';
@ -550,7 +492,7 @@ begin
wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;
@ -579,7 +521,7 @@ begin

d1.dividend <= ra;
d1.divisor <= rb;
d1.neg_result <= '0';
d1.is_signed <= '0';
d1.is_extended <= '0';
d1.is_32bit <= '1';
d1.is_modulus <= '1';
@ -588,7 +530,7 @@ begin
wait for clk_period;

d1.valid <= '0';
for j in 0 to 64 loop
for j in 0 to 65 loop
wait for clk_period;
if d2.valid = '1' then
exit;

Loading…
Cancel
Save