From e6536d4b8bf93cdb5456b6cd97cfbc4ebc41f34c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 25 Sep 2019 20:03:46 +1000 Subject: [PATCH 1/2] divider: Always compute result/sresult/d_out.write_reg_data These are intended to be combinatorial. The previous code was giving warnings in vivado about registers/latches with no clock defined. Signed-off-by: Paul Mackerras --- divider.vhdl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/divider.vhdl b/divider.vhdl index 6b20576..c133522 100644 --- a/divider.vhdl +++ b/divider.vhdl @@ -108,20 +108,21 @@ begin d_out <= DividerToWritebackInit; d_out.write_reg_nr <= write_reg; + if is_modulus = '1' then + result <= dend(127 downto 64); + else + result <= quot; + end if; + if neg_result = '1' then + sresult <= std_ulogic_vector(- signed(result)); + else + sresult <= result; + end if; + d_out.write_reg_data <= sresult; + if count(6) = '1' then d_out.valid <= '1'; d_out.write_reg_enable <= '1'; - if is_modulus = '1' then - result <= dend(127 downto 64); - else - result <= quot; - end if; - if neg_result = '1' then - sresult <= std_ulogic_vector(- signed(result)); - else - sresult <= result; - end if; - d_out.write_reg_data <= sresult; if rc = '1' then d_out.write_cr_enable <= '1'; d_out.write_cr_mask <= num_to_fxm(0); From 25b9450475704ae58cddf44438248d3a3bb56f1e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 28 Sep 2019 08:55:08 +1000 Subject: [PATCH 2/2] divider: Do absolute-value ops in divider instead of decode This moves the negation of negative operands for signed divide and modulus operations out of the decode2 stage and into the divider. If either of the operands for a signed divide or modulus operation is negative, the divider now takes an extra cycle to negate the operands that are negative. The interface to the divider now has an 'is_signed' signal rather than a 'neg_result' signal, and the dividend and divisor can be negative, so divider_tb had to be updated for the new interface. The reason for doing this is that one of the worst timing violations on the Arty A7-100 at 100MHz involved the carry chain in the adders that did the negation of the dividend and divisor in the decode stage. Moving the negations to a separate cycle fixes that and also seems to reduce the total number of slice LUTs used. Signed-off-by: Paul Mackerras --- common.vhdl | 4 +- decode2.vhdl | 57 +++++----------- divider.vhdl | 21 +++++- divider_tb.vhdl | 174 ++++++++++++++++-------------------------------- 4 files changed, 94 insertions(+), 162 deletions(-) diff --git a/common.vhdl b/common.vhdl index bd9b7d0..d7252f6 100644 --- a/common.vhdl +++ b/common.vhdl @@ -81,13 +81,13 @@ package common is write_reg: std_ulogic_vector(4 downto 0); dividend: std_ulogic_vector(63 downto 0); divisor: std_ulogic_vector(63 downto 0); - neg_result: std_ulogic; + is_signed: std_ulogic; is_32bit: std_ulogic; is_extended: std_ulogic; is_modulus: std_ulogic; rc: std_ulogic; end record; - constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', neg_result => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', rc => '0', others => (others => '0')); + constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', rc => '0', others => (others => '0')); type Decode2ToRegisterFileType is record read1_enable : std_ulogic; diff --git a/decode2.vhdl b/decode2.vhdl index deb16dd..2ace765 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -221,9 +221,6 @@ begin variable v_int : reg_internal_type; variable mul_a : std_ulogic_vector(63 downto 0); variable mul_b : std_ulogic_vector(63 downto 0); - variable dividend : std_ulogic_vector(63 downto 0); - variable divisor : std_ulogic_vector(63 downto 0); - variable absdend : std_ulogic_vector(31 downto 0); variable decoded_reg_a : decode_input_reg_t; variable decoded_reg_b : decode_input_reg_t; variable decoded_reg_c : decode_input_reg_t; @@ -308,60 +305,36 @@ begin -- s = 1 for signed, 0 for unsigned (for div*) -- t = 1 for 32-bit, 0 for 64-bit -- r = RC bit (record condition code) - -- For signed division/modulus, we take absolute values and - -- tell the divider what the sign of the result should be, - -- which is the dividend sign for modulus, and the XOR of - -- the dividend and divisor signs for division. - dividend := decoded_reg_a.data; - divisor := decoded_reg_b.data; v.d.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn); v.d.is_modulus := not d_in.insn(8); + v.d.is_32bit := not d_in.insn(2); if d_in.insn(8) = '1' then - signed_division := d_in.insn(6); + signed_division := d_in.insn(6); else - signed_division := d_in.insn(10); + signed_division := d_in.insn(10); end if; + v.d.is_signed := signed_division; if d_in.insn(2) = '0' then - -- 64-bit forms - v.d.is_32bit := '0'; + -- 64-bit forms if d_in.insn(8) = '1' and d_in.insn(7) = '0' then v.d.is_extended := '1'; end if; - if signed_division = '1' and dividend(63) = '1' then - v.d.neg_result := '1'; - v.d.dividend := std_ulogic_vector(- signed(dividend)); - else - v.d.dividend := dividend; - end if; - if signed_division = '1' and divisor(63) = '1' then - if d_in.insn(8) = '1' then - v.d.neg_result := not v.d.neg_result; - end if; - v.d.divisor := std_ulogic_vector(- signed(divisor)); - else - v.d.divisor := divisor; - end if; + v.d.dividend := decoded_reg_a.data; + v.d.divisor := decoded_reg_b.data; else -- 32-bit forms - v.d.is_32bit := '1'; - if signed_division = '1' and dividend(31) = '1' then - v.d.neg_result := '1'; - absdend := std_ulogic_vector(- signed(dividend(31 downto 0))); - else - absdend := dividend(31 downto 0); - end if; if d_in.insn(8) = '1' and d_in.insn(7) = '0' then -- extended forms - v.d.dividend := absdend & x"00000000"; + v.d.dividend := decoded_reg_a.data(31 downto 0) & x"00000000"; + elsif signed_division = '1' and decoded_reg_a.data(31) = '1' then + -- sign extend to 64 bits + v.d.dividend := x"ffffffff" & decoded_reg_a.data(31 downto 0); else - v.d.dividend := x"00000000" & absdend; + v.d.dividend := x"00000000" & decoded_reg_a.data(31 downto 0); end if; - if signed_division = '1' and divisor(31) = '1' then - if d_in.insn(8) = '1' then - v.d.neg_result := not v.d.neg_result; - end if; - v.d.divisor := x"00000000" & std_ulogic_vector(- signed(divisor(31 downto 0))); + if signed_division = '1' and decoded_reg_b.data(31) = '1' then + v.d.divisor := x"ffffffff" & decoded_reg_b.data(31 downto 0); else - v.d.divisor := x"00000000" & divisor(31 downto 0); + v.d.divisor := x"00000000" & decoded_reg_b.data(31 downto 0); end if; end if; v.d.rc := decode_rc(d_in.decode.rc, d_in.insn); diff --git a/divider.vhdl b/divider.vhdl index c133522..8345623 100644 --- a/divider.vhdl +++ b/divider.vhdl @@ -24,10 +24,12 @@ architecture behaviour of divider is signal sresult : std_ulogic_vector(63 downto 0); signal qbit : std_ulogic; signal running : std_ulogic; + signal signcheck : std_ulogic; signal count : unsigned(6 downto 0); signal neg_result : std_ulogic; signal is_modulus : std_ulogic; signal is_32bit : std_ulogic; + signal extended : std_ulogic; signal rc : std_ulogic; signal write_reg : std_ulogic_vector(4 downto 0); @@ -64,7 +66,7 @@ begin running <= '0'; count <= "0000000"; elsif d_in.valid = '1' then - if d_in.is_extended = '1' then + if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then dend <= d_in.dividend & x"0000000000000000"; else dend <= x"0000000000000000" & d_in.dividend; @@ -72,12 +74,27 @@ begin div <= unsigned(d_in.divisor); quot <= (others => '0'); write_reg <= d_in.write_reg; - neg_result <= d_in.neg_result; + neg_result <= '0'; is_modulus <= d_in.is_modulus; + extended <= d_in.is_extended; is_32bit <= d_in.is_32bit; rc <= d_in.rc; count <= "0000000"; running <= '1'; + signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63)); + elsif signcheck = '1' then + signcheck <= '0'; + neg_result <= dend(63) xor (div(63) and not is_modulus); + if dend(63) = '1' then + if extended = '1' then + dend <= std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000"; + else + dend <= x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0))); + end if; + end if; + if div(63) = '1' then + div <= unsigned(- signed(div)); + end if; elsif running = '1' then if count = "0111111" then running <= '0'; diff --git a/divider_tb.vhdl b/divider_tb.vhdl index 0fa7f05..3d8fc09 100644 --- a/divider_tb.vhdl +++ b/divider_tb.vhdl @@ -44,7 +44,7 @@ begin d1.write_reg <= "10001"; d1.dividend <= x"0000000010001000"; d1.divisor <= x"0000000000001111"; - d1.neg_result <= '0'; + d1.is_signed <= '0'; d1.is_32bit <= '0'; d1.is_extended <= '0'; d1.is_modulus <= '0'; @@ -55,7 +55,7 @@ begin d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -79,7 +79,7 @@ begin d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -105,27 +105,15 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - if ra(63) = '1' then - d1.dividend <= std_ulogic_vector(- signed(ra)); - else - d1.dividend <= ra; - end if; - if rb(63) = '1' then - d1.divisor <= std_ulogic_vector(- signed(rb)); - else - d1.divisor <= rb; - end if; - if ra(63) = rb(63) then - d1.neg_result <= '0'; - else - d1.neg_result <= '1'; - end if; + d1.dividend <= ra; + d1.divisor <= rb; + d1.is_signed <= '1'; d1.valid <= '1'; wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -154,13 +142,13 @@ begin d1.dividend <= ra; d1.divisor <= rb; - d1.neg_result <= '0'; + d1.is_signed <= '0'; d1.valid <= '1'; wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -187,28 +175,16 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - if ra(63) = '1' then - d1.dividend <= std_ulogic_vector(- signed(ra)); - else - d1.dividend <= ra; - end if; - if rb(63) = '1' then - d1.divisor <= std_ulogic_vector(- signed(rb)); - else - d1.divisor <= rb; - end if; - if ra(63) = rb(63) then - d1.neg_result <= '0'; - else - d1.neg_result <= '1'; - end if; + d1.dividend <= ra; + d1.divisor <= rb; + d1.is_signed <= '1'; d1.is_extended <= '1'; d1.valid <= '1'; wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -216,14 +192,17 @@ begin end loop; assert d2.valid = '1'; - if unsigned(d1.divisor) > unsigned(d1.dividend) then + if rb /= x"0000000000000000" then d128 := ra & x"0000000000000000"; q128 := std_ulogic_vector(signed(d128) / signed(rb)); - behave_rt := q128(63 downto 0); - assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) - report "bad divde expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divde"; + if q128(127 downto 63) = x"0000000000000000" & '0' or + q128(127 downto 63) = x"ffffffffffffffff" & '1' then + behave_rt := q128(63 downto 0); + assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data) + report "bad divde expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); + assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data + report "bad CR setting for divde"; + end if; end if; end loop; end loop; @@ -239,14 +218,14 @@ begin d1.dividend <= ra; d1.divisor <= rb; - d1.neg_result <= '0'; + d1.is_signed <= '0'; d1.is_extended <= '1'; d1.valid <= '1'; wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -254,7 +233,7 @@ begin end loop; assert d2.valid = '1'; - if unsigned(d1.divisor) > unsigned(d1.dividend) then + if unsigned(rb) > unsigned(ra) then d128 := ra & x"0000000000000000"; q128 := std_ulogic_vector(unsigned(d128) / unsigned(rb)); behave_rt := q128(63 downto 0); @@ -275,21 +254,9 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - if ra(63) = '1' then - d1.dividend <= std_ulogic_vector(- signed(ra)); - else - d1.dividend <= ra; - end if; - if rb(63) = '1' then - d1.divisor <= std_ulogic_vector(- signed(rb)); - else - d1.divisor <= rb; - end if; - if ra(63) = rb(63) then - d1.neg_result <= '0'; - else - d1.neg_result <= '1'; - end if; + d1.dividend <= ra; + d1.divisor <= rb; + d1.is_signed <= '1'; d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.valid <= '1'; @@ -297,7 +264,7 @@ begin wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -326,7 +293,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; - d1.neg_result <= '0'; + d1.is_signed <= '0'; d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.valid <= '1'; @@ -334,7 +301,7 @@ begin wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -361,21 +328,9 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 32)) & x"00000000"; rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - if ra(63) = '1' then - d1.dividend <= std_ulogic_vector(- signed(ra)); - else - d1.dividend <= ra; - end if; - if rb(63) = '1' then - d1.divisor <= std_ulogic_vector(- signed(rb)); - else - d1.divisor <= rb; - end if; - if ra(63) = rb(63) then - d1.neg_result <= '0'; - else - d1.neg_result <= '1'; - end if; + d1.dividend <= ra; + d1.divisor <= rb; + d1.is_signed <= '1'; d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.valid <= '1'; @@ -383,7 +338,7 @@ begin wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -391,12 +346,15 @@ begin end loop; assert d2.valid = '1'; - if unsigned(d1.divisor(31 downto 0)) > unsigned(d1.dividend(63 downto 32)) then + if rb /= x"0000000000000000" then behave_rt := std_ulogic_vector(signed(ra) / signed(rb)); - assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0) - report "bad divwe expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); - assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data - report "bad CR setting for divwe"; + if behave_rt(63 downto 31) = x"00000000" & '0' or + behave_rt(63 downto 31) = x"ffffffff" & '1' then + assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0) + report "bad divwe expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); + assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data + report "bad CR setting for divwe"; + end if; end if; end loop; end loop; @@ -412,7 +370,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; - d1.neg_result <= '0'; + d1.is_signed <= '0'; d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.valid <= '1'; @@ -420,7 +378,7 @@ begin wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -428,7 +386,7 @@ begin end loop; assert d2.valid = '1'; - if unsigned(d1.divisor(31 downto 0)) > unsigned(d1.dividend(63 downto 32)) then + if unsigned(rb(31 downto 0)) > unsigned(ra(63 downto 32)) then behave_rt := std_ulogic_vector(unsigned(ra) / unsigned(rb)); assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0) report "bad divweu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb); @@ -447,17 +405,9 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - if ra(63) = '1' then - d1.dividend <= std_ulogic_vector(- signed(ra)); - else - d1.dividend <= ra; - end if; - if rb(63) = '1' then - d1.divisor <= std_ulogic_vector(- signed(rb)); - else - d1.divisor <= rb; - end if; - d1.neg_result <= ra(63); + d1.dividend <= ra; + d1.divisor <= rb; + d1.is_signed <= '1'; d1.is_extended <= '0'; d1.is_32bit <= '0'; d1.is_modulus <= '1'; @@ -466,7 +416,7 @@ begin wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -495,7 +445,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; - d1.neg_result <= '0'; + d1.is_signed <= '0'; d1.is_extended <= '0'; d1.is_32bit <= '0'; d1.is_modulus <= '1'; @@ -504,7 +454,7 @@ begin wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -531,17 +481,9 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - if ra(63) = '1' then - d1.dividend <= std_ulogic_vector(- signed(ra)); - else - d1.dividend <= ra; - end if; - if rb(63) = '1' then - d1.divisor <= std_ulogic_vector(- signed(rb)); - else - d1.divisor <= rb; - end if; - d1.neg_result <= ra(63); + d1.dividend <= ra; + d1.divisor <= rb; + d1.is_signed <= '1'; d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.is_modulus <= '1'; @@ -550,7 +492,7 @@ begin wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit; @@ -579,7 +521,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; - d1.neg_result <= '0'; + d1.is_signed <= '0'; d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.is_modulus <= '1'; @@ -588,7 +530,7 @@ begin wait for clk_period; d1.valid <= '0'; - for j in 0 to 64 loop + for j in 0 to 65 loop wait for clk_period; if d2.valid = '1' then exit;