From 535341961d1d4d5b6df98f4bf9c01ae0daf5d9bf Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 25 Jul 2020 18:23:11 +1000 Subject: [PATCH] multiplier: Generalize interface to the multiplier This makes the interface to the multiplier more general so an instance of it can be used in the FPU. It now has a 128-bit addend that is added on to the product. Instead of an input to negate the output, it now has a "not_result" input to complement the output. Execute1 uses not_result=1 and addend=-1 to get the effect of negating the output. The interface is defined this way because this is what can be done easily with the Xilinx DSP slices in xilinx-mult.vhdl. This also adds clock enable signals to the DSP slices, mostly for the sake of reducing power consumption. Signed-off-by: Paul Mackerras --- common.vhdl | 27 ++++++++++--------- execute1.vhdl | 9 ++++--- multiply.vhdl | 22 ++++++++-------- multiply_tb.vhdl | 31 +++++++++++++++------- xilinx-mult.vhdl | 68 ++++++++++++++++++++++++------------------------ 5 files changed, 86 insertions(+), 71 deletions(-) diff --git a/common.vhdl b/common.vhdl index 28b3434..e05720b 100644 --- a/common.vhdl +++ b/common.vhdl @@ -182,16 +182,25 @@ package common is is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0', byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0')); - type Execute1ToMultiplyType is record + type MultiplyInputType is record valid: std_ulogic; data1: std_ulogic_vector(63 downto 0); data2: std_ulogic_vector(63 downto 0); + addend: std_ulogic_vector(127 downto 0); is_32bit: std_ulogic; - neg_result: std_ulogic; + not_result: std_ulogic; + end record; + constant MultiplyInputInit : MultiplyInputType := (valid => '0', + is_32bit => '0', not_result => '0', + others => (others => '0')); + + type MultiplyOutputType is record + valid: std_ulogic; + result: std_ulogic_vector(127 downto 0); + overflow : std_ulogic; end record; - constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', - is_32bit => '0', neg_result => '0', - others => (others => '0')); + constant MultiplyOutputInit : MultiplyOutputType := (valid => '0', overflow => '0', + others => (others => '0')); type Execute1ToDividerType is record valid: std_ulogic; @@ -382,14 +391,6 @@ package common is write_cr_data => (others => '0'), write_reg => (others => '0'), exc_write_reg => (others => '0'), exc_write_data => (others => '0')); - type MultiplyToExecute1Type is record - valid: std_ulogic; - result: std_ulogic_vector(127 downto 0); - overflow : std_ulogic; - end record; - constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0', - others => (others => '0')); - type DividerToExecute1Type is record valid: std_ulogic; write_reg_data: std_ulogic_vector(63 downto 0); diff --git a/execute1.vhdl b/execute1.vhdl index 2722570..d48fee8 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -89,8 +89,8 @@ architecture behaviour of execute1 is signal countzero_result: std_ulogic_vector(63 downto 0); -- multiply signals - signal x_to_multiply: Execute1ToMultiplyType; - signal multiply_to_x: MultiplyToExecute1Type; + signal x_to_multiply: MultiplyInputType; + signal multiply_to_x: MultiplyOutputType; -- divider signals signal x_to_divider: Execute1ToDividerType; @@ -396,7 +396,7 @@ begin abs2 := - signed(b_in); end if; - x_to_multiply <= Execute1ToMultiplyInit; + x_to_multiply <= MultiplyInputInit; x_to_multiply.is_32bit <= e_in.is_32bit; x_to_divider <= Execute1ToDividerInit; @@ -406,7 +406,8 @@ begin x_to_divider.is_modulus <= '1'; end if; - x_to_multiply.neg_result <= sign1 xor sign2; + x_to_multiply.not_result <= sign1 xor sign2; + x_to_multiply.addend <= (others => sign1 xor sign2); x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus); if e_in.is_32bit = '0' then -- 64-bit forms diff --git a/multiply.vhdl b/multiply.vhdl index 7a4c81b..b737a46 100644 --- a/multiply.vhdl +++ b/multiply.vhdl @@ -12,22 +12,22 @@ entity multiply is port ( clk : in std_logic; - m_in : in Execute1ToMultiplyType; - m_out : out MultiplyToExecute1Type + m_in : in MultiplyInputType; + m_out : out MultiplyOutputType ); end entity multiply; architecture behaviour of multiply is - signal m: Execute1ToMultiplyType := Execute1ToMultiplyInit; + signal m: MultiplyInputType := MultiplyInputInit; type multiply_pipeline_stage is record valid : std_ulogic; data : unsigned(127 downto 0); is_32bit : std_ulogic; - neg_res : std_ulogic; + not_res : std_ulogic; end record; constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', - is_32bit => '0', neg_res => '0', + is_32bit => '0', not_res => '0', data => (others => '0')); type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; @@ -53,19 +53,19 @@ begin variable d2 : std_ulogic_vector(63 downto 0); variable ov : std_ulogic; begin + v := r; v.multiply_pipeline(0).valid := m.valid; - v.multiply_pipeline(0).data := unsigned(m.data1) * unsigned(m.data2); + v.multiply_pipeline(0).data := (unsigned(m.data1) * unsigned(m.data2)) + unsigned(m.addend); v.multiply_pipeline(0).is_32bit := m.is_32bit; - v.multiply_pipeline(0).neg_res := m.neg_result; + v.multiply_pipeline(0).not_res := m.not_result; loop_0: for i in 1 to PIPELINE_DEPTH-1 loop v.multiply_pipeline(i) := r.multiply_pipeline(i-1); end loop; - if v.multiply_pipeline(PIPELINE_DEPTH-1).neg_res = '0' then - d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data); - else - d := std_ulogic_vector(- signed(v.multiply_pipeline(PIPELINE_DEPTH-1).data)); + d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data); + if v.multiply_pipeline(PIPELINE_DEPTH-1).not_res = '1' then + d := not d; end if; ov := '0'; diff --git a/multiply_tb.vhdl b/multiply_tb.vhdl index 87f029d..884b828 100644 --- a/multiply_tb.vhdl +++ b/multiply_tb.vhdl @@ -17,8 +17,8 @@ architecture behave of multiply_tb is constant pipeline_depth : integer := 4; - signal m1 : Execute1ToMultiplyType := Execute1ToMultiplyInit; - signal m2 : MultiplyToExecute1Type; + signal m1 : MultiplyInputType := MultiplyInputInit; + signal m2 : MultiplyOutputType; function absval(x: std_ulogic_vector) return std_ulogic_vector is begin @@ -45,6 +45,7 @@ begin stim_process: process variable ra, rb, rt, behave_rt: std_ulogic_vector(63 downto 0); variable si: std_ulogic_vector(15 downto 0); + variable sign: std_ulogic; begin wait for clk_period; @@ -90,7 +91,9 @@ begin m1.data1 <= absval(ra); m1.data2 <= absval(rb); - m1.neg_result <= ra(63) xor rb(63); + sign := ra(63) xor rb(63); + m1.not_result <= sign; + m1.addend <= (others => sign); m1.valid <= '1'; wait for clk_period; @@ -114,7 +117,8 @@ begin m1.data1 <= ra; m1.data2 <= rb; - m1.neg_result <= '0'; + m1.not_result <= '0'; + m1.addend <= (others => '0'); m1.valid <= '1'; wait for clk_period; @@ -138,7 +142,9 @@ begin m1.data1 <= absval(ra); m1.data2 <= absval(rb); - m1.neg_result <= ra(63) xor rb(63); + sign := ra(63) xor rb(63); + m1.not_result <= sign; + m1.addend <= (others => sign); m1.valid <= '1'; wait for clk_period; @@ -164,7 +170,9 @@ begin m1.data1(31 downto 0) <= absval(ra(31 downto 0)); m1.data2 <= (others => '0'); m1.data2(31 downto 0) <= absval(rb(31 downto 0)); - m1.neg_result <= ra(31) xor rb(31); + sign := ra(31) xor rb(31); + m1.not_result <= sign; + m1.addend <= (others => sign); m1.valid <= '1'; wait for clk_period; @@ -190,7 +198,9 @@ begin m1.data1(31 downto 0) <= absval(ra(31 downto 0)); m1.data2 <= (others => '0'); m1.data2(31 downto 0) <= absval(rb(31 downto 0)); - m1.neg_result <= ra(31) xor rb(31); + sign := ra(31) xor rb(31); + m1.not_result <= sign; + m1.addend <= (others => sign); m1.valid <= '1'; wait for clk_period; @@ -217,7 +227,8 @@ begin m1.data1(31 downto 0) <= ra(31 downto 0); m1.data2 <= (others => '0'); m1.data2(31 downto 0) <= rb(31 downto 0); - m1.neg_result <= '0'; + m1.not_result <= '0'; + m1.addend <= (others => '0'); m1.valid <= '1'; wait for clk_period; @@ -243,7 +254,9 @@ begin m1.data1 <= absval(ra); m1.data2 <= (others => '0'); m1.data2(15 downto 0) <= absval(si); - m1.neg_result <= ra(63) xor si(15); + sign := ra(63) xor si(15); + m1.not_result <= sign; + m1.addend <= (others => sign); m1.valid <= '1'; wait for clk_period; diff --git a/xilinx-mult.vhdl b/xilinx-mult.vhdl index 46366d6..4c60775 100644 --- a/xilinx-mult.vhdl +++ b/xilinx-mult.vhdl @@ -12,8 +12,8 @@ entity multiply is port ( clk : in std_logic; - m_in : in Execute1ToMultiplyType; - m_out : out MultiplyToExecute1Type + m_in : in MultiplyInputType; + m_out : out MultiplyOutputType ); end entity multiply; @@ -33,11 +33,11 @@ architecture behaviour of multiply is signal p1_pat, p1_patb : std_ulogic; signal req_32bit, r32_1 : std_ulogic; - signal req_neg, rneg_1 : std_ulogic; + signal req_not, rnot_1 : std_ulogic; signal valid_1 : std_ulogic; begin - addend <= (others => m_in.neg_result); + addend <= m_in.addend; m00: DSP48E1 generic map ( @@ -73,7 +73,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -129,7 +129,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -184,7 +184,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -239,7 +239,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -295,7 +295,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -351,7 +351,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -408,7 +408,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -464,7 +464,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -520,7 +520,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -575,7 +575,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -630,7 +630,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -685,7 +685,7 @@ begin CECTRL => '0', CED => '0', CEINMODE => '0', - CEM => '1', + CEM => m_in.valid, CEP => '0', CLK => clk, D => (others => '0'), @@ -734,12 +734,12 @@ begin CARRYINSEL => "000", CARRYOUT => s0_carry, CEA1 => '0', - CEA2 => '1', + CEA2 => valid_1, CEAD => '0', CEALUMODE => '0', CEB1 => '0', - CEB2 => '1', - CEC => '1', + CEB2 => valid_1, + CEC => valid_1, CECARRYIN => '0', CECTRL => '0', CED => '0', @@ -792,12 +792,12 @@ begin CARRYIN => s0_carry(3), CARRYINSEL => "000", CEA1 => '0', - CEA2 => '1', + CEA2 => valid_1, CEAD => '0', CEALUMODE => '0', CEB1 => '0', - CEB2 => '1', - CEC => '1', + CEB2 => valid_1, + CEC => valid_1, CECARRYIN => '0', CECTRL => '0', CED => '0', @@ -848,7 +848,7 @@ begin port map ( A => m21_p(22 downto 0) & m03_p(5 downto 0) & '0', ACIN => (others => '0'), - ALUMODE => "00" & rneg_1 & '0', + ALUMODE => "00" & rnot_1 & '0', B => (others => '0'), BCIN => (others => '0'), C => p0_mask, @@ -857,12 +857,12 @@ begin CARRYINSEL => "000", CARRYOUT => p0_carry, CEA1 => '0', - CEA2 => '1', + CEA2 => valid_1, CEAD => '0', - CEALUMODE => '1', + CEALUMODE => valid_1, CEB1 => '0', - CEB2 => '1', - CEC => '1', + CEB2 => valid_1, + CEC => valid_1, CECARRYIN => '0', CECTRL => '0', CED => '0', @@ -911,7 +911,7 @@ begin port map ( A => x"0000000" & '0' & m21_p(41), ACIN => (others => '0'), - ALUMODE => "00" & rneg_1 & '0', + ALUMODE => "00" & rnot_1 & '0', B => m21_p(40 downto 23), BCIN => (others => '0'), C => (others => '0'), @@ -919,11 +919,11 @@ begin CARRYIN => p0_carry(3), CARRYINSEL => "000", CEA1 => '0', - CEA2 => '1', + CEA2 => valid_1, CEAD => '0', - CEALUMODE => '1', + CEALUMODE => valid_1, CEB1 => '0', - CEB2 => '1', + CEB2 => valid_1, CEC => '0', CECARRYIN => '0', CECTRL => '0', @@ -952,7 +952,7 @@ begin RSTP => '0' ); - product(31 downto 0) <= product_lo xor (31 downto 0 => req_neg); + product(31 downto 0) <= product_lo xor (31 downto 0 => req_not); mult_out: process(all) variable ov : std_ulogic; @@ -977,8 +977,8 @@ begin valid_1 <= m_in.valid; req_32bit <= r32_1; r32_1 <= m_in.is_32bit; - req_neg <= rneg_1; - rneg_1 <= m_in.neg_result; + req_not <= rnot_1; + rnot_1 <= m_in.not_result; end if; end process;