multiplier: Generalize interface to the multiplier

This makes the interface to the multiplier more general so an instance of it can be used in the FPU. It now has a 128-bit addend that is added on to the product. Instead of an input to negate the output, it now has a "not_result" input to complement the output. Execute1 uses not_result=1 and addend=-1 to get the effect of negating the output. The interface is defined this way because this is what can be done easily with the Xilinx DSP slices in xilinx-mult.vhdl. This also adds clock enable signals to the DSP slices, mostly for the sake of reducing power consumption. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
6 years ago · 535341961d
parent 178d7680af
commit 535341961d
5 changed files with 86 additions and 71 deletions
--- a/common.vhdl
+++ b/common.vhdl
@ -182,16 +182,25 @@ package common is
 	 is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
         byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0'));

-    type Execute1ToMultiplyType is record
+    type MultiplyInputType is record
 	valid: std_ulogic;
 	data1: std_ulogic_vector(63 downto 0);
 	data2: std_ulogic_vector(63 downto 0);
+        addend: std_ulogic_vector(127 downto 0);
 	is_32bit: std_ulogic;
-        neg_result: std_ulogic;
+        not_result: std_ulogic;
+    end record;
+    constant MultiplyInputInit : MultiplyInputType := (valid => '0',
+                                                       is_32bit => '0', not_result => '0',
+                                                       others => (others => '0'));
+
+    type MultiplyOutputType is record
+	valid: std_ulogic;
+	result: std_ulogic_vector(127 downto 0);
+        overflow : std_ulogic;
    end record;
-    constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0',
-								 is_32bit => '0', neg_result => '0',
-								 others => (others => '0'));
+    constant MultiplyOutputInit : MultiplyOutputType := (valid => '0', overflow => '0',
+                                                         others => (others => '0'));

    type Execute1ToDividerType is record
 	valid: std_ulogic;
@ -382,14 +391,6 @@ package common is
                                   write_cr_data => (others => '0'), write_reg => (others => '0'),
                                   exc_write_reg => (others => '0'), exc_write_data => (others => '0'));

-    type MultiplyToExecute1Type is record
-	valid: std_ulogic;
-	result: std_ulogic_vector(127 downto 0);
-        overflow : std_ulogic;
-    end record;
-    constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
-								 others => (others => '0'));
-
    type DividerToExecute1Type is record
 	valid: std_ulogic;
 	write_reg_data: std_ulogic_vector(63 downto 0);
--- a/execute1.vhdl
+++ b/execute1.vhdl
@ -89,8 +89,8 @@ architecture behaviour of execute1 is
    signal countzero_result: std_ulogic_vector(63 downto 0);

    -- multiply signals
-    signal x_to_multiply: Execute1ToMultiplyType;
-    signal multiply_to_x: MultiplyToExecute1Type;
+    signal x_to_multiply: MultiplyInputType;
+    signal multiply_to_x: MultiplyOutputType;

    -- divider signals
    signal x_to_divider: Execute1ToDividerType;
@ -396,7 +396,7 @@ begin
            abs2 := - signed(b_in);
        end if;

-	x_to_multiply <= Execute1ToMultiplyInit;
+	x_to_multiply <= MultiplyInputInit;
 	x_to_multiply.is_32bit <= e_in.is_32bit;

        x_to_divider <= Execute1ToDividerInit;
@ -406,7 +406,8 @@ begin
            x_to_divider.is_modulus <= '1';
        end if;

-        x_to_multiply.neg_result <= sign1 xor sign2;
+        x_to_multiply.not_result <= sign1 xor sign2;
+        x_to_multiply.addend <= (others => sign1 xor sign2);
        x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
        if e_in.is_32bit = '0' then
            -- 64-bit forms
--- a/multiply.vhdl
+++ b/multiply.vhdl
@ -12,22 +12,22 @@ entity multiply is
    port (
        clk   : in std_logic;

-        m_in  : in Execute1ToMultiplyType;
-        m_out : out MultiplyToExecute1Type
+        m_in  : in MultiplyInputType;
+        m_out : out MultiplyOutputType
        );
 end entity multiply;

 architecture behaviour of multiply is
-    signal m: Execute1ToMultiplyType := Execute1ToMultiplyInit;
+    signal m: MultiplyInputType := MultiplyInputInit;

    type multiply_pipeline_stage is record
        valid     : std_ulogic;
        data      : unsigned(127 downto 0);
 	is_32bit  : std_ulogic;
-        neg_res   : std_ulogic;
+        not_res   : std_ulogic;
    end record;
    constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
-								     is_32bit => '0', neg_res => '0',
+								     is_32bit => '0', not_res => '0',
 								     data => (others => '0'));

    type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
@ -53,19 +53,19 @@ begin
        variable d2 : std_ulogic_vector(63 downto 0);
 	variable ov : std_ulogic;
    begin
+        v := r;
        v.multiply_pipeline(0).valid := m.valid;
-        v.multiply_pipeline(0).data := unsigned(m.data1) * unsigned(m.data2);
+        v.multiply_pipeline(0).data := (unsigned(m.data1) * unsigned(m.data2)) + unsigned(m.addend);
        v.multiply_pipeline(0).is_32bit := m.is_32bit;
-        v.multiply_pipeline(0).neg_res := m.neg_result;
+        v.multiply_pipeline(0).not_res := m.not_result;

        loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
            v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
        end loop;

-        if v.multiply_pipeline(PIPELINE_DEPTH-1).neg_res = '0' then
-            d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
-        else
-            d := std_ulogic_vector(- signed(v.multiply_pipeline(PIPELINE_DEPTH-1).data));
+        d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
+        if v.multiply_pipeline(PIPELINE_DEPTH-1).not_res = '1' then
+            d := not d;
        end if;

        ov := '0';
--- a/multiply_tb.vhdl
+++ b/multiply_tb.vhdl
@ -17,8 +17,8 @@ architecture behave of multiply_tb is

    constant pipeline_depth : integer := 4;

-    signal m1               : Execute1ToMultiplyType := Execute1ToMultiplyInit;
-    signal m2               : MultiplyToExecute1Type;
+    signal m1               : MultiplyInputType := MultiplyInputInit;
+    signal m2               : MultiplyOutputType;

    function absval(x: std_ulogic_vector) return std_ulogic_vector is
    begin
@ -45,6 +45,7 @@ begin
    stim_process: process
        variable ra, rb, rt, behave_rt: std_ulogic_vector(63 downto 0);
        variable si: std_ulogic_vector(15 downto 0);
+        variable sign: std_ulogic;
    begin
        wait for clk_period;

@ -90,7 +91,9 @@ begin

            m1.data1 <= absval(ra);
            m1.data2 <= absval(rb);
-            m1.neg_result <= ra(63) xor rb(63);
+            sign := ra(63) xor rb(63);
+            m1.not_result <= sign;
+            m1.addend <= (others => sign);
            m1.valid <= '1';

            wait for clk_period;
@ -114,7 +117,8 @@ begin

            m1.data1 <= ra;
            m1.data2 <= rb;
-            m1.neg_result <= '0';
+            m1.not_result <= '0';
+            m1.addend <= (others => '0');
            m1.valid <= '1';

            wait for clk_period;
@ -138,7 +142,9 @@ begin

            m1.data1 <= absval(ra);
            m1.data2 <= absval(rb);
-            m1.neg_result <= ra(63) xor rb(63);
+            sign := ra(63) xor rb(63);
+            m1.not_result <= sign;
+            m1.addend <= (others => sign);
            m1.valid <= '1';

            wait for clk_period;
@ -164,7 +170,9 @@ begin
            m1.data1(31 downto 0) <= absval(ra(31 downto 0));
            m1.data2 <= (others => '0');
            m1.data2(31 downto 0) <= absval(rb(31 downto 0));
-            m1.neg_result <= ra(31) xor rb(31);
+            sign := ra(31) xor rb(31);
+            m1.not_result <= sign;
+            m1.addend <= (others => sign);
            m1.valid <= '1';

            wait for clk_period;
@ -190,7 +198,9 @@ begin
            m1.data1(31 downto 0) <= absval(ra(31 downto 0));
            m1.data2 <= (others => '0');
            m1.data2(31 downto 0) <= absval(rb(31 downto 0));
-            m1.neg_result <= ra(31) xor rb(31);
+            sign := ra(31) xor rb(31);
+            m1.not_result <= sign;
+            m1.addend <= (others => sign);
            m1.valid <= '1';

            wait for clk_period;
@ -217,7 +227,8 @@ begin
            m1.data1(31 downto 0) <= ra(31 downto 0);
            m1.data2 <= (others => '0');
            m1.data2(31 downto 0) <= rb(31 downto 0);
-            m1.neg_result <= '0';
+            m1.not_result <= '0';
+            m1.addend <= (others => '0');
            m1.valid <= '1';

            wait for clk_period;
@ -243,7 +254,9 @@ begin
            m1.data1 <= absval(ra);
            m1.data2 <= (others => '0');
            m1.data2(15 downto 0) <= absval(si);
-            m1.neg_result <= ra(63) xor si(15);
+            sign := ra(63) xor si(15);
+            m1.not_result <= sign;
+            m1.addend <= (others => sign);
            m1.valid <= '1';

            wait for clk_period;
--- a/xilinx-mult.vhdl
+++ b/xilinx-mult.vhdl
@ -12,8 +12,8 @@ entity multiply is
    port (
        clk   : in std_logic;

-        m_in  : in Execute1ToMultiplyType;
-        m_out : out MultiplyToExecute1Type
+        m_in  : in MultiplyInputType;
+        m_out : out MultiplyOutputType
        );
 end entity multiply;

@ -33,11 +33,11 @@ architecture behaviour of multiply is
    signal p1_pat, p1_patb : std_ulogic;

    signal req_32bit, r32_1 : std_ulogic;
-    signal req_neg, rneg_1 : std_ulogic;
+    signal req_not, rnot_1 : std_ulogic;
    signal valid_1 : std_ulogic;

 begin
-    addend <= (others => m_in.neg_result);
+    addend <= m_in.addend;

    m00: DSP48E1
        generic map (
@ -73,7 +73,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -129,7 +129,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -184,7 +184,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -239,7 +239,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -295,7 +295,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -351,7 +351,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -408,7 +408,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -464,7 +464,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -520,7 +520,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -575,7 +575,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -630,7 +630,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -685,7 +685,7 @@ begin
            CECTRL => '0',
            CED => '0',
            CEINMODE => '0',
-            CEM => '1',
+            CEM => m_in.valid,
            CEP => '0',
            CLK => clk,
            D => (others => '0'),
@ -734,12 +734,12 @@ begin
            CARRYINSEL => "000",
            CARRYOUT => s0_carry,
            CEA1 => '0',
-            CEA2 => '1',
+            CEA2 => valid_1,
            CEAD => '0',
            CEALUMODE => '0',
            CEB1 => '0',
-            CEB2 => '1',
-            CEC => '1',
+            CEB2 => valid_1,
+            CEC => valid_1,
            CECARRYIN => '0',
            CECTRL => '0',
            CED => '0',
@ -792,12 +792,12 @@ begin
            CARRYIN => s0_carry(3),
            CARRYINSEL => "000",
            CEA1 => '0',
-            CEA2 => '1',
+            CEA2 => valid_1,
            CEAD => '0',
            CEALUMODE => '0',
            CEB1 => '0',
-            CEB2 => '1',
-            CEC => '1',
+            CEB2 => valid_1,
+            CEC => valid_1,
            CECARRYIN => '0',
            CECTRL => '0',
            CED => '0',
@ -848,7 +848,7 @@ begin
        port map (
            A => m21_p(22 downto 0) & m03_p(5 downto 0) & '0',
            ACIN => (others => '0'),
-            ALUMODE => "00" & rneg_1 & '0',
+            ALUMODE => "00" & rnot_1 & '0',
            B => (others => '0'),
            BCIN => (others => '0'),
            C => p0_mask,
@ -857,12 +857,12 @@ begin
            CARRYINSEL => "000",
            CARRYOUT => p0_carry,
            CEA1 => '0',
-            CEA2 => '1',
+            CEA2 => valid_1,
            CEAD => '0',
-            CEALUMODE => '1',
+            CEALUMODE => valid_1,
            CEB1 => '0',
-            CEB2 => '1',
-            CEC => '1',
+            CEB2 => valid_1,
+            CEC => valid_1,
            CECARRYIN => '0',
            CECTRL => '0',
            CED => '0',
@ -911,7 +911,7 @@ begin
        port map (
            A => x"0000000" & '0' & m21_p(41),
            ACIN => (others => '0'),
-            ALUMODE => "00" & rneg_1 & '0',
+            ALUMODE => "00" & rnot_1 & '0',
            B => m21_p(40 downto 23),
            BCIN => (others => '0'),
            C => (others => '0'),
@ -919,11 +919,11 @@ begin
            CARRYIN => p0_carry(3),
            CARRYINSEL => "000",
            CEA1 => '0',
-            CEA2 => '1',
+            CEA2 => valid_1,
            CEAD => '0',
-            CEALUMODE => '1',
+            CEALUMODE => valid_1,
            CEB1 => '0',
-            CEB2 => '1',
+            CEB2 => valid_1,
            CEC => '0',
            CECARRYIN => '0',
            CECTRL => '0',
@ -952,7 +952,7 @@ begin
            RSTP => '0'
            );

-    product(31 downto 0) <= product_lo xor (31 downto 0 => req_neg);
+    product(31 downto 0) <= product_lo xor (31 downto 0 => req_not);

    mult_out: process(all)
        variable ov : std_ulogic;
@ -977,8 +977,8 @@ begin
            valid_1 <= m_in.valid;
            req_32bit <= r32_1;
            r32_1 <= m_in.is_32bit;
-            req_neg <= rneg_1;
-            rneg_1 <= m_in.neg_result;
+            req_not <= rnot_1;
+            rnot_1 <= m_in.not_result;
        end if;
    end process;