multiplier: Generalize interface to the multiplier

This makes the interface to the multiplier more general so an instance
of it can be used in the FPU.  It now has a 128-bit addend that is
added on to the product.  Instead of an input to negate the output,
it now has a "not_result" input to complement the output.  Execute1
uses not_result=1 and addend=-1 to get the effect of negating the
output.  The interface is defined this way because this is what can
be done easily with the Xilinx DSP slices in xilinx-mult.vhdl.

This also adds clock enable signals to the DSP slices, mostly for the
sake of reducing power consumption.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/235/head
Paul Mackerras 4 years ago
parent 178d7680af
commit 535341961d

@ -182,15 +182,24 @@ package common is
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0')); byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0'));


type Execute1ToMultiplyType is record type MultiplyInputType is record
valid: std_ulogic; valid: std_ulogic;
data1: std_ulogic_vector(63 downto 0); data1: std_ulogic_vector(63 downto 0);
data2: std_ulogic_vector(63 downto 0); data2: std_ulogic_vector(63 downto 0);
addend: std_ulogic_vector(127 downto 0);
is_32bit: std_ulogic; is_32bit: std_ulogic;
neg_result: std_ulogic; not_result: std_ulogic;
end record;
constant MultiplyInputInit : MultiplyInputType := (valid => '0',
is_32bit => '0', not_result => '0',
others => (others => '0'));

type MultiplyOutputType is record
valid: std_ulogic;
result: std_ulogic_vector(127 downto 0);
overflow : std_ulogic;
end record; end record;
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', constant MultiplyOutputInit : MultiplyOutputType := (valid => '0', overflow => '0',
is_32bit => '0', neg_result => '0',
others => (others => '0')); others => (others => '0'));


type Execute1ToDividerType is record type Execute1ToDividerType is record
@ -382,14 +391,6 @@ package common is
write_cr_data => (others => '0'), write_reg => (others => '0'), write_cr_data => (others => '0'), write_reg => (others => '0'),
exc_write_reg => (others => '0'), exc_write_data => (others => '0')); exc_write_reg => (others => '0'), exc_write_data => (others => '0'));


type MultiplyToExecute1Type is record
valid: std_ulogic;
result: std_ulogic_vector(127 downto 0);
overflow : std_ulogic;
end record;
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
others => (others => '0'));

type DividerToExecute1Type is record type DividerToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0); write_reg_data: std_ulogic_vector(63 downto 0);

@ -89,8 +89,8 @@ architecture behaviour of execute1 is
signal countzero_result: std_ulogic_vector(63 downto 0); signal countzero_result: std_ulogic_vector(63 downto 0);


-- multiply signals -- multiply signals
signal x_to_multiply: Execute1ToMultiplyType; signal x_to_multiply: MultiplyInputType;
signal multiply_to_x: MultiplyToExecute1Type; signal multiply_to_x: MultiplyOutputType;


-- divider signals -- divider signals
signal x_to_divider: Execute1ToDividerType; signal x_to_divider: Execute1ToDividerType;
@ -396,7 +396,7 @@ begin
abs2 := - signed(b_in); abs2 := - signed(b_in);
end if; end if;


x_to_multiply <= Execute1ToMultiplyInit; x_to_multiply <= MultiplyInputInit;
x_to_multiply.is_32bit <= e_in.is_32bit; x_to_multiply.is_32bit <= e_in.is_32bit;


x_to_divider <= Execute1ToDividerInit; x_to_divider <= Execute1ToDividerInit;
@ -406,7 +406,8 @@ begin
x_to_divider.is_modulus <= '1'; x_to_divider.is_modulus <= '1';
end if; end if;


x_to_multiply.neg_result <= sign1 xor sign2; x_to_multiply.not_result <= sign1 xor sign2;
x_to_multiply.addend <= (others => sign1 xor sign2);
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus); x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
if e_in.is_32bit = '0' then if e_in.is_32bit = '0' then
-- 64-bit forms -- 64-bit forms

@ -12,22 +12,22 @@ entity multiply is
port ( port (
clk : in std_logic; clk : in std_logic;


m_in : in Execute1ToMultiplyType; m_in : in MultiplyInputType;
m_out : out MultiplyToExecute1Type m_out : out MultiplyOutputType
); );
end entity multiply; end entity multiply;


architecture behaviour of multiply is architecture behaviour of multiply is
signal m: Execute1ToMultiplyType := Execute1ToMultiplyInit; signal m: MultiplyInputType := MultiplyInputInit;


type multiply_pipeline_stage is record type multiply_pipeline_stage is record
valid : std_ulogic; valid : std_ulogic;
data : unsigned(127 downto 0); data : unsigned(127 downto 0);
is_32bit : std_ulogic; is_32bit : std_ulogic;
neg_res : std_ulogic; not_res : std_ulogic;
end record; end record;
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
is_32bit => '0', neg_res => '0', is_32bit => '0', not_res => '0',
data => (others => '0')); data => (others => '0'));


type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
@ -53,19 +53,19 @@ begin
variable d2 : std_ulogic_vector(63 downto 0); variable d2 : std_ulogic_vector(63 downto 0);
variable ov : std_ulogic; variable ov : std_ulogic;
begin begin
v := r;
v.multiply_pipeline(0).valid := m.valid; v.multiply_pipeline(0).valid := m.valid;
v.multiply_pipeline(0).data := unsigned(m.data1) * unsigned(m.data2); v.multiply_pipeline(0).data := (unsigned(m.data1) * unsigned(m.data2)) + unsigned(m.addend);
v.multiply_pipeline(0).is_32bit := m.is_32bit; v.multiply_pipeline(0).is_32bit := m.is_32bit;
v.multiply_pipeline(0).neg_res := m.neg_result; v.multiply_pipeline(0).not_res := m.not_result;


loop_0: for i in 1 to PIPELINE_DEPTH-1 loop loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1); v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
end loop; end loop;


if v.multiply_pipeline(PIPELINE_DEPTH-1).neg_res = '0' then
d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data); d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
else if v.multiply_pipeline(PIPELINE_DEPTH-1).not_res = '1' then
d := std_ulogic_vector(- signed(v.multiply_pipeline(PIPELINE_DEPTH-1).data)); d := not d;
end if; end if;


ov := '0'; ov := '0';

@ -17,8 +17,8 @@ architecture behave of multiply_tb is


constant pipeline_depth : integer := 4; constant pipeline_depth : integer := 4;


signal m1 : Execute1ToMultiplyType := Execute1ToMultiplyInit; signal m1 : MultiplyInputType := MultiplyInputInit;
signal m2 : MultiplyToExecute1Type; signal m2 : MultiplyOutputType;


function absval(x: std_ulogic_vector) return std_ulogic_vector is function absval(x: std_ulogic_vector) return std_ulogic_vector is
begin begin
@ -45,6 +45,7 @@ begin
stim_process: process stim_process: process
variable ra, rb, rt, behave_rt: std_ulogic_vector(63 downto 0); variable ra, rb, rt, behave_rt: std_ulogic_vector(63 downto 0);
variable si: std_ulogic_vector(15 downto 0); variable si: std_ulogic_vector(15 downto 0);
variable sign: std_ulogic;
begin begin
wait for clk_period; wait for clk_period;


@ -90,7 +91,9 @@ begin


m1.data1 <= absval(ra); m1.data1 <= absval(ra);
m1.data2 <= absval(rb); m1.data2 <= absval(rb);
m1.neg_result <= ra(63) xor rb(63); sign := ra(63) xor rb(63);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1'; m1.valid <= '1';


wait for clk_period; wait for clk_period;
@ -114,7 +117,8 @@ begin


m1.data1 <= ra; m1.data1 <= ra;
m1.data2 <= rb; m1.data2 <= rb;
m1.neg_result <= '0'; m1.not_result <= '0';
m1.addend <= (others => '0');
m1.valid <= '1'; m1.valid <= '1';


wait for clk_period; wait for clk_period;
@ -138,7 +142,9 @@ begin


m1.data1 <= absval(ra); m1.data1 <= absval(ra);
m1.data2 <= absval(rb); m1.data2 <= absval(rb);
m1.neg_result <= ra(63) xor rb(63); sign := ra(63) xor rb(63);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1'; m1.valid <= '1';


wait for clk_period; wait for clk_period;
@ -164,7 +170,9 @@ begin
m1.data1(31 downto 0) <= absval(ra(31 downto 0)); m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => '0'); m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= absval(rb(31 downto 0)); m1.data2(31 downto 0) <= absval(rb(31 downto 0));
m1.neg_result <= ra(31) xor rb(31); sign := ra(31) xor rb(31);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1'; m1.valid <= '1';


wait for clk_period; wait for clk_period;
@ -190,7 +198,9 @@ begin
m1.data1(31 downto 0) <= absval(ra(31 downto 0)); m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => '0'); m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= absval(rb(31 downto 0)); m1.data2(31 downto 0) <= absval(rb(31 downto 0));
m1.neg_result <= ra(31) xor rb(31); sign := ra(31) xor rb(31);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1'; m1.valid <= '1';


wait for clk_period; wait for clk_period;
@ -217,7 +227,8 @@ begin
m1.data1(31 downto 0) <= ra(31 downto 0); m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => '0'); m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0); m1.data2(31 downto 0) <= rb(31 downto 0);
m1.neg_result <= '0'; m1.not_result <= '0';
m1.addend <= (others => '0');
m1.valid <= '1'; m1.valid <= '1';


wait for clk_period; wait for clk_period;
@ -243,7 +254,9 @@ begin
m1.data1 <= absval(ra); m1.data1 <= absval(ra);
m1.data2 <= (others => '0'); m1.data2 <= (others => '0');
m1.data2(15 downto 0) <= absval(si); m1.data2(15 downto 0) <= absval(si);
m1.neg_result <= ra(63) xor si(15); sign := ra(63) xor si(15);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1'; m1.valid <= '1';


wait for clk_period; wait for clk_period;

@ -12,8 +12,8 @@ entity multiply is
port ( port (
clk : in std_logic; clk : in std_logic;


m_in : in Execute1ToMultiplyType; m_in : in MultiplyInputType;
m_out : out MultiplyToExecute1Type m_out : out MultiplyOutputType
); );
end entity multiply; end entity multiply;


@ -33,11 +33,11 @@ architecture behaviour of multiply is
signal p1_pat, p1_patb : std_ulogic; signal p1_pat, p1_patb : std_ulogic;


signal req_32bit, r32_1 : std_ulogic; signal req_32bit, r32_1 : std_ulogic;
signal req_neg, rneg_1 : std_ulogic; signal req_not, rnot_1 : std_ulogic;
signal valid_1 : std_ulogic; signal valid_1 : std_ulogic;


begin begin
addend <= (others => m_in.neg_result); addend <= m_in.addend;


m00: DSP48E1 m00: DSP48E1
generic map ( generic map (
@ -73,7 +73,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -129,7 +129,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -184,7 +184,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -239,7 +239,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -295,7 +295,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -351,7 +351,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -408,7 +408,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -464,7 +464,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -520,7 +520,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -575,7 +575,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -630,7 +630,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -685,7 +685,7 @@ begin
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
CEINMODE => '0', CEINMODE => '0',
CEM => '1', CEM => m_in.valid,
CEP => '0', CEP => '0',
CLK => clk, CLK => clk,
D => (others => '0'), D => (others => '0'),
@ -734,12 +734,12 @@ begin
CARRYINSEL => "000", CARRYINSEL => "000",
CARRYOUT => s0_carry, CARRYOUT => s0_carry,
CEA1 => '0', CEA1 => '0',
CEA2 => '1', CEA2 => valid_1,
CEAD => '0', CEAD => '0',
CEALUMODE => '0', CEALUMODE => '0',
CEB1 => '0', CEB1 => '0',
CEB2 => '1', CEB2 => valid_1,
CEC => '1', CEC => valid_1,
CECARRYIN => '0', CECARRYIN => '0',
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
@ -792,12 +792,12 @@ begin
CARRYIN => s0_carry(3), CARRYIN => s0_carry(3),
CARRYINSEL => "000", CARRYINSEL => "000",
CEA1 => '0', CEA1 => '0',
CEA2 => '1', CEA2 => valid_1,
CEAD => '0', CEAD => '0',
CEALUMODE => '0', CEALUMODE => '0',
CEB1 => '0', CEB1 => '0',
CEB2 => '1', CEB2 => valid_1,
CEC => '1', CEC => valid_1,
CECARRYIN => '0', CECARRYIN => '0',
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
@ -848,7 +848,7 @@ begin
port map ( port map (
A => m21_p(22 downto 0) & m03_p(5 downto 0) & '0', A => m21_p(22 downto 0) & m03_p(5 downto 0) & '0',
ACIN => (others => '0'), ACIN => (others => '0'),
ALUMODE => "00" & rneg_1 & '0', ALUMODE => "00" & rnot_1 & '0',
B => (others => '0'), B => (others => '0'),
BCIN => (others => '0'), BCIN => (others => '0'),
C => p0_mask, C => p0_mask,
@ -857,12 +857,12 @@ begin
CARRYINSEL => "000", CARRYINSEL => "000",
CARRYOUT => p0_carry, CARRYOUT => p0_carry,
CEA1 => '0', CEA1 => '0',
CEA2 => '1', CEA2 => valid_1,
CEAD => '0', CEAD => '0',
CEALUMODE => '1', CEALUMODE => valid_1,
CEB1 => '0', CEB1 => '0',
CEB2 => '1', CEB2 => valid_1,
CEC => '1', CEC => valid_1,
CECARRYIN => '0', CECARRYIN => '0',
CECTRL => '0', CECTRL => '0',
CED => '0', CED => '0',
@ -911,7 +911,7 @@ begin
port map ( port map (
A => x"0000000" & '0' & m21_p(41), A => x"0000000" & '0' & m21_p(41),
ACIN => (others => '0'), ACIN => (others => '0'),
ALUMODE => "00" & rneg_1 & '0', ALUMODE => "00" & rnot_1 & '0',
B => m21_p(40 downto 23), B => m21_p(40 downto 23),
BCIN => (others => '0'), BCIN => (others => '0'),
C => (others => '0'), C => (others => '0'),
@ -919,11 +919,11 @@ begin
CARRYIN => p0_carry(3), CARRYIN => p0_carry(3),
CARRYINSEL => "000", CARRYINSEL => "000",
CEA1 => '0', CEA1 => '0',
CEA2 => '1', CEA2 => valid_1,
CEAD => '0', CEAD => '0',
CEALUMODE => '1', CEALUMODE => valid_1,
CEB1 => '0', CEB1 => '0',
CEB2 => '1', CEB2 => valid_1,
CEC => '0', CEC => '0',
CECARRYIN => '0', CECARRYIN => '0',
CECTRL => '0', CECTRL => '0',
@ -952,7 +952,7 @@ begin
RSTP => '0' RSTP => '0'
); );


product(31 downto 0) <= product_lo xor (31 downto 0 => req_neg); product(31 downto 0) <= product_lo xor (31 downto 0 => req_not);


mult_out: process(all) mult_out: process(all)
variable ov : std_ulogic; variable ov : std_ulogic;
@ -977,8 +977,8 @@ begin
valid_1 <= m_in.valid; valid_1 <= m_in.valid;
req_32bit <= r32_1; req_32bit <= r32_1;
r32_1 <= m_in.is_32bit; r32_1 <= m_in.is_32bit;
req_neg <= rneg_1; req_not <= rnot_1;
rneg_1 <= m_in.neg_result; rnot_1 <= m_in.not_result;
end if; end if;
end process; end process;



Loading…
Cancel
Save