multiplier: Generalize interface to the multiplier

This makes the interface to the multiplier more general so an instance
of it can be used in the FPU.  It now has a 128-bit addend that is
added on to the product.  Instead of an input to negate the output,
it now has a "not_result" input to complement the output.  Execute1
uses not_result=1 and addend=-1 to get the effect of negating the
output.  The interface is defined this way because this is what can
be done easily with the Xilinx DSP slices in xilinx-mult.vhdl.

This also adds clock enable signals to the DSP slices, mostly for the
sake of reducing power consumption.

Signed-off-by: Paul Mackerras <>
Paul Mackerras 5 years ago
parent 178d7680af
commit 535341961d

@ -182,15 +182,24 @@ package common is
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0'));

type Execute1ToMultiplyType is record
type MultiplyInputType is record
valid: std_ulogic;
data1: std_ulogic_vector(63 downto 0);
data2: std_ulogic_vector(63 downto 0);
addend: std_ulogic_vector(127 downto 0);
is_32bit: std_ulogic;
neg_result: std_ulogic;
not_result: std_ulogic;
end record;
constant MultiplyInputInit : MultiplyInputType := (valid => '0',
is_32bit => '0', not_result => '0',
others => (others => '0'));

type MultiplyOutputType is record
valid: std_ulogic;
result: std_ulogic_vector(127 downto 0);
overflow : std_ulogic;
end record;
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0',
is_32bit => '0', neg_result => '0',
constant MultiplyOutputInit : MultiplyOutputType := (valid => '0', overflow => '0',
others => (others => '0'));

type Execute1ToDividerType is record
@ -382,14 +391,6 @@ package common is
write_cr_data => (others => '0'), write_reg => (others => '0'),
exc_write_reg => (others => '0'), exc_write_data => (others => '0'));

type MultiplyToExecute1Type is record
valid: std_ulogic;
result: std_ulogic_vector(127 downto 0);
overflow : std_ulogic;
end record;
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',
others => (others => '0'));

type DividerToExecute1Type is record
valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0);

@ -89,8 +89,8 @@ architecture behaviour of execute1 is
signal countzero_result: std_ulogic_vector(63 downto 0);

-- multiply signals
signal x_to_multiply: Execute1ToMultiplyType;
signal multiply_to_x: MultiplyToExecute1Type;
signal x_to_multiply: MultiplyInputType;
signal multiply_to_x: MultiplyOutputType;

-- divider signals
signal x_to_divider: Execute1ToDividerType;
@ -396,7 +396,7 @@ begin
abs2 := - signed(b_in);
end if;

x_to_multiply <= Execute1ToMultiplyInit;
x_to_multiply <= MultiplyInputInit;
x_to_multiply.is_32bit <= e_in.is_32bit;

x_to_divider <= Execute1ToDividerInit;
@ -406,7 +406,8 @@ begin
x_to_divider.is_modulus <= '1';
end if;

x_to_multiply.neg_result <= sign1 xor sign2;
x_to_multiply.not_result <= sign1 xor sign2;
x_to_multiply.addend <= (others => sign1 xor sign2);
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
if e_in.is_32bit = '0' then
-- 64-bit forms

@ -12,22 +12,22 @@ entity multiply is
port (
clk : in std_logic;

m_in : in Execute1ToMultiplyType;
m_out : out MultiplyToExecute1Type
m_in : in MultiplyInputType;
m_out : out MultiplyOutputType
end entity multiply;

architecture behaviour of multiply is
signal m: Execute1ToMultiplyType := Execute1ToMultiplyInit;
signal m: MultiplyInputType := MultiplyInputInit;

type multiply_pipeline_stage is record
valid : std_ulogic;
data : unsigned(127 downto 0);
is_32bit : std_ulogic;
neg_res : std_ulogic;
not_res : std_ulogic;
end record;
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
is_32bit => '0', neg_res => '0',
is_32bit => '0', not_res => '0',
data => (others => '0'));

type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
@ -53,19 +53,19 @@ begin
variable d2 : std_ulogic_vector(63 downto 0);
variable ov : std_ulogic;
v := r;
v.multiply_pipeline(0).valid := m.valid;
v.multiply_pipeline(0).data := unsigned(m.data1) * unsigned(m.data2);
v.multiply_pipeline(0).data := (unsigned(m.data1) * unsigned(m.data2)) + unsigned(m.addend);
v.multiply_pipeline(0).is_32bit := m.is_32bit;
v.multiply_pipeline(0).neg_res := m.neg_result;
v.multiply_pipeline(0).not_res := m.not_result;

loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
end loop;

if v.multiply_pipeline(PIPELINE_DEPTH-1).neg_res = '0' then
d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
d := std_ulogic_vector(- signed(v.multiply_pipeline(PIPELINE_DEPTH-1).data));
if v.multiply_pipeline(PIPELINE_DEPTH-1).not_res = '1' then
d := not d;
end if;

ov := '0';

@ -17,8 +17,8 @@ architecture behave of multiply_tb is

constant pipeline_depth : integer := 4;

signal m1 : Execute1ToMultiplyType := Execute1ToMultiplyInit;
signal m2 : MultiplyToExecute1Type;
signal m1 : MultiplyInputType := MultiplyInputInit;
signal m2 : MultiplyOutputType;

function absval(x: std_ulogic_vector) return std_ulogic_vector is
@ -45,6 +45,7 @@ begin
stim_process: process
variable ra, rb, rt, behave_rt: std_ulogic_vector(63 downto 0);
variable si: std_ulogic_vector(15 downto 0);
variable sign: std_ulogic;
wait for clk_period;

@ -90,7 +91,9 @@ begin

m1.data1 <= absval(ra);
m1.data2 <= absval(rb);
m1.neg_result <= ra(63) xor rb(63);
sign := ra(63) xor rb(63);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1';

wait for clk_period;
@ -114,7 +117,8 @@ begin

m1.data1 <= ra;
m1.data2 <= rb;
m1.neg_result <= '0';
m1.not_result <= '0';
m1.addend <= (others => '0');
m1.valid <= '1';

wait for clk_period;
@ -138,7 +142,9 @@ begin

m1.data1 <= absval(ra);
m1.data2 <= absval(rb);
m1.neg_result <= ra(63) xor rb(63);
sign := ra(63) xor rb(63);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1';

wait for clk_period;
@ -164,7 +170,9 @@ begin
m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= absval(rb(31 downto 0));
m1.neg_result <= ra(31) xor rb(31);
sign := ra(31) xor rb(31);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1';

wait for clk_period;
@ -190,7 +198,9 @@ begin
m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= absval(rb(31 downto 0));
m1.neg_result <= ra(31) xor rb(31);
sign := ra(31) xor rb(31);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1';

wait for clk_period;
@ -217,7 +227,8 @@ begin
m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0);
m1.neg_result <= '0';
m1.not_result <= '0';
m1.addend <= (others => '0');
m1.valid <= '1';

wait for clk_period;
@ -243,7 +254,9 @@ begin
m1.data1 <= absval(ra);
m1.data2 <= (others => '0');
m1.data2(15 downto 0) <= absval(si);
m1.neg_result <= ra(63) xor si(15);
sign := ra(63) xor si(15);
m1.not_result <= sign;
m1.addend <= (others => sign);
m1.valid <= '1';

wait for clk_period;

@ -12,8 +12,8 @@ entity multiply is
port (
clk : in std_logic;

m_in : in Execute1ToMultiplyType;
m_out : out MultiplyToExecute1Type
m_in : in MultiplyInputType;
m_out : out MultiplyOutputType
end entity multiply;

@ -33,11 +33,11 @@ architecture behaviour of multiply is
signal p1_pat, p1_patb : std_ulogic;

signal req_32bit, r32_1 : std_ulogic;
signal req_neg, rneg_1 : std_ulogic;
signal req_not, rnot_1 : std_ulogic;
signal valid_1 : std_ulogic;

addend <= (others => m_in.neg_result);
addend <= m_in.addend;

m00: DSP48E1
generic map (
@ -73,7 +73,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -129,7 +129,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -184,7 +184,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -239,7 +239,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -295,7 +295,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -351,7 +351,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -408,7 +408,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -464,7 +464,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -520,7 +520,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -575,7 +575,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -630,7 +630,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -685,7 +685,7 @@ begin
CECTRL => '0',
CED => '0',
CEINMODE => '0',
CEM => '1',
CEM => m_in.valid,
CEP => '0',
CLK => clk,
D => (others => '0'),
@ -734,12 +734,12 @@ begin
CARRYINSEL => "000",
CARRYOUT => s0_carry,
CEA1 => '0',
CEA2 => '1',
CEA2 => valid_1,
CEAD => '0',
CEB1 => '0',
CEB2 => '1',
CEC => '1',
CEB2 => valid_1,
CEC => valid_1,
CECTRL => '0',
CED => '0',
@ -792,12 +792,12 @@ begin
CARRYIN => s0_carry(3),
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '1',
CEA2 => valid_1,
CEAD => '0',
CEB1 => '0',
CEB2 => '1',
CEC => '1',
CEB2 => valid_1,
CEC => valid_1,
CECTRL => '0',
CED => '0',
@ -848,7 +848,7 @@ begin
port map (
A => m21_p(22 downto 0) & m03_p(5 downto 0) & '0',
ACIN => (others => '0'),
ALUMODE => "00" & rneg_1 & '0',
ALUMODE => "00" & rnot_1 & '0',
B => (others => '0'),
BCIN => (others => '0'),
C => p0_mask,
@ -857,12 +857,12 @@ begin
CARRYINSEL => "000",
CARRYOUT => p0_carry,
CEA1 => '0',
CEA2 => '1',
CEA2 => valid_1,
CEAD => '0',
CEALUMODE => valid_1,
CEB1 => '0',
CEB2 => '1',
CEC => '1',
CEB2 => valid_1,
CEC => valid_1,
CECTRL => '0',
CED => '0',
@ -911,7 +911,7 @@ begin
port map (
A => x"0000000" & '0' & m21_p(41),
ACIN => (others => '0'),
ALUMODE => "00" & rneg_1 & '0',
ALUMODE => "00" & rnot_1 & '0',
B => m21_p(40 downto 23),
BCIN => (others => '0'),
C => (others => '0'),
@ -919,11 +919,11 @@ begin
CARRYIN => p0_carry(3),
CARRYINSEL => "000",
CEA1 => '0',
CEA2 => '1',
CEA2 => valid_1,
CEAD => '0',
CEALUMODE => valid_1,
CEB1 => '0',
CEB2 => '1',
CEB2 => valid_1,
CEC => '0',
CECTRL => '0',
@ -952,7 +952,7 @@ begin
RSTP => '0'

product(31 downto 0) <= product_lo xor (31 downto 0 => req_neg);
product(31 downto 0) <= product_lo xor (31 downto 0 => req_not);

mult_out: process(all)
variable ov : std_ulogic;
@ -977,8 +977,8 @@ begin
valid_1 <= m_in.valid;
req_32bit <= r32_1;
r32_1 <= m_in.is_32bit;
req_neg <= rneg_1;
rneg_1 <= m_in.neg_result;
req_not <= rnot_1;
rnot_1 <= m_in.not_result;
end if;
end process;
