multiply: Move selection of result bits into execute1

This puts the logic that selects which bits of the multiplier result
get written into the destination GPR into execute1, moved out from
multiply.

The multiplier is now expected to do an unsigned multiplication of
64-bit operands, optionally negate the result, detect 32-bit
or 64-bit signed overflow of the result, and return a full 128-bit
result.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
pull/208/head
Paul Mackerras 4 years ago
parent f80da65799
commit 9880fc7435

@ -158,13 +158,13 @@ package common is


type Execute1ToMultiplyType is record type Execute1ToMultiplyType is record
valid: std_ulogic; valid: std_ulogic;
insn_type: insn_type_t; data1: std_ulogic_vector(63 downto 0);
data1: std_ulogic_vector(64 downto 0); data2: std_ulogic_vector(63 downto 0);
data2: std_ulogic_vector(64 downto 0);
is_32bit: std_ulogic; is_32bit: std_ulogic;
neg_result: std_ulogic;
end record; end record;
constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0', insn_type => OP_ILLEGAL, constant Execute1ToMultiplyInit : Execute1ToMultiplyType := (valid => '0',
is_32bit => '0', is_32bit => '0', neg_result => '0',
others => (others => '0')); others => (others => '0'));


type Execute1ToDividerType is record type Execute1ToDividerType is record
@ -356,7 +356,7 @@ package common is


type MultiplyToExecute1Type is record type MultiplyToExecute1Type is record
valid: std_ulogic; valid: std_ulogic;
write_reg_data: std_ulogic_vector(63 downto 0); result: std_ulogic_vector(127 downto 0);
overflow : std_ulogic; overflow : std_ulogic;
end record; end record;
constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0', constant MultiplyToExecute1Init : MultiplyToExecute1Type := (valid => '0', overflow => '0',

@ -53,6 +53,7 @@ architecture behaviour of execute1 is
mul_in_progress : std_ulogic; mul_in_progress : std_ulogic;
div_in_progress : std_ulogic; div_in_progress : std_ulogic;
cntz_in_progress : std_ulogic; cntz_in_progress : std_ulogic;
slow_op_insn : insn_type_t;
slow_op_dest : gpr_index_t; slow_op_dest : gpr_index_t;
slow_op_rc : std_ulogic; slow_op_rc : std_ulogic;
slow_op_oe : std_ulogic; slow_op_oe : std_ulogic;
@ -63,7 +64,7 @@ architecture behaviour of execute1 is
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, lr_update => '0', (e => Execute1ToWritebackInit, lr_update => '0',
mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0', mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0',
slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
next_lr => (others => '0'), ldst_nia => (others => '0'), others => (others => '0')); next_lr => (others => '0'), ldst_nia => (others => '0'), others => (others => '0'));


signal r, rin : reg_type; signal r, rin : reg_type;
@ -346,32 +347,7 @@ begin
v.div_in_progress := '0'; v.div_in_progress := '0';
v.cntz_in_progress := '0'; v.cntz_in_progress := '0';


-- signals to multiply unit -- signals to multiply and divide units
x_to_multiply <= Execute1ToMultiplyInit;
x_to_multiply.insn_type <= e_in.insn_type;
x_to_multiply.is_32bit <= e_in.is_32bit;

if e_in.is_32bit = '1' then
if e_in.is_signed = '1' then
x_to_multiply.data1 <= (others => a_in(31));
x_to_multiply.data1(31 downto 0) <= a_in(31 downto 0);
x_to_multiply.data2 <= (others => b_in(31));
x_to_multiply.data2(31 downto 0) <= b_in(31 downto 0);
else
x_to_multiply.data1 <= '0' & x"00000000" & a_in(31 downto 0);
x_to_multiply.data2 <= '0' & x"00000000" & b_in(31 downto 0);
end if;
else
if e_in.is_signed = '1' then
x_to_multiply.data1 <= a_in(63) & a_in;
x_to_multiply.data2 <= b_in(63) & b_in;
else
x_to_multiply.data1 <= '0' & a_in;
x_to_multiply.data2 <= '0' & b_in;
end if;
end if;

-- signals to divide unit
sign1 := '0'; sign1 := '0';
sign2 := '0'; sign2 := '0';
if e_in.is_signed = '1' then if e_in.is_signed = '1' then
@ -395,15 +371,22 @@ begin
abs2 := - signed(b_in); abs2 := - signed(b_in);
end if; end if;


x_to_multiply <= Execute1ToMultiplyInit;
x_to_multiply.is_32bit <= e_in.is_32bit;

x_to_divider <= Execute1ToDividerInit; x_to_divider <= Execute1ToDividerInit;
x_to_divider.is_signed <= e_in.is_signed; x_to_divider.is_signed <= e_in.is_signed;
x_to_divider.is_32bit <= e_in.is_32bit; x_to_divider.is_32bit <= e_in.is_32bit;
if e_in.insn_type = OP_MOD then if e_in.insn_type = OP_MOD then
x_to_divider.is_modulus <= '1'; x_to_divider.is_modulus <= '1';
end if; end if;

x_to_multiply.neg_result <= sign1 xor sign2;
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus); x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
if e_in.is_32bit = '0' then if e_in.is_32bit = '0' then
-- 64-bit forms -- 64-bit forms
x_to_multiply.data1 <= std_ulogic_vector(abs1);
x_to_multiply.data2 <= std_ulogic_vector(abs2);
if e_in.insn_type = OP_DIVE then if e_in.insn_type = OP_DIVE then
x_to_divider.is_extended <= '1'; x_to_divider.is_extended <= '1';
end if; end if;
@ -411,6 +394,8 @@ begin
x_to_divider.divisor <= std_ulogic_vector(abs2); x_to_divider.divisor <= std_ulogic_vector(abs2);
else else
-- 32-bit forms -- 32-bit forms
x_to_multiply.data1 <= x"00000000" & std_ulogic_vector(abs1(31 downto 0));
x_to_multiply.data2 <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
x_to_divider.is_extended <= '0'; x_to_divider.is_extended <= '0';
if e_in.insn_type = OP_DIVE then -- extended forms if e_in.insn_type = OP_DIVE then -- extended forms
x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000"; x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000";
@ -505,6 +490,7 @@ begin


v.e.valid := '1'; v.e.valid := '1';
v.e.write_reg := e_in.write_reg; v.e.write_reg := e_in.write_reg;
v.slow_op_insn := e_in.insn_type;
v.slow_op_dest := gspr_to_gpr(e_in.write_reg); v.slow_op_dest := gspr_to_gpr(e_in.write_reg);
v.slow_op_rc := e_in.rc; v.slow_op_rc := e_in.rc;
v.slow_op_oe := e_in.oe; v.slow_op_oe := e_in.oe;
@ -950,8 +936,18 @@ begin
if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or
(r.div_in_progress = '1' and divider_to_x.valid = '1') then (r.div_in_progress = '1' and divider_to_x.valid = '1') then
if r.mul_in_progress = '1' then if r.mul_in_progress = '1' then
result := multiply_to_x.write_reg_data; overflow := '0';
overflow := multiply_to_x.overflow; case r.slow_op_insn is
when OP_MUL_H32 =>
result := multiply_to_x.result(63 downto 32) &
multiply_to_x.result(63 downto 32);
when OP_MUL_H64 =>
result := multiply_to_x.result(127 downto 64);
when others =>
-- i.e. OP_MUL_L64
result := multiply_to_x.result(63 downto 0);
overflow := multiply_to_x.overflow;
end case;
else else
result := divider_to_x.write_reg_data; result := divider_to_x.write_reg_data;
overflow := divider_to_x.overflow; overflow := divider_to_x.overflow;

@ -4,11 +4,10 @@ use ieee.numeric_std.all;


library work; library work;
use work.common.all; use work.common.all;
use work.decode_types.all;


entity multiply is entity multiply is
generic ( generic (
PIPELINE_DEPTH : natural := 16 PIPELINE_DEPTH : natural := 4
); );
port ( port (
clk : in std_logic; clk : in std_logic;
@ -19,17 +18,16 @@ entity multiply is
end entity multiply; end entity multiply;


architecture behaviour of multiply is architecture behaviour of multiply is
signal m: Execute1ToMultiplyType; signal m: Execute1ToMultiplyType := Execute1ToMultiplyInit;


type multiply_pipeline_stage is record type multiply_pipeline_stage is record
valid : std_ulogic; valid : std_ulogic;
insn_type : insn_type_t; data : unsigned(127 downto 0);
data : signed(129 downto 0);
is_32bit : std_ulogic; is_32bit : std_ulogic;
neg_res : std_ulogic;
end record; end record;
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0', constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
insn_type => OP_ILLEGAL, is_32bit => '0', neg_res => '0',
is_32bit => '0',
data => (others => '0')); data => (others => '0'));


type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage; type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
@ -51,50 +49,35 @@ begin


multiply_1: process(all) multiply_1: process(all)
variable v : reg_type; variable v : reg_type;
variable d : std_ulogic_vector(129 downto 0); variable d : std_ulogic_vector(127 downto 0);
variable d2 : std_ulogic_vector(63 downto 0); variable d2 : std_ulogic_vector(63 downto 0);
variable ov : std_ulogic; variable ov : std_ulogic;
begin begin
v := r;

m_out <= MultiplyToExecute1Init;

v.multiply_pipeline(0).valid := m.valid; v.multiply_pipeline(0).valid := m.valid;
v.multiply_pipeline(0).insn_type := m.insn_type; v.multiply_pipeline(0).data := unsigned(m.data1) * unsigned(m.data2);
v.multiply_pipeline(0).data := signed(m.data1) * signed(m.data2);
v.multiply_pipeline(0).is_32bit := m.is_32bit; v.multiply_pipeline(0).is_32bit := m.is_32bit;
v.multiply_pipeline(0).neg_res := m.neg_result;


loop_0: for i in 1 to PIPELINE_DEPTH-1 loop loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1); v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
end loop; end loop;


d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data); if v.multiply_pipeline(PIPELINE_DEPTH-1).neg_res = '0' then
ov := '0'; d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
else
d := std_ulogic_vector(- signed(v.multiply_pipeline(PIPELINE_DEPTH-1).data));
end if;


-- TODO: Handle overflows ov := '0';
case_0: case v.multiply_pipeline(PIPELINE_DEPTH-1).insn_type is if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then
when OP_MUL_L64 => ov := (or d(63 downto 31)) and not (and d(63 downto 31));
d2 := d(63 downto 0); else
if v.multiply_pipeline(PIPELINE_DEPTH-1).is_32bit = '1' then ov := (or d(127 downto 63)) and not (and d(127 downto 63));
ov := (or d(63 downto 31)) and not (and d(63 downto 31)); end if;
else
ov := (or d(127 downto 63)) and not (and d(127 downto 63));
end if;
when OP_MUL_H32 =>
d2 := d(63 downto 32) & d(63 downto 32);
when OP_MUL_H64 =>
d2 := d(127 downto 64);
when others =>
--report "Illegal insn type in multiplier";
d2 := (others => '0');
end case;


m_out.write_reg_data <= d2; m_out.result <= d;
m_out.overflow <= ov; m_out.overflow <= ov;

m_out.valid <= v.multiply_pipeline(PIPELINE_DEPTH-1).valid;
if v.multiply_pipeline(PIPELINE_DEPTH-1).valid = '1' then
m_out.valid <= '1';
end if;


rin <= v; rin <= v;
end process; end process;

@ -17,8 +17,18 @@ architecture behave of multiply_tb is


constant pipeline_depth : integer := 4; constant pipeline_depth : integer := 4;


signal m1 : Execute1ToMultiplyType; signal m1 : Execute1ToMultiplyType := Execute1ToMultiplyInit;
signal m2 : MultiplyToExecute1Type; signal m2 : MultiplyToExecute1Type;

function absval(x: std_ulogic_vector) return std_ulogic_vector is
begin
if x(x'left) = '1' then
return std_ulogic_vector(- signed(x));
else
return x;
end if;
end;

begin begin
multiply_0: entity work.multiply multiply_0: entity work.multiply
generic map (PIPELINE_DEPTH => pipeline_depth) generic map (PIPELINE_DEPTH => pipeline_depth)
@ -39,9 +49,8 @@ begin
wait for clk_period; wait for clk_period;


m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_L64; m1.data1 <= x"0000000000001000";
m1.data1 <= '0' & x"0000000000001000"; m1.data2 <= x"0000000000001111";
m1.data2 <= '0' & x"0000000000001111";


wait for clk_period; wait for clk_period;
assert m2.valid = '0'; assert m2.valid = '0';
@ -56,7 +65,7 @@ begin


wait for clk_period; wait for clk_period;
assert m2.valid = '1'; assert m2.valid = '1';
assert m2.write_reg_data = x"0000000001111000"; assert m2.result = x"00000000000000000000000001111000";


wait for clk_period; wait for clk_period;
assert m2.valid = '0'; assert m2.valid = '0';
@ -70,7 +79,7 @@ begin


wait for clk_period * (pipeline_depth-1); wait for clk_period * (pipeline_depth-1);
assert m2.valid = '1'; assert m2.valid = '1';
assert m2.write_reg_data = x"0000000001111000"; assert m2.result = x"00000000000000000000000001111000";


-- test mulld -- test mulld
mulld_loop : for i in 0 to 1000 loop mulld_loop : for i in 0 to 1000 loop
@ -79,10 +88,10 @@ begin


behave_rt := ppc_mulld(ra, rb); behave_rt := ppc_mulld(ra, rb);


m1.data1 <= '0' & ra; m1.data1 <= absval(ra);
m1.data2 <= '0' & rb; m1.data2 <= absval(rb);
m1.neg_result <= ra(63) xor rb(63);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;


wait for clk_period; wait for clk_period;


@ -92,8 +101,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 0))
report "bad mulld expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulld expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(63 downto 0));
end loop; end loop;


-- test mulhdu -- test mulhdu
@ -103,10 +112,10 @@ begin


behave_rt := ppc_mulhdu(ra, rb); behave_rt := ppc_mulhdu(ra, rb);


m1.data1 <= '0' & ra; m1.data1 <= ra;
m1.data2 <= '0' & rb; m1.data2 <= rb;
m1.neg_result <= '0';
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_H64;


wait for clk_period; wait for clk_period;


@ -116,8 +125,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(127 downto 64))
report "bad mulhdu expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulhdu expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(127 downto 64));
end loop; end loop;


-- test mulhd -- test mulhd
@ -127,10 +136,10 @@ begin


behave_rt := ppc_mulhd(ra, rb); behave_rt := ppc_mulhd(ra, rb);


m1.data1 <= ra(63) & ra; m1.data1 <= absval(ra);
m1.data2 <= rb(63) & rb; m1.data2 <= absval(rb);
m1.neg_result <= ra(63) xor rb(63);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_H64;


wait for clk_period; wait for clk_period;


@ -140,8 +149,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(127 downto 64))
report "bad mulhd expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulhd expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(127 downto 64));
end loop; end loop;


-- test mullw -- test mullw
@ -151,12 +160,12 @@ begin


behave_rt := ppc_mullw(ra, rb); behave_rt := ppc_mullw(ra, rb);


m1.data1 <= (others => ra(31)); m1.data1 <= (others => '0');
m1.data1(31 downto 0) <= ra(31 downto 0); m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => rb(31)); m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0); m1.data2(31 downto 0) <= absval(rb(31 downto 0));
m1.neg_result <= ra(31) xor rb(31);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;


wait for clk_period; wait for clk_period;


@ -166,8 +175,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 0))
report "bad mullw expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mullw expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(63 downto 0));
end loop; end loop;


-- test mulhw -- test mulhw
@ -177,12 +186,12 @@ begin


behave_rt := ppc_mulhw(ra, rb); behave_rt := ppc_mulhw(ra, rb);


m1.data1 <= (others => ra(31)); m1.data1 <= (others => '0');
m1.data1(31 downto 0) <= ra(31 downto 0); m1.data1(31 downto 0) <= absval(ra(31 downto 0));
m1.data2 <= (others => rb(31)); m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0); m1.data2(31 downto 0) <= absval(rb(31 downto 0));
m1.neg_result <= ra(31) xor rb(31);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_H32;


wait for clk_period; wait for clk_period;


@ -192,8 +201,9 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32))
report "bad mulhw expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulhw expected " & to_hstring(behave_rt) & " got " &
to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32));
end loop; end loop;


-- test mulhwu -- test mulhwu
@ -207,8 +217,8 @@ begin
m1.data1(31 downto 0) <= ra(31 downto 0); m1.data1(31 downto 0) <= ra(31 downto 0);
m1.data2 <= (others => '0'); m1.data2 <= (others => '0');
m1.data2(31 downto 0) <= rb(31 downto 0); m1.data2(31 downto 0) <= rb(31 downto 0);
m1.neg_result <= '0';
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_H32;


wait for clk_period; wait for clk_period;


@ -218,8 +228,9 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32))
report "bad mulhwu expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulhwu expected " & to_hstring(behave_rt) & " got " &
to_hstring(m2.result(63 downto 32) & m2.result(63 downto 32));
end loop; end loop;


-- test mulli -- test mulli
@ -229,11 +240,11 @@ begin


behave_rt := ppc_mulli(ra, si); behave_rt := ppc_mulli(ra, si);


m1.data1 <= ra(63) & ra; m1.data1 <= absval(ra);
m1.data2 <= (others => si(15)); m1.data2 <= (others => '0');
m1.data2(15 downto 0) <= si; m1.data2(15 downto 0) <= absval(si);
m1.neg_result <= ra(63) xor si(15);
m1.valid <= '1'; m1.valid <= '1';
m1.insn_type <= OP_MUL_L64;


wait for clk_period; wait for clk_period;


@ -243,8 +254,8 @@ begin


assert m2.valid = '1'; assert m2.valid = '1';


assert to_hstring(behave_rt) = to_hstring(m2.write_reg_data) assert to_hstring(behave_rt) = to_hstring(m2.result(63 downto 0))
report "bad mulli expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.write_reg_data); report "bad mulli expected " & to_hstring(behave_rt) & " got " & to_hstring(m2.result(63 downto 0));
end loop; end loop;


std.env.finish; std.env.finish;

Loading…
Cancel
Save