execute1: Take an extra cycle for OE=1 multiply instructions

We now expect the overflow signal from the multiplier to come along
one cycle later than the product.

This breaks up a long combinatorial path and improves timing.

This also changes some uses of v.<field> to r.<field> in the slow
op logic, which should help timing as well.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
jtag-port
Paul Mackerras 4 years ago
parent 535341961d
commit f1238299bd

@ -56,6 +56,7 @@ architecture behaviour of execute1 is
lr_update : std_ulogic; lr_update : std_ulogic;
next_lr : std_ulogic_vector(63 downto 0); next_lr : std_ulogic_vector(63 downto 0);
mul_in_progress : std_ulogic; mul_in_progress : std_ulogic;
mul_finish : std_ulogic;
div_in_progress : std_ulogic; div_in_progress : std_ulogic;
cntz_in_progress : std_ulogic; cntz_in_progress : std_ulogic;
slow_op_insn : insn_type_t; slow_op_insn : insn_type_t;
@ -69,7 +70,7 @@ architecture behaviour of execute1 is
constant reg_type_init : reg_type := constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit, f => Execute1ToFetch1Init, (e => Execute1ToWritebackInit, f => Execute1ToFetch1Init,
busy => '0', lr_update => '0', terminate => '0', busy => '0', lr_update => '0', terminate => '0',
mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0', mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init,
next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0')); next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0'));


@ -371,6 +372,7 @@ begin
v.mul_in_progress := '0'; v.mul_in_progress := '0';
v.div_in_progress := '0'; v.div_in_progress := '0';
v.cntz_in_progress := '0'; v.cntz_in_progress := '0';
v.mul_finish := '0';


-- signals to multiply and divide units -- signals to multiply and divide units
sign1 := '0'; sign1 := '0';
@ -965,31 +967,47 @@ begin
when others => when others =>
-- i.e. OP_MUL_L64 -- i.e. OP_MUL_L64
result := multiply_to_x.result(63 downto 0); result := multiply_to_x.result(63 downto 0);
overflow := multiply_to_x.overflow;
end case; end case;
else else
result := divider_to_x.write_reg_data; result := divider_to_x.write_reg_data;
overflow := divider_to_x.overflow; overflow := divider_to_x.overflow;
end if; end if;
result_en := '1'; if r.mul_in_progress = '1' and r.slow_op_oe = '1' then
v.e.write_reg := gpr_to_gspr(v.slow_op_dest); -- have to wait until next cycle for overflow indication
v.e.rc := v.slow_op_rc; v.mul_finish := '1';
v.e.xerc := v.slow_op_xerc; v.busy := '1';
v.e.write_xerc_enable := v.slow_op_oe; else
-- We must test oe because the RC update code in writeback result_en := '1';
-- will use the xerc value to set CR0:SO so we must not clobber v.e.write_reg := gpr_to_gspr(r.slow_op_dest);
-- xerc if OE wasn't set. v.e.rc := r.slow_op_rc;
if v.slow_op_oe = '1' then v.e.xerc := r.slow_op_xerc;
v.e.xerc.ov := overflow; v.e.write_xerc_enable := r.slow_op_oe;
v.e.xerc.ov32 := overflow; -- We must test oe because the RC update code in writeback
v.e.xerc.so := v.slow_op_xerc.so or overflow; -- will use the xerc value to set CR0:SO so we must not clobber
end if; -- xerc if OE wasn't set.
v.e.valid := '1'; if r.slow_op_oe = '1' then
v.e.xerc.ov := overflow;
v.e.xerc.ov32 := overflow;
v.e.xerc.so := r.slow_op_xerc.so or overflow;
end if;
v.e.valid := '1';
end if;
else else
v.busy := '1'; v.busy := '1';
v.mul_in_progress := r.mul_in_progress; v.mul_in_progress := r.mul_in_progress;
v.div_in_progress := r.div_in_progress; v.div_in_progress := r.div_in_progress;
end if; end if;
elsif r.mul_finish = '1' then
result := r.e.write_data;
result_en := '1';
v.e.write_reg := gpr_to_gspr(r.slow_op_dest);
v.e.rc := r.slow_op_rc;
v.e.xerc := r.slow_op_xerc;
v.e.write_xerc_enable := r.slow_op_oe;
v.e.xerc.ov := multiply_to_x.overflow;
v.e.xerc.ov32 := multiply_to_x.overflow;
v.e.xerc.so := r.slow_op_xerc.so or multiply_to_x.overflow;
v.e.valid := '1';
end if; end if;


if illegal = '1' then if illegal = '1' then

@ -38,12 +38,15 @@ architecture behaviour of multiply is
end record; end record;


signal r, rin : reg_type := (multiply_pipeline => MultiplyPipelineInit); signal r, rin : reg_type := (multiply_pipeline => MultiplyPipelineInit);
signal overflow : std_ulogic;
signal ovf_in : std_ulogic;
begin begin
multiply_0: process(clk) multiply_0: process(clk)
begin begin
if rising_edge(clk) then if rising_edge(clk) then
m <= m_in; m <= m_in;
r <= rin; r <= rin;
overflow <= ovf_in;
end if; end if;
end process; end process;


@ -74,9 +77,10 @@ begin
else else
ov := (or d(127 downto 63)) and not (and d(127 downto 63)); ov := (or d(127 downto 63)) and not (and d(127 downto 63));
end if; end if;
ovf_in <= ov;


m_out.result <= d; m_out.result <= d;
m_out.overflow <= ov; m_out.overflow <= overflow;
m_out.valid <= v.multiply_pipeline(PIPELINE_DEPTH-1).valid; m_out.valid <= v.multiply_pipeline(PIPELINE_DEPTH-1).valid;


rin <= v; rin <= v;

@ -35,6 +35,7 @@ architecture behaviour of multiply is
signal req_32bit, r32_1 : std_ulogic; signal req_32bit, r32_1 : std_ulogic;
signal req_not, rnot_1 : std_ulogic; signal req_not, rnot_1 : std_ulogic;
signal valid_1 : std_ulogic; signal valid_1 : std_ulogic;
signal overflow, ovf_in : std_ulogic;


begin begin
addend <= m_in.addend; addend <= m_in.addend;
@ -964,9 +965,10 @@ begin
ov := not ((p1_pat and p0_pat and not product(31)) or ov := not ((p1_pat and p0_pat and not product(31)) or
(p1_patb and p0_patb and product(31))); (p1_patb and p0_patb and product(31)));
end if; end if;
ovf_in <= ov;


m_out.result <= product; m_out.result <= product;
m_out.overflow <= ov; m_out.overflow <= overflow;
end process; end process;


process(clk) process(clk)
@ -979,6 +981,7 @@ begin
r32_1 <= m_in.is_32bit; r32_1 <= m_in.is_32bit;
req_not <= rnot_1; req_not <= rnot_1;
rnot_1 <= m_in.not_result; rnot_1 <= m_in.not_result;
overflow <= ovf_in;
end if; end if;
end process; end process;



Loading…
Cancel
Save