You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
microwatt/multiply.vhdl

103 lines
3.0 KiB
VHDL

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library work;
use work.common.all;
entity multiply is
generic (
PIPELINE_DEPTH : natural := 3
);
port (
clk : in std_logic;
m_in : in MultiplyInputType;
m_out : out MultiplyOutputType
);
end entity multiply;
architecture behaviour of multiply is
signal m: MultiplyInputType := MultiplyInputInit;
type multiply_pipeline_stage is record
valid : std_ulogic;
data : unsigned(127 downto 0);
end record;
Add basic XER support The carry is currently internal to execute1. We don't handle any of the other XER fields. This creates type called "xer_common_t" that contains the commonly used XER bits (CA, CA32, SO, OV, OV32). The value is stored in the CR file (though it could be a separate module). The rest of the bits will be implemented as a separate SPR and the two parts reconciled in mfspr/mtspr in latter commits. We always read XER in decode2 (there is little point not to) and send it down all pipeline branches as it will be needed in writeback for all type of instructions when CR0:SO needs to be updated (such forms exist for all pipeline branches even if we don't yet implement them). To avoid having to track XER hazards, we forward it back in EX1. This assumes that other pipeline branches that can modify it (mult and div) are running single issue for now. One additional hazard to beware of is an XER:SO modifying instruction in EX1 followed immediately by a store conditional. Due to our writeback latency, the store will go down the LSU with the previous XER value, thus the stcx. will set CR0:SO using an obsolete SO value. I doubt there exist any code relying on this behaviour being correct but we should account for it regardless, possibly by ensuring that stcx. remain single issue initially, or later by adding some minimal tracking or moving the LSU into the same pipeline as execute. Missing some obscure XER affecting instructions like addex or mcrxrx. [paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of arguments to set_ov] Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
constant MultiplyPipelineStageInit : multiply_pipeline_stage := (valid => '0',
data => (others => '0'));
type multiply_pipeline_type is array(0 to PIPELINE_DEPTH-1) of multiply_pipeline_stage;
constant MultiplyPipelineInit : multiply_pipeline_type := (others => MultiplyPipelineStageInit);
type reg_type is record
multiply_pipeline : multiply_pipeline_type;
end record;
signal r, rin : reg_type := (multiply_pipeline => MultiplyPipelineInit);
signal overflow : std_ulogic;
signal ovf_in : std_ulogic;
begin
multiply_0: process(clk)
begin
if rising_edge(clk) then
m <= m_in;
r <= rin;
overflow <= ovf_in;
end if;
end process;
multiply_1: process(all)
variable v : reg_type;
variable a, b : std_ulogic_vector(64 downto 0);
variable prod : std_ulogic_vector(129 downto 0);
variable d : std_ulogic_vector(127 downto 0);
variable d2 : std_ulogic_vector(63 downto 0);
Add basic XER support The carry is currently internal to execute1. We don't handle any of the other XER fields. This creates type called "xer_common_t" that contains the commonly used XER bits (CA, CA32, SO, OV, OV32). The value is stored in the CR file (though it could be a separate module). The rest of the bits will be implemented as a separate SPR and the two parts reconciled in mfspr/mtspr in latter commits. We always read XER in decode2 (there is little point not to) and send it down all pipeline branches as it will be needed in writeback for all type of instructions when CR0:SO needs to be updated (such forms exist for all pipeline branches even if we don't yet implement them). To avoid having to track XER hazards, we forward it back in EX1. This assumes that other pipeline branches that can modify it (mult and div) are running single issue for now. One additional hazard to beware of is an XER:SO modifying instruction in EX1 followed immediately by a store conditional. Due to our writeback latency, the store will go down the LSU with the previous XER value, thus the stcx. will set CR0:SO using an obsolete SO value. I doubt there exist any code relying on this behaviour being correct but we should account for it regardless, possibly by ensuring that stcx. remain single issue initially, or later by adding some minimal tracking or moving the LSU into the same pipeline as execute. Missing some obscure XER affecting instructions like addex or mcrxrx. [paulus@ozlabs.org - fix CA32 and OV32 for OP_ADD, fix order of arguments to set_ov] Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
variable ov : std_ulogic;
begin
v := r;
a := (m.is_signed and m.data1(63)) & m.data1;
b := (m.is_signed and m.data2(63)) & m.data2;
prod := std_ulogic_vector(signed(a) * signed(b));
v.multiply_pipeline(0).valid := m.valid;
if m.subtract = '1' then
v.multiply_pipeline(0).data := unsigned(m.addend) - unsigned(prod(127 downto 0));
else
v.multiply_pipeline(0).data := unsigned(m.addend) + unsigned(prod(127 downto 0));
end if;
loop_0: for i in 1 to PIPELINE_DEPTH-1 loop
v.multiply_pipeline(i) := r.multiply_pipeline(i-1);
end loop;
d := std_ulogic_vector(v.multiply_pipeline(PIPELINE_DEPTH-1).data);
ov := (or d(127 downto 63)) and not (and d(127 downto 63));
ovf_in <= ov;
m_out.result <= d;
m_out.overflow <= overflow;
m_out.valid <= v.multiply_pipeline(PIPELINE_DEPTH-1).valid;
rin <= v;
end process;
end architecture behaviour;
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity short_multiply is
port (
clk : in std_ulogic;
a_in : in std_ulogic_vector(15 downto 0);
b_in : in std_ulogic_vector(15 downto 0);
m_out : out std_ulogic_vector(31 downto 0)
);
end entity short_multiply;
architecture behaviour of short_multiply is
begin
m_out <= std_ulogic_vector(signed(a_in) * signed(b_in));
end architecture behaviour;