|
|
|
-- Floating-point unit for Microwatt
|
|
|
|
|
|
|
|
library ieee;
|
|
|
|
use ieee.std_logic_1164.all;
|
|
|
|
use ieee.numeric_std.all;
|
|
|
|
|
|
|
|
library work;
|
|
|
|
use work.insn_helpers.all;
|
|
|
|
use work.decode_types.all;
|
|
|
|
use work.crhelpers.all;
|
|
|
|
use work.helpers.all;
|
|
|
|
use work.common.all;
|
|
|
|
|
|
|
|
entity fpu is
|
|
|
|
port (
|
|
|
|
clk : in std_ulogic;
|
|
|
|
rst : in std_ulogic;
|
|
|
|
flush_in : in std_ulogic;
|
|
|
|
|
|
|
|
e_in : in Execute1ToFPUType;
|
|
|
|
e_out : out FPUToExecute1Type;
|
|
|
|
|
|
|
|
w_out : out FPUToWritebackType
|
|
|
|
);
|
|
|
|
end entity fpu;
|
|
|
|
|
|
|
|
architecture behaviour of fpu is
|
|
|
|
type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
|
|
|
|
|
|
|
|
constant EXP_BITS : natural := 13;
|
|
|
|
constant UNIT_BIT : natural := 56;
|
|
|
|
constant QNAN_BIT : natural := UNIT_BIT - 1;
|
|
|
|
constant SP_LSB : natural := UNIT_BIT - 23;
|
|
|
|
constant SP_GBIT : natural := SP_LSB - 1;
|
|
|
|
constant SP_RBIT : natural := SP_LSB - 2;
|
|
|
|
constant DP_LSB : natural := UNIT_BIT - 52;
|
|
|
|
constant DP_GBIT : natural := DP_LSB - 1;
|
|
|
|
constant DP_RBIT : natural := DP_LSB - 2;
|
|
|
|
|
|
|
|
type fpu_reg_type is record
|
|
|
|
class : fp_number_class;
|
|
|
|
negative : std_ulogic;
|
|
|
|
denorm : std_ulogic;
|
|
|
|
exponent : signed(EXP_BITS-1 downto 0); -- unbiased
|
|
|
|
mantissa : std_ulogic_vector(63 downto 0); -- 8.56 format
|
|
|
|
end record;
|
|
|
|
|
|
|
|
type state_t is (IDLE, DO_ILLEGAL,
|
|
|
|
DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
|
|
|
|
DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
|
|
|
|
DO_FCFID, DO_FCTI,
|
|
|
|
DO_FRSP, DO_FRI,
|
|
|
|
DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
|
|
|
|
DO_FRE, DO_FRSQRTE,
|
|
|
|
DO_FSEL,
|
|
|
|
DO_IDIVMOD,
|
|
|
|
FRI_1,
|
|
|
|
ADD_1, ADD_SHIFT, ADD_2, ADD_3,
|
|
|
|
CMP_1, CMP_2,
|
|
|
|
MULT_1,
|
|
|
|
FMADD_0, FMADD_1, FMADD_2, FMADD_3,
|
|
|
|
FMADD_4, FMADD_5, FMADD_6,
|
|
|
|
LOOKUP,
|
|
|
|
DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
|
|
|
|
FRE_1,
|
|
|
|
RSQRT_1,
|
|
|
|
FTDIV_1,
|
|
|
|
SQRT_1, SQRT_2, SQRT_3, SQRT_4,
|
|
|
|
SQRT_5, SQRT_6, SQRT_7, SQRT_8,
|
|
|
|
SQRT_9, SQRT_10, SQRT_11, SQRT_12,
|
|
|
|
INT_SHIFT, INT_ROUND, INT_ISHIFT,
|
|
|
|
INT_FINAL, INT_CHECK, INT_OFLOW,
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
FINISH, NORMALIZE,
|
|
|
|
ROUND_UFLOW, ROUND_OFLOW,
|
|
|
|
ROUNDING, ROUNDING_2, ROUNDING_3,
|
|
|
|
DENORM,
|
|
|
|
RENORM_A, RENORM_A2,
|
|
|
|
RENORM_B, RENORM_B2,
|
|
|
|
RENORM_C, RENORM_C2,
|
|
|
|
NAN_RESULT, EXC_RESULT,
|
|
|
|
IDIV_NORMB, IDIV_NORMB2, IDIV_NORMB3,
|
|
|
|
IDIV_CLZA, IDIV_CLZA2, IDIV_CLZA3,
|
|
|
|
IDIV_NR0, IDIV_NR1, IDIV_NR2, IDIV_USE0_5,
|
|
|
|
IDIV_DODIV, IDIV_SH32,
|
|
|
|
IDIV_DIV, IDIV_DIV2, IDIV_DIV3, IDIV_DIV4, IDIV_DIV5,
|
|
|
|
IDIV_DIV6, IDIV_DIV7, IDIV_DIV8, IDIV_DIV9,
|
|
|
|
IDIV_EXT_TBH, IDIV_EXT_TBH2, IDIV_EXT_TBH3,
|
|
|
|
IDIV_EXT_TBH4, IDIV_EXT_TBH5,
|
|
|
|
IDIV_EXTDIV, IDIV_EXTDIV1, IDIV_EXTDIV2, IDIV_EXTDIV3,
|
|
|
|
IDIV_EXTDIV4, IDIV_EXTDIV5, IDIV_EXTDIV6,
|
|
|
|
IDIV_MODADJ, IDIV_MODSUB, IDIV_DIVADJ, IDIV_OVFCHK, IDIV_DONE, IDIV_ZERO);
|
|
|
|
|
|
|
|
type decode32 is array(0 to 31) of state_t;
|
|
|
|
type decode8 is array(0 to 7) of state_t;
|
|
|
|
|
|
|
|
type reg_type is record
|
|
|
|
state : state_t;
|
|
|
|
busy : std_ulogic;
|
|
|
|
f2stall : std_ulogic;
|
|
|
|
instr_done : std_ulogic;
|
|
|
|
complete : std_ulogic;
|
|
|
|
do_intr : std_ulogic;
|
|
|
|
illegal : std_ulogic;
|
|
|
|
op : insn_type_t;
|
|
|
|
insn : std_ulogic_vector(31 downto 0);
|
|
|
|
instr_tag : instr_tag_t;
|
|
|
|
dest_fpr : gspr_index_t;
|
|
|
|
fe_mode : std_ulogic;
|
|
|
|
rc : std_ulogic;
|
|
|
|
fp_rc : std_ulogic;
|
|
|
|
is_cmp : std_ulogic;
|
|
|
|
single_prec : std_ulogic;
|
|
|
|
sp_result : std_ulogic;
|
|
|
|
fpscr : std_ulogic_vector(31 downto 0);
|
|
|
|
comm_fpscr : std_ulogic_vector(31 downto 0); -- committed FPSCR value
|
|
|
|
a : fpu_reg_type;
|
|
|
|
b : fpu_reg_type;
|
|
|
|
c : fpu_reg_type;
|
|
|
|
r : std_ulogic_vector(63 downto 0); -- 8.56 format
|
|
|
|
s : std_ulogic_vector(55 downto 0); -- extended fraction
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
x : std_ulogic;
|
|
|
|
p : std_ulogic_vector(63 downto 0); -- 8.56 format
|
|
|
|
y : std_ulogic_vector(63 downto 0); -- 8.56 format
|
|
|
|
result_sign : std_ulogic;
|
|
|
|
result_class : fp_number_class;
|
|
|
|
result_exp : signed(EXP_BITS-1 downto 0);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
shift : signed(EXP_BITS-1 downto 0);
|
|
|
|
writing_fpr : std_ulogic;
|
|
|
|
write_reg : gspr_index_t;
|
|
|
|
complete_tag : instr_tag_t;
|
|
|
|
writing_cr : std_ulogic;
|
|
|
|
writing_xer : std_ulogic;
|
|
|
|
int_result : std_ulogic;
|
|
|
|
cr_result : std_ulogic_vector(3 downto 0);
|
|
|
|
cr_mask : std_ulogic_vector(7 downto 0);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
old_exc : std_ulogic_vector(4 downto 0);
|
|
|
|
update_fprf : std_ulogic;
|
|
|
|
quieten_nan : std_ulogic;
|
|
|
|
nsnan_result : std_ulogic;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
tiny : std_ulogic;
|
|
|
|
denorm : std_ulogic;
|
|
|
|
round_mode : std_ulogic_vector(2 downto 0);
|
|
|
|
is_subtract : std_ulogic;
|
|
|
|
exp_cmp : std_ulogic;
|
|
|
|
madd_cmp : std_ulogic;
|
|
|
|
add_bsmall : std_ulogic;
|
|
|
|
is_multiply : std_ulogic;
|
|
|
|
is_sqrt : std_ulogic;
|
|
|
|
first : std_ulogic;
|
|
|
|
count : unsigned(1 downto 0);
|
|
|
|
doing_ftdiv : std_ulogic_vector(1 downto 0);
|
|
|
|
opsel_a : std_ulogic_vector(1 downto 0);
|
|
|
|
use_a : std_ulogic;
|
|
|
|
use_b : std_ulogic;
|
|
|
|
use_c : std_ulogic;
|
|
|
|
invalid : std_ulogic;
|
|
|
|
negate : std_ulogic;
|
|
|
|
longmask : std_ulogic;
|
|
|
|
integer_op : std_ulogic;
|
|
|
|
divext : std_ulogic;
|
|
|
|
divmod : std_ulogic;
|
|
|
|
is_signed : std_ulogic;
|
|
|
|
int_ovf : std_ulogic;
|
|
|
|
div_close : std_ulogic;
|
|
|
|
inc_quot : std_ulogic;
|
|
|
|
a_hi : std_ulogic_vector(7 downto 0);
|
|
|
|
a_lo : std_ulogic_vector(55 downto 0);
|
|
|
|
m32b : std_ulogic;
|
|
|
|
oe : std_ulogic;
|
|
|
|
xerc : xer_common_t;
|
|
|
|
xerc_result : xer_common_t;
|
|
|
|
res_sign : std_ulogic;
|
|
|
|
end record;
|
|
|
|
|
|
|
|
type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
|
|
|
|
|
|
|
|
signal r, rin : reg_type;
|
|
|
|
|
|
|
|
signal fp_result : std_ulogic_vector(63 downto 0);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
signal opsel_b : std_ulogic_vector(1 downto 0);
|
|
|
|
signal opsel_r : std_ulogic_vector(1 downto 0);
|
|
|
|
signal opsel_s : std_ulogic_vector(1 downto 0);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
signal opsel_ainv : std_ulogic;
|
|
|
|
signal opsel_mask : std_ulogic;
|
|
|
|
signal opsel_binv : std_ulogic;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
signal in_a : std_ulogic_vector(63 downto 0);
|
|
|
|
signal in_b : std_ulogic_vector(63 downto 0);
|
|
|
|
signal result : std_ulogic_vector(63 downto 0);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
signal carry_in : std_ulogic;
|
|
|
|
signal lost_bits : std_ulogic;
|
|
|
|
signal r_hi_nz : std_ulogic;
|
|
|
|
signal r_lo_nz : std_ulogic;
|
|
|
|
signal r_gt_1 : std_ulogic;
|
|
|
|
signal s_nz : std_ulogic;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
signal misc_sel : std_ulogic_vector(3 downto 0);
|
|
|
|
signal f_to_multiply : MultiplyInputType;
|
|
|
|
signal multiply_to_f : MultiplyOutputType;
|
|
|
|
signal msel_1 : std_ulogic_vector(1 downto 0);
|
|
|
|
signal msel_2 : std_ulogic_vector(1 downto 0);
|
|
|
|
signal msel_add : std_ulogic_vector(1 downto 0);
|
|
|
|
signal msel_inv : std_ulogic;
|
|
|
|
signal inverse_est : std_ulogic_vector(18 downto 0);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
|
|
|
|
-- opsel values
|
|
|
|
constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
|
|
|
|
constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant BIN_RND : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant BIN_PS8 : std_ulogic_vector(1 downto 0) := "11";
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
|
|
|
|
constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
|
|
|
|
|
|
|
|
constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
|
|
|
|
|
|
|
|
-- msel values
|
|
|
|
constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
|
|
|
|
|
|
|
|
constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
|
|
|
|
|
|
|
|
constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
|
|
|
|
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- control signals and values for exponent data path
|
|
|
|
constant REXP1_ZERO : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant REXP1_R : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant REXP1_A : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant REXP1_BHALF : std_ulogic_vector(1 downto 0) := "11";
|
|
|
|
|
|
|
|
constant REXP2_CON : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant REXP2_NE : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant REXP2_C : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant REXP2_B : std_ulogic_vector(1 downto 0) := "11";
|
|
|
|
|
|
|
|
constant RECON2_ZERO : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant RECON2_UNIT : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant RECON2_BIAS : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant RECON2_MAX : std_ulogic_vector(1 downto 0) := "11";
|
|
|
|
|
|
|
|
signal re_sel1 : std_ulogic_vector(1 downto 0);
|
|
|
|
signal re_sel2 : std_ulogic_vector(1 downto 0);
|
|
|
|
signal re_con2 : std_ulogic_vector(1 downto 0);
|
|
|
|
signal re_neg1 : std_ulogic;
|
|
|
|
signal re_neg2 : std_ulogic;
|
|
|
|
signal re_set_result : std_ulogic;
|
|
|
|
|
|
|
|
constant RSH1_ZERO : std_ulogic_vector(1 downto 0) := "00";
|
|
|
|
constant RSH1_B : std_ulogic_vector(1 downto 0) := "01";
|
|
|
|
constant RSH1_NE : std_ulogic_vector(1 downto 0) := "10";
|
|
|
|
constant RSH1_S : std_ulogic_vector(1 downto 0) := "11";
|
|
|
|
|
|
|
|
constant RSH2_CON : std_ulogic := '0';
|
|
|
|
constant RSH2_A : std_ulogic := '1';
|
|
|
|
|
|
|
|
constant RSCON2_ZERO : std_ulogic_vector(3 downto 0) := "0000";
|
|
|
|
constant RSCON2_1 : std_ulogic_vector(3 downto 0) := "0001";
|
|
|
|
constant RSCON2_UNIT_52 : std_ulogic_vector(3 downto 0) := "0010";
|
|
|
|
constant RSCON2_64_UNIT : std_ulogic_vector(3 downto 0) := "0011";
|
|
|
|
constant RSCON2_32 : std_ulogic_vector(3 downto 0) := "0100";
|
|
|
|
constant RSCON2_52 : std_ulogic_vector(3 downto 0) := "0101";
|
|
|
|
constant RSCON2_UNIT : std_ulogic_vector(3 downto 0) := "0110";
|
|
|
|
constant RSCON2_63 : std_ulogic_vector(3 downto 0) := "0111";
|
|
|
|
constant RSCON2_64 : std_ulogic_vector(3 downto 0) := "1000";
|
|
|
|
constant RSCON2_MINEXP : std_ulogic_vector(3 downto 0) := "1001";
|
|
|
|
|
|
|
|
signal rs_sel1 : std_ulogic_vector(1 downto 0);
|
|
|
|
signal rs_sel2 : std_ulogic;
|
|
|
|
signal rs_con2 : std_ulogic_vector(3 downto 0);
|
|
|
|
signal rs_neg1 : std_ulogic;
|
|
|
|
signal rs_neg2 : std_ulogic;
|
|
|
|
signal rs_norm : std_ulogic;
|
|
|
|
|
|
|
|
constant arith_decode : decode32 := (
|
|
|
|
-- indexed by bits 5..1 of opcode
|
|
|
|
2#01000# => DO_FRI,
|
|
|
|
2#01100# => DO_FRSP,
|
|
|
|
2#01110# => DO_FCTI,
|
|
|
|
2#01111# => DO_FCTI,
|
|
|
|
2#10010# => DO_FDIV,
|
|
|
|
2#10100# => DO_FADD,
|
|
|
|
2#10101# => DO_FADD,
|
|
|
|
2#10110# => DO_FSQRT,
|
|
|
|
2#11000# => DO_FRE,
|
|
|
|
2#11001# => DO_FMUL,
|
|
|
|
2#11010# => DO_FRSQRTE,
|
|
|
|
2#11100# => DO_FMADD,
|
|
|
|
2#11101# => DO_FMADD,
|
|
|
|
2#11110# => DO_FMADD,
|
|
|
|
2#11111# => DO_FMADD,
|
|
|
|
others => DO_ILLEGAL
|
|
|
|
);
|
|
|
|
|
|
|
|
constant cmp_decode : decode8 := (
|
|
|
|
2#000# => DO_FCMP,
|
|
|
|
2#001# => DO_FCMP,
|
|
|
|
2#010# => DO_MCRFS,
|
|
|
|
2#100# => DO_FTDIV,
|
|
|
|
2#101# => DO_FTSQRT,
|
|
|
|
others => DO_ILLEGAL
|
|
|
|
);
|
|
|
|
|
|
|
|
constant misc_decode : decode32 := (
|
|
|
|
-- indexed by bits 10, 8, 4, 2, 1 of opcode
|
|
|
|
2#00010# => DO_MTFSB,
|
|
|
|
2#01010# => DO_MTFSFI,
|
|
|
|
2#10010# => DO_FMRG,
|
|
|
|
2#11010# => DO_FMRG,
|
|
|
|
2#10011# => DO_MFFS,
|
|
|
|
2#11011# => DO_MTFSF,
|
|
|
|
2#10110# => DO_FCFID,
|
|
|
|
2#11110# => DO_FCFID,
|
|
|
|
others => DO_ILLEGAL
|
|
|
|
);
|
|
|
|
|
|
|
|
-- Inverse lookup table, indexed by the top 8 fraction bits
|
|
|
|
-- The first 256 entries are the reciprocal (1/x) lookup table,
|
|
|
|
-- and the remaining 768 entries are the reciprocal square root table.
|
|
|
|
-- Output range is [0.5, 1) in 0.19 format, though the top
|
|
|
|
-- bit isn't stored since it is always 1.
|
|
|
|
-- Each output value is the inverse of the center of the input
|
|
|
|
-- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
|
|
|
|
-- entry 1 is 1 / (1 + 3/512), etc.
|
|
|
|
constant inverse_table : lookup_table := (
|
|
|
|
-- 1/x lookup table
|
|
|
|
-- Unit bit is assumed to be 1, so input range is [1, 2)
|
|
|
|
18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
|
|
|
|
18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
|
|
|
|
18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
|
|
|
|
18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
|
|
|
|
18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
|
|
|
|
18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
|
|
|
|
18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
|
|
|
|
18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
|
|
|
|
18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
|
|
|
|
18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
|
|
|
|
18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
|
|
|
|
18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
|
|
|
|
18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
|
|
|
|
18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
|
|
|
|
18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
|
|
|
|
18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
|
|
|
|
18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
|
|
|
|
18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
|
|
|
|
18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
|
|
|
|
18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
|
|
|
|
18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
|
|
|
|
18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
|
|
|
|
18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
|
|
|
|
18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
|
|
|
|
18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
|
|
|
|
18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
|
|
|
|
18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
|
|
|
|
18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
|
|
|
|
18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
|
|
|
|
18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
|
|
|
|
18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
|
|
|
|
18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
|
|
|
|
-- 1/sqrt(x) lookup table
|
|
|
|
-- Input is in the range [1, 4), i.e. two bits to the left of the
|
|
|
|
-- binary point. Those 2 bits index the following 3 blocks of 256 values.
|
|
|
|
-- 1.0 ... 1.9999
|
|
|
|
18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
|
|
|
|
18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
|
|
|
|
18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
|
|
|
|
18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
|
|
|
|
18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
|
|
|
|
18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
|
|
|
|
18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
|
|
|
|
18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
|
|
|
|
18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
|
|
|
|
18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
|
|
|
|
18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
|
|
|
|
18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
|
|
|
|
18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
|
|
|
|
18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
|
|
|
|
18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
|
|
|
|
18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
|
|
|
|
18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
|
|
|
|
18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
|
|
|
|
18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
|
|
|
|
18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
|
|
|
|
18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
|
|
|
|
18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
|
|
|
|
18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
|
|
|
|
18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
|
|
|
|
18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
|
|
|
|
18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
|
|
|
|
18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
|
|
|
|
18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
|
|
|
|
18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
|
|
|
|
18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
|
|
|
|
18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
|
|
|
|
18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
|
|
|
|
-- 2.0 ... 2.9999
|
|
|
|
18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
|
|
|
|
18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
|
|
|
|
18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
|
|
|
|
18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
|
|
|
|
18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
|
|
|
|
18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
|
|
|
|
18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
|
|
|
|
18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
|
|
|
|
18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
|
|
|
|
18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
|
|
|
|
18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
|
|
|
|
18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
|
|
|
|
18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
|
|
|
|
18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
|
|
|
|
18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
|
|
|
|
18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
|
|
|
|
18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
|
|
|
|
18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
|
|
|
|
18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
|
|
|
|
18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
|
|
|
|
18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
|
|
|
|
18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
|
|
|
|
18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
|
|
|
|
18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
|
|
|
|
18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
|
|
|
|
18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
|
|
|
|
18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
|
|
|
|
18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
|
|
|
|
18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
|
|
|
|
18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
|
|
|
|
18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
|
|
|
|
18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
|
|
|
|
-- 3.0 ... 3.9999
|
|
|
|
18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
|
|
|
|
18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
|
|
|
|
18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
|
|
|
|
18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
|
|
|
|
18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
|
|
|
|
18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
|
|
|
|
18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
|
|
|
|
18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
|
|
|
|
18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
|
|
|
|
18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
|
|
|
|
18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
|
|
|
|
18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
|
|
|
|
18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
|
|
|
|
18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
|
|
|
|
18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
|
|
|
|
18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
|
|
|
|
18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
|
|
|
|
18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
|
|
|
|
18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
|
|
|
|
18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
|
|
|
|
18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
|
|
|
|
18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
|
|
|
|
18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
|
|
|
|
18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
|
|
|
|
18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
|
|
|
|
18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
|
|
|
|
18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
|
|
|
|
18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
|
|
|
|
18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
|
|
|
|
18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
|
|
|
|
18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
|
|
|
|
18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
|
|
|
|
);
|
|
|
|
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
-- Left and right shifter with 120 bit input and 64 bit output.
|
|
|
|
-- Shifts inp left by shift bits and returns the upper 64 bits of
|
|
|
|
-- the result. The shift parameter is interpreted as a signed
|
|
|
|
-- number in the range -64..63, with negative values indicating
|
|
|
|
-- right shifts.
|
|
|
|
function shifter_64(inp: std_ulogic_vector(119 downto 0);
|
|
|
|
shift: std_ulogic_vector(6 downto 0))
|
|
|
|
return std_ulogic_vector is
|
|
|
|
variable s1 : std_ulogic_vector(94 downto 0);
|
|
|
|
variable s2 : std_ulogic_vector(70 downto 0);
|
|
|
|
variable shift_result : std_ulogic_vector(63 downto 0);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
begin
|
|
|
|
case shift(6 downto 5) is
|
|
|
|
when "00" =>
|
|
|
|
s1 := inp(119 downto 25);
|
|
|
|
when "01" =>
|
|
|
|
s1 := inp(87 downto 0) & "0000000";
|
|
|
|
when "10" =>
|
|
|
|
s1 := x"0000000000000000" & inp(119 downto 89);
|
|
|
|
when others =>
|
|
|
|
s1 := x"00000000" & inp(119 downto 57);
|
|
|
|
end case;
|
|
|
|
case shift(4 downto 3) is
|
|
|
|
when "00" =>
|
|
|
|
s2 := s1(94 downto 24);
|
|
|
|
when "01" =>
|
|
|
|
s2 := s1(86 downto 16);
|
|
|
|
when "10" =>
|
|
|
|
s2 := s1(78 downto 8);
|
|
|
|
when others =>
|
|
|
|
s2 := s1(70 downto 0);
|
|
|
|
end case;
|
|
|
|
case shift(2 downto 0) is
|
|
|
|
when "000" =>
|
|
|
|
shift_result := s2(70 downto 7);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when "001" =>
|
|
|
|
shift_result := s2(69 downto 6);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when "010" =>
|
|
|
|
shift_result := s2(68 downto 5);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when "011" =>
|
|
|
|
shift_result := s2(67 downto 4);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when "100" =>
|
|
|
|
shift_result := s2(66 downto 3);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when "101" =>
|
|
|
|
shift_result := s2(65 downto 2);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when "110" =>
|
|
|
|
shift_result := s2(64 downto 1);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when others =>
|
|
|
|
shift_result := s2(63 downto 0);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end case;
|
|
|
|
return shift_result;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end;
|
|
|
|
|
|
|
|
-- Generate a mask with 0-bits on the left and 1-bits on the right which
|
|
|
|
-- selects the bits will be lost in doing a right shift. The shift
|
|
|
|
-- parameter is the bottom 6 bits of a negative shift count,
|
|
|
|
-- indicating a right shift.
|
|
|
|
function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
|
|
|
|
variable mask_result: std_ulogic_vector(63 downto 0);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
begin
|
|
|
|
mask_result := (others => '0');
|
|
|
|
if is_X(shift) then
|
|
|
|
mask_result := (others => 'X');
|
|
|
|
return mask_result;
|
|
|
|
end if;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
for i in 0 to 63 loop
|
|
|
|
if i >= shift then
|
|
|
|
mask_result(63 - i) := '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
return mask_result;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end;
|
|
|
|
|
|
|
|
-- Split a DP floating-point number into components and work out its class.
|
|
|
|
-- If is_int = 1, the input is considered an integer
|
|
|
|
function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_fp: std_ulogic;
|
|
|
|
is_32bint: std_ulogic; is_signed: std_ulogic) return fpu_reg_type is
|
|
|
|
variable reg : fpu_reg_type;
|
|
|
|
variable exp_nz : std_ulogic;
|
|
|
|
variable exp_ao : std_ulogic;
|
|
|
|
variable frac_nz : std_ulogic;
|
|
|
|
variable low_nz : std_ulogic;
|
|
|
|
variable cls : std_ulogic_vector(2 downto 0);
|
|
|
|
begin
|
|
|
|
reg.negative := fpr(63);
|
|
|
|
reg.denorm := '0';
|
|
|
|
exp_nz := or (fpr(62 downto 52));
|
|
|
|
exp_ao := and (fpr(62 downto 52));
|
|
|
|
frac_nz := or (fpr(51 downto 0));
|
|
|
|
low_nz := or (fpr(31 downto 0));
|
|
|
|
if is_fp = '1' then
|
|
|
|
reg.denorm := frac_nz and not exp_nz;
|
|
|
|
reg.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
|
|
|
|
if exp_nz = '0' then
|
|
|
|
reg.exponent := to_signed(-1022, EXP_BITS);
|
|
|
|
end if;
|
|
|
|
reg.mantissa := std_ulogic_vector(shift_left(resize(unsigned(exp_nz & fpr(51 downto 0)), 64),
|
|
|
|
UNIT_BIT - 52));
|
|
|
|
cls := exp_ao & exp_nz & frac_nz;
|
|
|
|
case cls is
|
|
|
|
when "000" => reg.class := ZERO;
|
|
|
|
when "001" => reg.class := FINITE; -- denormalized
|
|
|
|
when "010" => reg.class := FINITE;
|
|
|
|
when "011" => reg.class := FINITE;
|
|
|
|
when "110" => reg.class := INFINITY;
|
|
|
|
when others => reg.class := NAN;
|
|
|
|
end case;
|
|
|
|
elsif is_32bint = '1' then
|
|
|
|
reg.negative := fpr(31);
|
|
|
|
reg.mantissa(31 downto 0) := fpr(31 downto 0);
|
|
|
|
reg.mantissa(63 downto 32) := (others => (is_signed and fpr(31)));
|
|
|
|
reg.exponent := (others => '0');
|
|
|
|
if low_nz = '1' then
|
|
|
|
reg.class := FINITE;
|
|
|
|
else
|
|
|
|
reg.class := ZERO;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
reg.mantissa := fpr;
|
|
|
|
reg.exponent := (others => '0');
|
|
|
|
if (fpr(63) or exp_nz or frac_nz) = '1' then
|
|
|
|
reg.class := FINITE;
|
|
|
|
else
|
|
|
|
reg.class := ZERO;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
return reg;
|
|
|
|
end;
|
|
|
|
|
|
|
|
-- Construct a DP floating-point result from components
|
|
|
|
function pack_dp(negative: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
|
|
|
|
mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
return std_ulogic_vector is
|
|
|
|
variable dp_result : std_ulogic_vector(63 downto 0);
|
|
|
|
begin
|
|
|
|
dp_result := (others => '0');
|
|
|
|
case class is
|
|
|
|
when ZERO =>
|
|
|
|
when FINITE =>
|
|
|
|
if mantissa(UNIT_BIT) = '1' then
|
|
|
|
-- normalized number
|
|
|
|
dp_result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
|
|
|
|
end if;
|
|
|
|
dp_result(51 downto 29) := mantissa(UNIT_BIT - 1 downto SP_LSB);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
if single_prec = '0' then
|
|
|
|
dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end if;
|
|
|
|
when INFINITY =>
|
|
|
|
dp_result(62 downto 52) := "11111111111";
|
|
|
|
when NAN =>
|
|
|
|
dp_result(62 downto 52) := "11111111111";
|
|
|
|
dp_result(51) := quieten_nan or mantissa(QNAN_BIT);
|
|
|
|
dp_result(50 downto 29) := mantissa(QNAN_BIT - 1 downto SP_LSB);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
if single_prec = '0' then
|
|
|
|
dp_result(28 downto 0) := mantissa(SP_LSB - 1 downto DP_LSB);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end if;
|
|
|
|
end case;
|
|
|
|
dp_result(63) := negative;
|
|
|
|
return dp_result;
|
|
|
|
end;
|
|
|
|
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
-- Determine whether to increment when rounding
|
|
|
|
-- Returns rounding_inc & inexact
|
|
|
|
-- If single_prec = 1, assumes x includes the bottom 31 (== SP_LSB - 2)
|
|
|
|
-- bits of the mantissa already (usually arranged by setting set_x = 1 earlier).
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
|
|
|
|
single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
|
|
|
|
sign: std_ulogic)
|
|
|
|
return std_ulogic_vector is
|
|
|
|
variable grx : std_ulogic_vector(2 downto 0);
|
|
|
|
variable ret : std_ulogic_vector(1 downto 0);
|
|
|
|
variable lsb : std_ulogic;
|
|
|
|
begin
|
|
|
|
if single_prec = '0' then
|
|
|
|
grx := mantissa(DP_GBIT downto DP_RBIT) & (x or (or mantissa(DP_RBIT - 1 downto 0)));
|
|
|
|
lsb := mantissa(DP_LSB);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
else
|
|
|
|
grx := mantissa(SP_GBIT downto SP_RBIT) & x;
|
|
|
|
lsb := mantissa(SP_LSB);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end if;
|
|
|
|
ret(1) := '0';
|
|
|
|
ret(0) := or (grx);
|
|
|
|
case rn(1 downto 0) is
|
|
|
|
when "00" => -- round to nearest
|
|
|
|
if grx = "100" and rn(2) = '0' then
|
|
|
|
ret(1) := lsb; -- tie, round to even
|
|
|
|
else
|
|
|
|
ret(1) := grx(2);
|
|
|
|
end if;
|
|
|
|
when "01" => -- round towards zero
|
|
|
|
when others => -- round towards +/- inf
|
|
|
|
if rn(0) = sign then
|
|
|
|
-- round towards greater magnitude
|
|
|
|
ret(1) := ret(0);
|
|
|
|
end if;
|
|
|
|
end case;
|
|
|
|
return ret;
|
|
|
|
end;
|
|
|
|
|
|
|
|
-- Determine result flags to write into the FPSCR
|
|
|
|
function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
|
|
|
|
return std_ulogic_vector is
|
|
|
|
begin
|
|
|
|
case class is
|
|
|
|
when ZERO =>
|
|
|
|
return sign & "0010";
|
|
|
|
when FINITE =>
|
|
|
|
return (not unitbit) & sign & (not sign) & "00";
|
|
|
|
when INFINITY =>
|
|
|
|
return '0' & sign & (not sign) & "01";
|
|
|
|
when NAN =>
|
|
|
|
return "10001";
|
|
|
|
end case;
|
|
|
|
end;
|
|
|
|
|
|
|
|
begin
|
|
|
|
fpu_multiply_0: entity work.multiply
|
|
|
|
port map (
|
|
|
|
clk => clk,
|
|
|
|
m_in => f_to_multiply,
|
|
|
|
m_out => multiply_to_f
|
|
|
|
);
|
|
|
|
|
|
|
|
fpu_0: process(clk)
|
|
|
|
begin
|
|
|
|
if rising_edge(clk) then
|
|
|
|
if rst = '1' or flush_in = '1' then
|
|
|
|
r.state <= IDLE;
|
|
|
|
r.busy <= '0';
|
|
|
|
r.f2stall <= '0';
|
|
|
|
r.instr_done <= '0';
|
|
|
|
r.complete <= '0';
|
|
|
|
r.illegal <= '0';
|
|
|
|
r.do_intr <= '0';
|
|
|
|
r.writing_fpr <= '0';
|
|
|
|
r.writing_cr <= '0';
|
|
|
|
r.writing_xer <= '0';
|
|
|
|
r.fpscr <= (others => '0');
|
|
|
|
r.write_reg <= (others =>'0');
|
|
|
|
r.complete_tag.valid <= '0';
|
|
|
|
r.cr_mask <= (others =>'0');
|
|
|
|
r.cr_result <= (others =>'0');
|
|
|
|
r.instr_tag.valid <= '0';
|
|
|
|
if rst = '1' then
|
|
|
|
r.fpscr <= (others => '0');
|
|
|
|
r.comm_fpscr <= (others => '0');
|
|
|
|
elsif r.do_intr = '0' then
|
|
|
|
-- flush_in = 1 and not due to us generating an interrupt,
|
|
|
|
-- roll back to committed fpscr
|
|
|
|
r.fpscr <= r.comm_fpscr;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
|
|
|
|
r <= rin;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
-- synchronous reads from lookup table
|
|
|
|
lut_access: process(clk)
|
|
|
|
variable addrhi : std_ulogic_vector(1 downto 0);
|
|
|
|
variable addr : std_ulogic_vector(9 downto 0);
|
|
|
|
begin
|
|
|
|
if rising_edge(clk) then
|
|
|
|
if r.is_sqrt = '1' then
|
|
|
|
addrhi := r.b.mantissa(UNIT_BIT + 1 downto UNIT_BIT);
|
|
|
|
else
|
|
|
|
addrhi := "00";
|
|
|
|
end if;
|
|
|
|
addr := addrhi & r.b.mantissa(UNIT_BIT - 1 downto UNIT_BIT - 8);
|
|
|
|
if is_X(addr) then
|
|
|
|
inverse_est <= (others => 'X');
|
|
|
|
else
|
|
|
|
inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
e_out.busy <= r.busy;
|
|
|
|
e_out.f2stall <= r.f2stall;
|
|
|
|
e_out.exception <= r.fpscr(FPSCR_FEX);
|
|
|
|
|
|
|
|
-- Note that the cycle where r.complete = 1 for an instruction can be as
|
|
|
|
-- late as the second cycle of the following instruction (i.e. in the state
|
|
|
|
-- following IDLE state). Hence it is important that none of the fields of
|
|
|
|
-- r that are used below are modified in IDLE state.
|
|
|
|
w_out.valid <= r.complete;
|
|
|
|
w_out.instr_tag <= r.complete_tag;
|
|
|
|
w_out.write_enable <= r.writing_fpr and r.complete;
|
|
|
|
w_out.write_reg <= r.write_reg;
|
|
|
|
w_out.write_data <= fp_result;
|
|
|
|
w_out.write_cr_enable <= r.writing_cr and r.complete;
|
|
|
|
w_out.write_cr_mask <= r.cr_mask;
|
|
|
|
w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
|
|
|
|
r.cr_result & r.cr_result & r.cr_result & r.cr_result;
|
|
|
|
w_out.write_xerc <= r.writing_xer and r.complete;
|
|
|
|
w_out.xerc <= r.xerc_result;
|
|
|
|
w_out.interrupt <= r.do_intr;
|
|
|
|
w_out.intr_vec <= 16#700#;
|
|
|
|
w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');
|
|
|
|
|
|
|
|
fpu_1: process(all)
|
|
|
|
variable v : reg_type;
|
|
|
|
variable adec : fpu_reg_type;
|
|
|
|
variable bdec : fpu_reg_type;
|
|
|
|
variable cdec : fpu_reg_type;
|
|
|
|
variable fpscr_mask : std_ulogic_vector(31 downto 0);
|
|
|
|
variable j, k : integer;
|
|
|
|
variable flm : std_ulogic_vector(7 downto 0);
|
|
|
|
variable fpin_a : std_ulogic;
|
|
|
|
variable fpin_b : std_ulogic;
|
|
|
|
variable fpin_c : std_ulogic;
|
|
|
|
variable is_32bint : std_ulogic;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
variable mask : std_ulogic_vector(63 downto 0);
|
|
|
|
variable in_a0 : std_ulogic_vector(63 downto 0);
|
|
|
|
variable in_b0 : std_ulogic_vector(63 downto 0);
|
|
|
|
variable misc : std_ulogic_vector(63 downto 0);
|
|
|
|
variable shift_res : std_ulogic_vector(63 downto 0);
|
|
|
|
variable round : std_ulogic_vector(1 downto 0);
|
|
|
|
variable update_fx : std_ulogic;
|
|
|
|
variable arith_done : std_ulogic;
|
|
|
|
variable invalid : std_ulogic;
|
|
|
|
variable zero_divide : std_ulogic;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
variable mant_nz : std_ulogic;
|
|
|
|
variable min_exp : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable max_exp : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable bias_exp : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable new_exp : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable exp_tiny : std_ulogic;
|
|
|
|
variable exp_huge : std_ulogic;
|
|
|
|
variable clz : std_ulogic_vector(5 downto 0);
|
|
|
|
variable set_x : std_ulogic;
|
|
|
|
variable mshift : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable need_check : std_ulogic;
|
|
|
|
variable msb : std_ulogic;
|
|
|
|
variable is_add : std_ulogic;
|
|
|
|
variable set_a : std_ulogic;
|
|
|
|
variable set_a_exp : std_ulogic;
|
|
|
|
variable set_a_mant : std_ulogic;
|
|
|
|
variable set_a_hi : std_ulogic;
|
|
|
|
variable set_a_lo : std_ulogic;
|
|
|
|
variable set_b : std_ulogic;
|
|
|
|
variable set_b_mant : std_ulogic;
|
|
|
|
variable set_c : std_ulogic;
|
|
|
|
variable set_y : std_ulogic;
|
|
|
|
variable set_s : std_ulogic;
|
|
|
|
variable qnan_result : std_ulogic;
|
|
|
|
variable px_nz : std_ulogic;
|
|
|
|
variable pcmpb_eq : std_ulogic;
|
|
|
|
variable pcmpb_lt : std_ulogic;
|
|
|
|
variable pcmpc_eq : std_ulogic;
|
|
|
|
variable pcmpc_lt : std_ulogic;
|
|
|
|
variable pshift : std_ulogic;
|
|
|
|
variable renorm_sqrt : std_ulogic;
|
|
|
|
variable sqrt_exp : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable shiftin : std_ulogic;
|
|
|
|
variable shiftin0 : std_ulogic;
|
|
|
|
variable mulexp : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable maddend : std_ulogic_vector(127 downto 0);
|
|
|
|
variable sum : std_ulogic_vector(63 downto 0);
|
|
|
|
variable round_inc : std_ulogic_vector(63 downto 0);
|
|
|
|
variable rbit_inc : std_ulogic;
|
|
|
|
variable mult_mask : std_ulogic;
|
|
|
|
variable sign_bit : std_ulogic;
|
|
|
|
variable rnd_b32 : std_ulogic;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
variable rexp_in1 : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable rexp_in2 : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable rexp_cin : std_ulogic;
|
|
|
|
variable rexp_sum : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable rsh_in1 : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable rsh_in2 : signed(EXP_BITS-1 downto 0);
|
|
|
|
variable exec_state : state_t;
|
|
|
|
variable opcbits : std_ulogic_vector(4 downto 0);
|
|
|
|
variable int_result : std_ulogic;
|
|
|
|
variable illegal : std_ulogic;
|
|
|
|
variable rsign : std_ulogic;
|
|
|
|
begin
|
|
|
|
v := r;
|
|
|
|
v.complete := '0';
|
|
|
|
v.do_intr := '0';
|
|
|
|
is_32bint := '0';
|
|
|
|
exec_state := IDLE;
|
|
|
|
|
|
|
|
if r.complete = '1' or r.do_intr = '1' then
|
|
|
|
v.instr_done := '0';
|
|
|
|
v.writing_fpr := '0';
|
|
|
|
v.writing_cr := '0';
|
|
|
|
v.writing_xer := '0';
|
|
|
|
v.comm_fpscr := r.fpscr;
|
|
|
|
v.illegal := '0';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- capture incoming instruction
|
|
|
|
if e_in.valid = '1' then
|
|
|
|
v.insn := e_in.insn;
|
|
|
|
v.op := e_in.op;
|
|
|
|
v.instr_tag := e_in.itag;
|
|
|
|
v.fe_mode := or (e_in.fe_mode);
|
|
|
|
v.dest_fpr := e_in.frt;
|
|
|
|
v.single_prec := e_in.single;
|
|
|
|
v.is_signed := e_in.is_signed;
|
|
|
|
v.rc := e_in.rc;
|
|
|
|
v.fp_rc := '0';
|
|
|
|
v.is_cmp := e_in.out_cr;
|
|
|
|
v.oe := e_in.oe;
|
|
|
|
v.m32b := e_in.m32b;
|
|
|
|
v.xerc := e_in.xerc;
|
|
|
|
v.longmask := '0';
|
|
|
|
v.integer_op := '0';
|
|
|
|
v.divext := '0';
|
|
|
|
v.divmod := '0';
|
|
|
|
v.is_sqrt := '0';
|
|
|
|
v.is_multiply := '0';
|
|
|
|
fpin_a := '0';
|
|
|
|
fpin_b := '0';
|
|
|
|
fpin_c := '0';
|
|
|
|
v.use_a := e_in.valid_a;
|
|
|
|
v.use_b := e_in.valid_b;
|
|
|
|
v.use_c := e_in.valid_c;
|
|
|
|
v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
|
|
|
|
case e_in.op is
|
|
|
|
when OP_FP_ARITH =>
|
|
|
|
fpin_a := e_in.valid_a;
|
|
|
|
fpin_b := e_in.valid_b;
|
|
|
|
fpin_c := e_in.valid_c;
|
|
|
|
v.longmask := e_in.single;
|
|
|
|
v.fp_rc := e_in.rc;
|
|
|
|
exec_state := arith_decode(to_integer(unsigned(e_in.insn(5 downto 1))));
|
|
|
|
if e_in.insn(5 downto 1) = "11001" or e_in.insn(5 downto 3) = "111" then
|
|
|
|
v.is_multiply := '1';
|
|
|
|
end if;
|
|
|
|
if e_in.insn(5 downto 1) = "10110" or e_in.insn(5 downto 1) = "11010" then
|
|
|
|
v.is_sqrt := '1';
|
|
|
|
end if;
|
|
|
|
if e_in.insn(5 downto 1) = "01111" then
|
|
|
|
v.round_mode := "001";
|
|
|
|
end if;
|
|
|
|
when OP_FP_CMP =>
|
|
|
|
fpin_a := e_in.valid_a;
|
|
|
|
fpin_b := e_in.valid_b;
|
|
|
|
exec_state := cmp_decode(to_integer(unsigned(e_in.insn(8 downto 6))));
|
|
|
|
when OP_FP_MISC =>
|
|
|
|
v.fp_rc := e_in.rc;
|
|
|
|
opcbits := e_in.insn(10) & e_in.insn(8) & e_in.insn(4) & e_in.insn(2) & e_in.insn(1);
|
|
|
|
exec_state := misc_decode(to_integer(unsigned(opcbits)));
|
|
|
|
when OP_FP_MOVE =>
|
|
|
|
v.fp_rc := e_in.rc;
|
|
|
|
fpin_a := e_in.valid_a;
|
|
|
|
fpin_b := e_in.valid_b;
|
|
|
|
fpin_c := e_in.valid_c;
|
|
|
|
if e_in.insn(5) = '0' then
|
|
|
|
exec_state := DO_FMR;
|
|
|
|
else
|
|
|
|
exec_state := DO_FSEL;
|
|
|
|
end if;
|
|
|
|
when OP_DIV =>
|
|
|
|
v.integer_op := '1';
|
|
|
|
is_32bint := e_in.single;
|
|
|
|
exec_state := DO_IDIVMOD;
|
|
|
|
when OP_DIVE =>
|
|
|
|
v.integer_op := '1';
|
|
|
|
v.divext := '1';
|
|
|
|
is_32bint := e_in.single;
|
|
|
|
exec_state := DO_IDIVMOD;
|
|
|
|
when OP_MOD =>
|
|
|
|
v.integer_op := '1';
|
|
|
|
v.divmod := '1';
|
|
|
|
is_32bint := e_in.single;
|
|
|
|
exec_state := DO_IDIVMOD;
|
|
|
|
when others =>
|
|
|
|
exec_state := DO_ILLEGAL;
|
|
|
|
end case;
|
|
|
|
v.quieten_nan := '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.tiny := '0';
|
|
|
|
v.denorm := '0';
|
|
|
|
v.is_subtract := '0';
|
|
|
|
v.add_bsmall := '0';
|
|
|
|
v.int_ovf := '0';
|
|
|
|
v.div_close := '0';
|
|
|
|
|
|
|
|
adec := decode_dp(e_in.fra, fpin_a, is_32bint, e_in.is_signed);
|
|
|
|
bdec := decode_dp(e_in.frb, fpin_b, is_32bint, e_in.is_signed);
|
|
|
|
cdec := decode_dp(e_in.frc, fpin_c, '0', '0');
|
|
|
|
v.a := adec;
|
|
|
|
v.b := bdec;
|
|
|
|
v.c := cdec;
|
|
|
|
|
|
|
|
v.exp_cmp := '0';
|
|
|
|
if adec.exponent > bdec.exponent then
|
|
|
|
v.exp_cmp := '1';
|
|
|
|
end if;
|
|
|
|
v.madd_cmp := '0';
|
|
|
|
if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
|
|
|
|
v.madd_cmp := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
v.a_hi := 8x"0";
|
|
|
|
v.a_lo := 56x"0";
|
|
|
|
end if;
|
|
|
|
|
|
|
|
r_hi_nz <= or (r.r(UNIT_BIT + 1 downto SP_LSB));
|
|
|
|
r_lo_nz <= or (r.r(SP_LSB - 1 downto DP_LSB));
|
|
|
|
r_gt_1 <= or (r.r(63 downto 1));
|
|
|
|
s_nz <= or (r.s);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
|
|
|
|
if r.single_prec = '0' then
|
|
|
|
if r.doing_ftdiv(1) = '0' then
|
|
|
|
max_exp := to_signed(1023, EXP_BITS);
|
|
|
|
else
|
|
|
|
max_exp := to_signed(1020, EXP_BITS);
|
|
|
|
end if;
|
|
|
|
if r.doing_ftdiv(0) = '0' then
|
|
|
|
min_exp := to_signed(-1022, EXP_BITS);
|
|
|
|
else
|
|
|
|
min_exp := to_signed(-1021, EXP_BITS);
|
|
|
|
end if;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
bias_exp := to_signed(1536, EXP_BITS);
|
|
|
|
else
|
|
|
|
max_exp := to_signed(127, EXP_BITS);
|
|
|
|
min_exp := to_signed(-126, EXP_BITS);
|
|
|
|
bias_exp := to_signed(192, EXP_BITS);
|
|
|
|
end if;
|
|
|
|
new_exp := r.result_exp - r.shift;
|
|
|
|
exp_tiny := '0';
|
|
|
|
exp_huge := '0';
|
|
|
|
if is_X(new_exp) or is_X(min_exp) then
|
|
|
|
exp_tiny := 'X';
|
|
|
|
elsif new_exp < min_exp then
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
exp_tiny := '1';
|
|
|
|
end if;
|
|
|
|
if is_X(new_exp) or is_X(max_exp) then
|
|
|
|
exp_huge := 'X';
|
|
|
|
elsif new_exp > max_exp then
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
exp_huge := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- Compare P with zero and with B
|
|
|
|
px_nz := or (r.p(UNIT_BIT + 1 downto 4));
|
|
|
|
pcmpb_eq := '0';
|
|
|
|
if r.p(59 downto 4) = r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT) then
|
|
|
|
pcmpb_eq := '1';
|
|
|
|
end if;
|
|
|
|
pcmpb_lt := '0';
|
|
|
|
if is_X(r.p(59 downto 4)) or is_X(r.b.mantissa(55 downto 0)) then
|
|
|
|
pcmpb_lt := 'X';
|
|
|
|
elsif unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(UNIT_BIT + 1 downto DP_RBIT)) then
|
|
|
|
pcmpb_lt := '1';
|
|
|
|
end if;
|
|
|
|
pcmpc_eq := '0';
|
|
|
|
if r.p = r.c.mantissa then
|
|
|
|
pcmpc_eq := '1';
|
|
|
|
end if;
|
|
|
|
pcmpc_lt := '0';
|
|
|
|
if is_X(r.p) or is_X(r.c.mantissa) then
|
|
|
|
pcmpc_lt := 'X';
|
|
|
|
elsif unsigned(r.p) < unsigned(r.c.mantissa) then
|
|
|
|
pcmpc_lt := '1';
|
|
|
|
end if;
|
|
|
|
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.update_fprf := '0';
|
|
|
|
v.first := '0';
|
|
|
|
v.doing_ftdiv := "00";
|
|
|
|
v.opsel_a := AIN_R;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
opsel_ainv <= '0';
|
|
|
|
opsel_mask <= '0';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
opsel_b <= BIN_ZERO;
|
|
|
|
opsel_binv <= '0';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
opsel_r <= RES_SUM;
|
|
|
|
opsel_s <= S_ZERO;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
carry_in <= '0';
|
|
|
|
misc_sel <= "0000";
|
|
|
|
fpscr_mask := (others => '1');
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
update_fx := '0';
|
|
|
|
arith_done := '0';
|
|
|
|
invalid := '0';
|
|
|
|
zero_divide := '0';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
set_x := '0';
|
|
|
|
qnan_result := '0';
|
|
|
|
set_a := '0';
|
|
|
|
set_a_exp := '0';
|
|
|
|
set_a_mant := '0';
|
|
|
|
set_a_hi := '0';
|
|
|
|
set_a_lo := '0';
|
|
|
|
set_b := '0';
|
|
|
|
set_b_mant := '0';
|
|
|
|
set_c := '0';
|
|
|
|
set_s := '0';
|
|
|
|
f_to_multiply.is_signed <= '0';
|
|
|
|
f_to_multiply.valid <= '0';
|
|
|
|
msel_1 <= MUL1_A;
|
|
|
|
msel_2 <= MUL2_C;
|
|
|
|
msel_add <= MULADD_ZERO;
|
|
|
|
msel_inv <= '0';
|
|
|
|
set_y := '0';
|
|
|
|
pshift := '0';
|
|
|
|
renorm_sqrt := '0';
|
|
|
|
shiftin := '0';
|
|
|
|
shiftin0 := '0';
|
|
|
|
rbit_inc := '0';
|
|
|
|
mult_mask := '0';
|
|
|
|
rnd_b32 := '0';
|
|
|
|
int_result := '0';
|
|
|
|
illegal := '0';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
|
|
|
|
re_sel1 <= REXP1_ZERO;
|
|
|
|
re_sel2 <= REXP2_CON;
|
|
|
|
re_con2 <= RECON2_ZERO;
|
|
|
|
re_neg1 <= '0';
|
|
|
|
re_neg2 <= '0';
|
|
|
|
re_set_result <= '0';
|
|
|
|
rs_sel1 <= RSH1_ZERO;
|
|
|
|
rs_sel2 <= RSH2_CON;
|
|
|
|
rs_con2 <= RSCON2_ZERO;
|
|
|
|
rs_neg1 <= '0';
|
|
|
|
rs_neg2 <= '0';
|
|
|
|
rs_norm <= '0';
|
|
|
|
|
|
|
|
case r.state is
|
|
|
|
when IDLE =>
|
|
|
|
v.invalid := '0';
|
|
|
|
v.negate := '0';
|
|
|
|
if e_in.valid = '1' then
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.busy := '1';
|
|
|
|
if e_in.op = OP_FP_ARITH and e_in.valid_a = '1' and
|
|
|
|
(e_in.valid_b = '0' or e_in.valid_c = '0') then
|
|
|
|
v.opsel_a := AIN_A;
|
|
|
|
end if;
|
|
|
|
if e_in.op = OP_FP_ARITH then
|
|
|
|
-- input selection for denorm cases
|
|
|
|
case e_in.insn(5 downto 1) is
|
|
|
|
when "10010" => -- fdiv
|
|
|
|
if v.b.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
end if;
|
|
|
|
when "11001" => -- fmul
|
|
|
|
if v.c.mantissa(UNIT_BIT) = '0' and v.a.mantissa(UNIT_BIT) = '1' then
|
|
|
|
v.opsel_a := AIN_C;
|
|
|
|
end if;
|
|
|
|
when "11100" | "11101" | "11110" | "11111" => -- fmadd etc.
|
|
|
|
if v.a.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.opsel_a := AIN_A;
|
|
|
|
elsif v.c.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.opsel_a := AIN_C;
|
|
|
|
end if;
|
|
|
|
when others =>
|
|
|
|
end case;
|
|
|
|
end if;
|
|
|
|
v.state := exec_state;
|
|
|
|
end if;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.x := '0';
|
|
|
|
v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
|
|
|
|
set_s := '1';
|
|
|
|
|
|
|
|
when DO_ILLEGAL =>
|
|
|
|
illegal := '1';
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
when DO_MCRFS =>
|
|
|
|
j := to_integer(unsigned(insn_bfa(r.insn)));
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
if i = j then
|
|
|
|
k := (7 - i) * 4;
|
|
|
|
v.cr_result := r.fpscr(k + 3 downto k);
|
|
|
|
fpscr_mask(k + 3 downto k) := "0000";
|
|
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
when DO_FTDIV =>
|
|
|
|
v.cr_result := "0000";
|
|
|
|
-- set result_exp to the exponent of B
|
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
|
|
|
|
(r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then
|
|
|
|
v.cr_result(2) := '1';
|
|
|
|
end if;
|
|
|
|
if r.a.class = NAN or r.a.class = INFINITY or
|
|
|
|
r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
|
|
|
|
(r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
|
|
|
|
v.cr_result(1) := '1';
|
|
|
|
v.instr_done := '1';
|
|
|
|
else
|
|
|
|
v.doing_ftdiv := "11";
|
|
|
|
v.first := '1';
|
|
|
|
v.state := FTDIV_1;
|
|
|
|
v.instr_done := '0';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DO_FTSQRT =>
|
|
|
|
v.instr_done := '1';
|
|
|
|
v.cr_result := "0000";
|
|
|
|
if r.b.class = ZERO or r.b.class = INFINITY or
|
|
|
|
(r.b.class = FINITE and r.b.mantissa(UNIT_BIT) = '0') then
|
|
|
|
v.cr_result(2) := '1';
|
|
|
|
end if;
|
|
|
|
if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
|
|
|
|
or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
|
|
|
|
v.cr_result(1) := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DO_FCMP =>
|
|
|
|
-- fcmp[uo]
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
v.instr_done := '1';
|
|
|
|
update_fx := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
if (r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
|
|
|
|
(r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') then
|
|
|
|
-- Signalling NAN
|
|
|
|
v.fpscr(FPSCR_VXSNAN) := '1';
|
|
|
|
if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
|
|
|
|
v.fpscr(FPSCR_VXVC) := '1';
|
|
|
|
end if;
|
|
|
|
invalid := '1';
|
|
|
|
v.cr_result := "0001"; -- unordered
|
|
|
|
elsif r.a.class = NAN or r.b.class = NAN then
|
|
|
|
if r.insn(6) = '1' then
|
|
|
|
-- fcmpo
|
|
|
|
v.fpscr(FPSCR_VXVC) := '1';
|
|
|
|
invalid := '1';
|
|
|
|
end if;
|
|
|
|
v.cr_result := "0001"; -- unordered
|
|
|
|
elsif r.a.class = ZERO and r.b.class = ZERO then
|
|
|
|
v.cr_result := "0010"; -- equal
|
|
|
|
elsif r.a.negative /= r.b.negative then
|
|
|
|
v.cr_result := r.a.negative & r.b.negative & "00";
|
|
|
|
elsif r.a.class = ZERO then
|
|
|
|
-- A and B are the same sign from here down
|
|
|
|
v.cr_result := not r.b.negative & r.b.negative & "00";
|
|
|
|
elsif r.a.class = INFINITY then
|
|
|
|
if r.b.class = INFINITY then
|
|
|
|
v.cr_result := "0010";
|
|
|
|
else
|
|
|
|
v.cr_result := r.a.negative & not r.a.negative & "00";
|
|
|
|
end if;
|
|
|
|
elsif r.b.class = ZERO then
|
|
|
|
-- A is finite from here down
|
|
|
|
v.cr_result := r.a.negative & not r.a.negative & "00";
|
|
|
|
elsif r.b.class = INFINITY then
|
|
|
|
v.cr_result := not r.b.negative & r.b.negative & "00";
|
|
|
|
elsif r.exp_cmp = '1' then
|
|
|
|
-- A and B are both finite from here down
|
|
|
|
v.cr_result := r.a.negative & not r.a.negative & "00";
|
|
|
|
elsif r.a.exponent /= r.b.exponent then
|
|
|
|
-- A exponent is smaller than B
|
|
|
|
v.cr_result := not r.a.negative & r.a.negative & "00";
|
|
|
|
else
|
|
|
|
-- Prepare to subtract mantissas, put B in R
|
|
|
|
v.cr_result := "0000";
|
|
|
|
v.instr_done := '0';
|
|
|
|
v.opsel_a := AIN_A;
|
|
|
|
v.state := CMP_1;
|
|
|
|
end if;
|
|
|
|
v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
|
|
|
|
|
|
|
|
when DO_MTFSB =>
|
|
|
|
-- mtfsb{0,1}
|
|
|
|
j := to_integer(unsigned(insn_bt(r.insn)));
|
|
|
|
for i in 0 to 31 loop
|
|
|
|
if i = j then
|
|
|
|
v.fpscr(31 - i) := r.insn(6);
|
|
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
when DO_MTFSFI =>
|
|
|
|
-- mtfsfi
|
|
|
|
j := to_integer(unsigned(insn_bf(r.insn)));
|
|
|
|
if r.insn(16) = '0' then
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
if i = j then
|
|
|
|
k := (7 - i) * 4;
|
|
|
|
v.fpscr(k + 3 downto k) := insn_u(r.insn);
|
|
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
end if;
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
when DO_FMRG =>
|
|
|
|
-- fmrgew, fmrgow
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
misc_sel <= "01" & r.insn(8) & '0';
|
|
|
|
int_result := '1';
|
|
|
|
v.writing_fpr := '1';
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
when DO_MFFS =>
|
|
|
|
v.writing_fpr := '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
case r.insn(20 downto 16) is
|
|
|
|
when "00000" =>
|
|
|
|
-- mffs
|
|
|
|
when "00001" =>
|
|
|
|
-- mffsce
|
|
|
|
v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
|
|
|
|
when "10100" | "10101" =>
|
|
|
|
-- mffscdrn[i] (but we don't implement DRN)
|
|
|
|
fpscr_mask := x"000000FF";
|
|
|
|
when "10110" =>
|
|
|
|
-- mffscrn
|
|
|
|
fpscr_mask := x"000000FF";
|
|
|
|
v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
|
|
|
|
r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
|
|
|
|
when "10111" =>
|
|
|
|
-- mffscrni
|
|
|
|
fpscr_mask := x"000000FF";
|
|
|
|
v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
|
|
|
|
when "11000" =>
|
|
|
|
-- mffsl
|
|
|
|
fpscr_mask := x"0007F0FF";
|
|
|
|
when others =>
|
|
|
|
v.illegal := '1';
|
|
|
|
v.writing_fpr := '0';
|
|
|
|
end case;
|
|
|
|
int_result := '1';
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
when DO_MTFSF =>
|
|
|
|
if r.insn(25) = '1' then
|
|
|
|
flm := x"FF";
|
|
|
|
elsif r.insn(16) = '1' then
|
|
|
|
flm := x"00";
|
|
|
|
else
|
|
|
|
flm := r.insn(24 downto 17);
|
|
|
|
end if;
|
|
|
|
for i in 0 to 7 loop
|
|
|
|
k := i * 4;
|
|
|
|
if flm(i) = '1' then
|
|
|
|
v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
|
|
|
|
end if;
|
|
|
|
end loop;
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
when DO_FMR =>
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
v.result_class := r.b.class;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.quieten_nan := '0';
|
|
|
|
if r.insn(9) = '1' then
|
|
|
|
v.result_sign := '0'; -- fabs
|
|
|
|
elsif r.insn(8) = '1' then
|
|
|
|
v.result_sign := '1'; -- fnabs
|
|
|
|
elsif r.insn(7) = '1' then
|
|
|
|
v.result_sign := r.b.negative; -- fmr
|
|
|
|
elsif r.insn(6) = '1' then
|
|
|
|
v.result_sign := not r.b.negative; -- fneg
|
|
|
|
else
|
|
|
|
v.result_sign := r.a.negative; -- fcpsgn
|
|
|
|
end if;
|
|
|
|
v.writing_fpr := '1';
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
when DO_FRI => -- fri[nzpm]
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
v.result_class := r.b.class;
|
|
|
|
v.result_sign := r.b.negative;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- set shift to exponent - 52
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_con2 <= RSCON2_52;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
|
|
|
if r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0' then
|
|
|
|
-- Signalling NAN
|
|
|
|
v.fpscr(FPSCR_VXSNAN) := '1';
|
|
|
|
invalid := '1';
|
|
|
|
end if;
|
|
|
|
if r.b.class = FINITE then
|
|
|
|
if r.b.exponent >= to_signed(52, EXP_BITS) then
|
|
|
|
-- integer already, no rounding required
|
|
|
|
arith_done := '1';
|
|
|
|
else
|
|
|
|
v.state := FRI_1;
|
|
|
|
v.round_mode := '1' & r.insn(7 downto 6);
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DO_FRSP =>
|
|
|
|
-- r.opsel_a = AIN_B, r.shift = 0
|
|
|
|
v.result_class := r.b.class;
|
|
|
|
v.result_sign := r.b.negative;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- set shift to exponent - -126
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_con2 <= RSCON2_MINEXP;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
|
|
|
if r.b.class = NAN and r.b.mantissa(53) = '0' then
|
|
|
|
-- Signalling NAN
|
|
|
|
v.fpscr(FPSCR_VXSNAN) := '1';
|
|
|
|
invalid := '1';
|
|
|
|
end if;
|
|
|
|
set_x := '1';
|
|
|
|
if r.b.class = FINITE then
|
|
|
|
if r.b.exponent < to_signed(-126, EXP_BITS) then
|
|
|
|
v.state := ROUND_UFLOW;
|
|
|
|
elsif r.b.exponent > to_signed(127, EXP_BITS) then
|
|
|
|
v.state := ROUND_OFLOW;
|
|
|
|
else
|
|
|
|
v.state := ROUNDING;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DO_FCTI =>
|
|
|
|
-- instr bit 9: 1=dword 0=word
|
|
|
|
-- instr bit 8: 1=unsigned 0=signed
|
|
|
|
-- instr bit 1: 1=round to zero 0=use fpscr[RN]
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
v.result_class := r.b.class;
|
|
|
|
v.result_sign := r.b.negative;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
|
|
|
if r.b.class = NAN and r.b.mantissa(53) = '0' then
|
|
|
|
-- Signalling NAN
|
|
|
|
v.fpscr(FPSCR_VXSNAN) := '1';
|
|
|
|
invalid := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
int_result := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
|
|
|
|
case r.b.class is
|
|
|
|
when ZERO =>
|
|
|
|
arith_done := '1';
|
|
|
|
when FINITE =>
|
|
|
|
if r.b.exponent >= to_signed(64, EXP_BITS) or
|
|
|
|
(r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
|
|
|
|
v.state := INT_OFLOW;
|
|
|
|
elsif r.b.exponent >= to_signed(52, EXP_BITS) then
|
|
|
|
-- integer already, no rounding required,
|
|
|
|
-- shift into final position
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to exponent - 56
|
|
|
|
rs_con2 <= RSCON2_UNIT;
|
|
|
|
if r.insn(8) = '1' and r.b.negative = '1' then
|
|
|
|
v.state := INT_OFLOW;
|
|
|
|
else
|
|
|
|
v.state := INT_ISHIFT;
|
|
|
|
end if;
|
|
|
|
else
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to exponent - 52
|
|
|
|
rs_con2 <= RSCON2_52;
|
|
|
|
v.state := INT_SHIFT;
|
|
|
|
end if;
|
|
|
|
when INFINITY | NAN =>
|
|
|
|
v.state := INT_OFLOW;
|
|
|
|
end case;
|
|
|
|
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when DO_FCFID =>
|
|
|
|
-- r.opsel_a = AIN_B
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.result_sign := '0';
|
|
|
|
if r.insn(8) = '0' and r.b.negative = '1' then
|
|
|
|
-- fcfid[s] with negative operand, set R = -B
|
|
|
|
opsel_ainv <= '1';
|
|
|
|
carry_in <= '1';
|
|
|
|
v.result_sign := '1';
|
|
|
|
end if;
|
|
|
|
v.result_class := r.b.class;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_con2 <= RECON2_UNIT;
|
|
|
|
re_set_result <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
|
|
|
if r.b.class = ZERO then
|
|
|
|
arith_done := '1';
|
|
|
|
else
|
|
|
|
v.state := FINISH;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DO_FADD =>
|
|
|
|
-- fadd[s] and fsub[s]
|
|
|
|
-- r.opsel_a = AIN_A
|
|
|
|
v.result_sign := r.a.negative;
|
|
|
|
v.result_class := r.a.class;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel1 <= REXP1_A;
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- set shift to a.exp - b.exp
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_neg1 <= '1';
|
|
|
|
rs_sel2 <= RSH2_A;
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
|
|
|
is_add := r.a.negative xor r.b.negative xor r.insn(1);
|
|
|
|
v.is_subtract := not is_add;
|
|
|
|
if r.a.class = FINITE and r.b.class = FINITE then
|
|
|
|
v.add_bsmall := r.exp_cmp;
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
if r.exp_cmp = '0' then
|
|
|
|
v.result_sign := r.b.negative xnor r.insn(1);
|
|
|
|
if r.a.exponent = r.b.exponent then
|
|
|
|
v.state := ADD_2;
|
|
|
|
else
|
|
|
|
v.longmask := '0';
|
|
|
|
v.state := ADD_SHIFT;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
v.state := ADD_1;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
if r.a.class = NAN or r.b.class = NAN then
|
|
|
|
v.state := NAN_RESULT;
|
|
|
|
elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
|
|
|
|
-- invalid operation, construct QNaN
|
|
|
|
v.fpscr(FPSCR_VXISI) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
arith_done := '1';
|
|
|
|
elsif r.a.class = INFINITY or r.b.class = ZERO then
|
|
|
|
-- result is A; we're already set up to put A into R
|
|
|
|
arith_done := '1';
|
|
|
|
else
|
|
|
|
-- result is +/- B
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.result_sign := r.b.negative xnor r.insn(1);
|
|
|
|
v.state := EXC_RESULT;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DO_FMUL =>
|
|
|
|
-- fmul[s]
|
|
|
|
-- r.opsel_a = AIN_A unless C is denorm and A isn't
|
|
|
|
v.result_sign := r.a.negative xor r.c.negative;
|
|
|
|
v.result_class := r.a.class;
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel1 <= REXP1_A;
|
|
|
|
re_sel2 <= REXP2_C;
|
|
|
|
re_set_result <= '1';
|
|
|
|
if r.a.class = FINITE and r.c.class = FINITE then
|
|
|
|
-- Renormalize denorm operands
|
|
|
|
if r.a.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.state := RENORM_A;
|
|
|
|
elsif r.c.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.state := RENORM_C;
|
|
|
|
else
|
|
|
|
f_to_multiply.valid <= '1';
|
|
|
|
v.state := MULT_1;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
if r.a.class = NAN or r.c.class = NAN then
|
|
|
|
v.state := NAN_RESULT;
|
|
|
|
elsif (r.a.class = INFINITY and r.c.class = ZERO) or
|
|
|
|
(r.a.class = ZERO and r.c.class = INFINITY) then
|
|
|
|
-- invalid operation, construct QNaN
|
|
|
|
v.fpscr(FPSCR_VXIMZ) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
elsif r.a.class = ZERO or r.a.class = INFINITY then
|
|
|
|
-- result is +/- A
|
|
|
|
arith_done := '1';
|
|
|
|
else
|
|
|
|
-- r.c.class is ZERO or INFINITY
|
|
|
|
v.opsel_a := AIN_C;
|
|
|
|
v.state := EXC_RESULT;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DO_FDIV =>
|
|
|
|
-- r.opsel_a = AIN_A unless B is denorm and A isn't
|
|
|
|
v.result_class := r.a.class;
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
|
|
|
v.result_sign := r.a.negative xor r.b.negative;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel1 <= REXP1_A;
|
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_neg2 <= '1';
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.count := "00";
|
|
|
|
if r.a.class = FINITE and r.b.class = FINITE then
|
|
|
|
-- Renormalize denorm operands
|
|
|
|
if r.a.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.state := RENORM_A;
|
|
|
|
elsif r.b.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.state := RENORM_B;
|
|
|
|
else
|
|
|
|
v.first := '1';
|
|
|
|
v.state := DIV_2;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
if r.a.class = NAN or r.b.class = NAN then
|
|
|
|
v.state := NAN_RESULT;
|
|
|
|
elsif r.b.class = INFINITY then
|
|
|
|
if r.a.class = INFINITY then
|
|
|
|
v.fpscr(FPSCR_VXIDI) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
else
|
|
|
|
v.result_class := ZERO;
|
|
|
|
end if;
|
|
|
|
arith_done := '1';
|
|
|
|
elsif r.b.class = ZERO then
|
|
|
|
if r.a.class = ZERO then
|
|
|
|
v.fpscr(FPSCR_VXZDZ) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
else
|
|
|
|
if r.a.class = FINITE then
|
|
|
|
zero_divide := '1';
|
|
|
|
end if;
|
|
|
|
v.result_class := INFINITY;
|
|
|
|
end if;
|
|
|
|
arith_done := '1';
|
|
|
|
else -- r.b.class = FINITE, result_class = r.a.class
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DO_FSEL =>
|
|
|
|
if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
|
|
|
|
v.opsel_a := AIN_C;
|
|
|
|
v.result_sign := r.c.negative;
|
|
|
|
else
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.result_sign := r.b.negative;
|
|
|
|
end if;
|
|
|
|
v.quieten_nan := '0';
|
|
|
|
v.state := EXC_RESULT;
|
|
|
|
|
|
|
|
when DO_FSQRT =>
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
v.result_class := r.b.class;
|
|
|
|
v.result_sign := r.b.negative;
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
case r.b.class is
|
|
|
|
when FINITE =>
|
|
|
|
if r.b.negative = '1' then
|
|
|
|
v.fpscr(FPSCR_VXSQRT) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
elsif r.b.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.state := RENORM_B;
|
|
|
|
elsif r.b.exponent(0) = '0' then
|
|
|
|
v.state := SQRT_1;
|
|
|
|
else
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 1
|
|
|
|
rs_con2 <= RSCON2_1;
|
|
|
|
v.state := RENORM_B2;
|
|
|
|
end if;
|
|
|
|
when NAN =>
|
|
|
|
v.state := NAN_RESULT;
|
|
|
|
when ZERO =>
|
|
|
|
-- result is B
|
|
|
|
arith_done := '1';
|
|
|
|
when INFINITY =>
|
|
|
|
if r.b.negative = '1' then
|
|
|
|
v.fpscr(FPSCR_VXSQRT) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
-- else result is B
|
|
|
|
end if;
|
|
|
|
arith_done := '1';
|
|
|
|
end case;
|
|
|
|
|
|
|
|
when DO_FRE =>
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
v.result_class := r.b.class;
|
|
|
|
v.result_sign := r.b.negative;
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
case r.b.class is
|
|
|
|
when FINITE =>
|
|
|
|
if r.b.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.state := RENORM_B;
|
|
|
|
else
|
|
|
|
v.state := FRE_1;
|
|
|
|
end if;
|
|
|
|
when NAN =>
|
|
|
|
v.state := NAN_RESULT;
|
|
|
|
when INFINITY =>
|
|
|
|
v.result_class := ZERO;
|
|
|
|
arith_done := '1';
|
|
|
|
when ZERO =>
|
|
|
|
v.result_class := INFINITY;
|
|
|
|
zero_divide := '1';
|
|
|
|
arith_done := '1';
|
|
|
|
end case;
|
|
|
|
|
|
|
|
when DO_FRSQRTE =>
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
v.result_class := r.b.class;
|
|
|
|
v.result_sign := r.b.negative;
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- set shift to 1
|
|
|
|
rs_con2 <= RSCON2_1;
|
|
|
|
case r.b.class is
|
|
|
|
when FINITE =>
|
|
|
|
if r.b.negative = '1' then
|
|
|
|
v.fpscr(FPSCR_VXSQRT) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
elsif r.b.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.state := RENORM_B;
|
|
|
|
elsif r.b.exponent(0) = '0' then
|
|
|
|
v.state := RSQRT_1;
|
|
|
|
else
|
|
|
|
v.state := RENORM_B2;
|
|
|
|
end if;
|
|
|
|
when NAN =>
|
|
|
|
v.state := NAN_RESULT;
|
|
|
|
when INFINITY =>
|
|
|
|
if r.b.negative = '1' then
|
|
|
|
v.fpscr(FPSCR_VXSQRT) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
else
|
|
|
|
v.result_class := ZERO;
|
|
|
|
end if;
|
|
|
|
arith_done := '1';
|
|
|
|
when ZERO =>
|
|
|
|
v.result_class := INFINITY;
|
|
|
|
zero_divide := '1';
|
|
|
|
arith_done := '1';
|
|
|
|
end case;
|
|
|
|
|
|
|
|
when DO_FMADD =>
|
|
|
|
-- fmadd, fmsub, fnmadd, fnmsub
|
|
|
|
-- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
|
|
|
|
-- else AIN_B
|
|
|
|
v.result_sign := r.a.negative;
|
|
|
|
v.result_class := r.a.class;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- put a.exp + c.exp into result_exp
|
|
|
|
re_sel1 <= REXP1_A;
|
|
|
|
re_sel2 <= REXP2_C;
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- put b.exp into shift
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
v.fpscr(FPSCR_FI) := '0';
|
|
|
|
is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
|
|
|
|
v.negate := r.insn(2);
|
|
|
|
v.is_subtract := not is_add;
|
|
|
|
if r.a.class = FINITE and r.c.class = FINITE and
|
|
|
|
(r.b.class = FINITE or r.b.class = ZERO) then
|
|
|
|
-- Make sure A and C are normalized
|
|
|
|
if r.a.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.state := RENORM_A;
|
|
|
|
elsif r.c.mantissa(UNIT_BIT) = '0' then
|
|
|
|
v.state := RENORM_C;
|
|
|
|
elsif r.b.class = ZERO then
|
|
|
|
-- no addend, degenerates to multiply
|
|
|
|
v.result_sign := r.a.negative xor r.c.negative;
|
|
|
|
f_to_multiply.valid <= '1';
|
|
|
|
v.is_multiply := '1';
|
|
|
|
v.state := MULT_1;
|
|
|
|
elsif r.madd_cmp = '0' then
|
|
|
|
-- addend is bigger, do multiply first
|
|
|
|
v.result_sign := r.b.negative xnor r.insn(1);
|
|
|
|
f_to_multiply.valid <= '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
v.first := '1';
|
|
|
|
v.state := FMADD_0;
|
|
|
|
else
|
|
|
|
-- product is bigger, shift B first
|
|
|
|
v.state := FMADD_1;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
|
|
|
|
v.state := NAN_RESULT;
|
|
|
|
elsif (r.a.class = ZERO and r.c.class = INFINITY) or
|
|
|
|
(r.a.class = INFINITY and r.c.class = ZERO) then
|
|
|
|
-- invalid operation, construct QNaN
|
|
|
|
v.fpscr(FPSCR_VXIMZ) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
elsif r.a.class = INFINITY or r.c.class = INFINITY then
|
|
|
|
if r.b.class = INFINITY and is_add = '0' then
|
|
|
|
-- invalid operation, construct QNaN
|
|
|
|
v.fpscr(FPSCR_VXISI) := '1';
|
|
|
|
qnan_result := '1';
|
|
|
|
else
|
|
|
|
-- result is infinity
|
|
|
|
v.result_class := INFINITY;
|
|
|
|
v.result_sign := r.a.negative xor r.c.negative;
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
-- Here A is zero, C is zero, or B is infinity
|
|
|
|
-- Result is +/-B in all of those cases
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.result_sign := r.b.negative xnor r.insn(1);
|
|
|
|
v.state := EXC_RESULT;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when RENORM_A =>
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
|
|
|
v.state := RENORM_A2;
|
|
|
|
if r.use_c = '1' and r.c.denorm = '1' then
|
|
|
|
v.opsel_a := AIN_C;
|
|
|
|
else
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when RENORM_A2 =>
|
|
|
|
-- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
|
|
|
|
set_a := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
if r.insn(4) = '1' then
|
|
|
|
if r.c.mantissa(UNIT_BIT) = '1' then
|
|
|
|
if r.insn(3) = '0' or r.b.class = ZERO then
|
|
|
|
v.first := '1';
|
|
|
|
v.state := MULT_1;
|
|
|
|
else
|
|
|
|
v.madd_cmp := '0';
|
|
|
|
if new_exp + 1 >= r.b.exponent then
|
|
|
|
v.madd_cmp := '1';
|
|
|
|
end if;
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.state := DO_FMADD;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
v.state := RENORM_C;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
if r.b.mantissa(UNIT_BIT) = '1' then
|
|
|
|
v.first := '1';
|
|
|
|
v.state := DIV_2;
|
|
|
|
else
|
|
|
|
v.state := RENORM_B;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when RENORM_B =>
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
|
|
|
renorm_sqrt := r.is_sqrt;
|
|
|
|
v.state := RENORM_B2;
|
|
|
|
|
|
|
|
when RENORM_B2 =>
|
|
|
|
set_b := '1';
|
|
|
|
-- For fdiv, we need to increase result_exp by shift rather
|
|
|
|
-- than decreasing it as for fre/frsqrte and fsqrt.
|
|
|
|
-- We do that by negating r.shift in this cycle and then
|
|
|
|
-- setting result_exp to new_exp in the next cycle
|
|
|
|
if r.use_a = '1' then
|
|
|
|
rs_sel1 <= RSH1_S;
|
|
|
|
rs_neg1 <= '1';
|
|
|
|
else
|
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
end if;
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.state := LOOKUP;
|
|
|
|
|
|
|
|
when RENORM_C =>
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
|
|
|
v.state := RENORM_C2;
|
|
|
|
|
|
|
|
when RENORM_C2 =>
|
|
|
|
set_c := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
if r.insn(3) = '0' or r.b.class = ZERO then
|
|
|
|
v.first := '1';
|
|
|
|
v.state := MULT_1;
|
|
|
|
else
|
|
|
|
v.madd_cmp := '0';
|
|
|
|
if new_exp + 1 >= r.b.exponent then
|
|
|
|
v.madd_cmp := '1';
|
|
|
|
end if;
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.state := DO_FMADD;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when ADD_1 =>
|
|
|
|
-- transferring B to R
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- set shift to b.exp - a.exp
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_sel2 <= RSH2_A;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
v.longmask := '0';
|
|
|
|
v.state := ADD_SHIFT;
|
|
|
|
|
|
|
|
when ADD_SHIFT =>
|
|
|
|
-- r.shift = - exponent difference, r.longmask = 0
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.x := s_nz;
|
|
|
|
set_x := '1';
|
|
|
|
v.longmask := r.single_prec;
|
|
|
|
if r.add_bsmall = '1' then
|
|
|
|
v.opsel_a := AIN_A;
|
|
|
|
else
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
end if;
|
|
|
|
v.state := ADD_2;
|
|
|
|
|
|
|
|
when ADD_2 =>
|
|
|
|
-- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
|
|
|
|
opsel_b <= BIN_R;
|
|
|
|
opsel_binv <= r.is_subtract;
|
|
|
|
carry_in <= r.is_subtract and not r.x;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to -1
|
|
|
|
rs_con2 <= RSCON2_1;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
v.state := ADD_3;
|
|
|
|
|
|
|
|
when ADD_3 =>
|
|
|
|
-- check for overflow or negative result (can't get both)
|
|
|
|
-- r.shift = -1
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
if r.r(63) = '1' then
|
|
|
|
-- result is opposite sign to expected
|
|
|
|
v.result_sign := not r.result_sign;
|
|
|
|
opsel_ainv <= '1';
|
|
|
|
carry_in <= '1';
|
|
|
|
v.state := FINISH;
|
|
|
|
elsif r.r(UNIT_BIT + 1) = '1' then
|
|
|
|
-- sum overflowed, shift right
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_set_result <= '1';
|
|
|
|
set_x := '1';
|
|
|
|
if exp_huge = '1' then
|
|
|
|
v.state := ROUND_OFLOW;
|
|
|
|
else
|
|
|
|
v.state := ROUNDING;
|
|
|
|
end if;
|
|
|
|
elsif r.r(UNIT_BIT) = '1' then
|
|
|
|
set_x := '1';
|
|
|
|
v.state := ROUNDING;
|
|
|
|
elsif (r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
|
|
|
|
-- r.x must be zero at this point
|
|
|
|
v.result_class := ZERO;
|
|
|
|
arith_done := '1';
|
|
|
|
else
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
|
|
|
v.state := NORMALIZE;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when CMP_1 =>
|
|
|
|
-- r.opsel_a = AIN_A
|
|
|
|
opsel_b <= BIN_R;
|
|
|
|
opsel_binv <= '1';
|
|
|
|
carry_in <= '1';
|
|
|
|
v.state := CMP_2;
|
|
|
|
|
|
|
|
when CMP_2 =>
|
|
|
|
if r.r(63) = '1' then
|
|
|
|
-- A is smaller in magnitude
|
|
|
|
v.cr_result := not r.a.negative & r.a.negative & "00";
|
|
|
|
elsif (r_hi_nz or r_lo_nz) = '0' then
|
|
|
|
v.cr_result := "0010";
|
|
|
|
else
|
|
|
|
v.cr_result := r.a.negative & not r.a.negative & "00";
|
|
|
|
end if;
|
|
|
|
v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
when MULT_1 =>
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := FINISH;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when FMADD_0 =>
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- r.shift is b.exp, so new_exp is a.exp + c.exp - b.exp
|
|
|
|
-- (first time through; subsequent times we preserve v.shift)
|
|
|
|
-- Addend is bigger here
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to a.exp + c.exp - b.exp
|
|
|
|
-- note v.shift is at most -2 here
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
if r.first = '1' then
|
|
|
|
rs_sel1 <= RSH1_NE;
|
|
|
|
else
|
|
|
|
rs_sel1 <= RSH1_S;
|
|
|
|
end if;
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
opsel_s <= S_MULT;
|
|
|
|
set_s := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.longmask := '0';
|
|
|
|
v.state := ADD_SHIFT;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when FMADD_1 =>
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- shift is b.exp, so new_exp is a.exp + c.exp - b.exp
|
|
|
|
-- product is bigger here
|
|
|
|
-- shift B right and use it as the addend to the multiplier
|
|
|
|
-- for subtract, multiplier does B - A * C
|
|
|
|
v.result_sign := r.a.negative xor r.c.negative xor r.is_subtract;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- set shift to b.exp - result_exp + 64
|
|
|
|
rs_sel1 <= RSH1_NE;
|
|
|
|
rs_neg1 <= '1';
|
|
|
|
rs_con2 <= RSCON2_64;
|
|
|
|
v.state := FMADD_2;
|
|
|
|
|
|
|
|
when FMADD_2 =>
|
|
|
|
-- Product is potentially bigger here
|
|
|
|
-- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
|
|
|
|
set_s := '1';
|
|
|
|
opsel_s <= S_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to r.shift - 64
|
|
|
|
rs_sel1 <= RSH1_S;
|
|
|
|
rs_con2 <= RSCON2_64;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
v.state := FMADD_3;
|
|
|
|
|
|
|
|
when FMADD_3 =>
|
|
|
|
-- r.shift = addend exp - product exp
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.first := '1';
|
|
|
|
v.state := FMADD_4;
|
|
|
|
|
|
|
|
when FMADD_4 =>
|
|
|
|
msel_add <= MULADD_RS;
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
msel_inv <= r.is_subtract;
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
opsel_s <= S_MULT;
|
|
|
|
set_s := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := FMADD_5;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when FMADD_5 =>
|
|
|
|
-- negate R:S:X if negative
|
|
|
|
if r.r(63) = '1' then
|
|
|
|
v.result_sign := not r.result_sign;
|
|
|
|
opsel_ainv <= '1';
|
|
|
|
carry_in <= not (s_nz or r.x);
|
|
|
|
opsel_s <= S_NEG;
|
|
|
|
set_s := '1';
|
|
|
|
end if;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to UNIT_BIT
|
|
|
|
rs_con2 <= RSCON2_UNIT;
|
|
|
|
v.state := FMADD_6;
|
|
|
|
|
|
|
|
when FMADD_6 =>
|
|
|
|
-- r.shift = UNIT_BIT (or 0, but only if r is now nonzero)
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
rs_norm <= '1';
|
|
|
|
if (r.r(UNIT_BIT + 2) or r_hi_nz or r_lo_nz or (or (r.r(DP_LSB - 1 downto 0)))) = '0' then
|
|
|
|
if s_nz = '0' then
|
|
|
|
-- must be a subtraction, and r.x must be zero
|
|
|
|
v.result_class := ZERO;
|
|
|
|
arith_done := '1';
|
|
|
|
else
|
|
|
|
-- R is all zeroes but there are non-zero bits in S
|
|
|
|
-- so shift them into R and set S to 0
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_set_result <= '1';
|
|
|
|
set_s := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
v.state := FINISH;
|
|
|
|
end if;
|
|
|
|
elsif r.r(UNIT_BIT + 2 downto UNIT_BIT) = "001" then
|
|
|
|
v.state := FINISH;
|
|
|
|
else
|
|
|
|
v.state := NORMALIZE;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when LOOKUP =>
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
-- wait one cycle for inverse_table[B] lookup
|
|
|
|
-- if this is a division, compute exponent
|
|
|
|
-- (see comment on RENORM_B2 above)
|
|
|
|
if r.use_a = '1' then
|
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
end if;
|
|
|
|
v.first := '1';
|
|
|
|
if r.insn(4) = '0' then
|
|
|
|
if r.insn(3) = '0' then
|
|
|
|
v.state := DIV_2;
|
|
|
|
else
|
|
|
|
v.state := SQRT_1;
|
|
|
|
end if;
|
|
|
|
elsif r.insn(2) = '0' then
|
|
|
|
v.state := FRE_1;
|
|
|
|
else
|
|
|
|
v.state := RSQRT_1;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DIV_2 =>
|
|
|
|
-- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
|
|
|
|
msel_1 <= MUL1_B;
|
|
|
|
msel_add <= MULADD_CONST;
|
|
|
|
msel_inv <= '1';
|
|
|
|
if r.count = 0 then
|
|
|
|
msel_2 <= MUL2_LUT;
|
|
|
|
else
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
end if;
|
|
|
|
set_y := r.first;
|
|
|
|
pshift := '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.first := '1';
|
|
|
|
v.count := r.count + 1;
|
|
|
|
v.state := DIV_3;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DIV_3 =>
|
|
|
|
-- compute Y = P = P * Y
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
pshift := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.first := '1';
|
|
|
|
if r.count = 3 then
|
|
|
|
v.state := DIV_4;
|
|
|
|
else
|
|
|
|
v.state := DIV_2;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DIV_4 =>
|
|
|
|
-- compute R = P = A * Y (quotient)
|
|
|
|
msel_1 <= MUL1_A;
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
set_y := r.first;
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
pshift := '1';
|
|
|
|
mult_mask := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
v.first := '1';
|
|
|
|
v.state := DIV_5;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DIV_5 =>
|
|
|
|
-- compute P = A - B * R (remainder)
|
|
|
|
msel_1 <= MUL1_B;
|
|
|
|
msel_2 <= MUL2_R;
|
|
|
|
msel_add <= MULADD_A;
|
|
|
|
msel_inv <= '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := DIV_6;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DIV_6 =>
|
|
|
|
-- r.opsel_a = AIN_R
|
|
|
|
-- test if remainder is 0 or >= B
|
|
|
|
if pcmpb_lt = '1' then
|
|
|
|
-- quotient is correct, set X if remainder non-zero
|
|
|
|
v.x := r.p(UNIT_BIT + 2) or px_nz;
|
|
|
|
else
|
|
|
|
-- quotient needs to be incremented by 1 in R-bit position
|
|
|
|
rbit_inc := '1';
|
|
|
|
opsel_b <= BIN_RND;
|
|
|
|
v.x := not pcmpb_eq;
|
|
|
|
end if;
|
|
|
|
v.state := FINISH;
|
|
|
|
|
|
|
|
when FRE_1 =>
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel1 <= REXP1_R;
|
|
|
|
re_neg1 <= '1';
|
|
|
|
re_set_result <= '1';
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
misc_sel <= "0111";
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 1
|
|
|
|
rs_con2 <= RSCON2_1;
|
|
|
|
v.state := NORMALIZE;
|
|
|
|
|
|
|
|
when FTDIV_1 =>
|
|
|
|
-- We go through this state up to two times; the first sees if
|
|
|
|
-- B.exponent is in the range [-1021,1020], and the second tests
|
|
|
|
-- whether B.exp - A.exp is in the range [-1022,1020].
|
|
|
|
v.cr_result(1) := exp_tiny or exp_huge;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to a.exp
|
|
|
|
rs_sel2 <= RSH2_A;
|
|
|
|
if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
|
|
|
|
v.instr_done := '1';
|
|
|
|
else
|
|
|
|
v.doing_ftdiv := "10";
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when RSQRT_1 =>
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
misc_sel <= "0111";
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel1 <= REXP1_BHALF;
|
|
|
|
re_neg1 <= '1';
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- set shift to 1
|
|
|
|
rs_con2 <= RSCON2_1;
|
|
|
|
v.state := NORMALIZE;
|
|
|
|
|
|
|
|
when SQRT_1 =>
|
|
|
|
-- put invsqr[B] in R and compute P = invsqr[B] * B
|
|
|
|
-- also transfer B (in R) to A
|
|
|
|
set_a := '1';
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
misc_sel <= "0111";
|
|
|
|
msel_1 <= MUL1_B;
|
|
|
|
msel_2 <= MUL2_LUT;
|
|
|
|
f_to_multiply.valid <= '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to -1
|
|
|
|
rs_con2 <= RSCON2_1;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
v.count := "00";
|
|
|
|
v.state := SQRT_2;
|
|
|
|
|
|
|
|
when SQRT_2 =>
|
|
|
|
-- shift R right one place
|
|
|
|
-- not expecting multiplier result yet
|
|
|
|
-- r.shift = -1
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.first := '1';
|
|
|
|
v.state := SQRT_3;
|
|
|
|
|
|
|
|
when SQRT_3 =>
|
|
|
|
-- put R into Y, wait for product from multiplier
|
|
|
|
msel_2 <= MUL2_R;
|
|
|
|
set_y := r.first;
|
|
|
|
pshift := '1';
|
|
|
|
mult_mask := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
-- put result into R
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
v.first := '1';
|
|
|
|
v.state := SQRT_4;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when SQRT_4 =>
|
|
|
|
-- compute 1.5 - Y * P
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
msel_add <= MULADD_CONST;
|
|
|
|
msel_inv <= '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
pshift := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := SQRT_5;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when SQRT_5 =>
|
|
|
|
-- compute Y = Y * P
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
f_to_multiply.valid <= '1';
|
|
|
|
v.first := '1';
|
|
|
|
v.state := SQRT_6;
|
|
|
|
|
|
|
|
when SQRT_6 =>
|
|
|
|
-- pipeline in R = R * P
|
|
|
|
msel_1 <= MUL1_R;
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
pshift := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.first := '1';
|
|
|
|
v.state := SQRT_7;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when SQRT_7 =>
|
|
|
|
-- first multiply is done, put result in Y
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
set_y := r.first;
|
|
|
|
-- wait for second multiply (should be here already)
|
|
|
|
pshift := '1';
|
|
|
|
mult_mask := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
-- put result into R
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
v.first := '1';
|
|
|
|
v.count := r.count + 1;
|
|
|
|
if r.count < 2 then
|
|
|
|
v.state := SQRT_4;
|
|
|
|
else
|
|
|
|
v.first := '1';
|
|
|
|
v.state := SQRT_8;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when SQRT_8 =>
|
|
|
|
-- compute P = A - R * R, which can be +ve or -ve
|
|
|
|
-- we arranged for B to be put into A earlier
|
|
|
|
msel_1 <= MUL1_R;
|
|
|
|
msel_2 <= MUL2_R;
|
|
|
|
msel_add <= MULADD_A;
|
|
|
|
msel_inv <= '1';
|
|
|
|
pshift := '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.first := '1';
|
|
|
|
v.state := SQRT_9;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when SQRT_9 =>
|
|
|
|
-- compute P = P * Y
|
|
|
|
-- since Y is an estimate of 1/sqrt(B), this makes P an
|
|
|
|
-- estimate of the adjustment needed to R. Since the error
|
|
|
|
-- could be negative and we have an unsigned multiplier, the
|
|
|
|
-- upper bits can be wrong, but it turns out the lowest 8 bits
|
|
|
|
-- are correct and are all we need (given 3 iterations through
|
|
|
|
-- SQRT_4 to SQRT_7).
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
pshift := '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := SQRT_10;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when SQRT_10 =>
|
|
|
|
-- Add the bottom 8 bits of P, sign-extended, onto R.
|
|
|
|
opsel_b <= BIN_PS8;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel1 <= REXP1_BHALF;
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- set shift to 1
|
|
|
|
rs_con2 <= RSCON2_1;
|
|
|
|
v.first := '1';
|
|
|
|
v.state := SQRT_11;
|
|
|
|
|
|
|
|
when SQRT_11 =>
|
|
|
|
-- compute P = A - R * R (remainder)
|
|
|
|
-- also put 2 * R + 1 into B for comparison with P
|
|
|
|
msel_1 <= MUL1_R;
|
|
|
|
msel_2 <= MUL2_R;
|
|
|
|
msel_add <= MULADD_A;
|
|
|
|
msel_inv <= '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
shiftin := '1';
|
|
|
|
set_b := r.first;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := SQRT_12;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when SQRT_12 =>
|
|
|
|
-- test if remainder is 0 or >= B = 2*R + 1
|
|
|
|
if pcmpb_lt = '1' then
|
|
|
|
-- square root is correct, set X if remainder non-zero
|
|
|
|
v.x := r.p(UNIT_BIT + 2) or px_nz;
|
|
|
|
else
|
|
|
|
-- square root needs to be incremented by 1
|
|
|
|
carry_in <= '1';
|
|
|
|
v.x := not pcmpb_eq;
|
|
|
|
end if;
|
|
|
|
v.state := FINISH;
|
|
|
|
|
|
|
|
when INT_SHIFT =>
|
|
|
|
-- r.shift = b.exponent - 52
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
set_x := '1';
|
|
|
|
v.state := INT_ROUND;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to -4 (== 52 - UNIT_BIT)
|
|
|
|
rs_con2 <= RSCON2_UNIT_52;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
|
|
|
|
when INT_ROUND =>
|
|
|
|
-- r.shift = -4 (== 52 - UNIT_BIT)
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
|
|
|
|
v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
|
|
|
|
-- Check for negative values that don't round to 0 for fcti*u*
|
|
|
|
if r.insn(8) = '1' and r.result_sign = '1' and
|
|
|
|
(r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
|
|
|
|
v.state := INT_OFLOW;
|
|
|
|
else
|
|
|
|
v.state := INT_FINAL;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when INT_ISHIFT =>
|
|
|
|
-- r.shift = b.exponent - UNIT_BIT;
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.state := INT_FINAL;
|
|
|
|
|
|
|
|
when INT_FINAL =>
|
|
|
|
-- Negate if necessary, and increment for rounding if needed
|
|
|
|
opsel_ainv <= r.result_sign;
|
|
|
|
carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
|
|
|
|
-- Check for possible overflows
|
|
|
|
case r.insn(9 downto 8) is
|
|
|
|
when "00" => -- fctiw[z]
|
|
|
|
need_check := r.r(31) or (r.r(30) and not r.result_sign);
|
|
|
|
when "01" => -- fctiwu[z]
|
|
|
|
need_check := r.r(31);
|
|
|
|
when "10" => -- fctid[z]
|
|
|
|
need_check := r.r(63) or (r.r(62) and not r.result_sign);
|
|
|
|
when others => -- fctidu[z]
|
|
|
|
need_check := r.r(63);
|
|
|
|
end case;
|
|
|
|
int_result := '1';
|
|
|
|
if need_check = '1' then
|
|
|
|
v.state := INT_CHECK;
|
|
|
|
else
|
|
|
|
if r.fpscr(FPSCR_FI) = '1' then
|
|
|
|
v.fpscr(FPSCR_XX) := '1';
|
|
|
|
end if;
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when INT_CHECK =>
|
|
|
|
if r.insn(9) = '0' then
|
|
|
|
msb := r.r(31);
|
|
|
|
else
|
|
|
|
msb := r.r(63);
|
|
|
|
end if;
|
|
|
|
misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
|
|
|
|
if (r.insn(8) = '0' and msb /= r.result_sign) or
|
|
|
|
(r.insn(8) = '1' and msb /= '1') then
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
v.fpscr(FPSCR_VXCVI) := '1';
|
|
|
|
invalid := '1';
|
|
|
|
else
|
|
|
|
if r.fpscr(FPSCR_FI) = '1' then
|
|
|
|
v.fpscr(FPSCR_XX) := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
int_result := '1';
|
|
|
|
arith_done := '1';
|
|
|
|
|
|
|
|
when INT_OFLOW =>
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
|
|
|
|
if r.b.class = NAN then
|
|
|
|
misc_sel(0) <= '1';
|
|
|
|
end if;
|
|
|
|
v.fpscr(FPSCR_VXCVI) := '1';
|
|
|
|
invalid := '1';
|
|
|
|
int_result := '1';
|
|
|
|
arith_done := '1';
|
|
|
|
|
|
|
|
when FRI_1 =>
|
|
|
|
-- r.shift = b.exponent - 52
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
set_x := '1';
|
|
|
|
v.state := ROUNDING;
|
|
|
|
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when FINISH =>
|
|
|
|
if r.is_multiply = '1' and px_nz = '1' then
|
|
|
|
v.x := '1';
|
|
|
|
end if;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to new_exp - min_exp (N.B. rs_norm overrides this)
|
|
|
|
rs_sel1 <= RSH1_NE;
|
|
|
|
rs_con2 <= RSCON2_MINEXP;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
if r.r(63 downto UNIT_BIT) /= std_ulogic_vector(to_unsigned(1, 64 - UNIT_BIT)) then
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.state := NORMALIZE;
|
|
|
|
else
|
|
|
|
set_x := '1';
|
|
|
|
if exp_tiny = '1' then
|
|
|
|
v.state := ROUND_UFLOW;
|
|
|
|
elsif exp_huge = '1' then
|
|
|
|
v.state := ROUND_OFLOW;
|
|
|
|
else
|
|
|
|
v.state := ROUNDING;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when NORMALIZE =>
|
|
|
|
-- Shift so we have 9 leading zeroes (we know R is non-zero)
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- r.shift = clz(r.r) - 7
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
-- set shift to new_exp - min_exp
|
|
|
|
rs_sel1 <= RSH1_NE;
|
|
|
|
rs_con2 <= RSCON2_MINEXP;
|
|
|
|
rs_neg2 <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
set_x := '1';
|
|
|
|
if exp_tiny = '1' then
|
|
|
|
v.state := ROUND_UFLOW;
|
|
|
|
elsif exp_huge = '1' then
|
|
|
|
v.state := ROUND_OFLOW;
|
|
|
|
else
|
|
|
|
v.state := ROUNDING;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when ROUND_UFLOW =>
|
|
|
|
-- r.shift = - amount by which exponent underflows
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.tiny := '1';
|
|
|
|
if r.fpscr(FPSCR_UE) = '0' then
|
|
|
|
-- disabled underflow exception case
|
|
|
|
-- have to denormalize before rounding
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
set_x := '1';
|
|
|
|
v.state := ROUNDING;
|
|
|
|
else
|
|
|
|
-- enabled underflow exception case
|
|
|
|
-- if denormalized, have to normalize before rounding
|
|
|
|
v.fpscr(FPSCR_UX) := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel1 <= REXP1_R;
|
|
|
|
re_con2 <= RECON2_BIAS;
|
|
|
|
re_set_result <= '1';
|
|
|
|
if r.r(UNIT_BIT) = '0' then
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.state := NORMALIZE;
|
|
|
|
else
|
|
|
|
v.state := ROUNDING;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when ROUND_OFLOW =>
|
|
|
|
v.fpscr(FPSCR_OX) := '1';
|
|
|
|
if r.fpscr(FPSCR_OE) = '0' then
|
|
|
|
-- disabled overflow exception
|
|
|
|
-- result depends on rounding mode
|
|
|
|
v.fpscr(FPSCR_XX) := '1';
|
|
|
|
v.fpscr(FPSCR_FI) := '1';
|
|
|
|
if r.round_mode(1 downto 0) = "00" or
|
|
|
|
(r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
|
|
|
|
v.result_class := INFINITY;
|
|
|
|
v.fpscr(FPSCR_FR) := '1';
|
|
|
|
else
|
|
|
|
v.fpscr(FPSCR_FR) := '0';
|
|
|
|
end if;
|
|
|
|
-- construct largest representable number
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_con2 <= RECON2_MAX;
|
|
|
|
re_set_result <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
misc_sel <= "001" & r.single_prec;
|
|
|
|
arith_done := '1';
|
|
|
|
else
|
|
|
|
-- enabled overflow exception
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel1 <= REXP1_R;
|
|
|
|
re_con2 <= RECON2_BIAS;
|
|
|
|
re_neg2 <= '1';
|
|
|
|
re_set_result <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.state := ROUNDING;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when ROUNDING =>
|
|
|
|
opsel_mask <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
|
|
|
|
v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
|
|
|
|
if round(1) = '1' then
|
|
|
|
-- increment the LSB for the precision
|
|
|
|
opsel_b <= BIN_RND;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to -1
|
|
|
|
rs_con2 <= RSCON2_1;
|
|
|
|
rs_neg2 <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.state := ROUNDING_2;
|
|
|
|
else
|
|
|
|
if r.r(UNIT_BIT) = '0' then
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
-- result after masking could be zero, or could be a
|
|
|
|
-- denormalized result that needs to be renormalized
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.state := ROUNDING_3;
|
|
|
|
else
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
if round(0) = '1' then
|
|
|
|
v.fpscr(FPSCR_XX) := '1';
|
|
|
|
if r.tiny = '1' then
|
|
|
|
v.fpscr(FPSCR_UX) := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when ROUNDING_2 =>
|
|
|
|
-- Check for overflow during rounding
|
|
|
|
-- r.shift = -1
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.x := '0';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
if r.r(UNIT_BIT + 1) = '1' then
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_set_result <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
if exp_huge = '1' then
|
|
|
|
v.state := ROUND_OFLOW;
|
|
|
|
else
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
elsif r.r(UNIT_BIT) = '0' then
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
-- Do CLZ so we can renormalize the result
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.state := ROUNDING_3;
|
|
|
|
else
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when ROUNDING_3 =>
|
|
|
|
-- r.shift = clz(r.r) - 9
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
-- set shift to new_exp - min_exp (== -1022)
|
|
|
|
rs_sel1 <= RSH1_NE;
|
|
|
|
rs_con2 <= RSCON2_MINEXP;
|
|
|
|
rs_neg2 <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
if mant_nz = '0' then
|
|
|
|
v.result_class := ZERO;
|
|
|
|
arith_done := '1';
|
|
|
|
else
|
|
|
|
-- Renormalize result after rounding
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_set_result <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.denorm := exp_tiny;
|
|
|
|
if new_exp < to_signed(-1022, EXP_BITS) then
|
|
|
|
v.state := DENORM;
|
|
|
|
else
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
when DENORM =>
|
|
|
|
-- r.shift = result_exp - -1022
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
arith_done := '1';
|
|
|
|
|
|
|
|
when NAN_RESULT =>
|
|
|
|
if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(QNAN_BIT) = '0') or
|
|
|
|
(r.use_b = '1' and r.b.class = NAN and r.b.mantissa(QNAN_BIT) = '0') or
|
|
|
|
(r.use_c = '1' and r.c.class = NAN and r.c.mantissa(QNAN_BIT) = '0') then
|
|
|
|
-- Signalling NAN
|
|
|
|
v.fpscr(FPSCR_VXSNAN) := '1';
|
|
|
|
invalid := '1';
|
|
|
|
end if;
|
|
|
|
if r.use_a = '1' and r.a.class = NAN then
|
|
|
|
v.opsel_a := AIN_A;
|
|
|
|
v.result_sign := r.a.negative;
|
|
|
|
elsif r.use_b = '1' and r.b.class = NAN then
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.result_sign := r.b.negative;
|
|
|
|
elsif r.use_c = '1' and r.c.class = NAN then
|
|
|
|
v.opsel_a := AIN_C;
|
|
|
|
v.result_sign := r.c.negative;
|
|
|
|
end if;
|
|
|
|
v.state := EXC_RESULT;
|
|
|
|
|
|
|
|
when EXC_RESULT =>
|
|
|
|
-- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
|
|
|
|
case r.opsel_a is
|
|
|
|
when AIN_B =>
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_B;
|
|
|
|
v.result_class := r.b.class;
|
|
|
|
when AIN_C =>
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_C;
|
|
|
|
v.result_class := r.c.class;
|
|
|
|
when others =>
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel1 <= REXP1_A;
|
|
|
|
v.result_class := r.a.class;
|
|
|
|
end case;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_set_result <= '1';
|
|
|
|
arith_done := '1';
|
|
|
|
|
|
|
|
when DO_IDIVMOD =>
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
v.result_sign := r.is_signed and (r.a.negative xor (r.b.negative and not r.divmod));
|
|
|
|
if r.b.class = ZERO then
|
|
|
|
-- B is zero, signal overflow
|
|
|
|
v.int_ovf := '1';
|
|
|
|
v.state := IDIV_ZERO;
|
|
|
|
elsif r.a.class = ZERO then
|
|
|
|
-- A is zero, result is zero (both for div and for mod)
|
|
|
|
v.state := IDIV_ZERO;
|
|
|
|
else
|
|
|
|
-- take absolute value for signed division, and
|
|
|
|
-- normalize and round up B to 8.56 format, like fcfid[u]
|
|
|
|
if r.is_signed = '1' and r.b.negative = '1' then
|
|
|
|
opsel_ainv <= '1';
|
|
|
|
carry_in <= '1';
|
|
|
|
end if;
|
|
|
|
v.result_class := FINITE;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_con2 <= RECON2_UNIT;
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.state := IDIV_NORMB;
|
|
|
|
end if;
|
|
|
|
when IDIV_NORMB =>
|
|
|
|
-- do count-leading-zeroes on B (now in R)
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
|
|
|
-- save the original value of B or |B| in C
|
|
|
|
set_c := '1';
|
|
|
|
v.state := IDIV_NORMB2;
|
|
|
|
when IDIV_NORMB2 =>
|
|
|
|
-- get B into the range [1, 2) in 8.56 format
|
|
|
|
set_x := '1'; -- record if any 1 bits shifted out
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.state := IDIV_NORMB3;
|
|
|
|
when IDIV_NORMB3 =>
|
|
|
|
-- add the X bit onto R to round up B
|
|
|
|
carry_in <= r.x;
|
|
|
|
-- prepare to do count-leading-zeroes on A
|
|
|
|
v.opsel_a := AIN_A;
|
|
|
|
v.state := IDIV_CLZA;
|
|
|
|
when IDIV_CLZA =>
|
|
|
|
set_b := '1'; -- put R back into B
|
|
|
|
-- r.opsel_a = AIN_A
|
|
|
|
if r.is_signed = '1' and r.a.negative = '1' then
|
|
|
|
opsel_ainv <= '1';
|
|
|
|
carry_in <= '1';
|
|
|
|
end if;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
re_con2 <= RECON2_UNIT;
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.opsel_a := AIN_C;
|
|
|
|
v.state := IDIV_CLZA2;
|
|
|
|
when IDIV_CLZA2 =>
|
|
|
|
-- r.opsel_a = AIN_C
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
rs_norm <= '1';
|
|
|
|
-- write the dividend back into A in case we negated it
|
|
|
|
set_a_mant := '1';
|
|
|
|
-- while doing the count-leading-zeroes on A,
|
|
|
|
-- also compute A - B to tell us whether A >= B
|
|
|
|
-- (using the original value of B, which is now in C)
|
|
|
|
opsel_b <= BIN_R;
|
|
|
|
opsel_ainv <= '1';
|
|
|
|
carry_in <= '1';
|
|
|
|
v.state := IDIV_CLZA3;
|
|
|
|
when IDIV_CLZA3 =>
|
|
|
|
-- save the exponent of A (but don't overwrite the mantissa)
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
set_a_exp := '1';
|
|
|
|
re_sel2 <= REXP2_NE;
|
|
|
|
re_set_result <= '1';
|
|
|
|
v.div_close := '0';
|
|
|
|
if new_exp = r.b.exponent then
|
|
|
|
v.div_close := '1';
|
|
|
|
end if;
|
|
|
|
v.state := IDIV_NR0;
|
|
|
|
if new_exp > r.b.exponent or (v.div_close = '1' and r.r(63) = '0') then
|
|
|
|
-- A >= B, overflow if extended division
|
|
|
|
if r.divext = '1' then
|
|
|
|
v.int_ovf := '1';
|
|
|
|
-- return 0 in overflow cases
|
|
|
|
v.state := IDIV_ZERO;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
-- A < B, result is zero for normal division
|
|
|
|
if r.divmod = '0' and r.divext = '0' then
|
|
|
|
v.state := IDIV_ZERO;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
when IDIV_NR0 =>
|
|
|
|
-- reduce number of Newton-Raphson iterations for small A
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
if r.divext = '1' or r.result_exp >= to_signed(32, EXP_BITS) then
|
|
|
|
v.count := "00";
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
elsif r.result_exp >= to_signed(16, EXP_BITS) then
|
|
|
|
v.count := "01";
|
|
|
|
else
|
|
|
|
v.count := "10";
|
|
|
|
end if;
|
|
|
|
-- first NR iteration does Y = LUT; P = 2 - B * LUT
|
|
|
|
msel_1 <= MUL1_B;
|
|
|
|
msel_add <= MULADD_CONST;
|
|
|
|
msel_inv <= '1';
|
|
|
|
msel_2 <= MUL2_LUT;
|
|
|
|
set_y := '1';
|
|
|
|
if r.b.mantissa(UNIT_BIT + 1) = '1' then
|
|
|
|
-- rounding up of the mantissa caused overflow, meaning the
|
|
|
|
-- normalized B is 2.0. Since this is outside the range
|
|
|
|
-- of the LUT, just use 0.5 as the estimated inverse.
|
|
|
|
v.state := IDIV_USE0_5;
|
|
|
|
else
|
|
|
|
-- start the first multiply now
|
|
|
|
f_to_multiply.valid <= '1';
|
|
|
|
-- note we don't set v.first, thus the following IDIV_NR1
|
|
|
|
-- state doesn't start a multiply (we already did that)
|
|
|
|
v.state := IDIV_NR1;
|
|
|
|
end if;
|
|
|
|
when IDIV_NR1 =>
|
|
|
|
-- subsequent NR iterations do Y = P; P = 2 - B * P
|
|
|
|
msel_1 <= MUL1_B;
|
|
|
|
msel_add <= MULADD_CONST;
|
|
|
|
msel_inv <= '1';
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
set_y := r.first;
|
|
|
|
pshift := '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.first := '1';
|
|
|
|
v.count := r.count + 1;
|
|
|
|
v.state := IDIV_NR2;
|
|
|
|
end if;
|
|
|
|
when IDIV_NR2 =>
|
|
|
|
-- compute P = Y * P
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
pshift := '1';
|
|
|
|
v.opsel_a := AIN_A;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 64
|
|
|
|
rs_con2 <= RSCON2_64;
|
|
|
|
-- Get 0.5 into R in case the inverse estimate turns out to be
|
|
|
|
-- less than 0.5, in which case we want to use 0.5, to avoid
|
|
|
|
-- infinite loops in some cases.
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
misc_sel <= "0001";
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.first := '1';
|
|
|
|
if r.count = "11" then
|
|
|
|
v.state := IDIV_DODIV;
|
|
|
|
else
|
|
|
|
v.state := IDIV_NR1;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
when IDIV_USE0_5 =>
|
|
|
|
-- Get 0.5 into R; it turns out the generated
|
|
|
|
-- QNaN mantissa is actually what we want
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
misc_sel <= "0001";
|
|
|
|
v.opsel_a := AIN_A;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 64
|
|
|
|
rs_con2 <= RSCON2_64;
|
|
|
|
v.state := IDIV_DODIV;
|
|
|
|
when IDIV_DODIV =>
|
|
|
|
-- r.opsel_a = AIN_A
|
|
|
|
-- r.shift = 64
|
|
|
|
-- inverse estimate is in P or in R; copy it to Y
|
|
|
|
if r.b.mantissa(UNIT_BIT + 1) = '1' or
|
|
|
|
(r.p(UNIT_BIT) = '0' and r.p(UNIT_BIT - 1) = '0') then
|
|
|
|
msel_2 <= MUL2_R;
|
|
|
|
else
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
end if;
|
|
|
|
set_y := '1';
|
|
|
|
-- shift_res is 0 because r.shift = 64;
|
|
|
|
-- put that into B, which now holds the quotient
|
|
|
|
set_b_mant := '1';
|
|
|
|
if r.divext = '0' then
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to -56
|
|
|
|
rs_con2 <= RSCON2_UNIT;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
v.first := '1';
|
|
|
|
v.state := IDIV_DIV;
|
|
|
|
elsif r.single_prec = '1' then
|
|
|
|
-- divwe[u][o], shift A left 32 bits
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 32
|
|
|
|
rs_con2 <= RSCON2_32;
|
|
|
|
v.state := IDIV_SH32;
|
|
|
|
elsif r.div_close = '0' then
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 64 - UNIT_BIT (== 8)
|
|
|
|
rs_con2 <= RSCON2_64_UNIT;
|
|
|
|
v.state := IDIV_EXTDIV;
|
|
|
|
else
|
|
|
|
-- handle top bit of quotient specially
|
|
|
|
-- for this we need the divisor left-justified in B
|
|
|
|
v.opsel_a := AIN_C;
|
|
|
|
v.state := IDIV_EXT_TBH;
|
|
|
|
end if;
|
|
|
|
when IDIV_SH32 =>
|
|
|
|
-- r.shift = 32, R contains the dividend
|
|
|
|
opsel_r <= RES_SHIFT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to -UNIT_BIT (== -56)
|
|
|
|
rs_con2 <= RSCON2_UNIT;
|
|
|
|
rs_neg2 <= '1';
|
|
|
|
v.first := '1';
|
|
|
|
v.state := IDIV_DIV;
|
|
|
|
when IDIV_DIV =>
|
|
|
|
-- Dividing A by C, r.shift = -56; A is in R
|
|
|
|
-- Put A into the bottom 64 bits of Ahi/A/Alo
|
|
|
|
set_a_mant := r.first;
|
|
|
|
set_a_lo := r.first;
|
|
|
|
-- compute R = R * Y (quotient estimate)
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_R;
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
pshift := '1';
|
|
|
|
opsel_r <= RES_MULT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to - b.exp
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_neg1 <= '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := IDIV_DIV2;
|
|
|
|
end if;
|
|
|
|
when IDIV_DIV2 =>
|
|
|
|
-- r.shift = - b.exponent
|
|
|
|
-- shift the quotient estimate right by b.exponent bits
|
|
|
|
opsel_r <= RES_SHIFT;
|
|
|
|
v.first := '1';
|
|
|
|
v.state := IDIV_DIV3;
|
|
|
|
when IDIV_DIV3 =>
|
|
|
|
-- quotient (so far) is in R; multiply by C and subtract from A
|
|
|
|
msel_1 <= MUL1_R;
|
|
|
|
msel_2 <= MUL2_C;
|
|
|
|
msel_add <= MULADD_A;
|
|
|
|
msel_inv <= '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
-- store the current quotient estimate in B
|
|
|
|
set_b_mant := r.first;
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
opsel_s <= S_MULT;
|
|
|
|
set_s := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := IDIV_DIV4;
|
|
|
|
end if;
|
|
|
|
when IDIV_DIV4 =>
|
|
|
|
-- remainder is in R/S and P
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
v.inc_quot := not pcmpc_lt and not r.divmod;
|
|
|
|
if r.divmod = '0' then
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
end if;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to UNIT_BIT (== 56)
|
|
|
|
rs_con2 <= RSCON2_UNIT;
|
|
|
|
if pcmpc_lt = '1' or pcmpc_eq = '1' then
|
|
|
|
if r.divmod = '0' then
|
|
|
|
v.state := IDIV_DIVADJ;
|
|
|
|
elsif pcmpc_eq = '1' then
|
|
|
|
v.state := IDIV_ZERO;
|
|
|
|
else
|
|
|
|
v.state := IDIV_MODADJ;
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
-- need to do another iteration, compute P * Y
|
|
|
|
f_to_multiply.valid <= '1';
|
|
|
|
v.state := IDIV_DIV5;
|
|
|
|
end if;
|
|
|
|
when IDIV_DIV5 =>
|
|
|
|
pshift := '1';
|
|
|
|
opsel_r <= RES_MULT;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to - b.exp
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_neg1 <= '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := IDIV_DIV6;
|
|
|
|
end if;
|
|
|
|
when IDIV_DIV6 =>
|
|
|
|
-- r.shift = - b.exponent
|
|
|
|
-- shift the quotient estimate right by b.exponent bits
|
|
|
|
opsel_r <= RES_SHIFT;
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.first := '1';
|
|
|
|
v.state := IDIV_DIV7;
|
|
|
|
when IDIV_DIV7 =>
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
-- add shifted quotient delta onto the total quotient
|
|
|
|
opsel_b <= BIN_R;
|
|
|
|
v.first := '1';
|
|
|
|
v.state := IDIV_DIV8;
|
|
|
|
when IDIV_DIV8 =>
|
|
|
|
-- quotient (so far) is in R; multiply by C and subtract from A
|
|
|
|
msel_1 <= MUL1_R;
|
|
|
|
msel_2 <= MUL2_C;
|
|
|
|
msel_add <= MULADD_A;
|
|
|
|
msel_inv <= '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
-- store the current quotient estimate in B
|
|
|
|
set_b_mant := r.first;
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
opsel_s <= S_MULT;
|
|
|
|
set_s := '1';
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := IDIV_DIV9;
|
|
|
|
end if;
|
|
|
|
when IDIV_DIV9 =>
|
|
|
|
-- remainder is in R/S and P
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_P;
|
|
|
|
v.inc_quot := not pcmpc_lt and not r.divmod;
|
|
|
|
if r.divmod = '0' then
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
end if;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to UNIT_BIT (== 56)
|
|
|
|
rs_con2 <= RSCON2_UNIT;
|
|
|
|
if r.divmod = '0' then
|
|
|
|
v.state := IDIV_DIVADJ;
|
|
|
|
elsif pcmpc_eq = '1' then
|
|
|
|
v.state := IDIV_ZERO;
|
|
|
|
else
|
|
|
|
v.state := IDIV_MODADJ;
|
|
|
|
end if;
|
|
|
|
when IDIV_EXT_TBH =>
|
|
|
|
-- r.opsel_a = AIN_C; get divisor into R and prepare to shift left
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 63 - b.exp
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_neg1 <= '1';
|
|
|
|
rs_con2 <= RSCON2_63;
|
|
|
|
v.opsel_a := AIN_A;
|
|
|
|
v.state := IDIV_EXT_TBH2;
|
|
|
|
when IDIV_EXT_TBH2 =>
|
|
|
|
-- r.opsel_a = AIN_A; divisor is in R
|
|
|
|
-- r.shift = 63 - b.exponent; shift and put into B
|
|
|
|
set_b_mant := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 64 - UNIT_BIT (== 8)
|
|
|
|
rs_con2 <= RSCON2_64_UNIT;
|
|
|
|
v.state := IDIV_EXT_TBH3;
|
|
|
|
when IDIV_EXT_TBH3 =>
|
|
|
|
-- Dividing (A << 64) by C
|
|
|
|
-- r.shift = 8
|
|
|
|
-- Put A in the top 64 bits of Ahi/A/Alo
|
|
|
|
set_a_hi := '1';
|
|
|
|
set_a_mant := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 64 - b.exp
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_neg1 <= '1';
|
|
|
|
rs_con2 <= RSCON2_64;
|
|
|
|
v.state := IDIV_EXT_TBH4;
|
|
|
|
when IDIV_EXT_TBH4 =>
|
|
|
|
-- dividend (A) is in R
|
|
|
|
-- r.shift = 64 - B.exponent, so is at least 1
|
|
|
|
opsel_r <= RES_SHIFT;
|
|
|
|
-- top bit of A gets lost in the shift, so handle it specially
|
|
|
|
v.opsel_a := AIN_B;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 63
|
|
|
|
rs_con2 <= RSCON2_63;
|
|
|
|
v.state := IDIV_EXT_TBH5;
|
|
|
|
when IDIV_EXT_TBH5 =>
|
|
|
|
-- r.opsel_a = AIN_B, r.shift = 63
|
|
|
|
-- shifted dividend is in R, subtract left-justified divisor
|
|
|
|
opsel_b <= BIN_R;
|
|
|
|
opsel_ainv <= '1';
|
|
|
|
carry_in <= '1';
|
|
|
|
-- and put 1<<63 into B as the divisor (S is still 0)
|
|
|
|
shiftin0 := '1';
|
|
|
|
set_b_mant := '1';
|
|
|
|
v.first := '1';
|
|
|
|
v.state := IDIV_EXTDIV2;
|
|
|
|
when IDIV_EXTDIV =>
|
|
|
|
-- Dividing (A << 64) by C
|
|
|
|
-- r.shift = 8
|
|
|
|
-- Put A in the top 64 bits of Ahi/A/Alo
|
|
|
|
set_a_hi := '1';
|
|
|
|
set_a_mant := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to 64 - b.exp
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_neg1 <= '1';
|
|
|
|
rs_con2 <= RSCON2_64;
|
|
|
|
v.state := IDIV_EXTDIV1;
|
|
|
|
when IDIV_EXTDIV1 =>
|
|
|
|
-- dividend is in R
|
|
|
|
-- r.shift = 64 - B.exponent
|
|
|
|
opsel_r <= RES_SHIFT;
|
|
|
|
v.first := '1';
|
|
|
|
v.state := IDIV_EXTDIV2;
|
|
|
|
when IDIV_EXTDIV2 =>
|
|
|
|
-- shifted remainder is in R; compute R = R * Y (quotient estimate)
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_R;
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
pshift := '1';
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.first := '1';
|
|
|
|
v.state := IDIV_EXTDIV3;
|
|
|
|
end if;
|
|
|
|
when IDIV_EXTDIV3 =>
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
-- delta quotient is in R; add it to B
|
|
|
|
opsel_b <= BIN_R;
|
|
|
|
v.first := '1';
|
|
|
|
v.state := IDIV_EXTDIV4;
|
|
|
|
when IDIV_EXTDIV4 =>
|
|
|
|
-- quotient is in R; put it in B and compute remainder
|
|
|
|
set_b_mant := r.first;
|
|
|
|
msel_1 <= MUL1_R;
|
|
|
|
msel_2 <= MUL2_C;
|
|
|
|
msel_add <= MULADD_A;
|
|
|
|
msel_inv <= '1';
|
|
|
|
f_to_multiply.valid <= r.first;
|
|
|
|
opsel_r <= RES_MULT;
|
|
|
|
opsel_s <= S_MULT;
|
|
|
|
set_s := '1';
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- set shift to UNIT_BIT - b.exp
|
|
|
|
rs_sel1 <= RSH1_B;
|
|
|
|
rs_neg1 <= '1';
|
|
|
|
rs_con2 <= RSCON2_UNIT;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
v.state := IDIV_EXTDIV5;
|
|
|
|
end if;
|
|
|
|
when IDIV_EXTDIV5 =>
|
|
|
|
-- r.shift = r.b.exponent - 56
|
|
|
|
-- remainder is in R/S; shift it right r.b.exponent bits
|
|
|
|
opsel_r <= RES_SHIFT;
|
|
|
|
-- test LS 64b of remainder in P against divisor in C
|
|
|
|
v.inc_quot := not pcmpc_lt;
|
|
|
|
v.opsel_a := AIN_B;
|
|
|
|
v.state := IDIV_EXTDIV6;
|
|
|
|
when IDIV_EXTDIV6 =>
|
|
|
|
-- r.opsel_a = AIN_B
|
|
|
|
-- shifted remainder is in R, see if it is > 1
|
|
|
|
-- and compute R = R * Y if so
|
|
|
|
msel_1 <= MUL1_Y;
|
|
|
|
msel_2 <= MUL2_R;
|
|
|
|
pshift := '1';
|
|
|
|
if r_gt_1 = '1' then
|
|
|
|
f_to_multiply.valid <= '1';
|
|
|
|
v.state := IDIV_EXTDIV2;
|
|
|
|
else
|
|
|
|
v.state := IDIV_DIVADJ;
|
|
|
|
end if;
|
|
|
|
when IDIV_MODADJ =>
|
|
|
|
-- r.shift = 56
|
|
|
|
-- result is in R/S
|
|
|
|
opsel_r <= RES_SHIFT;
|
|
|
|
if pcmpc_lt = '0' then
|
|
|
|
v.opsel_a := AIN_C;
|
|
|
|
v.state := IDIV_MODSUB;
|
|
|
|
elsif r.result_sign = '0' then
|
|
|
|
v.state := IDIV_DONE;
|
|
|
|
else
|
|
|
|
v.state := IDIV_DIVADJ;
|
|
|
|
end if;
|
|
|
|
when IDIV_MODSUB =>
|
|
|
|
-- r.opsel_a = AIN_C
|
|
|
|
-- Subtract divisor from remainder
|
|
|
|
opsel_ainv <= '1';
|
|
|
|
carry_in <= '1';
|
|
|
|
opsel_b <= BIN_R;
|
|
|
|
if r.result_sign = '0' then
|
|
|
|
v.state := IDIV_DONE;
|
|
|
|
else
|
|
|
|
v.state := IDIV_DIVADJ;
|
|
|
|
end if;
|
|
|
|
when IDIV_DIVADJ =>
|
|
|
|
-- result (so far) is on the A input of the adder
|
|
|
|
-- set carry to increment quotient if needed
|
|
|
|
-- and also negate R if the answer is negative
|
|
|
|
opsel_ainv <= r.result_sign;
|
|
|
|
carry_in <= r.inc_quot xor r.result_sign;
|
|
|
|
rnd_b32 := '1';
|
|
|
|
if r.divmod = '0' then
|
|
|
|
opsel_b <= BIN_RND;
|
|
|
|
end if;
|
|
|
|
if r.is_signed = '0' then
|
|
|
|
v.state := IDIV_DONE;
|
|
|
|
else
|
|
|
|
v.state := IDIV_OVFCHK;
|
|
|
|
end if;
|
|
|
|
when IDIV_OVFCHK =>
|
|
|
|
if r.single_prec = '0' then
|
|
|
|
sign_bit := r.r(63);
|
|
|
|
else
|
|
|
|
sign_bit := r.r(31);
|
|
|
|
end if;
|
|
|
|
v.int_ovf := sign_bit xor r.result_sign;
|
|
|
|
if v.int_ovf = '1' then
|
|
|
|
v.state := IDIV_ZERO;
|
|
|
|
else
|
|
|
|
v.state := IDIV_DONE;
|
|
|
|
end if;
|
|
|
|
when IDIV_DONE =>
|
|
|
|
v.xerc_result := v.xerc;
|
|
|
|
if r.oe = '1' then
|
|
|
|
v.xerc_result.ov := '0';
|
|
|
|
v.xerc_result.ov32 := '0';
|
|
|
|
v.writing_xer := '1';
|
|
|
|
end if;
|
|
|
|
if r.m32b = '0' then
|
|
|
|
v.cr_result(3) := r.r(63);
|
|
|
|
v.cr_result(2 downto 1) := "00";
|
|
|
|
if r.r = 64x"0" then
|
|
|
|
v.cr_result(1) := '1';
|
|
|
|
else
|
|
|
|
v.cr_result(2) := not r.r(63);
|
|
|
|
end if;
|
|
|
|
else
|
|
|
|
v.cr_result(3) := r.r(31);
|
|
|
|
v.cr_result(2 downto 1) := "00";
|
|
|
|
if r.r(31 downto 0) = 32x"0" then
|
|
|
|
v.cr_result(1) := '1';
|
|
|
|
else
|
|
|
|
v.cr_result(2) := not r.r(31);
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
v.cr_result(0) := v.xerc.so;
|
|
|
|
int_result := '1';
|
|
|
|
v.writing_fpr := '1';
|
|
|
|
v.instr_done := '1';
|
|
|
|
when IDIV_ZERO =>
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
misc_sel <= "0101";
|
|
|
|
v.xerc_result := v.xerc;
|
|
|
|
if r.oe = '1' then
|
|
|
|
v.xerc_result.ov := r.int_ovf;
|
|
|
|
v.xerc_result.ov32 := r.int_ovf;
|
|
|
|
v.xerc_result.so := r.xerc.so or r.int_ovf;
|
|
|
|
v.writing_xer := '1';
|
|
|
|
end if;
|
|
|
|
v.cr_result := "001" & v.xerc_result.so;
|
|
|
|
int_result := '1';
|
|
|
|
v.writing_fpr := '1';
|
|
|
|
v.instr_done := '1';
|
|
|
|
|
|
|
|
end case;
|
|
|
|
|
|
|
|
rsign := v.result_sign;
|
|
|
|
if zero_divide = '1' then
|
|
|
|
v.fpscr(FPSCR_ZX) := '1';
|
|
|
|
end if;
|
|
|
|
if qnan_result = '1' then
|
|
|
|
invalid := '1';
|
|
|
|
v.result_class := NAN;
|
|
|
|
rsign := '0';
|
|
|
|
misc_sel <= "0001";
|
|
|
|
opsel_r <= RES_MISC;
|
|
|
|
arith_done := '1';
|
|
|
|
end if;
|
|
|
|
if invalid = '1' then
|
|
|
|
v.invalid := '1';
|
|
|
|
end if;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
if arith_done = '1' then
|
|
|
|
-- Enabled invalid exception doesn't write result or FPRF
|
|
|
|
-- Neither does enabled zero-divide exception
|
|
|
|
if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
|
|
|
|
(zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
|
|
|
|
v.writing_fpr := '1';
|
|
|
|
v.update_fprf := '1';
|
|
|
|
end if;
|
|
|
|
if v.is_subtract = '1' and v.result_class = ZERO then
|
|
|
|
rsign := r.round_mode(0) and r.round_mode(1);
|
|
|
|
end if;
|
|
|
|
if v.negate = '1' and v.result_class /= NAN then
|
|
|
|
rsign := not rsign;
|
|
|
|
end if;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
v.instr_done := '1';
|
|
|
|
update_fx := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- Multiplier and divide/square root data path
|
|
|
|
case msel_1 is
|
|
|
|
when MUL1_A =>
|
|
|
|
f_to_multiply.data1 <= r.a.mantissa;
|
|
|
|
when MUL1_B =>
|
|
|
|
f_to_multiply.data1 <= r.b.mantissa;
|
|
|
|
when MUL1_Y =>
|
|
|
|
f_to_multiply.data1 <= r.y;
|
|
|
|
when others =>
|
|
|
|
f_to_multiply.data1 <= r.r;
|
|
|
|
end case;
|
|
|
|
case msel_2 is
|
|
|
|
when MUL2_C =>
|
|
|
|
f_to_multiply.data2 <= r.c.mantissa;
|
|
|
|
when MUL2_LUT =>
|
|
|
|
f_to_multiply.data2 <= std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64),
|
|
|
|
UNIT_BIT - 19));
|
|
|
|
when MUL2_P =>
|
|
|
|
f_to_multiply.data2 <= r.p;
|
|
|
|
when others =>
|
|
|
|
f_to_multiply.data2 <= r.r;
|
|
|
|
end case;
|
|
|
|
maddend := (others => '0');
|
|
|
|
case msel_add is
|
|
|
|
when MULADD_CONST =>
|
|
|
|
-- addend is 2.0 or 1.5 in 16.112 format
|
|
|
|
if r.is_sqrt = '0' then
|
|
|
|
maddend(2*UNIT_BIT + 1) := '1'; -- 2.0
|
|
|
|
else
|
|
|
|
maddend(2*UNIT_BIT downto 2*UNIT_BIT - 1) := "11"; -- 1.5
|
|
|
|
end if;
|
|
|
|
when MULADD_A =>
|
|
|
|
-- addend is A in 16.112 format
|
|
|
|
maddend(127 downto UNIT_BIT + 64) := r.a_hi;
|
|
|
|
maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.a.mantissa;
|
|
|
|
maddend(UNIT_BIT - 1 downto 0) := r.a_lo;
|
|
|
|
when MULADD_RS =>
|
|
|
|
-- addend is concatenation of R and S in 16.112 format
|
|
|
|
maddend(UNIT_BIT + 63 downto UNIT_BIT) := r.r;
|
|
|
|
maddend(UNIT_BIT - 1 downto 0) := r.s;
|
|
|
|
when others =>
|
|
|
|
end case;
|
|
|
|
f_to_multiply.addend <= maddend;
|
|
|
|
f_to_multiply.subtract <= msel_inv;
|
|
|
|
if set_y = '1' then
|
|
|
|
v.y := f_to_multiply.data2;
|
|
|
|
end if;
|
|
|
|
if multiply_to_f.valid = '1' then
|
|
|
|
if pshift = '0' then
|
|
|
|
v.p := multiply_to_f.result(63 downto 0);
|
|
|
|
else
|
|
|
|
v.p := multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT);
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- Data path.
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
-- This has A and B input multiplexers, an adder, a shifter,
|
|
|
|
-- count-leading-zeroes logic, and a result mux.
|
|
|
|
if r.longmask = '1' then
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
mshift := r.shift + to_signed(-29, EXP_BITS);
|
|
|
|
else
|
|
|
|
mshift := r.shift;
|
|
|
|
end if;
|
|
|
|
if is_X(mshift) then
|
|
|
|
mask := (others => 'X');
|
|
|
|
elsif mshift < to_signed(-64, EXP_BITS) then
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
mask := (others => '1');
|
|
|
|
elsif mshift >= to_signed(0, EXP_BITS) then
|
|
|
|
mask := (others => '0');
|
|
|
|
else
|
|
|
|
mask := right_mask(unsigned(mshift(5 downto 0)));
|
|
|
|
end if;
|
|
|
|
case r.opsel_a is
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when AIN_R =>
|
|
|
|
in_a0 := r.r;
|
|
|
|
when AIN_A =>
|
|
|
|
in_a0 := r.a.mantissa;
|
|
|
|
when AIN_B =>
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
in_a0 := r.b.mantissa;
|
|
|
|
when others =>
|
|
|
|
in_a0 := r.c.mantissa;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end case;
|
|
|
|
if (or (mask and in_a0)) = '1' and set_x = '1' then
|
|
|
|
v.x := '1';
|
|
|
|
end if;
|
|
|
|
if opsel_ainv = '1' then
|
|
|
|
in_a0 := not in_a0;
|
|
|
|
end if;
|
|
|
|
in_a <= in_a0;
|
|
|
|
case opsel_b is
|
|
|
|
when BIN_ZERO =>
|
|
|
|
in_b0 := (others => '0');
|
|
|
|
when BIN_R =>
|
|
|
|
in_b0 := r.r;
|
|
|
|
when BIN_RND =>
|
|
|
|
if rnd_b32 = '1' then
|
|
|
|
round_inc := (32 => r.result_sign and r.single_prec, others => '0');
|
|
|
|
elsif rbit_inc = '0' then
|
|
|
|
round_inc := (SP_LSB => r.single_prec, DP_LSB => not r.single_prec, others => '0');
|
|
|
|
else
|
|
|
|
round_inc := (DP_RBIT => '1', others => '0');
|
|
|
|
end if;
|
|
|
|
in_b0 := round_inc;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when others =>
|
|
|
|
-- BIN_PS8, 8 LSBs of P sign-extended to 64
|
|
|
|
in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 0)), 64));
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end case;
|
|
|
|
if opsel_binv = '1' then
|
|
|
|
in_b0 := not in_b0;
|
|
|
|
end if;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
in_b <= in_b0;
|
|
|
|
if is_X(r.shift) then
|
|
|
|
shift_res := (others => 'X');
|
|
|
|
elsif r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
|
|
|
|
shift_res := shifter_64(r.r(63 downto 1) & (shiftin0 or r.r(0)) &
|
|
|
|
(shiftin or r.s(55)) & r.s(54 downto 0),
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
std_ulogic_vector(r.shift(6 downto 0)));
|
|
|
|
else
|
|
|
|
shift_res := (others => '0');
|
|
|
|
end if;
|
|
|
|
sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
|
|
|
|
if opsel_mask = '1' then
|
|
|
|
sum(DP_LSB - 1 downto 0) := "0000";
|
|
|
|
if r.single_prec = '1' then
|
|
|
|
sum(SP_LSB - 1 downto DP_LSB) := (others => '0');
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
case opsel_r is
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when RES_SUM =>
|
|
|
|
result <= sum;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when RES_SHIFT =>
|
|
|
|
result <= shift_res;
|
|
|
|
when RES_MULT =>
|
|
|
|
result <= multiply_to_f.result(UNIT_BIT + 63 downto UNIT_BIT);
|
|
|
|
if mult_mask = '1' then
|
|
|
|
-- trim to 54 fraction bits if mult_mask = 1, for quotient when dividing
|
|
|
|
result(UNIT_BIT - 55 downto 0) <= (others => '0');
|
|
|
|
end if;
|
|
|
|
when others =>
|
|
|
|
misc := (others => '0');
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
case misc_sel is
|
|
|
|
when "0000" =>
|
|
|
|
misc := x"00000000" & (r.fpscr and fpscr_mask);
|
|
|
|
when "0001" =>
|
|
|
|
-- generated QNaN mantissa
|
|
|
|
misc(QNAN_BIT) := '1';
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when "0010" =>
|
|
|
|
-- mantissa of max representable DP number
|
|
|
|
misc(UNIT_BIT downto DP_LSB) := (others => '1');
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when "0011" =>
|
|
|
|
-- mantissa of max representable SP number
|
|
|
|
misc(UNIT_BIT downto SP_LSB) := (others => '1');
|
|
|
|
when "0100" =>
|
|
|
|
-- fmrgow result
|
|
|
|
misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
|
|
|
|
when "0110" =>
|
|
|
|
-- fmrgew result
|
|
|
|
misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
|
|
|
|
when "0111" =>
|
|
|
|
misc := std_ulogic_vector(shift_left(resize(unsigned(inverse_est), 64),
|
|
|
|
UNIT_BIT - 19));
|
|
|
|
when "1000" =>
|
|
|
|
-- max positive result for fctiw[z]
|
|
|
|
misc := x"000000007fffffff";
|
|
|
|
when "1001" =>
|
|
|
|
-- max negative result for fctiw[z]
|
|
|
|
misc := x"ffffffff80000000";
|
|
|
|
when "1010" =>
|
|
|
|
-- max positive result for fctiwu[z]
|
|
|
|
misc := x"00000000ffffffff";
|
|
|
|
when "1011" =>
|
|
|
|
-- max negative result for fctiwu[z]
|
|
|
|
misc := x"0000000000000000";
|
|
|
|
when "1100" =>
|
|
|
|
-- max positive result for fctid[z]
|
|
|
|
misc := x"7fffffffffffffff";
|
|
|
|
when "1101" =>
|
|
|
|
-- max negative result for fctid[z]
|
|
|
|
misc := x"8000000000000000";
|
|
|
|
when "1110" =>
|
|
|
|
-- max positive result for fctidu[z]
|
|
|
|
misc := x"ffffffffffffffff";
|
|
|
|
when "1111" =>
|
|
|
|
-- max negative result for fctidu[z]
|
|
|
|
misc := x"0000000000000000";
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
when others =>
|
|
|
|
end case;
|
|
|
|
result <= misc;
|
|
|
|
end case;
|
|
|
|
v.r := result;
|
|
|
|
if set_s = '1' then
|
|
|
|
case opsel_s is
|
|
|
|
when S_NEG =>
|
|
|
|
v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
|
|
|
|
when S_MULT =>
|
|
|
|
v.s := multiply_to_f.result(55 downto 0);
|
|
|
|
when S_SHIFT =>
|
|
|
|
v.s := shift_res(63 downto 8);
|
|
|
|
if shift_res(7 downto 0) /= x"00" then
|
|
|
|
v.x := '1';
|
|
|
|
end if;
|
|
|
|
when others =>
|
|
|
|
v.s := (others => '0');
|
|
|
|
end case;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if set_a = '1' or set_a_exp = '1' then
|
|
|
|
v.a.exponent := new_exp;
|
|
|
|
end if;
|
|
|
|
if set_a = '1' or set_a_mant = '1' then
|
|
|
|
v.a.mantissa := shift_res;
|
|
|
|
end if;
|
|
|
|
if e_in.valid = '1' then
|
|
|
|
v.a_hi := (others => '0');
|
|
|
|
v.a_lo := (others => '0');
|
|
|
|
else
|
|
|
|
if set_a_hi = '1' then
|
|
|
|
v.a_hi := r.r(63 downto 56);
|
|
|
|
end if;
|
|
|
|
if set_a_lo = '1' then
|
|
|
|
v.a_lo := r.r(55 downto 0);
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
if set_b = '1' then
|
|
|
|
v.b.exponent := new_exp;
|
|
|
|
end if;
|
|
|
|
if set_b = '1' or set_b_mant = '1' then
|
|
|
|
v.b.mantissa := shift_res;
|
|
|
|
end if;
|
|
|
|
if set_c = '1' then
|
|
|
|
v.c.exponent := new_exp;
|
|
|
|
v.c.mantissa := shift_res;
|
|
|
|
end if;
|
|
|
|
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- exponent data path
|
|
|
|
case re_sel1 is
|
|
|
|
when REXP1_R =>
|
|
|
|
rexp_in1 := r.result_exp;
|
|
|
|
when REXP1_A =>
|
|
|
|
rexp_in1 := r.a.exponent;
|
|
|
|
when REXP1_BHALF =>
|
|
|
|
rexp_in1 := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
|
|
|
|
when others =>
|
|
|
|
rexp_in1 := to_signed(0, EXP_BITS);
|
|
|
|
end case;
|
|
|
|
if re_neg1 = '1' then
|
|
|
|
rexp_in1 := not rexp_in1;
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end if;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
case re_sel2 is
|
|
|
|
when REXP2_NE =>
|
|
|
|
rexp_in2 := new_exp;
|
|
|
|
when REXP2_C =>
|
|
|
|
rexp_in2 := r.c.exponent;
|
|
|
|
when REXP2_B =>
|
|
|
|
rexp_in2 := r.b.exponent;
|
|
|
|
when others =>
|
|
|
|
case re_con2 is
|
|
|
|
when RECON2_UNIT =>
|
|
|
|
rexp_in2 := to_signed(UNIT_BIT, EXP_BITS);
|
|
|
|
when RECON2_MAX =>
|
|
|
|
rexp_in2 := max_exp;
|
|
|
|
when RECON2_BIAS =>
|
|
|
|
rexp_in2 := bias_exp;
|
|
|
|
when others =>
|
|
|
|
rexp_in2 := to_signed(0, EXP_BITS);
|
|
|
|
end case;
|
|
|
|
end case;
|
|
|
|
if re_neg2 = '1' then
|
|
|
|
rexp_in2 := not rexp_in2;
|
|
|
|
end if;
|
|
|
|
rexp_cin := re_neg1 or re_neg2;
|
|
|
|
rexp_sum := rexp_in1 + rexp_in2 + rexp_cin;
|
|
|
|
if re_set_result = '1' then
|
|
|
|
v.result_exp := rexp_sum;
|
|
|
|
end if;
|
|
|
|
case rs_sel1 is
|
|
|
|
when RSH1_B =>
|
|
|
|
rsh_in1 := r.b.exponent;
|
|
|
|
when RSH1_NE =>
|
|
|
|
rsh_in1 := new_exp;
|
|
|
|
when RSH1_S =>
|
|
|
|
rsh_in1 := r.shift;
|
|
|
|
when others =>
|
|
|
|
rsh_in1 := to_signed(0, EXP_BITS);
|
|
|
|
end case;
|
|
|
|
if rs_neg1 = '1' then
|
|
|
|
rsh_in1 := not rsh_in1;
|
|
|
|
end if;
|
|
|
|
case rs_sel2 is
|
|
|
|
when RSH2_A =>
|
|
|
|
rsh_in2 := r.a.exponent;
|
|
|
|
when others =>
|
|
|
|
case rs_con2 is
|
|
|
|
when RSCON2_1 =>
|
|
|
|
rsh_in2 := to_signed(1, EXP_BITS);
|
|
|
|
when RSCON2_UNIT_52 =>
|
|
|
|
rsh_in2 := to_signed(UNIT_BIT - 52, EXP_BITS);
|
|
|
|
when RSCON2_64_UNIT =>
|
|
|
|
rsh_in2 := to_signed(64 - UNIT_BIT, EXP_BITS);
|
|
|
|
when RSCON2_32 =>
|
|
|
|
rsh_in2 := to_signed(32, EXP_BITS);
|
|
|
|
when RSCON2_52 =>
|
|
|
|
rsh_in2 := to_signed(52, EXP_BITS);
|
|
|
|
when RSCON2_UNIT =>
|
|
|
|
rsh_in2 := to_signed(UNIT_BIT, EXP_BITS);
|
|
|
|
when RSCON2_63 =>
|
|
|
|
rsh_in2 := to_signed(63, EXP_BITS);
|
|
|
|
when RSCON2_64 =>
|
|
|
|
rsh_in2 := to_signed(64, EXP_BITS);
|
|
|
|
when RSCON2_MINEXP =>
|
|
|
|
rsh_in2 := min_exp;
|
|
|
|
when others =>
|
|
|
|
rsh_in2 := to_signed(0, EXP_BITS);
|
|
|
|
end case;
|
|
|
|
end case;
|
|
|
|
if rs_neg2 = '1' then
|
|
|
|
rsh_in2 := not rsh_in2;
|
|
|
|
end if;
|
|
|
|
if rs_norm = '1' then
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
clz := count_left_zeroes(r.r);
|
|
|
|
if renorm_sqrt = '1' then
|
|
|
|
-- make denormalized value end up with even exponent
|
|
|
|
clz(0) := '1';
|
|
|
|
end if;
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
-- do this as a separate dedicated 7-bit adder for timing reasons
|
|
|
|
v.shift := resize(signed('0' & clz) - (63 - UNIT_BIT), EXP_BITS);
|
FPU: Make an explicit exponent data path
With this, the large case statement sets values for a set of control
signals, which then control multiplexers and adders that generate
values for v.result_exp and v.shift. The plan is for the case
statement to turn into a microcode ROM eventually.
The value of v.result_exp is the sum of two values, either of which
can be negated (but not both). The first value can be chosen from the
result exponent, A exponent, B exponent arithmetically shifted right
one bit, or 0. The second value can be chosen from new_exp (which is
r.result_exp - r.shift), B exponent, C exponent or a constant. The
choices for the constant are 0, 56, the maximum exponent (max_exp) or
the exponent bias for trap-enabled overflow conditions (bias_exp).
These choices are controlled by the signals re_sel1, re_neg1, re_sel2
and re_neg2, and the sum is written into v.result_exp if re_set_result
is 1.
For v.shift we also compute the sum of two values, either of which
can be negated (but not both). The first value can be chosen from
new_exp, B exponent, r.shift, or 0. The second value can be chosen
from the A exponent or a constant. The possible constants are 0, 1,
4, 8, 32, 52, 56, 63, 64, or the minimum exponent (min_exp). These
choices are controlled by the signals rs_sel1, rs_neg1, rs_sel2 and
rs_neg2. After the adder there is a multiplexer which selects either
the sum or a shift count for normalization (derived from a count
leading zeroes operation on R) to be written into v.shift. The
count-leading-zeroes result does not go through the adder for timing
reasons.
In order to simplify the logic and help improve timing, settings of
the control signals have been made unconditional in a state in many
places, even if those settings are only required when some condition
is met.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
3 years ago
|
|
|
else
|
|
|
|
v.shift := rsh_in1 + rsh_in2 + (rs_neg1 or rs_neg2);
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
end if;
|
|
|
|
|
|
|
|
if r.update_fprf = '1' then
|
|
|
|
v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.res_sign, r.result_class,
|
|
|
|
r.r(UNIT_BIT) and not r.denorm);
|
|
|
|
end if;
|
|
|
|
|
|
|
|
v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
|
|
|
|
(or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
|
|
|
|
v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
|
|
|
|
v.fpscr(FPSCR_VE downto FPSCR_XE));
|
FPU: Implement floating convert from integer instructions
This implements fcfid, fcfidu, fcfids and fcfidus, which convert
64-bit integer values in an FPR into a floating-point value.
This brings in a lot of the datapath that will be needed in
future, including the shifter, adder, mask generator and
count-leading-zeroes logic, along with the machinery for rounding
to single-precision or double-precision, detecting inexact results,
signalling inexact-result exceptions, and updating result flags
in the FPSCR.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
5 years ago
|
|
|
if update_fx = '1' and
|
|
|
|
(v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
|
|
|
|
v.fpscr(FPSCR_FX) := '1';
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if v.instr_done = '1' then
|
|
|
|
if r.state /= IDLE then
|
|
|
|
v.state := IDLE;
|
|
|
|
v.busy := '0';
|
|
|
|
v.f2stall := '0';
|
|
|
|
if r.fp_rc = '1' then
|
|
|
|
v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
|
|
|
|
end if;
|
|
|
|
v.sp_result := r.single_prec;
|
|
|
|
v.int_result := int_result;
|
|
|
|
v.illegal := illegal;
|
|
|
|
v.nsnan_result := v.quieten_nan;
|
|
|
|
v.res_sign := rsign;
|
|
|
|
if r.integer_op = '1' then
|
|
|
|
v.cr_mask := num_to_fxm(0);
|
|
|
|
elsif r.is_cmp = '0' then
|
|
|
|
v.cr_mask := num_to_fxm(1);
|
|
|
|
elsif is_X(insn_bf(r.insn)) then
|
|
|
|
v.cr_mask := (others => 'X');
|
|
|
|
else
|
|
|
|
v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(r.insn))));
|
|
|
|
end if;
|
|
|
|
v.writing_cr := r.is_cmp or r.rc;
|
|
|
|
v.write_reg := r.dest_fpr;
|
|
|
|
v.complete_tag := r.instr_tag;
|
|
|
|
end if;
|
|
|
|
if e_in.stall = '0' then
|
|
|
|
v.complete := not v.illegal;
|
|
|
|
v.do_intr := (v.fpscr(FPSCR_FEX) and r.fe_mode) or v.illegal;
|
|
|
|
end if;
|
|
|
|
-- N.B. We rely on execute1 to prevent any new instruction
|
|
|
|
-- coming in while e_in.stall = 1, without us needing to
|
|
|
|
-- have busy asserted.
|
|
|
|
else
|
|
|
|
if r.state /= IDLE and e_in.stall = '0' then
|
|
|
|
v.f2stall := '1';
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
-- This mustn't depend on any fields of r that are modified in IDLE state.
|
|
|
|
if r.int_result = '1' then
|
|
|
|
fp_result <= r.r;
|
|
|
|
else
|
|
|
|
fp_result <= pack_dp(r.res_sign, r.result_class, r.result_exp, r.r,
|
|
|
|
r.sp_result, r.nsnan_result);
|
|
|
|
end if;
|
|
|
|
|
|
|
|
rin <= v;
|
|
|
|
end process;
|
|
|
|
|
|
|
|
end architecture behaviour;
|