You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3255 lines
140 KiB
Verilog

// © IBM Corp. 2020
// Licensed under the Apache License, Version 2.0 (the "License"), as modified by
// the terms below; you may not use the files in this repository except in
// compliance with the License as modified.
// You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
//
// Modified Terms:
//
// 1) For the purpose of the patent license granted to you in Section 3 of the
// License, the "Work" hereby includes implementations of the work of authorship
// in physical form.
//
// 2) Notwithstanding any terms to the contrary in the License, any licenses
// necessary for implementation of the Work that are available from OpenPOWER
// via the Power ISA End User License Agreement (EULA) are explicitly excluded
// hereunder, and may be obtained from OpenPOWER under the terms and conditions
// of the EULA.
//
// Unless required by applicable law or agreed to in writing, the reference design
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
// for the specific language governing permissions and limitations under the License.
//
// Additional rights, including the ability to physically implement a softcore that
// is compliant with the required sections of the Power ISA Specification, are
// available at no cost under the terms of the OpenPOWER Power ISA EULA, which can be
// obtained (along with the Power ISA) here: https://openpowerfoundation.org.
`timescale 1 ns / 1 ns
//==##########################################################################
//==### FU_DIVSQRT.VHDL #########
//==### #########
//==##########################################################################
`include "tri_a2o.vh"
module fu_divsqrt(
vdd,
gnd,
clkoff_b,
act_dis,
flush,
delay_lclkr,
mpw1_b,
mpw2_b,
sg_1,
thold_1,
fpu_enable,
nclk,
f_dsq_si,
f_dsq_so,
ex0_act_b,
f_dcd_ex0_div,
f_dcd_ex0_divs,
f_dcd_ex0_sqrt,
f_dcd_ex0_sqrts,
f_dcd_ex0_record_v,
f_dcd_ex2_divsqrt_hole_v,
f_dcd_flush,
f_dcd_ex1_itag,
f_dcd_ex1_fpscr_addr,
f_dcd_ex1_instr_frt,
f_dcd_ex1_instr_tid,
f_dcd_ex1_divsqrt_cr_bf,
f_dcd_axucr0_deno,
f_scr_ex6_fpscr_rm_thr0,
f_scr_ex6_fpscr_ee_thr0,
f_scr_ex6_fpscr_rm_thr1,
f_scr_ex6_fpscr_ee_thr1,
f_fmt_ex2_a_sign_div,
f_fmt_ex2_a_expo_div_b,
f_fmt_ex2_a_frac_div,
f_fmt_ex2_b_sign_div,
f_fmt_ex2_b_expo_div_b,
f_fmt_ex2_b_frac_div,
f_fmt_ex2_a_zero,
f_fmt_ex2_a_zero_dsq,
f_fmt_ex2_a_expo_max,
f_fmt_ex2_a_expo_max_dsq,
f_fmt_ex2_a_frac_zero,
f_fmt_ex2_b_zero,
f_fmt_ex2_b_zero_dsq,
f_fmt_ex2_b_expo_max,
f_fmt_ex2_b_expo_max_dsq,
f_fmt_ex2_b_frac_zero,
f_dsq_ex3_hangcounter_trigger,
f_dsq_ex5_divsqrt_v,
f_dsq_ex6_divsqrt_v,
f_dsq_ex6_divsqrt_record_v,
f_dsq_ex6_divsqrt_v_suppress,
f_dsq_ex5_divsqrt_itag,
f_dsq_ex6_divsqrt_fpscr_addr,
f_dsq_ex6_divsqrt_instr_frt,
f_dsq_ex6_divsqrt_instr_tid,
f_dsq_ex6_divsqrt_cr_bf,
f_dsq_ex6_divsqrt_sign,
f_dsq_ex6_divsqrt_exp,
f_dsq_ex6_divsqrt_fract,
f_dsq_ex6_divsqrt_flag_fpscr,
f_dsq_debug
);
inout vdd;
inout gnd;
input clkoff_b; // tiup
input act_dis; // ??tidn??
input flush; // ??tidn??
input delay_lclkr; // tidn,
input mpw1_b; // tidn,
input mpw2_b; // tidn,
input sg_1;
input thold_1;
input fpu_enable; //dc_act
input [0:`NCLK_WIDTH-1] nclk;
//--------------------------------------------------------------------------
input f_dsq_si; //perv scan
output f_dsq_so; //perv scan
input ex0_act_b;
//--------------------------------------------------------------------------
input f_dcd_ex0_div;
input f_dcd_ex0_divs;
input f_dcd_ex0_sqrt;
input f_dcd_ex0_sqrts;
input f_dcd_ex0_record_v;
input f_dcd_ex2_divsqrt_hole_v;
//--------------------------------------------------------------------------
input [0:1] f_dcd_flush;
input [0:6] f_dcd_ex1_itag;
input [0:5] f_dcd_ex1_fpscr_addr;
input [0:5] f_dcd_ex1_instr_frt;
input [0:3] f_dcd_ex1_instr_tid;
input [0:4] f_dcd_ex1_divsqrt_cr_bf;
input f_dcd_axucr0_deno;
input [0:1] f_scr_ex6_fpscr_rm_thr0;
input [0:4] f_scr_ex6_fpscr_ee_thr0; // FPSCR VE,OE,UE,ZE,XE
input [0:1] f_scr_ex6_fpscr_rm_thr1;
input [0:4] f_scr_ex6_fpscr_ee_thr1; // FPSCR VE,OE,UE,ZE,XE
//--------------------------------------------------------------------------
input f_fmt_ex2_a_sign_div; // these operands are actually ex2
input [01:13] f_fmt_ex2_a_expo_div_b;
input [01:52] f_fmt_ex2_a_frac_div;
input f_fmt_ex2_b_sign_div;
input [01:13] f_fmt_ex2_b_expo_div_b;
input [01:52] f_fmt_ex2_b_frac_div;
input f_fmt_ex2_a_zero;
input f_fmt_ex2_a_zero_dsq;
input f_fmt_ex2_a_expo_max;
input f_fmt_ex2_a_expo_max_dsq;
input f_fmt_ex2_a_frac_zero;
input f_fmt_ex2_b_zero;
input f_fmt_ex2_b_zero_dsq;
input f_fmt_ex2_b_expo_max;
input f_fmt_ex2_b_expo_max_dsq;
input f_fmt_ex2_b_frac_zero;
output f_dsq_ex3_hangcounter_trigger;
//--------------------------------------------------------------------------
output [0:1] f_dsq_ex5_divsqrt_v;
output [0:1] f_dsq_ex6_divsqrt_v;
output f_dsq_ex6_divsqrt_record_v;
output f_dsq_ex6_divsqrt_v_suppress;
output [0:6] f_dsq_ex5_divsqrt_itag;
output [0:5] f_dsq_ex6_divsqrt_fpscr_addr;
output [0:5] f_dsq_ex6_divsqrt_instr_frt;
output [0:3] f_dsq_ex6_divsqrt_instr_tid;
output [0:4] f_dsq_ex6_divsqrt_cr_bf;
output f_dsq_ex6_divsqrt_sign; // needs to be right off of a latch
output [01:13] f_dsq_ex6_divsqrt_exp; // needs to be right off of a latch
output [00:52] f_dsq_ex6_divsqrt_fract; // needs to be right off of a latch
output [00:15] f_dsq_ex6_divsqrt_flag_fpscr;
output [00:63] f_dsq_debug;
//--------------------------------------------------------------------------
//==################################################
parameter tiup = 1'b1;
parameter tidn = 1'b0;
wire sg_0;
wire thold_0_b;
wire thold_0;
wire force_t;
//----------------------------------------------------------------------
// todo items:
//----------------------------------------------------------------------
wire [00:56] zeros;
wire [00:27] ones;
wire ex0_act;
wire ex1_act;
wire ex2_act;
wire ex3_act;
wire ex4_act;
wire [0:7] act_so;
wire [0:7] act_si;
wire [0:14] ex1_div_instr_lat_scin;
wire [0:14] ex1_div_instr_lat_scout;
wire [0:5] ex2_div_instr_lat_scin;
wire [0:5] ex2_div_instr_lat_scout;
wire [0:8] ex2_itag_lat_scin;
wire [0:8] ex2_itag_lat_scout;
wire [0:27] ex2_fpscr_addr_lat_scin;
wire [0:27] ex2_fpscr_addr_lat_scout;
wire [0:18] ex1_div_ctr_lat_scin;
wire [0:18] ex1_div_ctr_lat_scout;
wire [0:95] ex5_div_result_lat_scin;
wire [0:95] ex5_div_result_lat_scout;
wire [0:65] ex6_div_result_lat_scin;
wire [0:65] ex6_div_result_lat_scout;
wire [0:162] ex5_special_case_lat_scin;
wire [0:162] ex5_special_case_lat_scout;
wire [0:3] ex5_div_done_lat_scout;
wire [0:3] ex5_div_done_lat_scin;
wire HW165073_bits;
wire HW165073_hit;
wire ex1_divsqrt_running_d;
wire exx_divsqrt_running_q;
wire ex1_divsqrt_done;
wire ex2_divsqrt_done;
wire ex2_divsqrt_done_din;
wire ex2_waiting_for_hole;
wire ex2_divsqrt_zero;
wire ex3_divsqrt_done_din;
wire ex4_divsqrt_done_din;
wire ex5_divsqrt_done_din;
wire ex3_divsqrt_done;
wire ex4_divsqrt_done;
wire ex4_divsqrt_done_q;
wire ex5_divsqrt_done;
wire ex6_divsqrt_done;
wire ex4_start_a_denorm_result;
wire ex4_start_denorm_result;
wire ex4_denormalizing_result;
wire ex4_denormalizing_result_done;
wire ex4_denormalizing_result_done_din;
wire ex5_denormalizing_result_done;
wire ex4_denormalizing_result_shifting;
wire ex4_divsqrt_denorm_hold;
wire ex4_denormalizing_result_rounding;
wire ex4_start_sp_denorm_result;
wire exp_eq_369;
wire exp_eq_380;
wire exp_eq_368;
wire exp_eq_367;
wire exp_eq_367to9;
wire ex4_force_36A;
wire ex4_force;
wire ex4_dnr_roundup_incexp;
wire ex4_roundup_incexp;
wire ex4_x_roundup_incexp;
wire ex5_x_roundup_incexp;
wire [0:70] ex2_div_a_stage_lat_scout;
wire [0:70] ex2_div_a_stage_lat_scin;
wire [0:70] ex2_div_b_stage_lat_scout;
wire [0:70] ex2_div_b_stage_lat_scin;
wire [0:113] ex3_div_PR_sumcarry_lat_scout;
wire [0:113] ex3_div_PR_sumcarry_lat_scin;
wire [0:7] ex3_div_PR_sum4carry4_lat_scout;
wire [0:7] ex3_div_PR_sum4carry4_lat_scin;
wire [0:113] ex3_div_Q_QM_lat_scin;
wire [0:113] ex3_div_Q_QM_lat_scout;
wire [0:113] ex3_div_bQ_QM_lat_scin;
wire [0:113] ex3_div_bQ_QM_lat_scout;
wire [0:167] ex3_sqrt_bitmask_lat_scin;
wire [0:167] ex3_sqrt_bitmask_lat_scout;
wire [0:51] ex2_div_exp_lat_scout;
wire [0:51] ex2_div_exp_lat_scin;
wire [0:55] ex3_denom_lat_scout;
wire [0:55] ex3_denom_lat_scin;
wire [0:26] exx_div_denorm_lat_scout;
wire [0:26] exx_div_denorm_lat_scin;
wire ex4_deno_force_zero;
wire exx_running_act_d;
wire exx_running_act_q;
(* analysis_not_referenced="TRUE" *)
wire [0:3] act_spare_unused;
(* analysis_not_referenced="TRUE" *)
wire [0:880] spare_unused;
wire ex0_record_v;
wire ex1_record_v;
wire ex2_record_v;
wire exx_record_v_din;
wire exx_record_v_q;
wire ex0_div;
wire ex0_divs;
wire ex0_sqrt;
wire ex0_sqrts;
wire ex1_div;
wire ex1_divs;
wire ex1_sqrt;
wire ex1_sqrts;
wire ex1_div_dout;
wire ex1_divs_dout;
wire ex1_sqrt_dout;
wire ex1_sqrts_dout;
wire ex2_div;
wire ex2_divs;
wire ex2_sqrt;
wire ex2_sqrts;
wire ex2_sp;
wire ex1_instr_v;
wire ex2_div_or_divs;
wire ex2_sqrt_or_sqrts;
wire ex0_anydivsqrt;
wire ex1_anydivsqrt;
wire ex2_anydivsqrt;
wire ex3_anydivsqrt;
wire ex4_anydivsqrt;
wire ex5_anydivsqrt;
wire ex6_anydivsqrt;
wire [0:6] ex1_itag_din;
wire [0:6] exx_itag_q;
wire [0:5] ex1_fpscr_addr_din;
wire [0:5] exx_fpscr_addr_q;
wire [0:5] ex1_instr_frt_din;
wire [0:5] exx_instr_frt_q;
wire [0:3] ex1_instr_tid_din;
wire [0:3] exx_instr_tid_q;
wire [0:1] tid_init;
wire [0:1] tid_hold;
wire [0:1] tid_clear;
wire [0:4] ex1_cr_bf_din;
wire [0:4] exx_cr_bf_q;
wire [0:7] ex0_op_cyc_count_din;
wire [0:7] ex1_op_cyc_count;
wire [0:7] ex2_hangcounter_din;
wire [0:7] ex3_hangcounter_q;
wire [0:7] ex3_div_hangcounter_lat_scout;
wire [0:7] ex3_div_hangcounter_lat_scin;
wire [0:63] f_dsq_debug_din;
wire [0:63] f_dsq_debug_q;
wire [0:63] f_dsq_debug_lat_scin;
wire [0:63] f_dsq_debug_lat_scout;
wire ex2_hangcounter_clear;
wire ex2_hangcounter_incr;
wire ex3_hangcounter_trigger;
wire ex4_sp;
wire ex4_dp;
wire exx_sp;
wire exx_dp;
wire ex1_cycles_init;
wire ex1_cycles_decr;
wire ex1_cycles_hold;
wire ex1_cycles_clear;
wire exx_single_precision_d;
wire exx_single_precision_q;
wire ex2_a_zero;
wire ex2_a_SPunderflow_zero;
wire ex2_a_expo_max;
wire ex2_a_SPoverflow_expo_max;
wire ex2_b_SPoverflow_expo_max;
wire ex2_a_frac_zero;
wire ex2_b_zero;
wire ex2_b_SPunderflow_zero;
wire ex2_b_expo_max;
wire ex2_b_frac_zero;
wire exx_a_zero_d;
wire exx_a_expo_max_d;
wire exx_a_frac_zero_d;
wire exx_b_zero_d;
wire exx_a_SPunderflow_zero_d;
wire exx_b_SPunderflow_zero_d;
wire exx_a_SPoverflow_expo_max_d;
wire exx_b_SPoverflow_expo_max_d;
wire exx_a_SPoverflow_expo_max_q;
wire exx_b_SPoverflow_expo_max_q;
wire exx_b_expo_max_d;
wire exx_b_frac_zero_d;
wire exx_a_zero_q;
wire exx_b_SPunderflow_zero_q;
wire exx_a_SPunderflow_zero_q;
wire exx_a_expo_max_q;
wire exx_a_frac_zero_q;
wire exx_b_zero_q;
wire exx_b_expo_max_q;
wire exx_b_frac_zero_q;
wire exx_a_NAN;
wire exx_b_NAN;
wire exx_a_INF;
wire exx_b_INF;
wire exx_a_SPoverflowINF;
wire exx_b_SPoverflowINF;
wire exx_b_ZER;
wire exx_a_ZER;
wire exx_b_SPunderflowZER;
wire exx_a_SPunderflowZER;
wire ex4_a_snan;
wire ex4_b_snan;
wire ex4_snan;
wire exx_hard_spec_case;
wire ex4_div_by_zero_zx;
wire ex4_zero_div_zero;
wire ex4_inf_div_inf;
wire ex4_sqrt_neg;
wire ex4_pass_a_nan;
wire ex4_pass_b_nan;
wire ex4_pass_nan;
wire ex4_pass_a_nan_sp;
wire ex4_pass_b_nan_sp;
wire ex4_pass_a_nan_dp;
wire ex4_pass_b_nan_dp;
wire exx_divsqrt_v_suppress_d;
wire exx_divsqrt_v_suppress_q;
wire ex4_force_zero;
wire ex4_force_zeroone;
wire ex4_force_inf;
wire ex5_force_inf;
wire ex4_force_maxnorm;
wire ex4_force_maxnorm_sp;
wire ex4_force_maxnorm_dp;
wire ex4_force_qnan;
wire ex4_div_special_case;
wire ex5_div_special_case;
wire exx_sqrt_d;
wire exx_div_d;
wire [00:03] exx_div_q;
wire [00:03] exx_sqrt_q;
wire [00:06] exx_fpscr_din;
wire [00:06] exx_fpscr_q;
wire [00:52] ex4_divsqrt_fract;
wire [00:56] ex4_divsqrt_fract_cur;
wire [00:56] ex4_divsqrt_fract_shifted;
wire [00:56] ex4_divsqrt_fract_shifted_dp;
wire [00:56] ex4_divsqrt_fract_shifted_spmasked;
wire [00:56] ex4_divsqrt_fract_stickymask;
wire [00:53] ex4_divsqrt_fract_dnr;
wire dn_lv1sh00;
wire dn_lv1sh01;
wire dn_lv1sh10;
wire dn_lv1sh11;
wire dn_lv2sh00;
wire dn_lv2sh01;
wire dn_lv2sh10;
wire dn_lv2sh11;
wire dn_lv3sh00;
wire dn_lv3sh01;
wire dn_lv3sh10;
wire dn_lv3sh11;
wire dnsp_lv1sh00;
wire dnsp_lv1sh01;
wire dnsp_lv1sh10;
wire dnsp_lv1sh11;
wire dnsp_lv2sh00;
wire dnsp_lv2sh01;
wire dnsp_lv2sh10;
wire dnsp_lv2sh11;
wire dnsp_lv3sh00;
wire dnsp_lv3sh01;
wire dnsp_lv3sh10;
wire dnsp_lv3sh11;
wire [00:59] ex4_divsqrt_fract_shifted_00to03;
wire [00:71] ex4_divsqrt_fract_shifted_00to12;
wire [00:119] ex4_divsqrt_fract_shifted_00to48;
wire [00:56] ex4_spdenorm_mask;
wire [00:59] ex4_spdenorm_mask_shifted_00to03;
wire [00:71] ex4_spdenorm_mask_shifted_00to12;
wire [00:119] ex4_spdenorm_mask_shifted_00to48;
wire [00:56] ex4_spdenorm_mask_lsb;
wire [00:59] ex4_spdenorm_mask_lsb_shifted_00to03;
wire [00:71] ex4_spdenorm_mask_lsb_shifted_00to12;
wire [00:119] ex4_spdenorm_mask_lsb_shifted_00to48;
wire [00:56] ex4_spdenorm_mask_guard;
wire [00:59] ex4_spdenorm_mask_guard_shifted_00to03;
wire [00:71] ex4_spdenorm_mask_guard_shifted_00to12;
wire [00:119] ex4_spdenorm_mask_guard_shifted_00to48;
wire [00:56] ex4_spdenorm_mask_round;
wire [00:59] ex4_spdenorm_mask_round_shifted_00to03;
wire [00:71] ex4_spdenorm_mask_round_shifted_00to12;
wire [00:119] ex4_spdenorm_mask_round_shifted_00to48;
wire [00:52] ex4_divsqrt_fract_special;
wire [00:52] ex5_divsqrt_fract_special;
wire [00:52] ex5_divsqrt_fract_d;
wire [00:52] ex6_divsqrt_fract_q;
wire [01:13] ex4_divsqrt_exp;
wire [01:13] ex4_divsqrt_exp_special;
wire [01:13] ex5_divsqrt_exp_special;
wire [01:13] ex5_divsqrt_exp_d;
wire [01:13] ex6_divsqrt_exp_q;
wire ex4_maxnorm_sign;
wire ex4_divsqrt_sign;
wire ex4_divsqrt_sign_special;
wire [1:52] ex2_b_fract;
wire [1:52] ex2_a_fract;
wire exx_a_sign_d;
wire [1:13] exx_a_biased_13exp_d;
wire [1:52] exx_a_fract_d;
wire exx_b_sign_d;
wire [1:13] exx_b_biased_13exp_d;
wire [1:52] exx_b_fract_d;
wire exx_a_sign_q;
wire [1:13] exx_a_biased_13exp_q;
wire [1:52] exx_a_fract_q;
wire exx_b_sign_q;
wire [1:13] exx_b_biased_13exp_q;
wire [1:52] exx_b_fract_q;
wire [1:13] exx_exp_ux_adj;
wire [1:13] exx_exp_ux_adj_dp;
wire [1:13] exx_exp_ux_adj_sp;
wire [1:13] exx_exp_ox_adj;
wire [1:13] exx_exp_ox_adj_dp;
wire [1:13] exx_exp_ox_adj_sp;
wire exx_invalid_mixed_precision;
wire [1:13] exx_b_ubexp;
wire [1:13] exy_b_ubexp;
wire [1:13] exx_exp_adj;
wire [1:13] exx_exp_adj_p1;
wire [0:12] exz_exp_addres_x0;
wire [0:12] exx_exp_addres_ux;
wire [0:12] exx_exp_addres_ox;
wire [0:12] exx_exp_addres;
wire [0:12] exx_exp_addres_div_x0;
wire [0:12] exx_exp_addres_sqrt_x0;
wire [0:12] exy_exp_addres_div_x0;
wire [0:12] exy_exp_addres_div_x0_m1;
wire [0:12] exz_exp_addres_div_x0_m1;
wire [0:12] exz_exp_addres_div_x0_adj;
wire [0:12] exy_exp_addres_sqrt_x0;
wire [0:12] exx_exp_addres_x0_p1;
wire [0:12] exx_exp_addres_ux_p1;
wire [0:12] exx_exp_addres_ox_p1;
wire [0:12] exy_exp_addres_x0_p1;
wire [0:12] exy_exp_addres_ux_p1;
wire [0:12] exy_exp_addres_ox_p1;
wire [0:12] exy_exp_addres_p1;
wire [0:12] exx_exp_addres_div_x0_p1;
wire [0:12] exx_exp_addres_sqrt_x0_p1;
wire ex4_expresult_zero;
wire [7:12] denorm_count_start;
wire [0:5] denorm_shift_amt;
wire [0:5] denorm_shift_amt_din;
wire [0:5] denorm_shift_amt_q;
wire [0:5] sp_denorm_shift_amt;
wire [0:5] sp_denorm_shift_amt_din;
wire [0:5] sp_denorm_shift_amt_q;
wire ex2_divsqrt_hole_v_b;
wire overflow;
wire underflow;
wire ueux;
wire oeox;
wire zezx;
wire vevx;
wire not_ueux_or_oeox;
wire exy_not_ueux_or_oeox;
wire exy_oeox;
wire exy_ueux;
wire overflow_sp;
wire sp_overflow_brink_x47E;
wire ex4_incexp_to_sp_overflow;
wire dp_overflow_brink_x7FE;
wire ex4_incexp_to_dp_overflow;
wire ex4_incexp_to_overflow;
wire underflow_sp;
wire overflow_dp;
wire underflow_dp;
wire underflow_denorm;
wire underflow_denorm_dp;
wire underflow_denorm_sp;
wire underflow_force_zero;
wire underflow_force_36A;
wire underflow_force_zeroone;
wire overflow_force_inf;
wire special_force_zero;
wire special_force_inf;
wire overflow_force_maxnorm;
wire underflow_sp_denorm;
wire sp_denorm_0x369roundup;
wire sp_denorm_underflow_zero;
wire sp_denorm_0x380roundup;
wire exx_q_bit0;
wire exx_q_bit0_cin;
wire exx_q_bit1;
wire exx_q_bit1_div;
wire exx_q_bit1_sqrt;
wire exx_q_bit1_cin_div;
wire exx_q_bit1_cin_sqrt;
wire exx_q_bit2;
wire exx_q_bit2_cin;
wire exx_q_bit3_div;
wire exx_q_bit3_cin_div;
wire exx_q_bit3_sqrt;
wire exx_q_bit3_cin_sqrt;
wire exx_q_bit3;
wire exx_nq_bit3;
wire [0:1] exx_q_bit22_sel;
wire exx_nq_bit0;
wire exx_nq_bit1;
wire exx_nq_bit1_div;
wire exx_nq_bit1_sqrt;
wire exx_nq_bit2;
wire exx_nq_bit3_div;
wire exx_nq_bit3_sqrt;
wire exx_notqornq_bit1_sqrt;
wire exx_notqornq_bit2;
wire exx_notqornq_bit3_sqrt;
wire exx_notqornq_bit1_div;
wire exx_notqornq_bit3_div;
wire exx_q_bit22;
wire exx_nq_bit22;
wire exx_q_bit22_div;
wire exx_nq_bit22_div;
wire exx_q_bit22_sqrt;
wire exx_nq_bit22_sqrt;
wire exx_notqornq_bit22_sqrt;
wire exx_notqornq_bit22_div;
wire exx_q_bit0_b;
wire exx_nq_bit0_b;
wire exx_q_bit0_prebuf;
wire exx_nq_bit0_prebuf;
wire [0:56] exx_Q_q;
wire [0:56] exx_Q_d;
wire [0:56] exx_QM_q;
wire [0:56] exx_QM_d;
wire [0:56] exx_bQ_q;
wire [0:56] exx_bQ_d;
wire [0:56] exx_bQM_q;
wire [0:56] exx_bQM_d;
wire [0:56] exx_lev0_csaout_sum;
wire [0:56] exx_lev0_csaout_carry;
wire [0:56] exx_lev0_csaoutsh_sum;
wire [0:56] exx_lev0_csaoutsh_carry;
wire exx_lev0_selD;
wire exx_lev0_selnD;
wire exx_lev0_selneg;
wire exx_lev0_selD_b;
wire exx_lev0_selnD_b;
wire exx_lev0_selQ;
wire exx_lev0_selMQ;
wire exx_lev0_selQ_b;
wire exx_lev0_selMQ_b;
wire exx_lev22_selD;
wire exx_lev22_selnD;
wire exx_lev22_selneg;
wire exx_lev22_selQ;
wire exx_lev22_selMQ;
wire [0:56] exx_lev0_csaout_carryout;
wire [0:56] exx_lev0_divsqrt_csaout_xor;
wire [0:56] exx_lev1_divsqrt_csaout_xor;
wire [0:56] exx_lev3_divsqrt_csaout_xor;
wire [0:56] exx_lev1_div_oper;
wire [0:56] exx_lev1_sqrt_oper;
wire [0:56] exx_lev3_div_oper;
wire [0:56] exx_lev3_sqrt_oper;
wire [0:56] exx_lev1_div_csaout_sum;
wire [0:56] exx_lev1_div_csaout_carry;
wire [0:56] exx_lev1_sqrt_csaout_sum;
wire [0:56] exx_lev1_sqrt_csaout_carry;
wire [0:56] exx_lev2_csaout_sum;
wire [0:56] exx_lev2_csaout_carry;
wire [0:56] exx_lev3_div_csaout_sum;
wire [0:56] exx_lev3_div_csaout_carry;
wire [0:56] exx_lev3_sqrt_csaout_sum;
wire [0:56] exx_lev3_sqrt_csaout_carry;
wire [0:56] exx_lev1_div_csaout_carryout;
wire [0:56] exx_lev1_sqrt_csaout_carryout;
wire [0:56] exx_lev2_csaout_carryout;
wire [0:56] exx_lev3_div_csaout_carryout;
wire [0:56] exx_lev3_sqrt_csaout_carryout;
wire [0:56] exx_lev22_csaout_carryout_div;
wire [0:56] exx_lev22_csaout_carryout_sqrt;
wire [0:56] exx_lev22_csaout_sum_sqrt;
wire [0:56] exx_lev22_csaout_carry_sqrt;
wire [0:56] exx_lev22_csaout_sum_div;
wire [0:56] exx_lev22_csaout_carry_div;
wire [0:56] exx_lev22_csaout_sum_xor;
wire [0:56] exx_PR_sum_d;
wire [0:56] exx_PR_sum_q;
wire [0:56] exx_PR_sum_d_late;
wire [0:56] exx_PR_sum_d_early;
wire [0:3] exx_PR_sum4_q;
wire [0:56] ex3_divsqrt_remainder;
wire [0:56] ex4_divsqrt_remainder;
wire ex3_rem_neg;
wire ex3_rem_neg_b;
wire [0:3] ex4_rem_neg;
wire [0:3] ex4_rem_neg_b;
wire [0:56] ex4_rem_neg_buf;
wire [0:56] ex4_rem_neg_buf_b;
wire ex4_rem_nonzero;
wire ex4_rem_nonzero_fi;
wire underflow_fi;
wire ex4_round_up;
wire ex4_round_up_underflow;
wire ex4_round_up_dnr;
wire ex3_norm_shl1;
wire ex3_norm_shl1_dp;
wire ex3_norm_shl1_sp;
wire ex4_norm_shl1;
wire ex4_norm_shl1_q;
wire ex4_norm_shl1_d;
wire ex4_norm_shl1_test;
wire [0:56] exx_PR_carry_d;
wire [0:56] exx_PR_carry_q;
wire [0:3] exx_PR_carry4_q;
wire [0:56] exx_PR_sum_shift;
wire [0:56] exx_PR_sum_final;
wire [0:56] exx_PR_carry_shift;
wire [0:56] exx_PR_carry_final;
wire [0:56] exx_PR_sum_q_shifted;
wire [0:56] exx_PR_carry_q_shifted;
wire [0:56] exx_Qin_lev0;
wire [0:56] exx_QMin_lev0;
wire [0:56] exx_Qin_lev1_sqrt;
wire [0:56] exx_QMin_lev1_sqrt;
wire [0:56] exx_Qin_lev1_div;
wire [0:56] exx_QMin_lev1_div;
wire [0:56] exx_bQin_lev1_sqrt;
wire [0:56] exx_bQMin_lev1_sqrt;
wire [0:56] exx_bQ_q_t;
wire [0:56] exx_bQM_q_t;
wire exx_Qin_lev0_sel0;
wire exx_Qin_lev0_sel1;
wire exx_QMin_lev0_sel0;
wire exx_QMin_lev0_sel1;
wire exx_QMin_lev0_sel2;
wire exx_Qin_lev1_sel0_sqrt;
wire exx_Qin_lev1_sel1_sqrt;
wire exx_Qin_lev1_sel0_div;
wire exx_Qin_lev1_sel1_div;
wire exx_QMin_lev1_sel0_div;
wire exx_QMin_lev1_sel1_div;
wire exx_QMin_lev1_sel2_div;
wire exx_QMin_lev1_sel0_sqrt;
wire exx_QMin_lev1_sel1_sqrt;
wire exx_QMin_lev1_sel2_sqrt;
wire [0:3] exx_sum4;
wire [0:3] exx_sum4_lev1_div;
wire [0:3] exx_sum4_lev1_sqrt;
wire [0:3] exx_sum4_lev2;
wire [0:3] exx_sum4_lev3_div;
wire [0:3] exx_sum4_lev3_sqrt;
wire [0:55] exx_denom_d;
wire [0:55] exx_denom_q;
wire [0:55] exx_denomQ_lev0;
wire [0:55] exx_denomQ_lev22_div;
wire [0:55] exx_denomQ_lev22_sqrt;
wire [0:55] exx_denomQ_lev0_nD_b;
wire [0:55] exx_denomQ_lev0_D_b;
wire [0:55] exx_denomQ_lev0_Q_b;
wire [0:55] exx_denomQ_lev0_MQ_b;
wire [0:55] exx_sqrtlev0_Q;
wire [0:55] exx_sqrtlev0_MQ;
wire [0:55] exx_sqrt_newbitmask_din;
wire [0:55] exx_sqrt_newbitmask_q;
wire [0:55] exx_sqrt_Qbitmask_din;
wire [0:55] exx_sqrt_Qbitmask_q;
wire [0:55] exx_sqrt_QMbitmask_din;
wire [0:55] exx_sqrt_QMbitmask_q;
wire [0:55] exx_sqrt_Qmaskvec;
wire [0:55] exx_sqrt_QMmaskvec;
wire wQ;
wire wMQ;
wire [0:55] exx_sqrtlev22_Q;
wire [0:55] exx_sqrtlev22_MQ;
wire [0:55] exx_bQin_lev0;
wire [0:55] exx_bQMin_lev0;
wire [0:55] exx_bQin_lev0_t;
wire [0:55] exx_bQMin_lev0_t;
wire ex2_PR_sum_sel0;
wire ex2_PR_sum_sel1;
wire ex2_PR_sum_sel2;
wire ex2_PR_sum_sel3;
wire ex2_PR_sum_sel4;
wire ex2_PR_sum_sel_late;
wire ex2_PR_sum_sel_early;
wire ex2_PR_carry_sel0;
wire ex2_PR_carry_sel1;
wire ex2_PR_carry_sel2;
wire [00:56] ex4_divsqrt_fract_preround;
wire [00:56] ex4_divsqrt_fract_preround_prenorm;
wire [00:53] ex4_divsqrt_fract_p0;
wire [00:53] ex4_divsqrt_fract_p1;
wire [00:53] ex5_divsqrt_fract_p1;
wire ex5_round_up;
wire [00:53] ex4_divsqrt_fract_dnr_p0;
wire [00:53] ex4_divsqrt_fract_dnr_p1;
wire [00:53] ex4_divsqrt_fract_dnr_sp_p0;
wire [00:53] ex4_divsqrt_fract_dnr_sp_p1;
wire [00:53] ex4_divsqrt_fract_dnr_dp;
wire [00:53] ex4_divsqrt_fract_dnr_sp;
wire [00:53] ex4_divsqrt_fract_dnr_sp_prem;
wire denorm_res_shiftoff_exp;
wire denorm_res_shiftoff_din;
wire denorm_res_shiftoff_q;
wire ex4_denorm_res_shiftoff_zero;
wire [00:53] ex4_divsqrt_fract_p0_sp;
wire [00:53] ex4_divsqrt_fract_p1_sp;
wire [00:53] ex4_divsqrt_fract_p0_dp;
wire [00:53] ex4_divsqrt_fract_p1_dp;
wire [00:53] ex4_divsqrt_fract_rounded;
wire exx_divsqrt_sign_d;
wire [01:13] exx_divsqrt_exp_d;
wire [00:56] exx_divsqrt_fract_d;
wire [00:15] exx_divsqrt_flag_fpscr_d;
wire exx_divsqrt_sign_q;
wire [01:13] exx_divsqrt_exp_q;
wire [00:56] exx_divsqrt_fract_q;
wire [00:15] exx_divsqrt_flag_fpscr_q;
wire n_flush_d;
wire n_flush;
wire [01:13] f_fmt_ex2_b_expo_div;
wire [01:13] f_fmt_ex2_a_expo_div;
wire [0:7] ex1_cycles_d;
wire [0:7] ex2_cycles_q;
wire lsb;
wire guard;
wire round;
wire sticky;
wire sticky_w_underflow;
wire denorm_sticky;
wire denorm_sticky_q;
wire denorm_sticky_din;
wire sign;
wire denorm_sticky_sp;
wire denorm_sticky_sp_q;
wire denorm_sticky_sp_din;
wire lsb_dnr;
wire guard_dnr;
wire round_dnr;
wire sticky_dnr;
wire lsb_dnr_sp;
wire guard_dnr_sp;
wire round_dnr_sp;
wire sticky_dnr_sp;
wire ex4_round_up_dnr_sp;
wire RNEmode;
wire RTZmode;
wire RPImode;
wire RNImode;
wire ex4_sp_inexact_roundbits;
wire ex4_denorm_result_det;
wire exp_gt_cap;
wire ex4_sp_denorm_result_det;
wire ex4_exp_le_896;
wire [00:13] denorm_exp_addres;
wire [00:13] denorm_exp_addres_sp;
wire [00:05] denorm_count_din;
wire [00:05] denorm_count_q;
wire VE; // FPSCR VE,OE,UE,ZE,XE
wire OE;
wire UE;
wire ZE;
wire XE;
//==##########################################
//# pervasive
//==##########################################
tri_plat #(.WIDTH(1)) thold_reg_0(
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.flush(flush),
.din(thold_1),
.q(thold_0)
);
tri_plat #(.WIDTH(1)) sg_reg_0(
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.flush(flush),
.din(sg_1),
.q(sg_0)
);
tri_lcbor lcbor_0(
.clkoff_b(clkoff_b),
.thold(thold_0),
.sg(sg_0),
.act_dis(act_dis),
.force_t(force_t),
.thold_b(thold_0_b)
);
//==##########################################
assign ex0_act = (~ex0_act_b);
assign n_flush_d = (f_dcd_flush[0] & exx_instr_tid_q[0]) | (f_dcd_flush[1] & exx_instr_tid_q[1]);
assign exx_running_act_d = (ex0_anydivsqrt | exx_running_act_q) & (~(ex4_divsqrt_done | n_flush));
tri_rlmreg_p #(.INIT(0), .WIDTH(8), .NEEDS_SRESET(0)) act_lat(
.force_t(force_t),
.d_mode(tiup),
.delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.act(tiup),
.thold_b(thold_0_b),
.sg(sg_0),
.scout(act_so),
.scin(act_si),
//-----------------
.din({
exx_running_act_d,
tidn,
ex0_act,
ex1_act,
ex2_act,
ex3_act,
n_flush_d,
tidn
}),
//-----------------
.dout({
exx_running_act_q,
act_spare_unused[1],
ex1_act,
ex2_act,
ex3_act,
ex4_act,
n_flush,
act_spare_unused[3]
})
);
//==##########################################
assign zeros = {57{1'b0}};
assign ones = {28{1'b1}};
assign act_spare_unused[0] = tidn;
assign act_spare_unused[2] = tidn;
//----------------------------------------------------------------------
//----------------------------------------------------------------------
// Algorithm
//
//
// cyc xx ex1_divsqrt_done=1, final cycle that the fract path is functioning for the main fract bits
// cyc xx ex2_divsqrt_done=1, extra 2 rounding bits generated, initial normalize (possible SHL by 1)
// cyc xx ex3_divsqrt_done=1, round
// cyc xx ex4_divsqrt_done=1, renormalize after rounding, compute the final exponent (+expadj)
// cyc xx ex5_divsqrt_done=1, final result is on the bus, directly off of the latch
// cyc xx
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
assign ex0_div = f_dcd_ex0_div;
assign ex0_divs = f_dcd_ex0_divs;
assign ex0_sqrt = f_dcd_ex0_sqrt;
assign ex0_sqrts = f_dcd_ex0_sqrts;
assign ex0_record_v = f_dcd_ex0_record_v;
assign ex0_anydivsqrt = ex0_div | ex0_sqrt | ex0_divs | ex0_sqrts;
assign ex0_op_cyc_count_din[0:7] = (8'b00011110 & {8{ex0_div}}) | //0d30
(8'b00010000 & {8{ex0_divs}}) | //0d16
(8'b00011101 & {8{ex0_sqrt}}) | //0d29
(8'b00001111 & {8{ex0_sqrts}}); //0d15
tri_rlmreg_p #(.INIT(0), .WIDTH(15), .NEEDS_SRESET(0)) ex1_div_instr_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex1_div_instr_lat_scout),
.scin(ex1_div_instr_lat_scin),
//-----------------
.din({
ex0_div,
ex0_divs,
ex0_sqrt,
ex0_sqrts,
ex0_record_v,
ex0_op_cyc_count_din,
ex4_anydivsqrt,
ex5_anydivsqrt}),
//-----------------
.dout({
ex1_div_dout,
ex1_divs_dout,
ex1_sqrt_dout,
ex1_sqrts_dout,
ex1_record_v,
ex1_op_cyc_count,
ex5_anydivsqrt,
ex6_anydivsqrt})
);
assign ex1_instr_v = |(f_dcd_ex1_instr_tid[0:3]); //or_reduce(f_dcd_ex1_instr_tid[0:3]);
assign ex1_div = ex1_div_dout & ex1_instr_v;
assign ex1_divs = ex1_divs_dout & ex1_instr_v;
assign ex1_sqrt = ex1_sqrt_dout & ex1_instr_v;
assign ex1_sqrts = ex1_sqrts_dout & ex1_instr_v;
assign ex1_anydivsqrt = ex1_div | ex1_sqrt | ex1_divs | ex1_sqrts;
tri_rlmreg_p #(.INIT(0), .WIDTH(6), .NEEDS_SRESET(0)) ex2_div_instr_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex2_div_instr_lat_scout),
.scin(ex2_div_instr_lat_scin),
//-----------------
.din({
ex1_div,
ex1_divs,
ex1_sqrt,
ex1_sqrts,
ex1_record_v,
ex1_anydivsqrt}),
//-----------------
.dout({
ex2_div,
ex2_divs,
ex2_sqrt,
ex2_sqrts,
ex2_record_v,
ex2_anydivsqrt})
);
assign ex2_div_or_divs = ex2_div | ex2_divs;
assign ex2_sqrt_or_sqrts = ex2_sqrt | ex2_sqrts;
assign ex2_sp = ex2_divs | ex2_sqrts;
//----------------------------------------------------------------------
assign ex1_itag_din = (f_dcd_ex1_itag & {7{ex1_anydivsqrt}}) | (exx_itag_q & {7{(~ex1_anydivsqrt)}});
tri_rlmreg_p #(.INIT(0), .WIDTH(9), .NEEDS_SRESET(0)) ex2_div_itag_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex2_itag_lat_scout),
.scin(ex2_itag_lat_scin),
//-----------------
.din({
ex1_itag_din,
ex2_anydivsqrt,
ex3_anydivsqrt}),
//-----------------
.dout({
exx_itag_q,
ex3_anydivsqrt,
ex4_anydivsqrt})
);
assign ex1_fpscr_addr_din = (f_dcd_ex1_fpscr_addr & {6{ex1_anydivsqrt}}) |
(exx_fpscr_addr_q & {6{(~ex1_anydivsqrt)}});
assign exx_fpscr_din = (({f_scr_ex6_fpscr_ee_thr0, f_scr_ex6_fpscr_rm_thr0}) & {7{(ex6_anydivsqrt & exx_instr_tid_q[0])}}) |
(({f_scr_ex6_fpscr_ee_thr1, f_scr_ex6_fpscr_rm_thr1}) & {7{(ex6_anydivsqrt & exx_instr_tid_q[1])}}) |
((exx_fpscr_q) & {7{(~ex6_anydivsqrt)}});
assign ex1_instr_frt_din = (f_dcd_ex1_instr_frt & {6{ex1_anydivsqrt}}) |
(exx_instr_frt_q & {6{(~ex1_anydivsqrt)}});
assign tid_init = {2{(ex1_anydivsqrt)}} & (~f_dcd_flush[0:1]); // new one can be starting in ex1 while ex6 finishing
assign tid_hold = {2{((~ex1_anydivsqrt) & (~ex6_divsqrt_done))}} & (~f_dcd_flush[0:1]);
assign tid_clear = ({2{(~ex1_anydivsqrt)}} & {2{ex6_divsqrt_done}}) | f_dcd_flush[0:1];
assign ex1_instr_tid_din[0:1] = (f_dcd_ex1_instr_tid[0:1] & tid_init) | (exx_instr_tid_q[0:1] & tid_hold) | (2'b00 & tid_clear);
assign ex1_instr_tid_din[2:3] = 2'b00;
assign ex1_cr_bf_din = (f_dcd_ex1_divsqrt_cr_bf & {5{ex1_anydivsqrt}}) |
(exx_cr_bf_q & {5{(~ex1_anydivsqrt)}});
tri_rlmreg_p #(.INIT(0), .WIDTH(28), .NEEDS_SRESET(1)) ex2_div_fpscr_addr_cr_bf_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex2_fpscr_addr_lat_scout),
.scin(ex2_fpscr_addr_lat_scin),
//-----------------
.din({ ex1_fpscr_addr_din,
ex1_cr_bf_din,
ex1_instr_frt_din,
ex1_instr_tid_din,
exx_fpscr_din}),
//-----------------
.dout({ exx_fpscr_addr_q,
exx_cr_bf_q,
exx_instr_frt_q,
exx_instr_tid_q,
exx_fpscr_q})
);
//----------------------------------------------------------------------
assign f_fmt_ex2_a_expo_div = (~f_fmt_ex2_a_expo_div_b);
assign f_fmt_ex2_b_expo_div = (~f_fmt_ex2_b_expo_div_b);
assign exx_a_sign_d = (f_fmt_ex2_a_sign_div & ex2_anydivsqrt) | (exx_a_sign_q & (~ex2_anydivsqrt));
assign exx_a_biased_13exp_d = (f_fmt_ex2_a_expo_div & {13{ex2_anydivsqrt}}) |
(exx_a_biased_13exp_q & {13{(~ex2_anydivsqrt)}});
assign exx_a_fract_d = (f_fmt_ex2_a_frac_div & {52{ex2_anydivsqrt}}) |
(exx_a_fract_q & {52{(~ex2_anydivsqrt)}});
assign ex2_a_zero = f_fmt_ex2_a_zero;
assign ex2_a_SPunderflow_zero = (f_fmt_ex2_a_zero_dsq & ex2_sp) & (~f_fmt_ex2_a_zero);
assign ex2_a_expo_max = f_fmt_ex2_a_expo_max;
assign ex2_a_SPoverflow_expo_max = (f_fmt_ex2_a_expo_max_dsq & ex2_sp) & (~ex2_a_expo_max);
assign ex2_a_frac_zero = f_fmt_ex2_a_frac_zero;
assign exx_a_zero_d = (ex2_a_zero & ex2_anydivsqrt) | (exx_a_zero_q & (~ex2_anydivsqrt));
assign exx_a_SPunderflow_zero_d = (ex2_a_SPunderflow_zero & ex2_anydivsqrt) | (exx_a_SPunderflow_zero_q & (~ex2_anydivsqrt));
assign exx_a_expo_max_d = (ex2_a_expo_max & ex2_anydivsqrt) | (exx_a_expo_max_q & (~ex2_anydivsqrt));
assign exx_a_SPoverflow_expo_max_d = (ex2_a_SPoverflow_expo_max & ex2_anydivsqrt) | (exx_a_SPoverflow_expo_max_q & (~ex2_anydivsqrt));
assign exx_a_frac_zero_d = (ex2_a_frac_zero & ex2_anydivsqrt) | (exx_a_frac_zero_q & (~ex2_anydivsqrt));
assign VE = exx_fpscr_q[0];
assign OE = exx_fpscr_q[1];
assign UE = exx_fpscr_q[2];
assign ZE = exx_fpscr_q[3];
assign XE = exx_fpscr_q[4];
assign spare_unused[0] = XE;
//---------------------------------------------------------------------
tri_rlmreg_p #(.INIT(0), .WIDTH(71), .NEEDS_SRESET(0)) ex2_div_a_stage_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex2_div_a_stage_lat_scout),
.scin(ex2_div_a_stage_lat_scin),
//-----------------
.din({
exx_a_sign_d,
exx_a_biased_13exp_d,
exx_a_fract_d,
exx_a_zero_d,
exx_a_expo_max_d,
exx_a_frac_zero_d,
exx_a_SPunderflow_zero_d,
exx_a_SPoverflow_expo_max_d}),
//-----------------
.dout({
exx_a_sign_q,
exx_a_biased_13exp_q,
exx_a_fract_q,
exx_a_zero_q,
exx_a_expo_max_q,
exx_a_frac_zero_q,
exx_a_SPunderflow_zero_q,
exx_a_SPoverflow_expo_max_q})
);
assign ex2_a_fract = f_fmt_ex2_a_frac_div[1:52];
assign exx_b_sign_d = (f_fmt_ex2_b_sign_div & ex2_anydivsqrt) | (exx_b_sign_q & (~ex2_anydivsqrt));
assign exx_b_biased_13exp_d = (f_fmt_ex2_b_expo_div & {13{ex2_anydivsqrt}}) |
(exx_b_biased_13exp_q & {13{(~ex2_anydivsqrt)}});
assign exx_b_fract_d = (f_fmt_ex2_b_frac_div & {52{ex2_anydivsqrt}}) |
(exx_b_fract_q & {52{(~ex2_anydivsqrt)}});
assign ex2_b_zero = f_fmt_ex2_b_zero;
assign ex2_b_SPunderflow_zero = (f_fmt_ex2_b_zero_dsq & ex2_sp) & (~f_fmt_ex2_b_zero);
assign ex2_b_expo_max = f_fmt_ex2_b_expo_max;
assign ex2_b_SPoverflow_expo_max = (f_fmt_ex2_b_expo_max_dsq & ex2_sp) & (~ex2_b_expo_max);
assign ex2_b_frac_zero = f_fmt_ex2_b_frac_zero;
assign exx_b_zero_d = (ex2_b_zero & ex2_anydivsqrt) | (exx_b_zero_q & (~ex2_anydivsqrt));
assign exx_b_SPunderflow_zero_d = (ex2_b_SPunderflow_zero & ex2_anydivsqrt) | (exx_b_SPunderflow_zero_q & (~ex2_anydivsqrt));
assign exx_b_expo_max_d = (ex2_b_expo_max & ex2_anydivsqrt) | (exx_b_expo_max_q & (~ex2_anydivsqrt));
assign exx_b_SPoverflow_expo_max_d = (ex2_b_SPoverflow_expo_max & ex2_anydivsqrt) | (exx_b_SPoverflow_expo_max_q & (~ex2_anydivsqrt));
assign exx_b_frac_zero_d = (ex2_b_frac_zero & ex2_anydivsqrt) | (exx_b_frac_zero_q & (~ex2_anydivsqrt));
//---------------------------------------------------------------------
tri_rlmreg_p #(.INIT(0), .WIDTH(71), .NEEDS_SRESET(0)) ex2_div_b_stage_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex2_div_b_stage_lat_scout),
.scin(ex2_div_b_stage_lat_scin),
//-----------------
.din({
exx_b_sign_d,
exx_b_biased_13exp_d,
exx_b_fract_d,
exx_b_zero_d,
exx_b_expo_max_d,
exx_b_frac_zero_d,
exx_b_SPunderflow_zero_d,
exx_b_SPoverflow_expo_max_d}),
//-----------------
.dout({ exx_b_sign_q,
exx_b_biased_13exp_q,
exx_b_fract_q,
exx_b_zero_q,
exx_b_expo_max_q,
exx_b_frac_zero_q,
exx_b_SPunderflow_zero_q,
exx_b_SPoverflow_expo_max_q})
);
assign ex2_b_fract = (f_fmt_ex2_b_frac_div[1:52]);
//------------------------------------------------------------------------------
// unbias the exponents
//------------------------------------------------------------------------------
// bias is DP, so subtract 1023
assign exx_b_ubexp = exx_b_biased_13exp_q[1:13] + 13'b1110000000001;
tri_rlmreg_p #(.INIT(0), .WIDTH(52), .NEEDS_SRESET(0)) ex2_div_exp_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex2_div_exp_lat_scout),
.scin(ex2_div_exp_lat_scin),
//-----------------
.din({ exx_b_ubexp,
exx_exp_addres_div_x0,
exx_exp_addres_sqrt_x0,
exy_exp_addres_div_x0_m1
}),
//-----------------
.dout({ exy_b_ubexp,
exy_exp_addres_div_x0,
exy_exp_addres_sqrt_x0,
exz_exp_addres_div_x0_m1
})
);
//------------------------------------------------------------------------------
// counter/state machine
assign ex2_divsqrt_hole_v_b = (~f_dcd_ex2_divsqrt_hole_v);
assign ex1_cycles_init = (ex1_div | ex1_divs | ex1_sqrt | ex1_sqrts) & (~n_flush);
assign ex1_cycles_hold = (ex2_divsqrt_zero | (ex2_divsqrt_done & ex2_divsqrt_hole_v_b)) & (~ex1_cycles_init) & (~n_flush);
assign ex1_cycles_decr = exx_divsqrt_running_q & (~ex1_cycles_hold) & (~ex1_cycles_init) & (~n_flush);
assign ex1_cycles_clear = n_flush;
//
assign ex1_cycles_d = (ex1_op_cyc_count & {8{ex1_cycles_init}}) |
(ex2_cycles_q & {8{ex1_cycles_hold}}) |
(8'b00000000 & {8{ex1_cycles_clear}}) |
((ex2_cycles_q - 8'b00000001) & {8{ex1_cycles_decr}});
assign ex2_divsqrt_zero = (ex2_cycles_q == 8'b00000000) ? 1'b1 :
1'b0;
assign ex1_divsqrt_done = (ex2_cycles_q == 8'b00000010) ? 1'b1 :
1'b0;
assign ex2_divsqrt_done = (ex2_cycles_q == 8'b00000001) ? 1'b1 :
1'b0;
assign ex2_divsqrt_done_din = ex2_divsqrt_done & (~ex2_divsqrt_hole_v_b) & (~n_flush);
assign ex2_waiting_for_hole = (ex2_divsqrt_done & ex2_divsqrt_hole_v_b) & (~ex1_cycles_init) & (~n_flush);
assign ex2_hangcounter_incr = ex2_waiting_for_hole & (~ex3_hangcounter_trigger);
assign ex2_hangcounter_clear = (ex2_divsqrt_done & (~ex2_divsqrt_hole_v_b)) | ex1_cycles_init | ex3_hangcounter_trigger | n_flush;
assign ex3_hangcounter_trigger = (ex3_hangcounter_q == 8'b00100000) ? 1'b1 :
1'b0;
assign f_dsq_ex3_hangcounter_trigger = ex3_hangcounter_trigger;
assign ex2_hangcounter_din = (8'b00000000 & {8{ex2_hangcounter_clear}}) |
((ex3_hangcounter_q + 8'b00000001) & {8{ex2_hangcounter_incr}});
assign ex1_divsqrt_running_d = ((ex1_div | ex1_divs | ex1_sqrt | ex1_sqrts) | exx_divsqrt_running_q) & (~(ex2_divsqrt_done_din | n_flush));
assign exx_single_precision_d = ((ex1_divs | ex1_sqrts) | (exx_single_precision_q & (~ex1_anydivsqrt))) & (~(n_flush));
assign exx_record_v_din = ((ex1_record_v & ex1_anydivsqrt) | (exx_record_v_q & (~ex1_anydivsqrt))) & (~(n_flush));
assign ex4_sp = exx_single_precision_q;
assign ex4_dp = (~exx_single_precision_q);
assign exx_sp = exx_single_precision_q;
assign exx_dp = (~exx_single_precision_q);
assign exx_sqrt_d = ((ex1_sqrt | ex1_sqrts) | (exx_sqrt_q[0] & (~ex1_anydivsqrt))) & (~(n_flush));
assign exx_div_d = ((ex1_div | ex1_divs) | (exx_div_q[0] & (~ex1_anydivsqrt))) & (~(n_flush));
tri_rlmreg_p #(.INIT(0), .WIDTH(8), .NEEDS_SRESET(1)) ex3_div_hangcounter_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex3_div_hangcounter_lat_scout),
.scin(ex3_div_hangcounter_lat_scin),
//-----------------
.din({ex2_hangcounter_din}),
//-----------------
.dout({ex3_hangcounter_q})
);
tri_rlmreg_p #(.INIT(0), .WIDTH(19), .NEEDS_SRESET(1)) ex1_div_ctr_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex1_div_ctr_lat_scout),
.scin(ex1_div_ctr_lat_scin),
//-----------------
.din({ ex1_cycles_d,
ex1_divsqrt_running_d,
exx_single_precision_d,
exx_sqrt_d,
exx_sqrt_d,
exx_sqrt_d,
exx_sqrt_d,
exx_div_d,
exx_div_d,
exx_div_d,
exx_div_d,
exx_record_v_din}),
//-----------------
.dout({ ex2_cycles_q,
exx_divsqrt_running_q,
exx_single_precision_q,
exx_sqrt_q[0:3],
exx_div_q[0:3],
exx_record_v_q})
);
//------------------------------------------------------------------------------
// fraction path
//------------------------------------------------------------------------------
//-------------------------------------------------------------------
// Initial 4-bit add and quotient select
//-------------------------------------------------------------------
assign exx_denom_d = (exx_denom_q & {56{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}) |
({{({1'b1, ex2_b_fract, 3'b000})}} & {56{(ex2_anydivsqrt)}});
//------------------------------------------------------------------------------------------------------------------------------------------------
assign exx_PR_sum_shift = exx_PR_sum_final;
assign ex2_PR_sum_sel0 = ex2_div_or_divs; // initialize div
assign ex2_PR_sum_sel1 = ex2_sqrt_or_sqrts & (~f_fmt_ex2_b_expo_div_b[13]); // initialize sqrt, even exponent
assign ex2_PR_sum_sel2 = ex2_sqrt_or_sqrts & f_fmt_ex2_b_expo_div_b[13]; // initialize sqrt, odd exponent
assign ex2_PR_sum_sel3 = (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b));
assign ex2_PR_sum_sel4 = ex2_divsqrt_done & ex2_divsqrt_hole_v_b;
assign ex2_PR_sum_sel_late = ex2_PR_sum_sel3;
assign ex2_PR_sum_sel_early = ex2_PR_sum_sel0 | ex2_PR_sum_sel1 | ex2_PR_sum_sel2 | ex2_PR_sum_sel4;
// div
// sqrt even exponent
assign exx_PR_sum_d_early = (({4'b0001, ex2_a_fract[1:52], 1'b0}) & {57{ex2_PR_sum_sel0}}) |
(({4'b0001, ex2_b_fract[1:52], 1'b0}) & {57{ex2_PR_sum_sel1}}) |
(({3'b001, ex2_b_fract[1:52], 2'b00}) & {57{ex2_PR_sum_sel2}}) |
(exx_PR_sum_q & {57{ex2_PR_sum_sel4}}); // sqrt odd exponent
// hold
assign exx_PR_sum_d_late = exx_PR_sum_shift;
assign exx_PR_sum_d = (exx_PR_sum_d_late & {57{ex2_PR_sum_sel_late}}) |
(exx_PR_sum_d_early & {57{ex2_PR_sum_sel_early}});
assign exx_PR_carry_shift = exx_PR_carry_final;
assign ex2_PR_carry_sel0 = ex2_anydivsqrt;
assign ex2_PR_carry_sel1 = (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b));
assign ex2_PR_carry_sel2 = ex2_divsqrt_done & ex2_divsqrt_hole_v_b; // hold
assign exx_PR_carry_d = ({57{1'b0}} & {57{ex2_PR_carry_sel0}}) |
(exx_PR_carry_shift & {57{ex2_PR_carry_sel1}}) |
(exx_PR_carry_q & {57{ex2_PR_carry_sel2}});
tri_rlmreg_p #(.INIT(0), .WIDTH(114), .NEEDS_SRESET(0)) ex3_div_PR_sumcarry_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_div_PR_sumcarry_lat_scout),
.scin(ex3_div_PR_sumcarry_lat_scin),
//-----------------
.din({exx_PR_sum_d, exx_PR_carry_d}),
//-----------------
.dout({exx_PR_sum_q, exx_PR_carry_q})
);
tri_rlmreg_p #(.INIT(0), .WIDTH(8), .NEEDS_SRESET(0)) ex3_div_PR_sum4carry4_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex3_div_PR_sum4carry4_lat_scout),
.scin(ex3_div_PR_sum4carry4_lat_scin),
//-----------------
.din({exx_PR_sum_d[0:3],exx_PR_carry_d[0:3]}),
//-----------------
.dout({exx_PR_sum4_q, exx_PR_carry4_q })
);
tri_rlmreg_p #(.INIT(0), .WIDTH(114), .NEEDS_SRESET(0)) ex3_div_Q_QM_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_div_Q_QM_lat_scout),
.scin(ex3_div_Q_QM_lat_scin),
//-----------------
.din({exx_Q_d, exx_QM_d }),
//-----------------
.dout({exx_Q_q, exx_QM_q})
);
tri_rlmreg_p #(.INIT(0), .WIDTH(114), .NEEDS_SRESET(0)) ex3_div_bQ_QM_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_div_bQ_QM_lat_scout),
.scin(ex3_div_bQ_QM_lat_scin),
//-----------------
.din({exx_bQ_d, exx_bQM_d }),
//-----------------
.dout({exx_bQ_q,exx_bQM_q })
);
tri_rlmreg_p #(.INIT(0), .WIDTH(168), .NEEDS_SRESET(0)) ex3_sqrt_bitmask_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_sqrt_bitmask_lat_scout),
.scin(ex3_sqrt_bitmask_lat_scin),
//-----------------
.din({exx_sqrt_newbitmask_din,
exx_sqrt_Qbitmask_din,
exx_sqrt_QMbitmask_din }),
//-----------------
.dout({exx_sqrt_newbitmask_q,
exx_sqrt_Qbitmask_q,
exx_sqrt_QMbitmask_q })
);
tri_rlmreg_p #(.INIT(0), .WIDTH(56), .NEEDS_SRESET(0)) ex3_div_denom_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex3_denom_lat_scout),
.scin(ex3_denom_lat_scin),
//-----------------
.din(exx_denom_d),
//-----------------
.dout(exx_denom_q)
);
//----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
fu_divsqrt_add4 lev0_add4(
.x(exx_PR_sum4_q[0:3]),
.y(exx_PR_carry4_q[0:3]),
//------------------------------------------------------
.s(exx_sum4)
);
assign exx_q_bit0_cin = exx_PR_sum_q[5] | exx_PR_carry_q[5];
fu_divsqrt_q_table lev0_div_q_table(
.x(exx_sum4[0:3]),
.cin(exx_q_bit0_cin),
//------------------------------------------------------
.q(exx_q_bit0_prebuf)
);
fu_divsqrt_nq_table lev0_div_nq_table(
.x(exx_sum4[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit0_prebuf)
);
assign exx_q_bit0_b = (~exx_q_bit0_prebuf);
assign exx_nq_bit0_b = (~exx_nq_bit0_prebuf);
assign exx_q_bit0 = (~exx_q_bit0_b);
assign exx_nq_bit0 = (~exx_nq_bit0_b);
//----------------------------------------------------------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------
// on-the-fly quotient digit conversion logic for level 0
//-------------------------------------------------------------------
// Qin=(Q & q) if q >= 0. Qin=(QM & 1) if q < 0
assign exx_Qin_lev0_sel0 = exx_q_bit0 | ((~exx_nq_bit0));
assign exx_Qin_lev0_sel1 = exx_nq_bit0;
assign exx_Qin_lev0[0:56] = (({exx_Q_q[1:56], exx_q_bit0}) & {57{exx_Qin_lev0_sel0}}) |
(({exx_QM_q[1:56], 1'b1}) & {57{exx_Qin_lev0_sel1}});
// QMin=(Q & 0) if q > 0. QMin=(QM & 0) if q < 0. QMin=(QM & 1) if q = 0
assign exx_QMin_lev0_sel0 = exx_q_bit0;
assign exx_QMin_lev0_sel1 = exx_nq_bit0;
assign exx_QMin_lev0_sel2 = (~(exx_nq_bit0 | exx_q_bit0));
assign exx_QMin_lev0[0:56] = (({exx_Q_q[1:56], 1'b0}) & {57{exx_QMin_lev0_sel0}}) |
(({exx_QM_q[1:56], 1'b0}) & {57{exx_QMin_lev0_sel1}}) |
(({exx_QM_q[1:56], 1'b1}) & {57{exx_QMin_lev0_sel2}});
// massage Q and QM for use with square root
// sel_denom_pre1 = ~(((Q << 2) | 1) << 29-i);
// sel_denom_pre3 = (((QM << 2) | 3) << 29-i);
assign exx_sqrtlev0_Q[0:55] = exx_bQ_q_t[0:55];
assign exx_sqrtlev0_MQ[0:55] = exx_bQM_q_t[0:55];
//-------------------------------------------------------------------
// Initial Denominator mux and 3:2 CSA
//-------------------------------------------------------------------
assign exx_PR_sum_q_shifted = {exx_PR_sum_q[1:56], 1'b0};
assign exx_PR_carry_q_shifted = {exx_PR_carry_q[1:56], 1'b0};
assign exx_lev0_selneg = exx_q_bit0 & (~exx_nq_bit0);
assign exx_lev0_selD_b = (~(exx_nq_bit0 & exx_div_q[0]));
assign exx_lev0_selnD_b = (~(exx_q_bit0 & exx_div_q[0]));
assign exx_lev0_selD = (~exx_lev0_selD_b);
assign exx_lev0_selnD = (~exx_lev0_selnD_b);
assign exx_lev0_selQ_b = (~(exx_q_bit0 & exx_sqrt_q[0]));
assign exx_lev0_selMQ_b = (~(exx_nq_bit0 & exx_sqrt_q[0]));
assign exx_lev0_selQ = (~exx_lev0_selQ_b);
assign exx_lev0_selMQ = (~exx_lev0_selMQ_b);
assign exx_denomQ_lev0_nD_b = (~((~exx_denom_q) & {56{exx_lev0_selnD}}));
assign exx_denomQ_lev0_D_b = (~(exx_denom_q & {56{exx_lev0_selD}}));
assign exx_denomQ_lev0_Q_b = (~((~exx_sqrtlev0_Q) & {56{exx_lev0_selQ}}));
assign exx_denomQ_lev0_MQ_b = (~(exx_sqrtlev0_MQ & {56{exx_lev0_selMQ}}));
assign exx_denomQ_lev0 = (~(exx_denomQ_lev0_nD_b & exx_denomQ_lev0_D_b & exx_denomQ_lev0_Q_b & exx_denomQ_lev0_MQ_b));
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev0_div_csaout_sum(exx_lev0_csaoutsh_sum,
{exx_lev0_selneg, exx_denomQ_lev0},
exx_lev0_divsqrt_csaout_xor);
assign exx_lev0_csaout_carryout = (({exx_lev0_selneg, exx_denomQ_lev0}) & exx_PR_sum_q_shifted) |
(({exx_lev0_selneg, exx_denomQ_lev0}) & exx_PR_carry_q_shifted) |
(exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev0_csaoutsh_carry[0:56] = {exx_lev0_csaout_carryout[1:56], exx_lev0_selneg};
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev0_div_csaout_xor(exx_lev0_divsqrt_csaout_xor,
exx_PR_sum_q_shifted,
exx_PR_carry_q_shifted);
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev1_div_csaout_xor(exx_lev1_divsqrt_csaout_xor,
exx_PR_sum_q_shifted,
exx_PR_carry_q_shifted);
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev3_div_csaout_xor(exx_lev3_divsqrt_csaout_xor,
exx_PR_sum_q_shifted,
exx_PR_carry_q_shifted);
//-------------------------------------------------------------------
// Pick -d, 0, +d
//-------------------------------------------------------------------
// lev1: neg d, +q ========================================================
assign exx_lev1_div_oper = ({1'b1, (~exx_denom_q)});
assign exx_lev1_sqrt_oper = ({1'b1, (~exx_sqrtlev0_Q)});
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev1_div_csaout_sum(exx_lev1_div_csaout_sum,
exx_lev1_div_oper,
exx_lev1_divsqrt_csaout_xor);
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev1_sqrt_csaout_sum(exx_lev1_sqrt_csaout_sum,
exx_lev1_sqrt_oper,
exx_lev1_divsqrt_csaout_xor);
assign exx_lev1_div_csaout_carryout = (exx_lev1_div_oper & exx_PR_sum_q_shifted) | (exx_lev1_div_oper & exx_PR_carry_q_shifted) | (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev1_sqrt_csaout_carryout = (exx_lev1_sqrt_oper & exx_PR_sum_q_shifted) | (exx_lev1_sqrt_oper & exx_PR_carry_q_shifted) | (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev1_div_csaout_carry[0:56] = {exx_lev1_div_csaout_carryout[1:56], 1'b1};
assign exx_lev1_sqrt_csaout_carry[0:56] = {exx_lev1_sqrt_csaout_carryout[1:56], 1'b1};
fu_divsqrt_add4 lev1_div_add4(
.x(exx_lev1_div_csaout_sum[0:3]),
.y(exx_lev1_div_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev1_div)
);
fu_divsqrt_add4 lev1_sqrt_add4(
.x(exx_lev1_sqrt_csaout_sum[0:3]),
.y(exx_lev1_sqrt_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev1_sqrt)
);
assign exx_q_bit1_cin_div = exx_lev1_div_csaout_sum[5] | exx_lev1_div_csaout_carry[5];
assign exx_q_bit1_cin_sqrt = exx_lev1_sqrt_csaout_sum[5] | exx_lev1_sqrt_csaout_carry[5];
fu_divsqrt_q_table lev1_div_q_table(
.x(exx_sum4_lev1_div[0:3]),
.cin(exx_q_bit1_cin_div),
//------------------------------------------------------
.q(exx_q_bit1_div)
);
fu_divsqrt_q_table lev1_sqrt_q_table(
.x(exx_sum4_lev1_sqrt[0:3]),
.cin(exx_q_bit1_cin_sqrt ),
//------------------------------------------------------
.q(exx_q_bit1_sqrt)
);
fu_divsqrt_nq_table lev1_div_nq_table(
.x(exx_sum4_lev1_div[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit1_div )
);
fu_divsqrt_nq_table lev1_sqrt_nq_table(
.x(exx_sum4_lev1_sqrt[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit1_sqrt )
);
assign exx_notqornq_bit1_sqrt = ((exx_sum4_lev1_sqrt == 4'b0000) & (~exx_q_bit1_cin_sqrt)) |
(exx_sum4_lev1_sqrt == 4'b1111) ;
assign exx_notqornq_bit1_div = ((exx_sum4_lev1_div == 4'b0000) & (~exx_q_bit1_cin_div)) |
(exx_sum4_lev1_div == 4'b1111) ;
assign exx_q_bit1 = (exx_q_bit1_div & exx_div_q[1]) | (exx_q_bit1_sqrt & exx_sqrt_q[1]);
assign exx_nq_bit1 = (exx_nq_bit1_div & exx_div_q[1]) | (exx_nq_bit1_sqrt & exx_sqrt_q[1]);
// zero: lev2 ===========================================================
assign exx_lev2_csaout_sum = exx_PR_sum_q_shifted ^ exx_PR_carry_q_shifted;
assign exx_lev2_csaout_carryout = (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev2_csaout_carry[0:56] = {exx_lev2_csaout_carryout[1:56], 1'b0};
fu_divsqrt_add4 lev2_add4(
.x(exx_lev2_csaout_sum[0:3]),
.y(exx_lev2_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev2)
);
assign exx_q_bit2_cin = exx_lev2_csaout_sum[5] | exx_lev2_csaout_carry[5];
fu_divsqrt_q_table lev2_div_q_table(
.x(exx_sum4_lev2[0:3]),
.cin(exx_q_bit2_cin),
//------------------------------------------------------
.q(exx_q_bit2)
);
fu_divsqrt_nq_table lev2_nq_table(
.x(exx_sum4_lev2[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit2 )
);
assign exx_notqornq_bit2 = ((exx_sum4_lev2 == 4'b0000) & (~exx_q_bit2_cin)) |
(exx_sum4_lev2 == 4'b1111) ;
// pos d, -q: lev3 =======================================================
assign exx_lev3_div_oper = ({1'b0, exx_denom_q});
assign exx_lev3_sqrt_oper = ({1'b0, exx_sqrtlev0_MQ});
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev3_div_csaout_sum(exx_lev3_div_csaout_sum,
exx_lev3_div_oper,
exx_lev3_divsqrt_csaout_xor);
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev3_sqrt_csaout_sum(exx_lev3_sqrt_csaout_sum,
exx_lev3_sqrt_oper,
exx_lev3_divsqrt_csaout_xor);
assign exx_lev3_div_csaout_carryout = (exx_lev3_div_oper & exx_PR_sum_q_shifted) | (exx_lev3_div_oper & exx_PR_carry_q_shifted) | (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev3_sqrt_csaout_carryout = (exx_lev3_sqrt_oper & exx_PR_sum_q_shifted) | (exx_lev3_sqrt_oper & exx_PR_carry_q_shifted) | (exx_PR_sum_q_shifted & exx_PR_carry_q_shifted);
assign exx_lev3_div_csaout_carry[0:56] = {exx_lev3_div_csaout_carryout[1:56], 1'b0};
assign exx_lev3_sqrt_csaout_carry[0:56] = {exx_lev3_sqrt_csaout_carryout[1:56], 1'b0};
fu_divsqrt_add4 lev3_div_add4(
.x(exx_lev3_div_csaout_sum[0:3]),
.y(exx_lev3_div_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev3_div)
);
fu_divsqrt_add4 lev3_sqrt_add4(
.x(exx_lev3_sqrt_csaout_sum[0:3]),
.y(exx_lev3_sqrt_csaout_carry[0:3]),
//------------------------------------------------------
.s(exx_sum4_lev3_sqrt)
);
assign exx_q_bit3_cin_div = exx_lev3_div_csaout_sum[5] | exx_lev3_div_csaout_carry[5];
assign exx_q_bit3_cin_sqrt = exx_lev3_sqrt_csaout_sum[5] | exx_lev3_sqrt_csaout_carry[5];
fu_divsqrt_q_table lev3_div_q_table(
.x(exx_sum4_lev3_div[0:3]),
.cin(exx_q_bit3_cin_div),
//------------------------------------------------------
.q(exx_q_bit3_div)
);
fu_divsqrt_q_table lev3_sqrt_q_table(
.x(exx_sum4_lev3_sqrt[0:3]),
.cin(exx_q_bit3_cin_sqrt),
//------------------------------------------------------
.q(exx_q_bit3_sqrt)
);
fu_divsqrt_nq_table lev3_div_nq_table(
.x(exx_sum4_lev3_div[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit3_div )
);
fu_divsqrt_nq_table lev3_sqrt_nq_table(
.x(exx_sum4_lev3_sqrt[0:3]),
//------------------------------------------------------
.nq(exx_nq_bit3_sqrt )
);
assign exx_notqornq_bit3_sqrt = ((exx_sum4_lev3_sqrt == 4'b0000) & (~exx_q_bit3_cin_sqrt)) |
(exx_sum4_lev3_sqrt == 4'b1111) ;
assign exx_notqornq_bit3_div = ((exx_sum4_lev3_div == 4'b0000) & (~exx_q_bit3_cin_div)) |
(exx_sum4_lev3_div == 4'b1111) ;
assign exx_q_bit3 = (exx_q_bit3_div & exx_div_q[2]) | (exx_q_bit3_sqrt & exx_sqrt_q[2]);
assign exx_nq_bit3 = (exx_nq_bit3_div & exx_div_q[2]) | (exx_nq_bit3_sqrt & exx_sqrt_q[2]);
//-------------------------------------------------------------------
// Mux between these three to get the next quotient bit
//-------------------------------------------------------------------
assign exx_q_bit22_sel = {exx_q_bit0, exx_nq_bit0};
assign exx_q_bit22_sqrt = (exx_q_bit22_sel == 2'b10) ? exx_q_bit1_sqrt :
(exx_q_bit22_sel == 2'b00) ? exx_q_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_q_bit3_sqrt :
1'b0;
assign exx_nq_bit22_sqrt = (exx_q_bit22_sel == 2'b10) ? exx_nq_bit1_sqrt :
(exx_q_bit22_sel == 2'b00) ? exx_nq_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_nq_bit3_sqrt :
1'b0;
assign exx_notqornq_bit22_sqrt = (exx_q_bit22_sel == 2'b10) ? exx_notqornq_bit1_sqrt :
(exx_q_bit22_sel == 2'b00) ? exx_notqornq_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_notqornq_bit3_sqrt :
1'b0;
assign exx_q_bit22_div = (exx_q_bit22_sel == 2'b10) ? exx_q_bit1_div :
(exx_q_bit22_sel == 2'b00) ? exx_q_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_q_bit3_div :
1'b0;
assign exx_nq_bit22_div = (exx_q_bit22_sel == 2'b10) ? exx_nq_bit1_div :
(exx_q_bit22_sel == 2'b00) ? exx_nq_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_nq_bit3_div :
1'b0;
assign exx_notqornq_bit22_div = (exx_q_bit22_sel == 2'b10) ? exx_notqornq_bit1_div :
(exx_q_bit22_sel == 2'b00) ? exx_notqornq_bit2 :
(exx_q_bit22_sel == 2'b01) ? exx_notqornq_bit3_div :
1'b0;
assign exx_q_bit22 = (exx_q_bit22_div & exx_div_q[2]) | (exx_q_bit22_sqrt & exx_sqrt_q[2]);
assign exx_nq_bit22 = (exx_nq_bit22_div & exx_div_q[2]) | (exx_nq_bit22_sqrt & exx_sqrt_q[2]);
// massage Q and QM for use with square root
// sel_denom_pre1 = ~(((Q << 2) | 1) << 29-i);
// sel_denom_pre3 = (((QM << 2) | 3) << 29-i);
// sel_denom_1 = ~(((Q << 2) | 1) << 28-i);
// sel_denom_3 = (((QM << 2) | 3) << 28-i);
assign exx_bQin_lev0[0:55] = ((exx_bQ_q[0:55]) & {56{exx_Qin_lev0_sel0}}) |
((exx_bQM_q[0:55]) & {56{exx_Qin_lev0_sel1}});
assign exx_bQMin_lev0[0:55] = ((exx_bQ_q[0:55]) & {56{exx_QMin_lev0_sel0}}) |
((exx_bQM_q[0:55]) & {56{(~exx_QMin_lev0_sel0)}});
assign exx_bQin_lev0_t[0:55] = exx_bQin_lev0 | ({exx_sqrt_Qbitmask_q[1:55], 1'b0});
assign exx_bQMin_lev0_t[0:55] = exx_bQMin_lev0 | ({exx_sqrt_QMbitmask_q[1:55], 1'b0});
assign exx_sqrtlev22_Q[0:55] = (exx_sqrt_Qmaskvec[0:55] & exx_sqrt_newbitmask_q[0:55]) |
({56{1'b1}} & exx_sqrt_Qbitmask_q[0:55]) |
(exx_bQin_lev0_t[0:55] & (~(exx_sqrt_newbitmask_q[0:55] | exx_sqrt_QMbitmask_q[0:55]))); // need QM for 3 bit mask
assign exx_sqrtlev22_MQ[0:55] = (exx_sqrt_QMmaskvec[0:55] & exx_sqrt_newbitmask_q[0:55]) |
({56{1'b1}} & exx_sqrt_QMbitmask_q[0:55]) |
(exx_bQMin_lev0_t[0:55] & (~(exx_sqrt_newbitmask_q[0:55] | exx_sqrt_QMbitmask_q[0:55])));
assign exx_sqrt_Qmaskvec[0:55] = {56{wQ}};
assign exx_sqrt_QMmaskvec[0:55] = {56{wMQ}};
assign wQ = exx_Qin_lev0[56];
assign wMQ = exx_QMin_lev0[56];
//-------------------------------------------------------------------
// Final Denominator mux and 3:2 CSA
//-------------------------------------------------------------------
// shift left by 1 again
assign exx_lev0_csaout_sum[0:56] = {exx_lev0_csaoutsh_sum[1:56], 1'b0};
assign exx_lev0_csaout_carry[0:56] = {exx_lev0_csaoutsh_carry[1:56], 1'b0};
assign exx_lev22_selneg = exx_q_bit22; //exx_q_bit22 & (~exx_nq_bit22);
assign exx_lev22_selD = exx_nq_bit22_div; // and not exx_q_bit22_div and exx_div_q(0);
assign exx_lev22_selnD = exx_q_bit22_div; // and not exx_nq_bit22_div and exx_div_q(0);
assign exx_lev22_selQ = exx_q_bit22_sqrt; // and not exx_nq_bit22_sqrt and exx_sqrt_q(0);
assign exx_lev22_selMQ = exx_nq_bit22_sqrt; // and not exx_q_bit22_sqrt and exx_sqrt_q(0);
assign exx_denomQ_lev22_div = ((~exx_denom_q) & {56{exx_lev22_selnD}}) |
(exx_denom_q & {56{exx_lev22_selD}});
assign exx_denomQ_lev22_sqrt = ((~exx_sqrtlev22_Q) & {56{exx_lev22_selQ}}) |
(exx_sqrtlev22_MQ & {56{exx_lev22_selMQ}});
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev22_csaout_sum_xor(exx_lev22_csaout_sum_xor,
exx_lev0_csaout_sum,
exx_lev0_csaout_carry );
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev22_csaout_sum_div(exx_lev22_csaout_sum_div,
{exx_lev22_selneg, exx_denomQ_lev22_div},
exx_lev22_csaout_sum_xor );
tri_xor2 #(.WIDTH(57)) DIVSQRT_XOR2_exx_lev22_csaout_sum_sqrt(exx_lev22_csaout_sum_sqrt,
{exx_lev22_selneg, exx_denomQ_lev22_sqrt},
exx_lev22_csaout_sum_xor );
assign exx_lev22_csaout_carryout_div = (({exx_lev22_selneg, exx_denomQ_lev22_div}) & exx_lev0_csaout_sum) |
(({exx_lev22_selneg, exx_denomQ_lev22_div}) & exx_lev0_csaout_carry) |
(exx_lev0_csaout_sum & exx_lev0_csaout_carry);
assign exx_lev22_csaout_carryout_sqrt = (({exx_lev22_selneg, exx_denomQ_lev22_sqrt}) & exx_lev0_csaout_sum) |
(({exx_lev22_selneg, exx_denomQ_lev22_sqrt}) & exx_lev0_csaout_carry) |
(exx_lev0_csaout_sum & exx_lev0_csaout_carry);
assign exx_lev22_csaout_carry_div[0:56] = {exx_lev22_csaout_carryout_div[1:56], exx_lev22_selneg};
assign exx_lev22_csaout_carry_sqrt[0:56] = {exx_lev22_csaout_carryout_sqrt[1:56], exx_lev22_selneg};
assign exx_PR_sum_final = (exx_lev22_csaout_sum_div & {57{exx_div_q[0]}}) |
(exx_lev22_csaout_sum_sqrt & {57{exx_sqrt_q[0]}});
assign exx_PR_carry_final = (exx_lev22_csaout_carry_div & {57{exx_div_q[0]}}) |
(exx_lev22_csaout_carry_sqrt & {57{exx_sqrt_q[0]}});
//-------------------------------------------------------------------
// on-the-fly quotient digit conversion logic
//-------------------------------------------------------------------
// Qin=(Q & q) if q >= 0. Qin=(QM & 1) if q < 0
//timing: split out seperate sqrt Q latch?
assign exx_Qin_lev1_sel0_div = (~exx_nq_bit22_div); // (exx_q_bit22_div | ((~exx_nq_bit22_div)));this combination will never be 11
assign exx_Qin_lev1_sel1_div = exx_nq_bit22_div;
assign exx_Qin_lev1_sel0_sqrt = (~exx_nq_bit22_sqrt); // (exx_q_bit22_sqrt | ((~exx_nq_bit22_sqrt))); this combination will never be 11
assign exx_Qin_lev1_sel1_sqrt = exx_nq_bit22_sqrt;
assign exx_Qin_lev1_div[0:56] = (({exx_Qin_lev0[1:56], exx_q_bit22_div}) & {57{exx_Qin_lev1_sel0_div}}) |
(({exx_QMin_lev0[1:56], 1'b1}) & {57{exx_Qin_lev1_sel1_div}});
assign exx_Qin_lev1_sqrt[0:56] = (({exx_Qin_lev0[1:56], exx_q_bit22_sqrt}) & {57{exx_Qin_lev1_sel0_sqrt}}) |
(({exx_QMin_lev0[1:56], 1'b1}) & {57{exx_Qin_lev1_sel1_sqrt}});
// QMin=(Q & 0) if q > 0. QMin=(QM & 0) if q < 0. QMin=(QM & 1) if q = 0
assign exx_QMin_lev1_sel0_div = exx_q_bit22_div;
assign exx_QMin_lev1_sel1_div = exx_nq_bit22_div;
assign exx_QMin_lev1_sel2_div = exx_notqornq_bit22_div; //((~(exx_nq_bit22_div | exx_q_bit22_div)));
assign exx_QMin_lev1_sel0_sqrt = exx_q_bit22_sqrt;
assign exx_QMin_lev1_sel1_sqrt = exx_nq_bit22_sqrt;
assign exx_QMin_lev1_sel2_sqrt = exx_notqornq_bit22_sqrt;
assign exx_QMin_lev1_div[0:56] = (({exx_Qin_lev0[1:56], 1'b0}) & {57{exx_QMin_lev1_sel0_div}}) |
(({exx_QMin_lev0[1:56], 1'b0}) & {57{exx_QMin_lev1_sel1_div}}) |
(({exx_QMin_lev0[1:56], 1'b1}) & {57{exx_QMin_lev1_sel2_div}});
assign exx_QMin_lev1_sqrt[0:56] = (({exx_Qin_lev0[1:56], 1'b0}) & {57{exx_QMin_lev1_sel0_sqrt}}) |
(({exx_QMin_lev0[1:56], 1'b0}) & {57{exx_QMin_lev1_sel1_sqrt}}) |
(({exx_QMin_lev0[1:56], 1'b1}) & {57{exx_QMin_lev1_sel2_sqrt}});
assign exx_Q_d = (exx_Qin_lev1_div & {57{(exx_div_q[0] & exx_divsqrt_running_q & (~ex3_divsqrt_done) & (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)))}}) | // normal running mode
(exx_Qin_lev1_sqrt & {57{(exx_sqrt_q[0] & exx_divsqrt_running_q & (~ex3_divsqrt_done) & (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)))}}) | // normal running mode
(exx_Q_q & {57{(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)}}) | // hold
(exx_Q_q & {57{(ex3_divsqrt_done & (~ex2_anydivsqrt))}}) | // hold for rounding
({57{1'b0}} & {57{ex2_anydivsqrt}}); // init
assign exx_QM_d = (exx_QMin_lev1_div & {57{(exx_div_q[0] & exx_divsqrt_running_q & (~ex3_divsqrt_done) & (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)))}}) |
(exx_QMin_lev1_sqrt & {57{(exx_sqrt_q[0] & exx_divsqrt_running_q & (~ex3_divsqrt_done) & (~ex2_anydivsqrt) & (~(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)))}}) |
(exx_QM_q & {57{(ex2_divsqrt_done & ex2_divsqrt_hole_v_b)}}) |
(exx_QM_q & {57{(ex3_divsqrt_done & (~ex2_anydivsqrt))}}) |
({57{1'b1}} & {57{ex2_anydivsqrt}}); // hold for rounding
//-------------------------------------------------------------------------------------------------------------
// massage Q and QM for use with square root
// sel_denom_pre1 = ~(((Q << 2) | 1) << 29-i);
// sel_denom_pre3 = (((QM << 2) | 3) << 29-i);
// sel_denom_1 = ~(((Q << 2) | 1) << 28-i);
// sel_denom_3 = ((QM << 2) | 3) << 28-i;
// left justify Q, QM and append 01, 11 for use in square root
//---------------------------------------------------------------------------------------------------------------------------------------------------------------------
assign exx_bQ_q_t = exx_bQ_q[00:56] | ({exx_sqrt_Qbitmask_q[1:55], 2'b00});
assign exx_bQM_q_t = exx_bQM_q[00:56] | ({exx_sqrt_QMbitmask_q[1:55], 2'b00});
assign exx_bQin_lev1_sqrt[0:56] = (({exx_bQin_lev0[00:55], 1'b0}) & {57{exx_Qin_lev1_sel0_sqrt}}) |
(({exx_bQMin_lev0[00:55], 1'b0}) & {57{exx_Qin_lev1_sel1_sqrt}});
assign exx_bQMin_lev1_sqrt[0:56] = (({exx_bQin_lev0[00:55], 1'b0}) & {57{exx_QMin_lev1_sel0_sqrt}}) |
(({exx_bQMin_lev0[00:55], 1'b0}) & {57{exx_QMin_lev1_sel1_sqrt}}) |
(({exx_bQMin_lev0[00:55], 1'b0}) & {57{exx_QMin_lev1_sel2_sqrt}});
// lev0
assign exx_bQ_d[00:56] = (({exx_sqrt_newbitmask_q[0:55], 1'b0}) & {57{exx_Qin_lev1_sqrt[55]}}) |
(({1'b0, exx_sqrt_newbitmask_q[0:55]}) & {57{exx_Qin_lev1_sqrt[56]}}) |
((exx_bQin_lev1_sqrt) & {57{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}); // lev1
// lev0
assign exx_bQM_d[00:56] = (({exx_sqrt_newbitmask_q[0:55], 1'b0}) & {57{exx_QMin_lev1_sqrt[55]}}) |
({{({1'b0, exx_sqrt_newbitmask_q[0:55]})}} & {57{exx_QMin_lev1_sqrt[56]}}) |
((exx_bQMin_lev1_sqrt) & {57{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}); // lev1
assign exx_sqrt_newbitmask_din[0:55] = (({2'b00, exx_sqrt_newbitmask_q[0:53]}) & {56{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}) |
({{({1'b1, zeros[1:55]})}} & {56{ex2_anydivsqrt}});
assign exx_sqrt_Qbitmask_din[0:55] = (({2'b00, exx_sqrt_Qbitmask_q[0:53]}) & {56{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}) |
(({3'b001, zeros[3:55]}) & {56{ex2_anydivsqrt}});
assign exx_sqrt_QMbitmask_din[0:55] = (({2'b00, exx_sqrt_QMbitmask_q[0:53]}) & {56{(exx_divsqrt_running_q & (~ex2_anydivsqrt))}}) |
(({3'b011, zeros[3:55]}) & {56{ex2_anydivsqrt}});
// todo: probably don't need both newbitmask and Qbitmask
//-------------------------------------------------------------------
//
//-------------------------------------------------------------------
//-------------------------------------------------------------------
// exponent logic
//-------------------------------------------------------------------
assign exx_exp_adj[1:13] = (13'b1111111111111 & {13{(ex4_norm_shl1_d)}}) |
(13'b0000000000000 & {13{(~(ex4_norm_shl1_d))}});
assign exx_exp_addres_div_x0 = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]);
assign exy_exp_addres_div_x0_m1 = exy_exp_addres_div_x0 - 13'b0000000000001;
assign exz_exp_addres_div_x0_adj = (exz_exp_addres_div_x0_m1 & {13{(ex4_norm_shl1_d)}}) |
(exy_exp_addres_div_x0 & {13{((~ex4_norm_shl1_d))}});
assign exx_exp_addres_sqrt_x0 = ({exy_b_ubexp[1], exy_b_ubexp[1:12]}) + 13'b0001111111111;
assign exz_exp_addres_x0 = (exz_exp_addres_div_x0_adj & {13{exx_div_q[0]}}) |
(exy_exp_addres_sqrt_x0 & {13{exx_sqrt_q[0]}});
assign exx_exp_addres_ux = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj[1:13]) + exx_exp_ux_adj;
assign exx_exp_addres_ox = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj[1:13]) + exx_exp_ox_adj;
assign exx_exp_adj_p1[1:13] = (13'b0000000000000 & {13{(ex4_norm_shl1_d)}}) |
(13'b0000000000001 & {13{(~(ex4_norm_shl1_d))}});
assign exx_exp_addres_div_x0_p1 = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj_p1[1:13]);
assign exx_exp_addres_sqrt_x0_p1 = ({exy_b_ubexp[1], exy_b_ubexp[1:12]}) + 13'b0010000000000;
assign exx_exp_addres_x0_p1 = (exx_exp_addres_div_x0_p1 & {13{exx_div_q[0]}}) |
(exx_exp_addres_sqrt_x0_p1 & {13{exx_sqrt_q[0]}});
assign exx_exp_addres_ux_p1 = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj_p1[1:13]) + exx_exp_ux_adj;
assign exx_exp_addres_ox_p1 = (exx_a_biased_13exp_q) - (exy_b_ubexp[1:13]) + (exx_exp_adj_p1[1:13]) + exx_exp_ox_adj;
assign ueux = (underflow & (~special_force_zero)) & UE;
assign oeox = (overflow & (~exx_hard_spec_case)) & OE;
assign zezx = ex4_div_by_zero_zx & ZE;
assign vevx = (ex4_zero_div_zero | ex4_inf_div_inf | ex4_sqrt_neg | ex4_snan) & VE;
assign not_ueux_or_oeox = ~(ueux | oeox);
assign exx_exp_addres = (exz_exp_addres_x0 & {13{(~(ueux | oeox))}}) |
(exx_exp_addres_ux & {13{ueux}}) |
(exx_exp_addres_ox & {13{oeox}});
assign ex4_expresult_zero = (~|(exz_exp_addres_x0)); //or_reduce
//
assign exx_exp_ux_adj_dp = 13'b0011000000000; // 1536
assign exx_exp_ux_adj_sp = 13'b0000011000000; // 192
assign exx_exp_ox_adj_dp = 13'b1101000000000; // -1536
assign exx_exp_ox_adj_sp = 13'b1111101000000; // -192
assign exx_exp_ux_adj = (exx_exp_ux_adj_dp & {13{exx_dp}}) |
(exx_exp_ux_adj_sp & {13{exx_sp}});
assign exx_exp_ox_adj = (exx_exp_ox_adj_dp & {13{exx_dp}}) |
(exx_exp_ox_adj_sp & {13{exx_sp}});
// underflow
assign underflow_dp = exz_exp_addres_x0[0] | ex4_expresult_zero;
// neg
// < -127+1023 0b000000xxxxxxx
assign underflow_sp = (exz_exp_addres_x0[0]) | (((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & (~exz_exp_addres_x0[2]) & (~exz_exp_addres_x0[3]) & (~exz_exp_addres_x0[4]) & (~exz_exp_addres_x0[5])) & (exz_exp_addres_x0[6] | exz_exp_addres_x0[7] | exz_exp_addres_x0[8] | exz_exp_addres_x0[9] | exz_exp_addres_x0[10] | exz_exp_addres_x0[11] | exz_exp_addres_x0[12])) | (((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & (~exz_exp_addres_x0[2])) & (((exz_exp_addres_x0[3] | exz_exp_addres_x0[4]) & (~exz_exp_addres_x0[5])) | ((exz_exp_addres_x0[5] | exz_exp_addres_x0[3]) & (~exz_exp_addres_x0[4])) | ((exz_exp_addres_x0[4] | exz_exp_addres_x0[5]) & (~exz_exp_addres_x0[3])))) | ((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & (~exz_exp_addres_x0[2]) & exz_exp_addres_x0[3] & exz_exp_addres_x0[4] & exz_exp_addres_x0[5] & (~exz_exp_addres_x0[6]) & (~exz_exp_addres_x0[7]) & (~exz_exp_addres_x0[8]) & (~exz_exp_addres_x0[9]) & (~exz_exp_addres_x0[10]) & (~exz_exp_addres_x0[11]) & (~exz_exp_addres_x0[12])); // < -127+1023 0b000xxxXXXXXXX
// -127+1023 0b0001110000000
assign underflow_denorm_dp = (denorm_sticky | exx_divsqrt_fract_q[53]); // guard bit also
assign underflow_denorm_sp = (denorm_sticky_sp | guard_dnr_sp | round_dnr_sp);
assign underflow_denorm = (underflow_denorm_dp & exx_dp) | (underflow_denorm_sp & exx_sp);
assign underflow_fi = (underflow & (~ex4_denormalizing_result_done)) | (underflow_denorm & ex4_denormalizing_result_done);
// overflow
assign sp_overflow_brink_x47E = ((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & exz_exp_addres_x0[2] &
(~exz_exp_addres_x0[3]) & (~exz_exp_addres_x0[4]) & (~exz_exp_addres_x0[5]) &
exz_exp_addres_x0[6] & exz_exp_addres_x0[7] & exz_exp_addres_x0[8] & exz_exp_addres_x0[9] & exz_exp_addres_x0[10] & exz_exp_addres_x0[11] & (~exz_exp_addres_x0[12]));
// 0b0010001111110 128+1023-1
assign ex4_incexp_to_sp_overflow = ex4_divsqrt_fract_rounded[0] & sp_overflow_brink_x47E & exx_sp; // rounded up past the implicit bit (which is bit 1 here) and into sp overflow
assign dp_overflow_brink_x7FE = ((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) &
exz_exp_addres_x0[2] & exz_exp_addres_x0[3] & exz_exp_addres_x0[4] & exz_exp_addres_x0[5] & exz_exp_addres_x0[6] & exz_exp_addres_x0[7] & exz_exp_addres_x0[8] & exz_exp_addres_x0[9] & exz_exp_addres_x0[10] & exz_exp_addres_x0[11] & (~exz_exp_addres_x0[12])); // 0b0011111111110 1024+1023-1
assign ex4_incexp_to_dp_overflow = ex4_divsqrt_fract_rounded[0] & dp_overflow_brink_x7FE & exx_dp;
assign ex4_incexp_to_overflow = ex4_incexp_to_sp_overflow | ex4_incexp_to_dp_overflow;
assign overflow_dp = ex4_incexp_to_dp_overflow |
(((~exz_exp_addres_x0[0]) & exz_exp_addres_x0[1]) | // 0b01XXXXXXXXXXX > 1024+1023
((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) &
exz_exp_addres_x0[2] & exz_exp_addres_x0[3] & exz_exp_addres_x0[4] & exz_exp_addres_x0[5] & exz_exp_addres_x0[6] & exz_exp_addres_x0[7] & exz_exp_addres_x0[8] & exz_exp_addres_x0[9] & exz_exp_addres_x0[10] & exz_exp_addres_x0[11] & exz_exp_addres_x0[12])); // 0b0011111111111 1024+1023
assign overflow_sp = ex4_incexp_to_sp_overflow |
((((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & exz_exp_addres_x0[2]) &
(exz_exp_addres_x0[3] | exz_exp_addres_x0[4] | exz_exp_addres_x0[5])) | // 0b001xxxXXXXXXX > 128+1023
(((~exz_exp_addres_x0[0]) & exz_exp_addres_x0[1])) | // 0b01xxxxXXXXXXX > 128+1023
((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) & exz_exp_addres_x0[2] & (~exz_exp_addres_x0[3]) & (~exz_exp_addres_x0[4]) & (~exz_exp_addres_x0[5]) & exz_exp_addres_x0[6] & exz_exp_addres_x0[7] & exz_exp_addres_x0[8] & exz_exp_addres_x0[9] & exz_exp_addres_x0[10] & exz_exp_addres_x0[11] & exz_exp_addres_x0[12]));
// 0b0010001111111 128+1023
assign overflow = (overflow_sp & exx_sp) | (overflow_dp & exx_dp);
assign underflow = (underflow_sp & exx_sp) | (underflow_dp & exx_dp);
//-------------------------------------------------------------------
// result staging latch
//-------------------------------------------------------------------
assign ex3_divsqrt_done_din = ex3_divsqrt_done & (~n_flush);
assign ex4_divsqrt_done_din = ex4_divsqrt_done & (~n_flush);
assign ex5_divsqrt_done_din = ex5_divsqrt_done & (~n_flush);
tri_rlmreg_p #(.INIT(0), .WIDTH(4), .NEEDS_SRESET(0)) ex4_div_done_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex5_div_done_lat_scout),
.scin(ex5_div_done_lat_scin),
//-----------------
.din({ ex2_divsqrt_done_din,
ex3_divsqrt_done_din,
ex4_divsqrt_done_din,
ex5_divsqrt_done_din}),
//-----------------
.dout({ ex3_divsqrt_done,
ex4_divsqrt_done_q,
ex5_divsqrt_done,
ex6_divsqrt_done})
);
//------------------------------------------------------------------------------------------------------------------------------------
// final fixup stages: normalize, round, final staging
// generate the remainder
assign ex3_divsqrt_remainder[00:56] = exx_PR_sum_q[0:56] + exx_PR_carry_q[0:56];
//-----------------------------------------------------------------------
assign ex4_divsqrt_remainder[00:56] = exx_divsqrt_fract_q[00:56];
assign ex4_rem_neg_buf[00:14] = {15{ex4_rem_neg[0]}};
assign ex4_rem_neg_buf[15:28] = {14{ex4_rem_neg[1]}};
assign ex4_rem_neg_buf[29:42] = {14{ex4_rem_neg[2]}};
assign ex4_rem_neg_buf[43:56] = {14{ex4_rem_neg[3]}};
assign ex4_rem_neg_buf_b[00:14] = {15{ex4_rem_neg_b[0]}};
assign ex4_rem_neg_buf_b[15:28] = {14{ex4_rem_neg_b[1]}};
assign ex4_rem_neg_buf_b[29:42] = {14{ex4_rem_neg_b[2]}};
assign ex4_rem_neg_buf_b[43:56] = {14{ex4_rem_neg_b[3]}};
//assign ex4_rem_neg = ex4_divsqrt_remainder[00];
assign ex4_rem_nonzero = |(ex4_divsqrt_remainder[00:56]); // or_reduce
assign ex4_rem_nonzero_fi = (ex4_rem_nonzero | ex4_sp_inexact_roundbits) & (~ex4_denormalizing_result_done);
assign ex4_divsqrt_fract_preround_prenorm[00:56] = (exx_Q_q[00:56] & ex4_rem_neg_buf_b ) |
(exx_QM_q[00:56] & ex4_rem_neg_buf );
assign ex4_norm_shl1_test = (((~ex4_divsqrt_fract_preround_prenorm[00])) & exx_dp) | (((~ex4_divsqrt_fract_preround_prenorm[28])) & exx_sp); // normalize
assign ex3_norm_shl1_dp = (exx_Q_d[0] & (~ex3_divsqrt_remainder[0])) | (exx_QM_d[0] & ex3_divsqrt_remainder[0]);
assign ex3_norm_shl1_sp = (exx_Q_d[28] & (~ex3_divsqrt_remainder[0])) | (exx_QM_d[28] & ex3_divsqrt_remainder[0]);
assign ex3_norm_shl1 = (~((ex3_norm_shl1_dp & exx_dp) | (ex3_norm_shl1_sp & exx_sp)));
assign ex4_norm_shl1_d = ((ex4_norm_shl1 & ex4_divsqrt_done_q) | ex4_norm_shl1_q) & (~(n_flush | ex2_anydivsqrt | ex6_divsqrt_done));
assign ex4_divsqrt_fract_preround[00:56] = (ex4_divsqrt_fract_preround_prenorm[00:56] & {57{(~ex4_norm_shl1)}}) |
({ex4_divsqrt_fract_preround_prenorm[01:56], 1'b0} & {57{ex4_norm_shl1}});
assign ex4_divsqrt_fract_p0_dp = {1'b0, ex4_divsqrt_fract_preround[00:52]};
assign ex4_divsqrt_fract_p1_dp = ({1'b0, ex4_divsqrt_fract_preround[00:52]}) + ({{53{1'b0}}, 1'b1});
assign ex4_divsqrt_fract_p0_sp = {1'b0, ex4_divsqrt_fract_preround[28:51], {29{1'b0}}};
assign ex4_divsqrt_fract_p1_sp = ({1'b0, ex4_divsqrt_fract_preround[28:51], {29{1'b0}}}) +
({{24{1'b0}}, 1'b1, {29{1'b0}}});
assign HW165073_bits = (ex4_divsqrt_fract_preround_prenorm[52:56] == 5'b10000) ? 1'b1 :
1'b0;
assign HW165073_hit = HW165073_bits & exx_sp & ex4_divsqrt_done & ex4_norm_shl1;
assign spare_unused[1] = HW165073_hit;
assign ex4_sp_inexact_roundbits = |(ex4_divsqrt_fract_preround[52:56]) & ex4_sp; // or_reduce
assign ex4_divsqrt_fract_p0 = (ex4_divsqrt_fract_p0_sp & {54{exx_sp}}) |
(ex4_divsqrt_fract_p0_dp & {54{exx_dp}});
assign ex4_divsqrt_fract_p1 = (ex4_divsqrt_fract_p1_sp & {54{exx_sp}}) |
(ex4_divsqrt_fract_p1_dp & {54{exx_dp}});
assign sign = ex4_divsqrt_sign; //exx_divsqrt_sign_d;
assign lsb = (ex4_divsqrt_fract_preround[52] & ex4_dp) | (ex4_divsqrt_fract_preround[51] & ex4_sp);
assign guard = (ex4_divsqrt_fract_preround[53] & ex4_dp) | (ex4_divsqrt_fract_preround[52] & ex4_sp);
assign round = sticky | ((ex4_divsqrt_fract_preround[54] & ex4_dp) | (ex4_divsqrt_fract_preround[53] & ex4_sp));
assign sticky = ex4_rem_nonzero;
assign sticky_w_underflow = ex4_rem_nonzero | (underflow & (~exx_hard_spec_case) & (~UE));
assign RNEmode = (~exx_fpscr_q[5]) & (~exx_fpscr_q[6]); // 00
assign RTZmode = (~exx_fpscr_q[5]) & exx_fpscr_q[6]; // 01
assign RPImode = exx_fpscr_q[5] & (~exx_fpscr_q[6]); // 10
assign RNImode = exx_fpscr_q[5] & exx_fpscr_q[6]; // 11
assign ex4_round_up = ((guard & (lsb | round)) & RNEmode) | ((1'b0) & RTZmode) | (((guard | round) & (~sign)) & RPImode) | (((guard | round) & sign) & RNImode); // round to nearest mode
assign ex4_round_up_underflow = (((sticky_w_underflow) & (~sign)) & RPImode) | (((sticky_w_underflow) & sign) & RNImode);
//timing todo: don't need this whole vector
assign ex4_divsqrt_fract_rounded = (ex4_divsqrt_fract_p0 & {54{(~ex4_round_up)}}) |
(ex4_divsqrt_fract_p1 & {54{ex4_round_up}});
assign ex4_roundup_incexp = ex4_divsqrt_fract_rounded[0] & (~ex4_start_a_denorm_result) & (~exx_hard_spec_case) & (~ex4_force); // rounded up past the implicit bit (which is bit 1 here)
assign ex4_x_roundup_incexp = ex4_dnr_roundup_incexp | ex4_roundup_incexp;
//-----------------------------------------------------------------------
// Denormal result handling
// exx_exp_addres <= std_ulogic_vector(unsigned((exx_a_biased_13exp_q) -
// (exx_b_ubexp(1) & exx_b_ubexp(1) & exx_b_ubexp(1 to 11)) +
// (exx_exp_adj(1) & exx_exp_adj(1) & exx_exp_adj(1 to 11))));
// underflow
// underflow <= exx_exp_addres(0);
// ex4_divsqrt_denorm_hold
// exp_gt_cap <= (exx_exp_addres(0 to 12) < "1111111001011"); -- < -53
// result is too small to denormalize = exp_gt_cap
assign denorm_exp_addres = (({exz_exp_addres_x0[0], exz_exp_addres_x0[0:12]})) + (14'b00000000110101);
assign denorm_exp_addres_sp = (({exz_exp_addres_x0[0], exz_exp_addres_x0[0:12]})) + (14'b11110010011001); // -(896-25)=-871
//denorm_exp_addres_sp_lsb <= std_ulogic_vector(((exx_exp_addres_x0(0) & exx_exp_addres_x0(0 to 12))) + ("11110010010111")); -- -(896-23)=-873
// denormal result shiftoff zero case
assign denorm_res_shiftoff_exp = (denorm_exp_addres[0:12] == 13'b0000000000000) ? 1'b1 : // 0 or 1: implicit bit shifted to Guard or Round position
1'b0;
assign denorm_res_shiftoff_din = ((denorm_res_shiftoff_exp & ex4_start_denorm_result) | denorm_res_shiftoff_q) & (~ex2_anydivsqrt);
assign exp_gt_cap = (denorm_exp_addres[0] & ex4_dp) | (denorm_exp_addres_sp[0] & ex4_sp);
assign ex4_denorm_result_det = exx_dp & (exz_exp_addres_x0[0] | ex4_expresult_zero) & (~exp_gt_cap);
assign ex4_sp_denorm_result_det = exx_sp & ex4_exp_le_896 & (~exp_gt_cap); // if the exponent is in the range [871 to 896] [0x367 to 0x380] 0x369 puts the lsb one to the left of the implicit bit
assign ex4_exp_le_896 =
((~exz_exp_addres_x0[0]) & (~exz_exp_addres_x0[1]) &
(~exz_exp_addres_x0[2]) & exz_exp_addres_x0[3] &
exz_exp_addres_x0[4] & exz_exp_addres_x0[5] &
(~|(exz_exp_addres_x0[6:12]))) |
((~|(exz_exp_addres_x0[0:2])) &
((~(exz_exp_addres_x0[3] & exz_exp_addres_x0[4] & exz_exp_addres_x0[5])) & (exz_exp_addres_x0[3] | exz_exp_addres_x0[4] | exz_exp_addres_x0[5]))); // =0b0001110000000
// less than or equal to 0b0001110000000
assign exp_eq_369 = (exz_exp_addres_x0[0:12] == 13'b0001101101001) ? 1'b1 :
1'b0;
assign exp_eq_368 = (exz_exp_addres_x0[0:12] == 13'b0001101101000) ? 1'b1 :
1'b0;
assign exp_eq_367 = (exz_exp_addres_x0[0:12] == 13'b0001101100111) ? 1'b1 :
1'b0;
assign exp_eq_367to9 = exp_eq_367 | exp_eq_368 | exp_eq_369;
assign exp_eq_380 = (exz_exp_addres_x0[0:12] == 13'b0001110000000) ? 1'b1 :
1'b0;
assign ex4_start_denorm_result = ((ex4_denorm_result_det & (~UE)) & ex4_divsqrt_done_q & (~exx_hard_spec_case)) & (~n_flush);
assign ex4_start_sp_denorm_result = ((ex4_sp_denorm_result_det & (~UE)) & ex4_divsqrt_done_q & (~exx_hard_spec_case)) & (~n_flush);
assign ex4_start_a_denorm_result = (((ex4_sp_denorm_result_det | ex4_denorm_result_det) & (~UE)) & ex4_divsqrt_done_q & (~exx_hard_spec_case)) & (~n_flush);
assign ex4_denormalizing_result = |(denorm_count_q); // or_reduce
assign ex4_denormalizing_result_shifting = (denorm_count_q == 6'b000010) ? 1'b1 :
1'b0;
assign ex4_denormalizing_result_rounding = (denorm_count_q == 6'b000001) ? 1'b1 :
1'b0;
assign ex4_denormalizing_result_done = (denorm_count_q == 6'b000001) ? 1'b1 :
1'b0;
assign ex4_divsqrt_denorm_hold = ex4_denormalizing_result;
assign ex4_denormalizing_result_done_din = ex4_denormalizing_result_done & (~f_dcd_axucr0_deno);
assign denorm_sticky_din = ((ex4_denormalizing_result & |(ex4_divsqrt_fract_shifted_00to48[54:119])) |
denorm_sticky_q | (ex4_rem_nonzero & ex4_start_denorm_result)) & (~ex2_anydivsqrt);
assign denorm_sticky_sp_din = ((ex4_denormalizing_result & |(ex4_divsqrt_fract_stickymask[0:56])) |
denorm_sticky_sp_q | (ex4_rem_nonzero & ex4_start_sp_denorm_result)) & (~ex2_anydivsqrt);
assign denorm_sticky = denorm_sticky_q;
assign denorm_sticky_sp = denorm_sticky_sp_q;
assign denorm_count_start = 6'b000010;
assign denorm_shift_amt_din = (((~exz_exp_addres_x0[7:12]) + (6'b000010)));
assign sp_denorm_shift_amt_din = (((~exz_exp_addres_x0[7:12]) + (6'b000100))); // exp is in the range [871 to 896]
assign denorm_shift_amt = denorm_shift_amt_q;
assign sp_denorm_shift_amt = sp_denorm_shift_amt_q;
assign denorm_count_din = ((denorm_count_start) & {6{ex4_start_a_denorm_result}}) |
(((denorm_count_q) - 6'b000001) & {6{ex4_denormalizing_result}}) |
(6'b000000 & {6{ex4_denormalizing_result_done}});
//--------------------------------------------------------------------------------------------------------------------------------
// shift the fraction
assign ex4_divsqrt_fract_cur[00:56] = exx_divsqrt_fract_q[00:56];
// lev1
assign dn_lv1sh00 = (~denorm_shift_amt[4]) & (~denorm_shift_amt[5]); //00
assign dn_lv1sh01 = (~denorm_shift_amt[4]) & denorm_shift_amt[5]; //01
assign dn_lv1sh10 = denorm_shift_amt[4] & (~denorm_shift_amt[5]); //10
assign dn_lv1sh11 = denorm_shift_amt[4] & denorm_shift_amt[5]; //11
assign ex4_divsqrt_fract_shifted_00to03[00:59] = (({ex4_divsqrt_fract_cur[00:56], 3'b000}) & {60{dn_lv1sh00}}) |
(({1'b0, ex4_divsqrt_fract_cur[00:56], 2'b00}) & {60{dn_lv1sh01}}) |
(({2'b00, ex4_divsqrt_fract_cur[00:56], 1'b0}) & {60{dn_lv1sh10}}) |
(({3'b000, ex4_divsqrt_fract_cur[00:56]}) & {60{dn_lv1sh11}});
// lev2
assign dn_lv2sh00 = (~denorm_shift_amt[2]) & (~denorm_shift_amt[3]); //00
assign dn_lv2sh01 = (~denorm_shift_amt[2]) & denorm_shift_amt[3]; //01
assign dn_lv2sh10 = denorm_shift_amt[2] & (~denorm_shift_amt[3]); //10
assign dn_lv2sh11 = denorm_shift_amt[2] & denorm_shift_amt[3]; //11
assign ex4_divsqrt_fract_shifted_00to12[00:71] = (({ex4_divsqrt_fract_shifted_00to03[00:59], 12'b000000000000}) & {72{dn_lv2sh00}}) |
(({4'b0000, ex4_divsqrt_fract_shifted_00to03[00:59], 8'b00000000}) & {72{dn_lv2sh01}}) |
(({8'b00000000, ex4_divsqrt_fract_shifted_00to03[00:59], 4'b0000}) & {72{dn_lv2sh10}}) |
(({12'b000000000000, ex4_divsqrt_fract_shifted_00to03[00:59]}) & {72{dn_lv2sh11}});
// lev3
assign dn_lv3sh00 = (~denorm_shift_amt[0]) & (~denorm_shift_amt[1]); //00
assign dn_lv3sh01 = (~denorm_shift_amt[0]) & denorm_shift_amt[1]; //01
assign dn_lv3sh10 = denorm_shift_amt[0] & (~denorm_shift_amt[1]); //10
assign dn_lv3sh11 = denorm_shift_amt[0] & denorm_shift_amt[1]; //11
assign ex4_divsqrt_fract_shifted_00to48[00:119] = (({ex4_divsqrt_fract_shifted_00to12[00:71], {48{1'b0}}}) & {120{dn_lv3sh00}}) |
({{16{1'b0}}, ({ex4_divsqrt_fract_shifted_00to12[00:71], {32{1'b0}}})} & {120{dn_lv3sh01}}) |
({{32{1'b0}}, ({ex4_divsqrt_fract_shifted_00to12[00:71], {16{1'b0}}})} & {120{dn_lv3sh10}}) |
({{48{1'b0}}, (ex4_divsqrt_fract_shifted_00to12[00:71])} & {120{dn_lv3sh11}});
assign ex4_divsqrt_fract_shifted_dp[00:56] = ex4_divsqrt_fract_shifted_00to48[00:56];
//--------------------------------------------------------------------------------------------------------------------------------
//--------------------------------------------------------------------------------------------------------------------------------
// shift the sp denorm mask
assign ex4_spdenorm_mask[00:56] = {ones[0:27], zeros[28:56]};
assign ex4_spdenorm_mask_lsb[00:56] = {zeros[0:24], 1'b1, zeros[26:56]};
assign ex4_spdenorm_mask_guard[00:56] = {zeros[0:25], 1'b1, zeros[27:56]};
assign ex4_spdenorm_mask_round[00:56] = {zeros[0:26], 1'b1, zeros[28:56]};
// todo: get rid of the cruft below
// lev1
assign dnsp_lv1sh00 = (~sp_denorm_shift_amt[4]) & (~sp_denorm_shift_amt[5]); //00
assign dnsp_lv1sh01 = (~sp_denorm_shift_amt[4]) & sp_denorm_shift_amt[5]; //01
assign dnsp_lv1sh10 = sp_denorm_shift_amt[4] & (~sp_denorm_shift_amt[5]); //10
assign dnsp_lv1sh11 = sp_denorm_shift_amt[4] & sp_denorm_shift_amt[5]; //11
assign ex4_spdenorm_mask_shifted_00to03[00:59] = (({ex4_spdenorm_mask[00:56], 3'b000}) & {60{dnsp_lv1sh00}}) |
(({ex4_spdenorm_mask[01:56], 4'b0000}) & {60{dnsp_lv1sh01}}) |
(({ex4_spdenorm_mask[02:56], 5'b00000}) & {60{dnsp_lv1sh10}}) |
(({ex4_spdenorm_mask[03:56], 6'b000000}) & {60{dnsp_lv1sh11}});
assign ex4_spdenorm_mask_lsb_shifted_00to03[00:59] = (({ex4_spdenorm_mask_lsb[00:56], 3'b000}) & {60{dnsp_lv1sh00}}) |
(({ex4_spdenorm_mask_lsb[01:56], 4'b0000}) & {60{dnsp_lv1sh01}}) |
(({ex4_spdenorm_mask_lsb[02:56], 5'b00000}) & {60{dnsp_lv1sh10}}) |
(({ex4_spdenorm_mask_lsb[03:56], 6'b000000}) & {60{dnsp_lv1sh11}});
assign ex4_spdenorm_mask_guard_shifted_00to03[00:59] = (({ex4_spdenorm_mask_guard[00:56], 3'b000}) & {60{dnsp_lv1sh00}}) |
(({ex4_spdenorm_mask_guard[01:56], 4'b0000}) & {60{dnsp_lv1sh01}}) |
(({ex4_spdenorm_mask_guard[02:56], 5'b00000}) & {60{dnsp_lv1sh10}}) |
(({ex4_spdenorm_mask_guard[03:56], 6'b000000}) & {60{dnsp_lv1sh11}});
assign ex4_spdenorm_mask_round_shifted_00to03[00:59] = (({ex4_spdenorm_mask_round[00:56], 3'b000}) & {60{dnsp_lv1sh00}}) |
(({ex4_spdenorm_mask_round[01:56], 4'b0000}) & {60{dnsp_lv1sh01}}) |
(({ex4_spdenorm_mask_round[02:56], 5'b00000}) & {60{dnsp_lv1sh10}}) |
(({ex4_spdenorm_mask_round[03:56], 6'b000000}) & {60{dnsp_lv1sh11}});
// lev2
assign dnsp_lv2sh00 = (~sp_denorm_shift_amt[2]) & (~sp_denorm_shift_amt[3]); //00
assign dnsp_lv2sh01 = (~sp_denorm_shift_amt[2]) & sp_denorm_shift_amt[3]; //01
assign dnsp_lv2sh10 = sp_denorm_shift_amt[2] & (~sp_denorm_shift_amt[3]); //10
assign dnsp_lv2sh11 = sp_denorm_shift_amt[2] & sp_denorm_shift_amt[3]; //11
assign ex4_spdenorm_mask_shifted_00to12[00:71] = (({ex4_spdenorm_mask_shifted_00to03[00:59], 12'b000000000000}) & {72{dnsp_lv2sh00}}) |
(({ex4_spdenorm_mask_shifted_00to03[04:59], 16'b0000000000000000}) & {72{dnsp_lv2sh01}}) |
(({ex4_spdenorm_mask_shifted_00to03[08:59], 20'b00000000000000000000}) & {72{dnsp_lv2sh10}}) |
(({ex4_spdenorm_mask_shifted_00to03[12:59], 24'b000000000000000000000000}) & {72{dnsp_lv2sh11}});
assign ex4_spdenorm_mask_lsb_shifted_00to12[00:71] = (({ex4_spdenorm_mask_lsb_shifted_00to03[00:59], 12'b000000000000}) & {72{dnsp_lv2sh00}}) |
(({ex4_spdenorm_mask_lsb_shifted_00to03[04:59], 16'b0000000000000000}) & {72{dnsp_lv2sh01}}) |
(({ex4_spdenorm_mask_lsb_shifted_00to03[08:59], 20'b00000000000000000000}) & {72{dnsp_lv2sh10}}) |
(({ex4_spdenorm_mask_lsb_shifted_00to03[12:59], 24'b000000000000000000000000}) & {72{dnsp_lv2sh11}});
assign ex4_spdenorm_mask_guard_shifted_00to12[00:71] = (({ex4_spdenorm_mask_guard_shifted_00to03[00:59], 12'b000000000000}) & {72{dnsp_lv2sh00}}) |
(({ex4_spdenorm_mask_guard_shifted_00to03[04:59], 16'b0000000000000000}) & {72{dnsp_lv2sh01}}) |
(({ex4_spdenorm_mask_guard_shifted_00to03[08:59], 20'b00000000000000000000}) & {72{dnsp_lv2sh10}}) |
(({ex4_spdenorm_mask_guard_shifted_00to03[12:59], 24'b000000000000000000000000}) & {72{dnsp_lv2sh11}});
assign ex4_spdenorm_mask_round_shifted_00to12[00:71] = (({ex4_spdenorm_mask_round_shifted_00to03[00:59], 12'b000000000000}) & {72{dnsp_lv2sh00}}) |
(({ex4_spdenorm_mask_round_shifted_00to03[04:59], 16'b0000000000000000}) & {72{dnsp_lv2sh01}}) |
(({ex4_spdenorm_mask_round_shifted_00to03[08:59], 20'b00000000000000000000}) & {72{dnsp_lv2sh10}}) |
(({ex4_spdenorm_mask_round_shifted_00to03[12:59], 24'b000000000000000000000000}) & {72{dnsp_lv2sh11}});
// lev3
assign dnsp_lv3sh00 = (~sp_denorm_shift_amt[0]) & (~sp_denorm_shift_amt[1]); //00
assign dnsp_lv3sh01 = (~sp_denorm_shift_amt[0]) & sp_denorm_shift_amt[1]; //01
assign dnsp_lv3sh10 = sp_denorm_shift_amt[0] & (~sp_denorm_shift_amt[1]); //10
assign dnsp_lv3sh11 = sp_denorm_shift_amt[0] & sp_denorm_shift_amt[1]; //11
assign ex4_spdenorm_mask_shifted_00to48[00:119] = (({ex4_spdenorm_mask_shifted_00to12[00:71], {48{1'b0}}}) & {120{dnsp_lv3sh00}}) |
(({ex4_spdenorm_mask_shifted_00to12[16:71], {64{1'b0}}}) & {120{dnsp_lv3sh01}});
assign ex4_spdenorm_mask_lsb_shifted_00to48[00:119] = (({ex4_spdenorm_mask_lsb_shifted_00to12[00:71], {48{1'b0}}}) & {120{dnsp_lv3sh00}}) |
(({ex4_spdenorm_mask_lsb_shifted_00to12[16:71], {64{1'b0}}}) & {120{dnsp_lv3sh01}});
assign ex4_spdenorm_mask_guard_shifted_00to48[00:119] = (({ex4_spdenorm_mask_guard_shifted_00to12[00:71], {48{1'b0}}}) & {120{dnsp_lv3sh00}}) |
(({ex4_spdenorm_mask_guard_shifted_00to12[16:71], {64{1'b0}}}) & {120{dnsp_lv3sh01}});
assign ex4_spdenorm_mask_round_shifted_00to48[00:119] = (({ex4_spdenorm_mask_round_shifted_00to12[00:71], {48{1'b0}}}) & {120{dnsp_lv3sh00}}) |
(({ex4_spdenorm_mask_round_shifted_00to12[16:71], {64{1'b0}}}) & {120{dnsp_lv3sh01}});
assign ex4_divsqrt_fract_shifted_spmasked[00:56] = ex4_spdenorm_mask_shifted_00to48[00:56] & ex4_divsqrt_fract_cur[00:56];
assign ex4_divsqrt_fract_stickymask[00:56] = (~ex4_spdenorm_mask_shifted_00to48[00:56]) & ex4_divsqrt_fract_cur[00:56];
//--------------------------------------------------------------------------------------------------------------------------------
assign ex4_divsqrt_fract_shifted[00:56] = (ex4_divsqrt_fract_shifted_spmasked[00:56] & {57{ex4_sp}}) |
(ex4_divsqrt_fract_shifted_dp[00:56] & {57{ex4_dp}});
//--------------------------------------------------------------------------------------------------------------------------------
// round after denorm result
//ex4_denormalizing_result_rounding
assign ex4_divsqrt_fract_dnr_p0 = {1'b0, exx_divsqrt_fract_q[00:52]};
assign ex4_divsqrt_fract_dnr_p1 = ({1'b0, exx_divsqrt_fract_q[00:52]}) + ({{53{1'b0}}, 1'b1});
assign lsb_dnr = exx_divsqrt_fract_q[52];
assign guard_dnr = exx_divsqrt_fract_q[53];
assign round_dnr = sticky_dnr | exx_divsqrt_fract_q[54];
assign sticky_dnr = denorm_sticky;
assign ex4_round_up_dnr = ((guard_dnr & (lsb_dnr | sticky_dnr | round_dnr)) & RNEmode) | ((1'b0) & RTZmode) | (((guard_dnr | round_dnr) & (~sign)) & RPImode) | (((guard_dnr | round_dnr) & sign) & RNImode); // round to nearest mode
assign ex4_divsqrt_fract_dnr_dp = (ex4_divsqrt_fract_dnr_p0 & {54{(~ex4_round_up_dnr)}}) |
(ex4_divsqrt_fract_dnr_p1 & {54{ex4_round_up_dnr}});
// sp denorm rounding ----------------
assign ex4_divsqrt_fract_dnr_sp_p0 = {1'b0, exx_divsqrt_fract_q[00:52]};
assign ex4_divsqrt_fract_dnr_sp_p1 = ({1'b0, exx_divsqrt_fract_q[00:52]}) + ({1'b0, ex4_spdenorm_mask_lsb_shifted_00to48[00:52]});
assign lsb_dnr_sp = |(ex4_spdenorm_mask_lsb_shifted_00to48[00:25] & exx_divsqrt_fract_q[00:25]);
assign guard_dnr_sp = |(ex4_spdenorm_mask_guard_shifted_00to48[00:26] & exx_divsqrt_fract_q[00:26]);
assign round_dnr_sp = sticky_dnr_sp | |(ex4_spdenorm_mask_round_shifted_00to48[00:27] & exx_divsqrt_fract_q[00:27]);
assign sticky_dnr_sp = denorm_sticky_sp;
assign ex4_round_up_dnr_sp = ((guard_dnr_sp & (lsb_dnr_sp | sticky_dnr_sp | round_dnr_sp)) & RNEmode) |
((1'b0) & RTZmode) |
(((guard_dnr_sp | round_dnr_sp) & (~sign)) & RPImode) |
(((guard_dnr_sp | round_dnr_sp) & sign) & RNImode);
assign ex4_divsqrt_fract_dnr_sp_prem = (ex4_divsqrt_fract_dnr_sp_p0 & {54{(~ex4_round_up_dnr_sp)}}) |
(ex4_divsqrt_fract_dnr_sp_p1 & {54{ex4_round_up_dnr_sp}});
assign ex4_divsqrt_fract_dnr_sp = ex4_divsqrt_fract_dnr_sp_prem &
(~({1'b0, ex4_spdenorm_mask_guard_shifted_00to48[00:52]})) &
(~({1'b0, ex4_spdenorm_mask_round_shifted_00to48[00:52]}));
assign ex4_divsqrt_fract_dnr = (ex4_divsqrt_fract_dnr_sp & {54{exx_sp}}) |
(ex4_divsqrt_fract_dnr_dp & {54{exx_dp}});
assign ex4_dnr_roundup_incexp = ex4_divsqrt_fract_dnr[0] & ex4_denormalizing_result_done & (~exx_hard_spec_case) & (~ex4_force); // rounded up past the implicit bit (which is bit 1 here)
assign ex4_denorm_res_shiftoff_zero = denorm_res_shiftoff_q & (~ex4_round_up_dnr) & ex4_denormalizing_result_done;
//------------------------------------------------------------------------------------------------------------------
tri_rlmreg_p #(.INIT(0), .WIDTH(27), .NEEDS_SRESET(1)) exx_div_denorm_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(exx_div_denorm_lat_scout),
.scin(exx_div_denorm_lat_scin),
//-----------------
.din({
denorm_sticky_sp_din,
ex4_norm_shl1_d,
denorm_res_shiftoff_din,
denorm_sticky_din,
denorm_count_din,
denorm_shift_amt_din,
sp_denorm_shift_amt_din,
ex3_norm_shl1,
ex4_div_special_case,
ex4_round_up,
ex4_denormalizing_result_done_din,
ex4_force_inf}),
//-----------------
.dout({ denorm_sticky_sp_q,
ex4_norm_shl1_q,
denorm_res_shiftoff_q,
denorm_sticky_q,
denorm_count_q,
denorm_shift_amt_q,
sp_denorm_shift_amt_q,
ex4_norm_shl1,
ex5_div_special_case,
ex5_round_up,
ex5_denormalizing_result_done,
ex5_force_inf})
);
//-----------------------------------------------------------------------
//-----------------------------------------------------------------------
// Special cases: NaN, etc
assign exx_a_NAN = exx_a_expo_max_q & (~exx_a_frac_zero_q);
assign exx_b_NAN = exx_b_expo_max_q & (~exx_b_frac_zero_q);
assign exx_a_INF = exx_a_expo_max_q & exx_a_frac_zero_q;
assign exx_b_INF = exx_b_expo_max_q & exx_b_frac_zero_q;
assign exx_a_SPoverflowINF = exx_a_SPoverflow_expo_max_q;
assign exx_b_SPoverflowINF = exx_b_SPoverflow_expo_max_q;
assign exx_b_ZER = exx_b_zero_q;
assign exx_a_ZER = exx_a_zero_q;
assign exx_a_SPunderflowZER = exx_a_SPunderflow_zero_q;
assign exx_b_SPunderflowZER = exx_b_SPunderflow_zero_q;
assign exx_invalid_mixed_precision = ((exx_b_SPunderflowZER | exx_b_SPoverflowINF) | (exx_div_q[3] & (exx_a_SPunderflowZER | exx_a_SPoverflowINF))) & (~((exx_div_q[3] & (exx_a_NAN | exx_a_INF | exx_a_ZER)) | (exx_b_NAN | exx_b_INF | exx_b_ZER)));
assign exx_hard_spec_case = (exx_div_q[3] & (exx_a_NAN | exx_a_INF | exx_a_ZER | exx_a_SPunderflowZER | exx_a_SPoverflowINF)) | (exx_b_NAN | exx_b_INF | exx_b_ZER | exx_b_SPunderflowZER | exx_b_SPoverflowINF) | (exx_sqrt_q[3] & exx_b_sign_q);
assign ex4_div_by_zero_zx = exx_b_ZER & (~(exx_div_q[3] & exx_a_INF)) & (~(exx_a_SPunderflowZER | exx_a_SPoverflowINF)) & (~exx_sqrt_q[3]) & (~ex4_zero_div_zero) & (~ex4_pass_nan);
assign ex4_zero_div_zero = (exx_a_ZER & exx_b_ZER) & exx_div_q[3];
assign ex4_inf_div_inf = (exx_a_INF & exx_b_INF) & exx_div_q[3];
assign ex4_sqrt_neg = exx_sqrt_q[3] & exx_b_sign_q & (~exx_b_ZER) & (~ex4_pass_nan);
assign ex4_div_special_case = ex4_pass_a_nan |
ex4_pass_b_nan |
ex4_force_qnan |
ex4_force_zero |
ex4_force_zeroone |
ex4_force_36A |
ex4_force_maxnorm;
assign underflow_force_zero = underflow & exp_gt_cap & (~ex4_round_up_underflow) & (~UE) & (~exx_hard_spec_case);
assign underflow_force_zeroone = underflow_dp & exp_gt_cap & ex4_round_up_underflow & exx_dp & (~UE) & (~exx_hard_spec_case);
assign underflow_force_36A = underflow_sp & exp_gt_cap & ex4_round_up_underflow & exx_sp & (~UE) & (~exx_hard_spec_case);
assign sp_denorm_0x369roundup = ex4_denormalizing_result_rounding & ex4_round_up_dnr_sp & exx_sp & exp_eq_367to9;
assign sp_denorm_0x380roundup = ex4_denormalizing_result_rounding & ex4_divsqrt_fract_dnr[0] & exx_sp & exp_eq_380;
assign sp_denorm_underflow_zero = ex4_denormalizing_result_rounding & (~ex4_round_up_dnr_sp) & exx_sp & exp_eq_367to9 & (~UE);
assign underflow_sp_denorm = underflow & (~exp_gt_cap) & (~UE) & (~exx_hard_spec_case) & ex4_sp;
assign overflow_force_inf = ((overflow & RNEmode) | (overflow & RPImode & (~ex4_divsqrt_sign)) | (overflow & RNImode & ex4_divsqrt_sign)) & (~exx_hard_spec_case) & (~OE);
assign overflow_force_maxnorm = ((overflow & RTZmode) | (overflow & RPImode & ex4_divsqrt_sign) | (overflow & RNImode & (~ex4_divsqrt_sign))) & (~exx_hard_spec_case) & (~OE);
assign ex4_maxnorm_sign = ex4_divsqrt_sign;
assign special_force_zero = (exx_b_INF & (~exx_sqrt_q[3])) | (exx_a_ZER & (~exx_sqrt_q[3])) | (exx_b_ZER & exx_sqrt_q[3]);
assign special_force_inf = (exx_a_INF & (~exx_sqrt_q[3])) | (exx_b_ZER & (~exx_sqrt_q[3])) | (exx_b_INF & exx_sqrt_q[3]);
assign ex4_force_36A = (sp_denorm_0x369roundup | underflow_force_36A) & (~(ex4_force_qnan | ex4_pass_nan));
assign ex4_force_zeroone = underflow_force_zeroone & (~(ex4_force_qnan | ex4_pass_nan));
assign ex4_force_zero = (underflow_force_zero | special_force_zero | sp_denorm_underflow_zero | ex4_deno_force_zero) & (~(ex4_force_qnan | ex4_pass_nan));
assign ex4_force_inf = (overflow_force_inf | special_force_inf) & (~(ex4_force_qnan | ex4_pass_nan));
assign ex4_force_maxnorm = overflow_force_maxnorm & (~(ex4_force_qnan | ex4_pass_nan));
assign ex4_force_maxnorm_dp = ex4_force_maxnorm & ex4_dp;
assign ex4_force_maxnorm_sp = ex4_force_maxnorm & ex4_sp;
assign ex4_force_qnan = ex4_zero_div_zero | ex4_inf_div_inf | ex4_sqrt_neg | exx_b_SPunderflowZER | exx_b_SPoverflowINF | ((exx_a_SPunderflowZER | exx_a_SPoverflowINF) & exx_div_q[3]);
assign ex4_force = ex4_force_36A | ex4_force_zeroone | ex4_force_zero | ex4_force_maxnorm | ex4_force_qnan;
assign ex4_deno_force_zero = ex4_denormalizing_result_done & f_dcd_axucr0_deno;
assign ex4_pass_a_nan = exx_a_NAN & (~exx_sqrt_q[3]);
assign ex4_pass_b_nan = exx_b_NAN & (~ex4_pass_a_nan);
assign ex4_pass_a_nan_sp = ex4_pass_a_nan & exx_sp;
assign ex4_pass_b_nan_sp = ex4_pass_b_nan & exx_sp;
assign ex4_pass_a_nan_dp = ex4_pass_a_nan & exx_dp;
assign ex4_pass_b_nan_dp = ex4_pass_b_nan & exx_dp;
assign ex4_a_snan = exx_a_NAN & (~exx_a_fract_q[1]) & (~exx_sqrt_q[3]);
assign ex4_b_snan = exx_b_NAN & (~exx_b_fract_q[1]);
assign ex4_pass_nan = ex4_pass_a_nan | ex4_pass_b_nan;
assign ex4_snan = ex4_a_snan | ex4_b_snan;
assign ex4_divsqrt_sign_special = (exx_a_sign_q & ex4_pass_a_nan) |
(exx_b_sign_q & ex4_pass_b_nan) |
(1'b0 & ex4_force_qnan) |
(ex4_divsqrt_sign & ex4_force_zero) |
(ex4_divsqrt_sign & ex4_force_zeroone) |
(ex4_divsqrt_sign & ex4_force_36A) |
(ex4_divsqrt_sign & ex4_force_inf) |
(ex4_divsqrt_sign & ex4_dnr_roundup_incexp) |
(ex4_divsqrt_sign & ex4_roundup_incexp) |
(ex4_maxnorm_sign & ex4_force_maxnorm);
assign ex4_divsqrt_exp_special[01:13] = ({13{1'b0}} & {13{ex4_force_zero}}) |
({{12{1'b0}}, 1'b1} & {13{ex4_force_zeroone}}) |
({2'b00, ones[03:13]} & {13{ex4_pass_nan}}) |
({2'b00, ones[03:13]} & {13{ex4_force_qnan}}) |
({2'b00, ones[03:12], 1'b0} & {13{ex4_force_maxnorm_dp}}) |
(13'b0001101101010 & {13{ex4_force_36A}}) |
(13'b0010001111110 & {13{ex4_force_maxnorm_sp}});
assign ex4_divsqrt_fract_special[00:52] = ({53{1'b0}} & {53{ex4_force_zero}}) |
({{52{1'b0}}, 1'b1} & {53{ex4_force_zeroone}}) |
({1'b1, {52{1'b0}}} & {53{ex4_force_36A}}) |
(({2'b11, zeros[2:52]}) & {53{ex4_force_qnan}}) |
(({2'b11, exx_a_fract_q[2:23], zeros[24:52]}) & {53{ex4_pass_a_nan_sp}}) |
(({2'b11, exx_b_fract_q[2:23], zeros[24:52]}) & {53{ex4_pass_b_nan_sp}}) |
(({2'b11, exx_a_fract_q[2:52]}) & {53{ex4_pass_a_nan_dp}}) |
(({2'b11, exx_b_fract_q[2:52]}) & {53{ex4_pass_b_nan_dp}}) |
({53{1'b1}} & {53{ex4_force_maxnorm_dp}}) |
({{24{1'b1}}, {29{1'b0}}} & {53{ex4_force_maxnorm_sp}});
//-----------------------------------------------------------------------
// some final result muxing
//-----------------------------------------------------------------------
assign ex4_divsqrt_sign = exx_a_sign_q ^ exx_b_sign_q;
assign exx_divsqrt_sign_d = (ex4_divsqrt_sign & (~ex4_div_special_case)) | (ex4_divsqrt_sign_special & ex4_div_special_case);
assign ex4_divsqrt_exp = ((exx_exp_addres) & {13{(ex4_divsqrt_done_q & (~ex4_denormalizing_result_done))}}) |
((exx_exp_addres) & {13{(ex4_denormalizing_result_done & ex4_sp)}}) |
(13'b0000000000001 & {13{(ex4_denormalizing_result_done & (~ex4_sp))}});
assign exx_divsqrt_exp_d = ex4_divsqrt_exp;
assign ex4_divsqrt_fract = ex4_divsqrt_fract_p0[01:53];
// generate the remainder
assign exx_divsqrt_fract_d = (ex3_divsqrt_remainder[00:56] & {57{(ex3_divsqrt_done & (~ex4_denormalizing_result) & (~ex4_start_a_denorm_result))}}) |
({ex4_divsqrt_fract[00:52], 4'b0000} & {57{(ex4_divsqrt_done_q & (~ex4_denormalizing_result) & (~ex4_start_a_denorm_result))}}) |
({ex4_divsqrt_fract_dnr[01:53], 4'b0000} & {57{(ex4_denormalizing_result_rounding & (~ex4_denormalizing_result_shifting) & (~ex4_start_a_denorm_result))}}) |
(ex4_divsqrt_fract_shifted[00:56] & {57{(ex4_denormalizing_result_shifting & (~ex4_denormalizing_result_rounding) & (~ex4_start_a_denorm_result))}}) |
(ex4_divsqrt_fract_preround[00:56] & {57{(ex4_start_denorm_result)}}) |
(({ex4_divsqrt_fract_preround[28:56], zeros[0:27]}) & {57{(ex4_start_sp_denorm_result)}}); // grab the rounded/corrected result
//-----------------------------------------------------------------------
//-----------------------------------------------------------------------
////#------------------------------------------------------------------------
////# decode fprf field for pipe settings
////#------------------------------------------------------------------------
// FPRF
// 10001 QNAN [0] qnan | den | (sign*zero)
// 01001 -INF [1] sign * !zero
// 01000 -norm [2] !sign * !zero * !qnan
// 11000 -den [3] zero
// 10010 -zero [4] inf | qnan
// 00010 +zero
// 10100 +den
// 00100 +norm
// 00101 +inf
// FPSCR status bits
// [ 0] ox 0
// [ 1] ux 0
// [ 2] zx 0
// [ 3] xx 1 (not needed, comes from FI)
// [ 4] FR 1
// [ 5] FI 1
// [ 6] sign
// [ 7] not sign and not zero, redundant in rnd?
// [ 8] zer
// [ 9] inf
// [10] den
// [11] vxidi
// [12] vxzdz
// [13] vxsqrt
// [14] nan
// [15] vxsnan
assign exx_divsqrt_flag_fpscr_d[0] = overflow & (~exx_hard_spec_case);
assign exx_divsqrt_flag_fpscr_d[1] = underflow_fi & (~exx_hard_spec_case) & (~ex4_deno_force_zero);
assign exx_divsqrt_flag_fpscr_d[2] = ex4_div_by_zero_zx;
assign exx_divsqrt_flag_fpscr_d[3] = ex4_rem_nonzero_fi & (~exx_hard_spec_case) & (~ex4_deno_force_zero);
assign exx_divsqrt_flag_fpscr_d[4] = ((((ex4_round_up & (~(underflow & (~UE)))) | ex4_force_zeroone | ex4_force_36A) & (~ex4_denormalizing_result_done)) | (((ex4_round_up_dnr & exx_dp) | (ex4_round_up_dnr_sp & exx_sp)) & ex4_denormalizing_result_done) | (overflow & (~OE))) & (~exx_hard_spec_case) & (~ex4_deno_force_zero); // and not underflow_fi;
assign exx_divsqrt_flag_fpscr_d[5] = (ex4_rem_nonzero_fi | (overflow & (~OE)) | (underflow_fi & (~UE))) & (~exx_hard_spec_case) & (~ex4_deno_force_zero);
assign exx_divsqrt_flag_fpscr_d[6] = exx_divsqrt_sign_d; // and not (ex4_pass_nan or ex4_force_qnan);
assign exx_divsqrt_flag_fpscr_d[7] = ((~exx_divsqrt_sign_d)) & (~ex4_force_zero) & (~(ex4_pass_nan | ex4_force_qnan));
assign exx_divsqrt_flag_fpscr_d[8] = (ex4_force_zero | ex4_denorm_res_shiftoff_zero) & (~(ex4_pass_nan | ex4_force_qnan));
assign exx_divsqrt_flag_fpscr_d[9] = ex4_force_inf;
assign exx_divsqrt_flag_fpscr_d[10] = ((((ex4_denormalizing_result_done & ((~exx_divsqrt_fract_d[0]) & exx_dp)) | underflow_sp_denorm) & (~sp_denorm_0x380roundup)) | ex4_force_zeroone | ex4_force_36A) & (~ex4_deno_force_zero);
assign exx_divsqrt_flag_fpscr_d[11] = ex4_inf_div_inf;
assign exx_divsqrt_flag_fpscr_d[12] = ex4_zero_div_zero;
assign exx_divsqrt_flag_fpscr_d[13] = ex4_sqrt_neg & (~exx_b_SPunderflow_zero_q) & (~exx_b_SPoverflow_expo_max_q);
assign exx_divsqrt_flag_fpscr_d[14] = ex4_force_qnan | ex4_pass_nan;
assign exx_divsqrt_flag_fpscr_d[15] = ex4_snan;
assign exx_divsqrt_v_suppress_d = (zezx | vevx) & (~exx_invalid_mixed_precision);
assign ex3_rem_neg = ex3_divsqrt_remainder[0];
assign ex3_rem_neg_b = (~ex3_divsqrt_remainder[0]);
tri_rlmreg_p #(.INIT(0), .WIDTH(96), .NEEDS_SRESET(0)) ex5_div_result_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex5_div_result_lat_scout),
.scin(ex5_div_result_lat_scin),
//-----------------
.din({
exx_divsqrt_sign_d,
exx_divsqrt_exp_d,
exx_divsqrt_fract_d,
exx_divsqrt_flag_fpscr_d,
exx_divsqrt_v_suppress_d,
ex3_rem_neg,
ex3_rem_neg,
ex3_rem_neg,
ex3_rem_neg,
ex3_rem_neg_b,
ex3_rem_neg_b,
ex3_rem_neg_b,
ex3_rem_neg_b }),
//-----------------
.dout({ exx_divsqrt_sign_q,
exx_divsqrt_exp_q[1:13],
exx_divsqrt_fract_q[00:56],
exx_divsqrt_flag_fpscr_q,
exx_divsqrt_v_suppress_q,
ex4_rem_neg[0],
ex4_rem_neg[1],
ex4_rem_neg[2],
ex4_rem_neg[3],
ex4_rem_neg_b[0],
ex4_rem_neg_b[1],
ex4_rem_neg_b[2],
ex4_rem_neg_b[3] })
);
tri_rlmreg_p #(.INIT(0), .WIDTH(163), .NEEDS_SRESET(0)) ex5_special_case_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(exx_running_act_q),
//-----------------
.scout(ex5_special_case_lat_scout),
.scin(ex5_special_case_lat_scin),
//-----------------
.din({ex4_divsqrt_fract_special,
ex4_divsqrt_fract_p1,
ex4_divsqrt_exp_special,
exx_exp_addres_x0_p1,
exx_exp_addres_ux_p1,
exx_exp_addres_ox_p1,
ueux,
oeox,
not_ueux_or_oeox,
ex4_x_roundup_incexp
}),
//-----------------
.dout({
ex5_divsqrt_fract_special,
ex5_divsqrt_fract_p1,
ex5_divsqrt_exp_special,
exy_exp_addres_x0_p1,
exy_exp_addres_ux_p1,
exy_exp_addres_ox_p1,
exy_ueux,
exy_oeox,
exy_not_ueux_or_oeox,
ex5_x_roundup_incexp
})
);
assign exy_exp_addres_p1 = (exy_exp_addres_x0_p1 & {13{exy_not_ueux_or_oeox}}) |
(exy_exp_addres_ux_p1 & {13{exy_ueux}}) |
(exy_exp_addres_ox_p1 & {13{exy_oeox}});
assign ex5_divsqrt_fract_d = (exx_divsqrt_fract_q[00:52] & {53{(((~(ex5_div_special_case | ex5_force_inf | ex5_round_up | ex5_x_roundup_incexp))) | ex5_denormalizing_result_done)}}) |
(ex5_divsqrt_fract_special[00:52] & {53{(ex5_div_special_case & (~ex5_force_inf))}}) |
({1'b1, {52{1'b0}}} & {53{ex5_x_roundup_incexp}}) |
({1'b1, {52{1'b0}}} & {53{(ex5_force_inf)}}) |
(ex5_divsqrt_fract_p1[01:53] & {53{((~(ex5_div_special_case | ex5_force_inf | ex5_x_roundup_incexp)) & ex5_round_up & (~ex5_denormalizing_result_done))}});
assign ex5_divsqrt_exp_d = (exx_divsqrt_exp_q[01:13] & {13{((~ex5_denormalizing_result_done) & (~(ex5_div_special_case | ex5_force_inf | ex5_x_roundup_incexp)))}}) |
(exx_divsqrt_exp_q[01:13] & {13{(ex5_denormalizing_result_done & (~(ex5_div_special_case | ex5_force_inf | ex5_x_roundup_incexp)))}}) |
(exy_exp_addres_p1[0:12] & {13{(ex5_x_roundup_incexp & (~ex5_force_inf))}}) |
({2'b00, ones[03:13]} & {13{(ex5_force_inf)}}) |
(ex5_divsqrt_exp_special[01:13] & {13{(ex5_div_special_case & (~ex5_force_inf))}});
tri_rlmreg_p #(.INIT(0), .WIDTH(66), .NEEDS_SRESET(0)) ex6_div_result_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(ex6_div_result_lat_scout),
.scin(ex6_div_result_lat_scin),
//-----------------
.din({ex5_divsqrt_fract_d,
ex5_divsqrt_exp_d}),
//-----------------
.dout({ ex6_divsqrt_fract_q,
ex6_divsqrt_exp_q})
);
assign ex4_divsqrt_done = (ex4_divsqrt_done_q | ex4_denormalizing_result_done) & (~ex4_start_a_denorm_result);
assign f_dsq_ex5_divsqrt_v[0] = ex5_divsqrt_done & exx_instr_tid_q[0];
assign f_dsq_ex5_divsqrt_v[1] = ex5_divsqrt_done & exx_instr_tid_q[1];
assign f_dsq_ex6_divsqrt_v[0] = ex6_divsqrt_done & exx_instr_tid_q[0];
assign f_dsq_ex6_divsqrt_v[1] = ex6_divsqrt_done & exx_instr_tid_q[1];
assign f_dsq_ex6_divsqrt_record_v = exx_record_v_q & ex6_divsqrt_done;
assign f_dsq_ex6_divsqrt_v_suppress = exx_divsqrt_v_suppress_q;
assign f_dsq_ex5_divsqrt_itag = exx_itag_q;
assign f_dsq_ex6_divsqrt_fpscr_addr = exx_fpscr_addr_q;
assign f_dsq_ex6_divsqrt_instr_frt = exx_instr_frt_q;
assign f_dsq_ex6_divsqrt_instr_tid = exx_instr_tid_q;
assign f_dsq_ex6_divsqrt_cr_bf = exx_cr_bf_q;
assign f_dsq_ex6_divsqrt_sign = exx_divsqrt_sign_q;
assign f_dsq_ex6_divsqrt_exp[01:13] = ex6_divsqrt_exp_q; //exx_divsqrt_exp_q;
assign f_dsq_ex6_divsqrt_fract[00:52] = ex6_divsqrt_fract_q; //exx_divsqrt_fract_q(00 to 52);
assign f_dsq_ex6_divsqrt_flag_fpscr = exx_divsqrt_flag_fpscr_q;
assign f_dsq_debug_din[00] = ex1_cycles_init; // 0:11 are on trigger group 2, 12:23 on 3
assign f_dsq_debug_din[01] = ex1_cycles_hold;
assign f_dsq_debug_din[02] = ex1_divsqrt_done;
assign f_dsq_debug_din[03] = ex2_divsqrt_done;
assign f_dsq_debug_din[04] = ex3_divsqrt_done;
assign f_dsq_debug_din[05] = ex4_divsqrt_done;
assign f_dsq_debug_din[06] = ex5_divsqrt_done;
assign f_dsq_debug_din[07] = ex6_divsqrt_done;
assign f_dsq_debug_din[08] = ex1_cycles_clear;
assign f_dsq_debug_din[09] = exx_divsqrt_running_q;
assign f_dsq_debug_din[10] = exx_running_act_q;
assign f_dsq_debug_din[11] = ex1_sqrt;
assign f_dsq_debug_din[12] = ex2_cycles_q[0]; // 0:11 are on trigger group 2, 12:23 on 3
assign f_dsq_debug_din[13] = ex2_cycles_q[1];
assign f_dsq_debug_din[14] = ex2_cycles_q[2];
assign f_dsq_debug_din[15] = ex2_cycles_q[3];
assign f_dsq_debug_din[16] = ex2_cycles_q[4];
assign f_dsq_debug_din[17] = ex2_cycles_q[5];
assign f_dsq_debug_din[18] = ex2_cycles_q[6];
assign f_dsq_debug_din[19] = ex2_cycles_q[7];
assign f_dsq_debug_din[20] = ex1_cycles_hold;
assign f_dsq_debug_din[21] = ex1_cycles_init;
assign f_dsq_debug_din[22] = exx_single_precision_d;
assign f_dsq_debug_din[23] = exx_sqrt_d;
assign f_dsq_debug_din[24] = exx_sum4[0];
assign f_dsq_debug_din[25] = exx_sum4[1];
assign f_dsq_debug_din[26] = exx_sum4[2];
assign f_dsq_debug_din[27] = exx_sum4[3];
assign f_dsq_debug_din[28] = exx_q_bit0_cin;
assign f_dsq_debug_din[29:63] = exx_Q_q[22:56];
tri_rlmreg_p #(.INIT(0), .WIDTH(64), .NEEDS_SRESET(0)) f_dsq_debug_lat(
.force_t(force_t), .d_mode(tiup), .delay_lclkr(delay_lclkr),
.mpw1_b(mpw1_b),
.mpw2_b(mpw2_b),
.vd(vdd),
.gd(gnd),
.nclk(nclk),
.thold_b(thold_0_b),
.sg(sg_0),
//-----------------
.act(tiup),
//-----------------
.scout(f_dsq_debug_lat_scout),
.scin(f_dsq_debug_lat_scin),
//-----------------
.din(f_dsq_debug_din),
//-----------------
.dout(f_dsq_debug_q)
);
assign f_dsq_debug = f_dsq_debug_q;
//------------------------------------------------------------------------------
// sinkless
//------------------------------------------------------------------------------
assign spare_unused[2] = exx_Qin_lev0[0];
assign spare_unused[3] = exx_QMin_lev0[0];
assign spare_unused[4] = exx_bQ_q_t[56];
assign spare_unused[5] = exx_bQM_q_t[56];
assign spare_unused[6] = exx_lev0_csaout_carryout[0];
assign spare_unused[7] = fpu_enable;
assign spare_unused[8] = exx_lev0_csaoutsh_carry[0];
assign spare_unused[9] = exx_lev1_div_csaout_carryout[0];
assign spare_unused[10] = exx_lev1_sqrt_csaout_carryout[0];
assign spare_unused[11] = exx_lev1_div_csaout_carry[4];
assign spare_unused[12:62] = exx_lev1_div_csaout_carry[6:56];
assign spare_unused[63] = exx_lev1_sqrt_csaout_carry[4];
assign spare_unused[64:114] = exx_lev1_sqrt_csaout_carry[6:56];
assign spare_unused[115] = exx_lev1_div_csaout_sum[4];
assign spare_unused[116:166] = exx_lev1_div_csaout_sum[6:56];
assign spare_unused[167] = exx_lev1_sqrt_csaout_sum[4];
assign spare_unused[168:218] = exx_lev1_sqrt_csaout_sum[6:56];
assign spare_unused[219] = exx_q_bit1;
assign spare_unused[220] = exx_nq_bit1;
assign spare_unused[221] = exx_lev2_csaout_sum[4];
assign spare_unused[222:272] = exx_lev2_csaout_sum[6:56];
assign spare_unused[273] = exx_lev2_csaout_carryout[0];
assign spare_unused[274] = exx_lev2_csaout_carry[4];
assign spare_unused[275:325] = exx_lev2_csaout_carry[6:56];
assign spare_unused[326] = exx_lev3_div_csaout_carryout[0];
assign spare_unused[327] = exx_lev3_sqrt_csaout_carryout[0];
assign spare_unused[328] = exx_lev3_div_csaout_carry[4];
assign spare_unused[329:379] = exx_lev3_div_csaout_carry[6:56];
assign spare_unused[380] = exx_lev3_sqrt_csaout_carry[4];
assign spare_unused[381:431] = exx_lev3_sqrt_csaout_carry[6:56];
assign spare_unused[432] = exx_lev3_div_csaout_sum[4];
assign spare_unused[433:483] = exx_lev3_div_csaout_sum[6:56];
assign spare_unused[484] = exx_lev3_sqrt_csaout_sum[4];
assign spare_unused[485:535] = exx_lev3_sqrt_csaout_sum[6:56];
assign spare_unused[536] = exx_q_bit3;
assign spare_unused[537] = exx_nq_bit3;
assign spare_unused[538] = exx_nq_bit22;
assign spare_unused[539] = exx_lev0_csaoutsh_sum[0];
assign spare_unused[540] = exx_lev22_csaout_carryout_div[0];
assign spare_unused[541] = exx_lev22_csaout_carryout_sqrt[0];
assign spare_unused[542:594] = ex4_divsqrt_fract_rounded[1:53];
assign spare_unused[595] = ex4_incexp_to_overflow;
assign spare_unused[596] = ex4_norm_shl1_test;
assign spare_unused[597] = denorm_exp_addres[13];
assign spare_unused[598:610] = denorm_exp_addres_sp[1:13];
assign spare_unused[611] = ex4_divsqrt_denorm_hold;
assign spare_unused[612] = dnsp_lv3sh10;
assign spare_unused[613] = dnsp_lv3sh11;
assign spare_unused[614:676] = ex4_spdenorm_mask_shifted_00to48[57:119];
assign spare_unused[677:743] = ex4_spdenorm_mask_lsb_shifted_00to48[53:119];
assign spare_unused[744:810] = ex4_spdenorm_mask_guard_shifted_00to48[53:119];
assign spare_unused[811:877] = ex4_spdenorm_mask_round_shifted_00to48[53:119];
assign spare_unused[878] = ex5_divsqrt_fract_p1[0];
assign spare_unused[879] = ex4_act;
assign spare_unused[880] = ex2_record_v;
//------------------------------------------------------------------------------
// scan chain
//------------------------------------------------------------------------------
assign ex1_div_ctr_lat_scin[0:18] = {ex1_div_ctr_lat_scout[1:18], f_dsq_si};
assign ex3_div_hangcounter_lat_scin[0:7] = {ex3_div_hangcounter_lat_scout[1:7], ex1_div_ctr_lat_scout[0]};
assign ex2_div_b_stage_lat_scin[0:70] = {ex2_div_b_stage_lat_scout[1:70], ex3_div_hangcounter_lat_scout[0]};
assign ex2_div_exp_lat_scin[0:51] = {ex2_div_exp_lat_scout[1:51], ex2_div_b_stage_lat_scout[0]};
assign ex2_div_a_stage_lat_scin[0:70] = {ex2_div_a_stage_lat_scout[1:70], ex2_div_exp_lat_scout[0]};
assign ex1_div_instr_lat_scin[0:14] = {ex1_div_instr_lat_scout[1:14], ex2_div_a_stage_lat_scout[0]};
assign ex2_div_instr_lat_scin[0:5] = {ex2_div_instr_lat_scout[1:5], ex1_div_instr_lat_scout[0]};
assign ex2_itag_lat_scin[0:8] = {ex2_itag_lat_scout[1:8], ex2_div_instr_lat_scout[0]};
assign ex2_fpscr_addr_lat_scin[0:27] = {ex2_fpscr_addr_lat_scout[1:27], ex2_itag_lat_scout[0]};
assign exx_div_denorm_lat_scin[0:26] = {exx_div_denorm_lat_scout[1:26], ex2_fpscr_addr_lat_scout[0]};
assign ex3_div_PR_sumcarry_lat_scin[0:113] = {ex3_div_PR_sumcarry_lat_scout[1:113], exx_div_denorm_lat_scout[0]};
assign ex3_div_PR_sum4carry4_lat_scin[0:7] = {ex3_div_PR_sum4carry4_lat_scout[1:7], ex3_div_PR_sumcarry_lat_scout[0]};
assign ex3_div_Q_QM_lat_scin[0:113] = {ex3_div_Q_QM_lat_scout[1:113], ex3_div_PR_sum4carry4_lat_scout[0]};
assign ex3_div_bQ_QM_lat_scin[0:113] = {ex3_div_bQ_QM_lat_scout[1:113], ex3_div_Q_QM_lat_scout[0]};
assign ex3_sqrt_bitmask_lat_scin[0:167] = {ex3_sqrt_bitmask_lat_scout[1:167], ex3_div_bQ_QM_lat_scout[0]};
assign ex3_denom_lat_scin[0:55] = {ex3_denom_lat_scout[1:55], ex3_sqrt_bitmask_lat_scout[0]};
assign ex5_div_result_lat_scin[00:95] = {ex5_div_result_lat_scout[01:95], ex3_denom_lat_scout[0]};
assign ex6_div_result_lat_scin[00:65] = {ex6_div_result_lat_scout[01:65], ex5_div_result_lat_scout[0]};
assign ex5_special_case_lat_scin[00:162] = {ex5_special_case_lat_scout[01:162], ex6_div_result_lat_scout[0]};
assign ex5_div_done_lat_scin[0:3] = {ex5_div_done_lat_scout[1:3], ex5_special_case_lat_scout[0]};
assign act_si[0:7] = {act_so[1:7], ex5_div_done_lat_scout[0]};
assign f_dsq_debug_lat_scin[0:63] = {f_dsq_debug_lat_scout[1:63], act_so[0]};
assign f_dsq_so = f_dsq_debug_lat_scout[0];
endmodule