diff --git a/divider.vhdl b/divider.vhdl index a2a35b0..cfadc51 100644 --- a/divider.vhdl +++ b/divider.vhdl @@ -22,6 +22,7 @@ architecture behaviour of divider is signal quot : std_ulogic_vector(63 downto 0); signal result : std_ulogic_vector(63 downto 0); signal sresult : std_ulogic_vector(63 downto 0); + signal oresult : std_ulogic_vector(63 downto 0); signal qbit : std_ulogic; signal running : std_ulogic; signal signcheck : std_ulogic; @@ -34,7 +35,9 @@ architecture behaviour of divider is signal rc : std_ulogic; signal write_reg : std_ulogic_vector(4 downto 0); signal overflow : std_ulogic; + signal ovf32 : std_ulogic; signal did_ovf : std_ulogic; + signal cr_data : std_ulogic_vector(2 downto 0); begin divider_0: process(clk) @@ -64,6 +67,7 @@ begin count <= "1111111"; running <= '1'; overflow <= '0'; + ovf32 <= '0'; signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63)); elsif signcheck = '1' then signcheck <= '0'; @@ -84,16 +88,19 @@ begin end if; overflow <= quot(63); if dend(128) = '1' or unsigned(dend(127 downto 64)) >= div then + ovf32 <= ovf32 or quot(31); dend <= std_ulogic_vector(unsigned(dend(127 downto 64)) - div) & dend(63 downto 0) & '0'; quot <= quot(62 downto 0) & '1'; count <= count + 1; elsif dend(128 downto 57) = x"000000000000000000" and count(6 downto 3) /= "0111" then -- consume 8 bits of zeroes in one cycle + ovf32 <= or (ovf32 & quot(31 downto 24)); dend <= dend(120 downto 0) & x"00"; quot <= quot(55 downto 0) & x"00"; count <= count + 8; else + ovf32 <= ovf32 or quot(31); dend <= dend(127 downto 0) & '0'; quot <= quot(62 downto 0) & '0'; count <= count + 1; @@ -106,8 +113,8 @@ begin divider_1: process(all) begin - d_out <= DividerToWritebackInit; d_out.write_reg_nr <= write_reg; + d_out.write_cr_mask <= num_to_fxm(0); if is_modulus = '1' then result <= dend(128 downto 65); @@ -123,36 +130,43 @@ begin if is_32bit = '0' then did_ovf <= overflow or (is_signed and (sresult(63) xor neg_result)); elsif is_signed = '1' then - if overflow = '1' or - (sresult(63 downto 31) /= x"00000000" & '0' and - sresult(63 downto 31) /= x"ffffffff" & '1') then + if ovf32 = '1' or sresult(32) /= sresult(31) then did_ovf <= '1'; end if; else - did_ovf <= overflow or (or (sresult(63 downto 32))); + did_ovf <= ovf32; end if; if did_ovf = '1' then - d_out.write_reg_data <= (others => '0'); + oresult <= (others => '0'); elsif (is_32bit = '1') and (is_modulus = '0') then -- 32-bit divisions set the top 32 bits of the result to 0 - d_out.write_reg_data <= x"00000000" & sresult(31 downto 0); + oresult <= x"00000000" & sresult(31 downto 0); else - d_out.write_reg_data <= sresult; + oresult <= sresult; end if; - if count = "1000000" then - d_out.valid <= '1'; - d_out.write_reg_enable <= '1'; - if rc = '1' then - d_out.write_cr_enable <= '1'; - d_out.write_cr_mask <= num_to_fxm(0); - if (did_ovf = '1') or (or (sresult) = '0') then - d_out.write_cr_data <= x"20000000"; - elsif (sresult(63) = '1') and not ((is_32bit = '1') and (is_modulus = '0')) then - d_out.write_cr_data <= x"80000000"; - else - d_out.write_cr_data <= x"40000000"; - end if; + if (did_ovf = '1') or (or (sresult) = '0') then + cr_data <= "001"; + elsif (sresult(63) = '1') and not ((is_32bit = '1') and (is_modulus = '0')) then + cr_data <= "100"; + else + cr_data <= "010"; + end if; + end process; + + divider_out: process(clk) + begin + if rising_edge(clk) then + d_out.write_reg_data <= oresult; + d_out.write_cr_data <= cr_data & '0' & x"0000000"; + if count = "1000000" then + d_out.valid <= '1'; + d_out.write_reg_enable <= '1'; + d_out.write_cr_enable <= rc; + else + d_out.valid <= '0'; + d_out.write_reg_enable <= '0'; + d_out.write_cr_enable <= '0'; end if; end if; end process;