diff --git a/divider.vhdl b/divider.vhdl
index a2a35b0..cfadc51 100644
--- a/divider.vhdl
+++ b/divider.vhdl
@@ -22,6 +22,7 @@ architecture behaviour of divider is
     signal quot       : std_ulogic_vector(63 downto 0);
     signal result     : std_ulogic_vector(63 downto 0);
     signal sresult    : std_ulogic_vector(63 downto 0);
+    signal oresult    : std_ulogic_vector(63 downto 0);
     signal qbit       : std_ulogic;
     signal running    : std_ulogic;
     signal signcheck  : std_ulogic;
@@ -34,7 +35,9 @@ architecture behaviour of divider is
     signal rc         : std_ulogic;
     signal write_reg  : std_ulogic_vector(4 downto 0);
     signal overflow   : std_ulogic;
+    signal ovf32      : std_ulogic;
     signal did_ovf    : std_ulogic;
+    signal cr_data    : std_ulogic_vector(2 downto 0);
 
 begin
     divider_0: process(clk)
@@ -64,6 +67,7 @@ begin
                 count <= "1111111";
                 running <= '1';
                 overflow <= '0';
+                ovf32 <= '0';
                 signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63));
             elsif signcheck = '1' then
                 signcheck <= '0';
@@ -84,16 +88,19 @@ begin
                 end if;
                 overflow <= quot(63);
                 if dend(128) = '1' or unsigned(dend(127 downto 64)) >= div then
+                    ovf32 <= ovf32 or quot(31);
                     dend <= std_ulogic_vector(unsigned(dend(127 downto 64)) - div) &
                             dend(63 downto 0) & '0';
                     quot <= quot(62 downto 0) & '1';
                     count <= count + 1;
                 elsif dend(128 downto 57) = x"000000000000000000" and count(6 downto 3) /= "0111" then
                     -- consume 8 bits of zeroes in one cycle
+                    ovf32 <= or (ovf32 & quot(31 downto 24));
                     dend <= dend(120 downto 0) & x"00";
                     quot <= quot(55 downto 0) & x"00";
                     count <= count + 8;
                 else
+                    ovf32 <= ovf32 or quot(31);
                     dend <= dend(127 downto 0) & '0';
                     quot <= quot(62 downto 0) & '0';
                     count <= count + 1;
@@ -106,8 +113,8 @@ begin
 
     divider_1: process(all)
     begin
-        d_out <= DividerToWritebackInit;
         d_out.write_reg_nr <= write_reg;
+        d_out.write_cr_mask <= num_to_fxm(0);
 
         if is_modulus = '1' then
             result <= dend(128 downto 65);
@@ -123,36 +130,43 @@ begin
         if is_32bit = '0' then
             did_ovf <= overflow or (is_signed and (sresult(63) xor neg_result));
         elsif is_signed = '1' then
-            if overflow = '1' or
-                (sresult(63 downto 31) /= x"00000000" & '0' and
-                 sresult(63 downto 31) /= x"ffffffff" & '1') then
+            if ovf32 = '1' or sresult(32) /= sresult(31) then
                 did_ovf <= '1';
             end if;
         else
-            did_ovf <= overflow or (or (sresult(63 downto 32)));
+            did_ovf <= ovf32;
         end if;
         if did_ovf = '1' then
-            d_out.write_reg_data <= (others => '0');
+            oresult <= (others => '0');
         elsif (is_32bit = '1') and (is_modulus = '0') then
             -- 32-bit divisions set the top 32 bits of the result to 0
-            d_out.write_reg_data <= x"00000000" & sresult(31 downto 0);
+            oresult <= x"00000000" & sresult(31 downto 0);
         else
-            d_out.write_reg_data <= sresult;
+            oresult <= sresult;
         end if;
 
-        if count = "1000000" then
-            d_out.valid <= '1';
-            d_out.write_reg_enable <= '1';
-            if rc = '1' then
-                d_out.write_cr_enable <= '1';
-                d_out.write_cr_mask <= num_to_fxm(0);
-                if (did_ovf = '1') or (or (sresult) = '0') then
-                    d_out.write_cr_data <= x"20000000";
-                elsif (sresult(63) = '1') and not ((is_32bit = '1') and (is_modulus = '0')) then
-                    d_out.write_cr_data <= x"80000000";
-                else
-                    d_out.write_cr_data <= x"40000000";
-                end if;
+        if (did_ovf = '1') or (or (sresult) = '0') then
+            cr_data <= "001";
+        elsif (sresult(63) = '1') and not ((is_32bit = '1') and (is_modulus = '0')) then
+            cr_data <= "100";
+        else
+            cr_data <= "010";
+        end if;
+    end process;
+
+    divider_out: process(clk)
+    begin
+        if rising_edge(clk) then
+            d_out.write_reg_data <= oresult;
+            d_out.write_cr_data <= cr_data & '0' & x"0000000";
+            if count = "1000000" then
+                d_out.valid <= '1';
+                d_out.write_reg_enable <= '1';
+                d_out.write_cr_enable <= rc;
+            else
+                d_out.valid <= '0';
+                d_out.write_reg_enable <= '0';
+                d_out.write_cr_enable <= '0';
             end if;
         end if;
     end process;