diff --git a/Makefile b/Makefile index 720e8d5..939f48e 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ common.o: decode_types.o control.o: gpr_hazard.o cr_hazard.o common.o sim_jtag.o: sim_jtag_socket.o core_tb.o: common.o wishbone_types.o core.o soc.o sim_jtag.o -core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o divider.o +core.o: common.o wishbone_types.o fetch1.o fetch2.o icache.o decode1.o decode2.o register_file.o cr_file.o execute1.o loadstore1.o dcache.o writeback.o core_debug.o core_debug.o: common.o countzero.o: countzero_tb.o: common.o glibc_random.o countzero.o @@ -26,7 +26,7 @@ crhelpers.o: common.o decode1.o: common.o decode_types.o decode2.o: decode_types.o common.o helpers.o insn_helpers.o control.o decode_types.o: -execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o multiply.o +execute1.o: decode_types.o common.o helpers.o crhelpers.o insn_helpers.o ppc_fx_insns.o rotator.o logical.o countzero.o multiply.o divider.o fetch1.o: common.o fetch2.o: common.o wishbone_types.o glibc_random_helpers.o: diff --git a/common.vhdl b/common.vhdl index 9c18230..1d0bbac 100644 --- a/common.vhdl +++ b/common.vhdl @@ -145,7 +145,7 @@ package common is oe => '0', is_32bit => '0', xerc => xerc_init, others => (others => '0')); - type Decode2ToDividerType is record + type Execute1ToDividerType is record valid: std_ulogic; write_reg: gpr_index_t; dividend: std_ulogic_vector(63 downto 0); @@ -154,14 +154,15 @@ package common is is_32bit: std_ulogic; is_extended: std_ulogic; is_modulus: std_ulogic; + neg_result: std_ulogic; rc: std_ulogic; oe: std_ulogic; xerc: xer_common_t; end record; - constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', - is_extended => '0', is_modulus => '0', - rc => '0', oe => '0', xerc => xerc_init, - others => (others => '0')); + constant Execute1ToDividerInit: Execute1ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', + is_extended => '0', is_modulus => '0', + rc => '0', oe => '0', xerc => xerc_init, + neg_result => '0', others => (others => '0')); type Decode2ToRegisterFileType is record read1_enable : std_ulogic; @@ -275,20 +276,19 @@ package common is xerc => xerc_init, others => (others => '0')); - type DividerToWritebackType is record + type DividerToExecute1Type is record valid: std_ulogic; - write_reg_enable : std_ulogic; write_reg_nr: gpr_index_t; write_reg_data: std_ulogic_vector(63 downto 0); write_xerc_enable : std_ulogic; xerc : xer_common_t; rc: std_ulogic; end record; - constant DividerToWritebackInit : DividerToWritebackType := (valid => '0', write_reg_enable => '0', - rc => '0', write_xerc_enable => '0', - xerc => xerc_init, - others => (others => '0')); + constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', + rc => '0', write_xerc_enable => '0', + xerc => xerc_init, + others => (others => '0')); type WritebackToRegisterFileType is record write_reg : gspr_index_t; diff --git a/core.vhdl b/core.vhdl index 71c10b3..a38cf36 100644 --- a/core.vhdl +++ b/core.vhdl @@ -63,10 +63,6 @@ architecture behave of core is signal loadstore1_to_dcache: Loadstore1ToDcacheType; signal dcache_to_writeback: DcacheToWritebackType; - -- divider signals - signal decode2_to_divider: Decode2ToDividerType; - signal divider_to_writeback: DividerToWritebackType; - -- local signals signal fetch1_stall_in : std_ulogic; signal icache_stall_out : std_ulogic; @@ -111,7 +107,6 @@ architecture behave of core is attribute keep_hierarchy of register_file_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of cr_file_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of execute1_0 : label is keep_h(DISABLE_FLATTEN); - attribute keep_hierarchy of divider_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of loadstore1_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of dcache_0 : label is keep_h(DISABLE_FLATTEN); attribute keep_hierarchy of writeback_0 : label is keep_h(DISABLE_FLATTEN); @@ -192,7 +187,6 @@ begin d_in => decode1_to_decode2, e_out => decode2_to_execute1, l_out => decode2_to_loadstore1, - d_out => decode2_to_divider, r_in => register_file_to_decode2, r_out => decode2_to_register_file, c_in => cr_file_to_decode2, @@ -228,6 +222,7 @@ begin execute1_0: entity work.execute1 port map ( clk => clk, + rst => core_rst, flush_out => flush, stall_out => ex1_stall_out, e_in => decode2_to_execute1, @@ -259,20 +254,11 @@ begin wishbone_out => wishbone_data_out ); - divider_0: entity work.divider - port map ( - clk => clk, - rst => core_rst, - d_in => decode2_to_divider, - d_out => divider_to_writeback - ); - writeback_0: entity work.writeback port map ( clk => clk, e_in => execute1_to_writeback, l_in => dcache_to_writeback, - d_in => divider_to_writeback, w_out => writeback_to_register_file, c_out => writeback_to_cr_file, complete_out => complete diff --git a/decode1.vhdl b/decode1.vhdl index 4e1d063..6ac3f01 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -160,22 +160,22 @@ architecture behaviour of decode1 is 2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt 2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst -- 2#1111110110# dcbz - 2#0110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeu - 2#1110001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeuo - 2#0110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweu - 2#1110001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweuo - 2#0110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divde - 2#1110101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdeo - 2#0110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwe - 2#1110101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divweo - 2#0111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdu - 2#1111001001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divduo - 2#0111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwu - 2#1111001011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwuo - 2#0111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divd - 2#1111101001# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divdo - 2#0111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divw - 2#1111101011# => (DIV, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- divwo + 2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu + 2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo + 2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu + 2#1110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweuo + 2#0110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divde + 2#1110101001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdeo + 2#0110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwe + 2#1110101011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divweo + 2#0111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdu + 2#1111001001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divduo + 2#0111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwu + 2#1111001011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divwuo + 2#0111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divd + 2#1111101001# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- divdo + 2#0111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divw + 2#1111101011# => (ALU, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- divwo 2#0100011100# => (ALU, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- eqv 2#1110111010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsb 2#1110011010# => (ALU, OP_EXTS, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extsh @@ -238,10 +238,10 @@ architecture behaviour of decode1 is -- 2#1001000000# mcrxrx 2#0000010011# => (ALU, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfcr/mfocrf 2#0101010011# => (ALU, OP_MFSPR, SPR, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mfspr - 2#0100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modud - 2#0100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- moduw - 2#1100001001# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsd - 2#1100001011# => (DIV, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- modsw + 2#0100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- modud + 2#0100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0'), -- moduw + 2#1100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- modsd + 2#1100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0'), -- modsw 2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf 2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr 2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd diff --git a/decode2.vhdl b/decode2.vhdl index 2da5c41..a95dae3 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -24,7 +24,6 @@ entity decode2 is d_in : in Decode1ToDecode2Type; e_out : out Decode2ToExecute1Type; - d_out : out Decode2ToDividerType; l_out : out Decode2ToLoadstore1Type; r_in : in RegisterFileToDecode2Type; @@ -38,7 +37,6 @@ end entity decode2; architecture behaviour of decode2 is type reg_type is record e : Decode2ToExecute1Type; - d : Decode2ToDividerType; l : Decode2ToLoadstore1Type; end record; @@ -236,7 +234,7 @@ begin decode2_0: process(clk) begin if rising_edge(clk) then - if rin.e.valid = '1' or rin.l.valid = '1' or rin.d.valid = '1' then + if rin.e.valid = '1' or rin.l.valid = '1' then report "execute " & to_hstring(rin.e.nia); end if; r <= rin; @@ -257,14 +255,12 @@ begin variable decoded_reg_b : decode_input_reg_t; variable decoded_reg_c : decode_input_reg_t; variable decoded_reg_o : decode_output_reg_t; - variable signed_division: std_ulogic; variable length : std_ulogic_vector(3 downto 0); begin v := r; v.e := Decode2ToExecute1Init; v.l := Decode2ToLoadStore1Init; - v.d := Decode2ToDividerInit; mul_a := (others => '0'); mul_b := (others => '0'); @@ -319,51 +315,6 @@ begin v.e.insn := d_in.insn; v.e.data_len := length; - -- divide unit - -- PPC divide and modulus instruction words have these bits in - -- the bottom 11 bits: o1dns 010t1 r - -- where o = OE for div instrs, signedness for mod instrs - -- d = 1 for div*, 0 for mod* - -- n = 1 for normal, 0 for extended (dividend << 32/64) - -- s = 1 for signed, 0 for unsigned (for div*) - -- t = 1 for 32-bit, 0 for 64-bit - -- r = RC bit (record condition code) - v.d.write_reg := gspr_to_gpr(decoded_reg_o.reg); - v.d.is_modulus := not d_in.insn(8); - v.d.is_32bit := d_in.insn(2); - if d_in.insn(8) = '1' then - signed_division := d_in.insn(6); - else - signed_division := d_in.insn(10); - end if; - v.d.is_signed := signed_division; - if d_in.insn(2) = '0' then - -- 64-bit forms - if d_in.insn(8) = '1' and d_in.insn(7) = '0' then - v.d.is_extended := '1'; - end if; - v.d.dividend := decoded_reg_a.data; - v.d.divisor := decoded_reg_b.data; - else - -- 32-bit forms - if d_in.insn(8) = '1' and d_in.insn(7) = '0' then -- extended forms - v.d.dividend := decoded_reg_a.data(31 downto 0) & x"00000000"; - elsif signed_division = '1' and decoded_reg_a.data(31) = '1' then - -- sign extend to 64 bits - v.d.dividend := x"ffffffff" & decoded_reg_a.data(31 downto 0); - else - v.d.dividend := x"00000000" & decoded_reg_a.data(31 downto 0); - end if; - if signed_division = '1' and decoded_reg_b.data(31) = '1' then - v.d.divisor := x"ffffffff" & decoded_reg_b.data(31 downto 0); - else - v.d.divisor := x"00000000" & decoded_reg_b.data(31 downto 0); - end if; - end if; - v.d.rc := decode_rc(d_in.decode.rc, d_in.insn); - v.d.xerc := c_in.read_xerc_data; - v.d.oe := decode_oe(d_in.decode.rc, d_in.insn); - -- load/store unit v.l.update_reg := gspr_to_gpr(decoded_reg_a.reg); v.l.addr1 := decoded_reg_a.data; @@ -402,15 +353,12 @@ begin cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn); v.e.valid := '0'; - v.d.valid := '0'; v.l.valid := '0'; case d_in.decode.unit is when ALU => v.e.valid := control_valid_out; when LDST => v.l.valid := control_valid_out; - when DIV => - v.d.valid := control_valid_out; when NONE => v.e.valid := control_valid_out; v.e.insn_type := OP_ILLEGAL; @@ -419,7 +367,6 @@ begin if rst = '1' then v.e := Decode2ToExecute1Init; v.l := Decode2ToLoadStore1Init; - v.d := Decode2ToDividerInit; end if; -- Update registers @@ -428,6 +375,5 @@ begin -- Update outputs e_out <= r.e; l_out <= r.l; - d_out <= r.d; end process; end architecture behaviour; diff --git a/decode_types.vhdl b/decode_types.vhdl index 9860406..fdc1e6e 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -8,7 +8,7 @@ package decode_types is OP_CNTZ, OP_CRAND, OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC, OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, - OP_DCBZ, OP_DIV, OP_EXTS, + OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, OP_MCRF, OP_MCRXR, OP_MCRXRX, OP_MFCR, OP_MFSPR, OP_MOD, @@ -46,7 +46,7 @@ package decode_types is constant TOO_OFFSET : integer := 0; - type unit_t is (NONE, ALU, LDST, DIV); + type unit_t is (NONE, ALU, LDST); type length_t is (NONE, is1B, is2B, is4B, is8B); type decode_rom_t is record diff --git a/divider.vhdl b/divider.vhdl index affab85..33d2a0d 100644 --- a/divider.vhdl +++ b/divider.vhdl @@ -10,8 +10,8 @@ entity divider is port ( clk : in std_logic; rst : in std_logic; - d_in : in Decode2ToDividerType; - d_out : out DividerToWritebackType + d_in : in Execute1ToDividerType; + d_out : out DividerToExecute1Type ); end entity divider; @@ -23,7 +23,6 @@ architecture behaviour of divider is signal sresult : std_ulogic_vector(64 downto 0); signal oresult : std_ulogic_vector(63 downto 0); signal running : std_ulogic; - signal signcheck : std_ulogic; signal count : unsigned(6 downto 0); signal neg_result : std_ulogic; signal is_modulus : std_ulogic; @@ -48,7 +47,7 @@ begin running <= '0'; count <= "0000000"; elsif d_in.valid = '1' then - if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then + if d_in.is_extended = '1' then dend <= '0' & d_in.dividend & x"0000000000000000"; else dend <= '0' & x"0000000000000000" & d_in.dividend; @@ -56,7 +55,7 @@ begin div <= unsigned(d_in.divisor); quot <= (others => '0'); write_reg <= d_in.write_reg; - neg_result <= '0'; + neg_result <= d_in.neg_result; is_modulus <= d_in.is_modulus; extended <= d_in.is_extended; is_32bit <= d_in.is_32bit; @@ -68,20 +67,6 @@ begin running <= '1'; overflow <= '0'; ovf32 <= '0'; - signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63)); - elsif signcheck = '1' then - signcheck <= '0'; - neg_result <= dend(63) xor (div(63) and not is_modulus); - if dend(63) = '1' then - if extended = '1' then - dend <= '0' & std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000"; - else - dend <= '0' & x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0))); - end if; - end if; - if div(63) = '1' then - div <= unsigned(- signed(div)); - end if; elsif running = '1' then if count = "0111111" then running <= '0'; @@ -151,12 +136,10 @@ begin if rising_edge(clk) then d_out.valid <= '0'; d_out.write_reg_data <= oresult; - d_out.write_reg_enable <= '0'; d_out.write_xerc_enable <= '0'; d_out.xerc <= xerc; if count = "1000000" then d_out.valid <= '1'; - d_out.write_reg_enable <= '1'; d_out.write_xerc_enable <= oe; -- We must test oe because the RC update code in writeback diff --git a/divider_tb.vhdl b/divider_tb.vhdl index 5f809bb..8151315 100644 --- a/divider_tb.vhdl +++ b/divider_tb.vhdl @@ -16,8 +16,8 @@ architecture behave of divider_tb is signal rst : std_ulogic; constant clk_period : time := 10 ns; - signal d1 : Decode2ToDividerType; - signal d2 : DividerToWritebackType; + signal d1 : Execute1ToDividerType; + signal d2 : DividerToExecute1Type; begin divider_0: entity work.divider port map (clk => clk, rst => rst, d_in => d1, d_out => d2); @@ -50,6 +50,7 @@ begin d1.is_32bit <= '0'; d1.is_extended <= '0'; d1.is_modulus <= '0'; + d1.neg_result <= '0'; d1.rc <= '0'; wait for clk_period; @@ -65,7 +66,6 @@ begin end loop; assert d2.valid = '1'; - assert d2.write_reg_enable = '1'; assert d2.write_reg_nr = "10001"; assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); assert d2.rc = '0'; @@ -89,7 +89,6 @@ begin end loop; assert d2.valid = '1'; - assert d2.write_reg_enable = '1'; assert d2.write_reg_nr = "10001"; assert d2.write_reg_data = x"000000000000f001" report "result " & to_hstring(d2.write_reg_data); assert d2.rc = '1'; @@ -105,9 +104,10 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - d1.dividend <= ra; - d1.divisor <= rb; + d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra)); + d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb)); d1.is_signed <= '1'; + d1.neg_result <= ra(63) xor rb(63); d1.valid <= '1'; wait for clk_period; @@ -142,6 +142,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; d1.is_signed <= '0'; + d1.neg_result <= '0'; d1.valid <= '1'; wait for clk_period; @@ -173,9 +174,10 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - d1.dividend <= ra; - d1.divisor <= rb; + d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra)); + d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb)); d1.is_signed <= '1'; + d1.neg_result <= ra(63) xor rb(63); d1.is_extended <= '1'; d1.valid <= '1'; @@ -216,6 +218,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; d1.is_signed <= '0'; + d1.neg_result <= '0'; d1.is_extended <= '1'; d1.valid <= '1'; @@ -250,9 +253,10 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - d1.dividend <= ra; - d1.divisor <= rb; + d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra)); + d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb)); d1.is_signed <= '1'; + d1.neg_result <= ra(63) xor rb(63); d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.valid <= '1'; @@ -289,6 +293,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; d1.is_signed <= '0'; + d1.neg_result <= '0'; d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.valid <= '1'; @@ -322,9 +327,10 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 32)) & x"00000000"; rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - d1.dividend <= ra; - d1.divisor <= rb; + d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra)); + d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb)); d1.is_signed <= '1'; + d1.neg_result <= ra(63) xor rb(63); d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.valid <= '1'; @@ -365,6 +371,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; d1.is_signed <= '0'; + d1.neg_result <= '0'; d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.valid <= '1'; @@ -398,9 +405,10 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - d1.dividend <= ra; - d1.divisor <= rb; + d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra)); + d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb)); d1.is_signed <= '1'; + d1.neg_result <= ra(63); d1.is_extended <= '0'; d1.is_32bit <= '0'; d1.is_modulus <= '1'; @@ -438,6 +446,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; d1.is_signed <= '0'; + d1.neg_result <= '0'; d1.is_extended <= '0'; d1.is_32bit <= '0'; d1.is_modulus <= '1'; @@ -472,9 +481,10 @@ begin ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64)); rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64)); - d1.dividend <= ra; - d1.divisor <= rb; + d1.dividend <= ra when ra(63) = '0' else std_ulogic_vector(- signed(ra)); + d1.divisor <= rb when rb(63) = '0' else std_ulogic_vector(- signed(rb)); d1.is_signed <= '1'; + d1.neg_result <= ra(63); d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.is_modulus <= '1'; @@ -517,6 +527,7 @@ begin d1.dividend <= ra; d1.divisor <= rb; d1.is_signed <= '0'; + d1.neg_result <= '0'; d1.is_extended <= '0'; d1.is_32bit <= '1'; d1.is_modulus <= '1'; diff --git a/execute1.vhdl b/execute1.vhdl index 710044f..7bcffdc 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -13,6 +13,7 @@ use work.ppc_fx_insns.all; entity execute1 is port ( clk : in std_ulogic; + rst : in std_ulogic; -- asynchronous flush_out : out std_ulogic; @@ -36,6 +37,7 @@ architecture behaviour of execute1 is lr_update : std_ulogic; next_lr : std_ulogic_vector(63 downto 0); mul_in_progress : std_ulogic; + div_in_progress : std_ulogic; end record; signal r, rin : reg_type; @@ -53,6 +55,10 @@ architecture behaviour of execute1 is signal x_to_multiply: Execute1ToMultiplyType; signal multiply_to_x: MultiplyToExecute1Type; + -- divider signals + signal x_to_divider: Execute1ToDividerType; + signal divider_to_x: DividerToExecute1Type; + procedure set_carry(e: inout Execute1ToWritebackType; carry32 : in std_ulogic; carry : in std_ulogic) is @@ -135,6 +141,14 @@ begin m_out => multiply_to_x ); + divider_0: entity work.divider + port map ( + clk => clk, + rst => rst, + d_in => x_to_divider, + d_out => divider_to_x + ); + execute1_0: process(clk) begin if rising_edge(clk) then @@ -171,6 +185,8 @@ begin variable l : std_ulogic; variable next_nia : std_ulogic_vector(63 downto 0); variable carry_32, carry_64 : std_ulogic; + variable sign1, sign2 : std_ulogic; + variable abs1, abs2 : signed(63 downto 0); begin result := (others => '0'); result_with_carry := (others => '0'); @@ -217,6 +233,7 @@ begin v.lr_update := '0'; v.mul_in_progress := '0'; + v.div_in_progress := '0'; -- signals to multiply unit x_to_multiply <= Execute1ToMultiplyInit; @@ -249,6 +266,59 @@ begin end if; end if; + -- signals to divide unit + sign1 := '0'; + sign2 := '0'; + if e_in.is_signed = '1' then + if e_in.is_32bit = '1' then + sign1 := e_in.read_data1(31); + sign2 := e_in.read_data2(31); + else + sign1 := e_in.read_data1(63); + sign2 := e_in.read_data2(63); + end if; + end if; + -- take absolute values + if sign1 = '0' then + abs1 := signed(e_in.read_data1); + else + abs1 := - signed(e_in.read_data1); + end if; + if sign2 = '0' then + abs2 := signed(e_in.read_data2); + else + abs2 := - signed(e_in.read_data2); + end if; + + x_to_divider <= Execute1ToDividerInit; + x_to_divider.write_reg <= gspr_to_gpr(e_in.write_reg); + x_to_divider.is_signed <= e_in.is_signed; + x_to_divider.is_32bit <= e_in.is_32bit; + if e_in.insn_type = OP_MOD then + x_to_divider.is_modulus <= '1'; + end if; + x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus); + x_to_divider.rc <= e_in.rc; + x_to_divider.oe <= e_in.oe; + x_to_divider.xerc <= v.e.xerc; + if e_in.is_32bit = '0' then + -- 64-bit forms + if e_in.insn_type = OP_DIVE then + x_to_divider.is_extended <= '1'; + end if; + x_to_divider.dividend <= std_ulogic_vector(abs1); + x_to_divider.divisor <= std_ulogic_vector(abs2); + else + -- 32-bit forms + x_to_divider.is_extended <= '0'; + if e_in.insn_type = OP_DIVE then -- extended forms + x_to_divider.dividend <= std_ulogic_vector(abs1(31 downto 0)) & x"00000000"; + else + x_to_divider.dividend <= x"00000000" & std_ulogic_vector(abs1(31 downto 0)); + end if; + x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0)); + end if; + ctrl_tmp <= ctrl; -- FIXME: run at 512MHz not core freq ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1); @@ -550,13 +620,19 @@ begin when OP_ICBI => icache_inval <= '1'; - when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 => + when OP_MUL_L64 | OP_MUL_H64 | OP_MUL_H32 => v.e.valid := '0'; v.mul_in_progress := '1'; stall_out <= '1'; x_to_multiply.valid <= '1'; - when others => + when OP_DIV | OP_DIVE | OP_MOD => + v.e.valid := '0'; + v.div_in_progress := '1'; + stall_out <= '1'; + x_to_divider.valid <= '1'; + + when others => terminate_out <= '1'; report "illegal"; end case; @@ -603,6 +679,21 @@ begin stall_out <= '1'; v.mul_in_progress := '1'; end if; + elsif r.div_in_progress = '1' then + if divider_to_x.valid = '1' then + v.e.write_reg := gpr_to_gspr(divider_to_x.write_reg_nr); + result := divider_to_x.write_reg_data; + result_en := '1'; + v.e.rc := divider_to_x.rc; + v.e.xerc := divider_to_x.xerc; + v.e.write_xerc_enable := divider_to_x.write_xerc_enable; + v.e.valid := '1'; + v.e.write_len := x"8"; + v.e.sign_extend := '0'; + else + stall_out <= '1'; + v.div_in_progress := '1'; + end if; end if; v.e.write_data := result; diff --git a/writeback.vhdl b/writeback.vhdl index 1323f71..08efe91 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -12,7 +12,6 @@ entity writeback is e_in : in Execute1ToWritebackType; l_in : in DcacheToWritebackType; - d_in : in DividerToWritebackType; w_out : out WritebackToRegisterFileType; c_out : out WritebackToCrFileType; @@ -66,28 +65,21 @@ begin begin x := "" & e_in.valid; y := "" & l_in.valid; - z := "" & d_in.valid; - assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; + assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; x := "" & e_in.write_enable; y := "" & l_in.write_enable; - z := "" & d_in.write_reg_enable; - assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(z))) <= 1 severity failure; + assert (to_integer(unsigned(x)) + to_integer(unsigned(y))) <= 1 severity failure; w := "" & e_in.write_cr_enable; x := "" & (e_in.write_enable and e_in.rc); - z := "" & (d_in.valid and d_in.rc); - assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) + to_integer(unsigned(z))) <= 1 severity failure; - - x := "" & e_in.write_xerc_enable; - z := "" & D_in.write_xerc_enable; - assert (to_integer(unsigned(x)) + to_integer(unsigned(z))) <= 1 severity failure; + assert (to_integer(unsigned(w)) + to_integer(unsigned(x))) <= 1 severity failure; w_out <= WritebackToRegisterFileInit; c_out <= WritebackToCrFileInit; complete_out <= '0'; - if e_in.valid = '1' or l_in.valid = '1' or d_in.valid = '1' then + if e_in.valid = '1' or l_in.valid = '1' then complete_out <= '1'; end if; @@ -138,19 +130,6 @@ begin xe := l_in.xerc; end if; - if d_in.write_reg_enable = '1' then - w_out.write_enable <= '1'; - w_out.write_reg <= gpr_to_gspr(d_in.write_reg_nr); - data_in <= d_in.write_reg_data; - rc <= d_in.rc; - xe := d_in.xerc; - end if; - - if d_in.write_xerc_enable = '1' then - c_out.write_xerc_enable <= '1'; - c_out.write_xerc_data <= d_in.xerc; - end if; - -- shift and byte-reverse data bytes for i in 0 to 7 loop k := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);