execute: Do comparisons using the main adder

This handles OP_CMP like a subtraction; the main adder computes ~RA + RB + 1, and the condition codes are computed from the results. A direct comparison of the two input operands is used to calculate the EQ bit of the condition result. The LT and GT bits are computed from the MSB of the subtraction result, the carry out from the subtraction, and the MSBs of the operands. For a 32-bit comparison, the 32-bit carry and bit 31 of the result and input operands are used; for a 64-bit comparison, the 64-bit carry and bit 63 of the operands and result are used. It turns out to be more convenient to use the 'signed' field of the decode table to distinguish signed from unsigned comparisons, rather than the insn_type. Therefore this uses OP_CMP for both cmp and cmpl, which also has the benefit of reducing the number of values in insn_type_t. Doing this saves over 200 slice LUTs on the Arty A7-100 and improves timing slightly as well. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
6 years ago · d2ca625b3b
parent d956846667
commit d2ca625b3b
3 changed files with 60 additions and 37 deletions
--- a/decode1.vhdl
+++ b/decode1.vhdl
@ -44,8 +44,8 @@ architecture behaviour of decode1 is
 		29 =>       (ALU,    OP_AND,       NONE,       CONST_UI_HI, RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE,  '0', '0'), -- andis.
 		18 =>       (ALU,    OP_B,         NONE,       CONST_LI,    NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- b
 		16 =>       (ALU,    OP_BC,        SPR,        CONST_BD,    NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc
-		11 =>       (ALU,    OP_CMP,       RA,         CONST_SI,    NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpi
-		10 =>       (ALU,    OP_CMPL,      RA,         CONST_UI,    NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
+		11 =>       (ALU,    OP_CMP,       RA,         CONST_SI,    NONE, NONE, '0', '1', '1', '0', ONE,  '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmpi
+		10 =>       (ALU,    OP_CMP,       RA,         CONST_UI,    NONE, NONE, '0', '1', '1', '0', ONE,  '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli
 		34 =>       (LDST,   OP_LOAD,      RA_OR_ZERO, CONST_SI,    NONE, RT,   '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lbz
 		35 =>       (LDST,   OP_LOAD,      RA_OR_ZERO, CONST_SI,    NONE, RT,   '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lbzu
 		42 =>       (LDST,   OP_LOAD,      RA_OR_ZERO, CONST_SI,    NONE, RT,   '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '1'), -- lha
@ -145,10 +145,10 @@ architecture behaviour of decode1 is
 		2#0000011100#  =>       (ALU,    OP_AND,       NONE,       RB,          RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- and
 		2#0000111100#  =>       (ALU,    OP_AND,       NONE,       RB,          RS,   RA,   '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- andc
 		-- 2#0011111100# bperm
-		2#0000000000#  =>       (ALU,    OP_CMP,       RA,         RB,          NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmp
+		2#0000000000#  =>       (ALU,    OP_CMP,       RA,         RB,          NONE, NONE, '0', '1', '1', '0', ONE,  '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmp
 		2#0111111100#  =>       (ALU,    OP_CMPB,      NONE,       RB,          RS,   RA,   '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpb
 		-- 2#0011100000# cmpeqb
-		2#0000100000#  =>       (ALU,    OP_CMPL,      RA,         RB,          NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl
+		2#0000100000#  =>       (ALU,    OP_CMP,       RA,         RB,          NONE, NONE, '0', '1', '1', '0', ONE,  '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl
 		-- 2#0011000000# cmprb
 		2#0000111010#  =>       (ALU,    OP_CNTZ,      NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC,   '0', '0'), -- cntlzd
 		2#0000011010#  =>       (ALU,    OP_CNTZ,      NONE,       NONE,        RS,   RA,   '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC,   '0', '0'), -- cntlzw
--- a/decode_types.vhdl
+++ b/decode_types.vhdl
@ -4,7 +4,7 @@ use ieee.std_logic_1164.all;
 package decode_types is
    type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD,
 			 OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG,
-			 OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB,
+			 OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB,
 			 OP_CNTZ, OP_CRAND,
 			 OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC,
 			 OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST,
--- a/execute1.vhdl
+++ b/execute1.vhdl
@ -193,6 +193,9 @@ begin
        variable abs1, abs2 : signed(63 downto 0);
 	variable overflow : std_ulogic;
 	variable negative : std_ulogic;
+        variable zerohi, zerolo : std_ulogic;
+        variable msb_a, msb_b : std_ulogic;
+        variable a_lt : std_ulogic;
    begin
 	result := (others => '0');
 	result_with_carry := (others => '0');
@ -348,7 +351,7 @@ begin
 		report "illegal";
 	    when OP_NOP =>
 		-- Do nothing
-	    when OP_ADD =>
+	    when OP_ADD | OP_CMP =>
 		if e_in.invert_a = '0' then
 		    a_inv := e_in.read_data1;
 		else
@ -359,15 +362,57 @@ begin
 		result := result_with_carry(63 downto 0);
                carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32);
                carry_64 := result_with_carry(64);
-		if e_in.output_carry = '1' then
-		    set_carry(v.e, carry_32, carry_64);
-		end if;
-		if e_in.oe = '1' then
-		    set_ov(v.e,
-			   calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)),
-			   calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31)));
-		end if;
-		result_en := '1';
+                if e_in.insn_type = OP_ADD then
+                    if e_in.output_carry = '1' then
+                        set_carry(v.e, carry_32, carry_64);
+                    end if;
+                    if e_in.oe = '1' then
+                        set_ov(v.e,
+                               calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)),
+                               calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31)));
+                    end if;
+                    result_en := '1';
+                else
+                    -- CMP and CMPL instructions
+                    -- Note, we have done RB - RA, not RA - RB
+                    bf := insn_bf(e_in.insn);
+                    l := insn_l(e_in.insn);
+                    v.e.write_cr_enable := '1';
+                    crnum := to_integer(unsigned(bf));
+                    v.e.write_cr_mask := num_to_fxm(crnum);
+                    zerolo := not (or (e_in.read_data1(31 downto 0) xor e_in.read_data2(31 downto 0)));
+                    zerohi := not (or (e_in.read_data1(63 downto 32) xor e_in.read_data2(63 downto 32)));
+                    if zerolo = '1' and (l = '0' or zerohi = '1') then
+                        -- values are equal
+                        newcrf := "001" & v.e.xerc.so;
+                    else
+                        if l = '1' then
+                            -- 64-bit comparison
+                            msb_a := e_in.read_data1(63);
+                            msb_b := e_in.read_data2(63);
+                        else
+                            -- 32-bit comparison
+                            msb_a := e_in.read_data1(31);
+                            msb_b := e_in.read_data2(31);
+                        end if;
+                        if msb_a /= msb_b then
+                            -- Subtraction might overflow, but
+                            -- comparison is clear from MSB difference.
+                            -- for signed, 0 is greater; for unsigned, 1 is greater
+                            a_lt := msb_a xnor e_in.is_signed;
+                        else
+                            -- Subtraction cannot overflow since MSBs are equal.
+                            -- carry = 1 indicates RA is smaller (signed or unsigned)
+                            a_lt := (not l and carry_32) or (l and carry_64);
+                        end if;
+                        newcrf := a_lt & not a_lt & '0' & v.e.xerc.so;
+                    end if;
+                    for i in 0 to 7 loop
+                        lo := i*4;
+                        hi := lo + 3;
+                        v.e.write_cr_data(hi downto lo) := newcrf;
+                    end loop;
+                end if;
 	    when OP_AND | OP_OR | OP_XOR =>
 		result := logical_result;
 		result_en := '1';
@ -412,28 +457,6 @@ begin
 	    when OP_CMPB =>
 		result := ppc_cmpb(e_in.read_data3, e_in.read_data2);
 		result_en := '1';
-	    when OP_CMP =>
-		bf := insn_bf(e_in.insn);
-		l := insn_l(e_in.insn);
-		v.e.write_cr_enable := '1';
-		crnum := to_integer(unsigned(bf));
-		v.e.write_cr_mask := num_to_fxm(crnum);
-		for i in 0 to 7 loop
-		    lo := i*4;
-		    hi := lo + 3;
-		    v.e.write_cr_data(hi downto lo) := ppc_cmp(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so);
-		end loop;
-	    when OP_CMPL =>
-		bf := insn_bf(e_in.insn);
-		l := insn_l(e_in.insn);
-		v.e.write_cr_enable := '1';
-		crnum := to_integer(unsigned(bf));
-		v.e.write_cr_mask := num_to_fxm(crnum);
-		for i in 0 to 7 loop
-		    lo := i*4;
-		    hi := lo + 3;
-		    v.e.write_cr_data(hi downto lo) := ppc_cmpl(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so);
-		end loop;
 	    when OP_CNTZ =>
 		result := countzero_result;
 		result_en := '1';