From 290b05f97da6255734b4ff3c7c7a913cf99301f9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 4 Aug 2020 20:02:30 +1000 Subject: [PATCH] core: Implement the maddhd, maddhdu and maddld instructions These instructions use major opcode 4 and have a third GPR input operand, so we need a decode table for major opcode 4 and some plumbing to get the RC register operand read. The multiply-add instructions use the same insn_type_t values as the regular multiply instructions, and we distinguish in execute1 by looking at the major opcode. This turns out to be convenient because we don't have to add any cases in the code that handles the output of the multiplier, and it frees up some insn_type_t values. Signed-off-by: Paul Mackerras --- decode1.vhdl | 30 ++++++++++++++++++++++++++++++ decode2.vhdl | 5 ++++- decode_types.vhdl | 4 ++-- execute1.vhdl | 15 ++++++++++++++- insn_helpers.vhdl | 6 ++++++ scripts/fmt_log/fmt_log.c | 10 +++++----- 6 files changed, 61 insertions(+), 9 deletions(-) diff --git a/decode1.vhdl b/decode1.vhdl index a95cfad..eceee40 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -34,6 +34,8 @@ architecture behaviour of decode1 is subtype major_opcode_t is unsigned(5 downto 0); type major_rom_array_t is array(0 to 63) of decode_rom_t; type minor_valid_array_t is array(0 to 1023) of std_ulogic; + type minor_valid_array_2t is array(0 to 2047) of std_ulogic; + type op_4_subop_array_t is array(0 to 63) of decode_rom_t; type op_19_subop_array_t is array(0 to 7) of decode_rom_t; type op_30_subop_array_t is array(0 to 15) of decode_rom_t; type op_31_subop_array_t is array(0 to 1023) of decode_rom_t; @@ -85,6 +87,24 @@ architecture behaviour of decode1 is others => illegal_inst ); + -- indexed by bits 5..0 and 10..6 of instruction word + constant decode_op_4_valid : minor_valid_array_2t := ( + 2#11000000000# to 2#11000011111# => '1', -- maddhd + 2#11000100000# to 2#11000111111# => '1', -- maddhdu + 2#11001100000# to 2#11001111111# => '1', -- maddld + others => '0' + ); + + -- indexed by bits 5..0 of instruction word + constant decode_op_4_array : op_4_subop_array_t := ( + -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl + -- op in out A out in out len ext pipe + 2#110000# => (ALU, OP_MUL_H64, RA, RB, RCR, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- maddhd + 2#110001# => (ALU, OP_MUL_H64, RA, RB, RCR, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- maddhdu + 2#110011# => (ALU, OP_MUL_L64, RA, RB, RCR, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- maddld + others => decode_rom_init + ); + -- indexed by bits 10..1 of instruction word constant decode_op_19_valid : minor_valid_array_t := ( -- addpcis, 5 upper bits are part of constant @@ -390,6 +410,7 @@ begin variable v : Decode1ToDecode2Type; variable f : Decode1ToFetch1Type; variable majorop : major_opcode_t; + variable minor4op : std_ulogic_vector(10 downto 0); variable op_19_bits: std_ulogic_vector(2 downto 0); variable sprn : spr_num_t; variable br_nia : std_ulogic_vector(61 downto 0); @@ -418,6 +439,15 @@ begin end if; v.decode := fetch_fail_inst; + elsif majorop = "000100" then + -- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*) + minor4op := f_in.insn(5 downto 0) & f_in.insn(10 downto 6); + if decode_op_4_valid(to_integer(unsigned(minor4op))) = '1' then + v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0)))); + else + v.decode := illegal_inst; + end if; + elsif majorop = "011111" then -- major opcode 31, lots of things v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1)))); diff --git a/decode2.vhdl b/decode2.vhdl index 62c574c..b1531f1 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -135,6 +135,8 @@ architecture behaviour of decode2 is case t is when RS => return ('1', gpr_to_gspr(insn_rs(insn_in)), reg_data); + when RCR => + return ('1', gpr_to_gspr(insn_rcreg(insn_in)), reg_data); when NONE => return ('0', (others => '0'), (others => '0')); end case; @@ -282,7 +284,8 @@ begin else gpr_to_gspr(insn_ra(d_in.insn)); r_out.read2_reg <= d_in.ispr2 when d_in.decode.input_reg_b = SPR else gpr_to_gspr(insn_rb(d_in.insn)); - r_out.read3_reg <= insn_rs(d_in.insn); + r_out.read3_reg <= insn_rcreg(d_in.insn) when d_in.decode.input_reg_c = RCR + else insn_rs(d_in.insn); c_out.read <= d_in.decode.input_cr; diff --git a/decode_types.vhdl b/decode_types.vhdl index fac593e..e5ae8c1 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -9,7 +9,7 @@ package decode_types is OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, OP_DCBZ, OP_DIV, OP_DIVE, OP_EXTS, OP_EXTSWSLI, OP_ICBI, OP_ICBT, OP_ISEL, OP_ISYNC, - OP_LOAD, OP_STORE, OP_MADDHD, OP_MADDHDU, OP_MADDLD, + OP_LOAD, OP_STORE, OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MOD, OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64, OP_MUL_H64, OP_MUL_H32, OP_OR, @@ -23,7 +23,7 @@ package decode_types is type input_reg_a_t is (NONE, RA, RA_OR_ZERO, SPR, CIA); type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DXHI4, CONST_DS, CONST_M1, CONST_SH, CONST_SH32, SPR); - type input_reg_c_t is (NONE, RS); + type input_reg_c_t is (NONE, RS, RCR); type output_reg_a_t is (NONE, RT, RA, SPR); type rc_t is (NONE, ONE, RC); type carry_in_t is (ZERO, CA, ONE); diff --git a/execute1.vhdl b/execute1.vhdl index b836e33..a620a50 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -309,6 +309,7 @@ begin variable taken_branch : std_ulogic; variable abs_branch : std_ulogic; variable spr_val : std_ulogic_vector(63 downto 0); + variable addend : std_ulogic_vector(127 downto 0); begin result := (others => '0'); result_with_carry := (others => '0'); @@ -408,8 +409,20 @@ begin x_to_divider.is_modulus <= '1'; end if; + addend := (others => '0'); + if e_in.insn(26) = '0' then + -- integer multiply-add, major op 4 (if it is a multiply) + addend(63 downto 0) := c_in; + if e_in.is_signed = '1' then + addend(127 downto 64) := (others => c_in(63)); + end if; + end if; + if (sign1 xor sign2) = '1' then + addend := not addend; + end if; + x_to_multiply.not_result <= sign1 xor sign2; - x_to_multiply.addend <= (others => sign1 xor sign2); + x_to_multiply.addend <= addend; x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus); if e_in.is_32bit = '0' then -- 64-bit forms diff --git a/insn_helpers.vhdl b/insn_helpers.vhdl index acd2f72..592acb0 100644 --- a/insn_helpers.vhdl +++ b/insn_helpers.vhdl @@ -6,6 +6,7 @@ package insn_helpers is function insn_rt (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_ra (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_rb (insn_in : std_ulogic_vector) return std_ulogic_vector; + function insn_rcreg (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_si (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_ui (insn_in : std_ulogic_vector) return std_ulogic_vector; function insn_l (insn_in : std_ulogic_vector) return std_ulogic; @@ -59,6 +60,11 @@ package body insn_helpers is return insn_in(15 downto 11); end; + function insn_rcreg (insn_in : std_ulogic_vector) return std_ulogic_vector is + begin + return insn_in(10 downto 6); + end; + function insn_si (insn_in : std_ulogic_vector) return std_ulogic_vector is begin return insn_in(15 downto 0); diff --git a/scripts/fmt_log/fmt_log.c b/scripts/fmt_log/fmt_log.c index 3a003f7..9b6775b 100644 --- a/scripts/fmt_log/fmt_log.c +++ b/scripts/fmt_log/fmt_log.c @@ -90,11 +90,11 @@ const char *ops[64] = "illegal", "nop ", "add ", "and ", "attn ", "b ", "bc ", "bcreg ", "bperm ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", "darn ", "dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "div ", "dive ", "exts ", - "extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "maddhd ", - "maddhdu", "maddld ", "mcrxrx ", "mfcr ", "mfmsr ", "mfspr ", "mod ", "mtcrf ", - "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "or ", "popcnt ", "prty ", - "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ", "shr ", - "sync ", "tlbie ", "trap ", "xor ", "ffail ", "?61 ", "?62 ", "?63 " + "extswsl", "icbi ", "icbt ", "isel ", "isync ", "ld ", "st ", "mcrxrx ", + "mfcr ", "mfmsr ", "mfspr ", "mod ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", + "mulh64 ", "mulh32 ", "or ", "popcnt ", "prty ", "rfid ", "rlc ", "rlcl ", + "rlcr ", "sc ", "setb ", "shl ", "shr ", "sync ", "tlbie ", "trap ", + "xor ", "ffail ", "?58 ", "?59 ", "?60 ", "?61 ", "?62 ", "?63 " }; const char *spr_names[13] =