From 658feabfd40fa4d4e3048334d11036fc1c1c959b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 26 Sep 2020 17:19:57 +1000 Subject: [PATCH] core: Make result multiplexing explicit This adds an explicit multiplexer feeding v.e.write_data in execute1, with the select lines determined in the previous cycle based on the insn_type. Similarly, for multiply and divide instructions, there is now an explicit multiplexer. Signed-off-by: Paul Mackerras --- common.vhdl | 6 +- decode2.vhdl | 52 +++++++++++++++++ execute1.vhdl | 150 +++++++++++++++++++++++++++++++------------------- logical.vhdl | 6 +- 4 files changed, 154 insertions(+), 60 deletions(-) diff --git a/common.vhdl b/common.vhdl index 8b9380c..44f63bd 100644 --- a/common.vhdl +++ b/common.vhdl @@ -210,6 +210,7 @@ package common is rc: std_ulogic; oe: std_ulogic; invert_a: std_ulogic; + addm1 : std_ulogic; invert_out: std_ulogic; input_carry: carry_in_t; output_carry: std_ulogic; @@ -224,18 +225,21 @@ package common is update : std_ulogic; -- is this an update instruction? reserve : std_ulogic; -- set for larx/stcx br_pred : std_ulogic; + result_sel : std_ulogic_vector(2 downto 0); -- select source of result + sub_select : std_ulogic_vector(2 downto 0); -- sub-result selection repeat : std_ulogic; -- set if instruction is cracked into two ops second : std_ulogic; -- set if this is the second op end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', - bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', + bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0', byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), + result_sel => "000", sub_select => "000", repeat => '0', second => '0', others => (others => '0')); type MultiplyInputType is record diff --git a/decode2.vhdl b/decode2.vhdl index 8b4633a..561fd79 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -221,6 +221,52 @@ architecture behaviour of decode2 is end case; end; + -- control signals that are derived from insn_type + type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0); + + constant result_select : mux_select_array_t := ( + OP_AND => "001", -- logical_result + OP_OR => "001", + OP_XOR => "001", + OP_POPCNT => "001", + OP_PRTY => "001", + OP_CMPB => "001", + OP_EXTS => "001", + OP_BPERM => "001", + OP_BCD => "001", + OP_MTSPR => "001", + OP_RLC => "010", -- rotator_result + OP_RLCL => "010", + OP_RLCR => "010", + OP_SHL => "010", + OP_SHR => "010", + OP_EXTSWSLI => "010", + OP_MUL_L64 => "011", -- muldiv_result + OP_MUL_H64 => "011", + OP_MUL_H32 => "011", + OP_DIV => "011", + OP_DIVE => "011", + OP_MOD => "011", + OP_CNTZ => "100", -- countzero_result + OP_MFSPR => "101", -- spr_result + OP_ISEL => "111", -- misc_result + OP_DARN => "111", + OP_MFMSR => "111", + OP_MFCR => "111", + OP_SETB => "111", + others => "000" -- default to adder_result + ); + + constant subresult_select : mux_select_array_t := ( + OP_MUL_L64 => "000", -- muldiv_result + OP_MUL_H64 => "001", + OP_MUL_H32 => "010", + OP_DIV => "011", + OP_DIVE => "011", + OP_MOD => "011", + others => "000" + ); + -- issue control signals signal control_valid_in : std_ulogic; signal control_valid_out : std_ulogic; @@ -400,6 +446,10 @@ begin v.e.bypass_cr := cr_bypass; v.e.xerc := c_in.read_xerc_data; v.e.invert_a := d_in.decode.invert_a; + v.e.addm1 := '0'; + if d_in.decode.insn_type = OP_BC or d_in.decode.insn_type = OP_BCREG then + v.e.addm1 := '1'; + end if; v.e.invert_out := d_in.decode.invert_out; v.e.input_carry := d_in.decode.input_carry; v.e.output_carry := d_in.decode.output_carry; @@ -415,6 +465,8 @@ begin v.e.update := d_in.decode.update; v.e.reserve := d_in.decode.reserve; v.e.br_pred := d_in.br_pred; + v.e.result_sel := result_select(d_in.decode.insn_type); + v.e.sub_select := subresult_select(d_in.decode.insn_type); -- issue control control_valid_in <= d_in.valid; diff --git a/execute1.vhdl b/execute1.vhdl index 4ea2680..6d2eb04 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -60,6 +60,8 @@ architecture behaviour of execute1 is prev_op : insn_type_t; lr_update : std_ulogic; next_lr : std_ulogic_vector(63 downto 0); + resmux : std_ulogic_vector(2 downto 0); + submux : std_ulogic_vector(2 downto 0); mul_in_progress : std_ulogic; mul_finish : std_ulogic; div_in_progress : std_ulogic; @@ -103,6 +105,13 @@ architecture behaviour of execute1 is signal rotator_carry: std_ulogic; signal logical_result: std_ulogic_vector(63 downto 0); signal countzero_result: std_ulogic_vector(63 downto 0); + signal alu_result: std_ulogic_vector(63 downto 0); + signal adder_result: std_ulogic_vector(63 downto 0); + signal misc_result: std_ulogic_vector(63 downto 0); + signal muldiv_result: std_ulogic_vector(63 downto 0); + signal spr_result: std_ulogic_vector(63 downto 0); + signal result_mux_sel: std_ulogic_vector(2 downto 0); + signal sub_mux_sel: std_ulogic_vector(2 downto 0); -- multiply signals signal x_to_multiply: MultiplyInputType; @@ -285,6 +294,18 @@ begin terminate_out <= r.terminate; + -- Result mux + result_mux_sel <= e_in.result_sel when r.busy = '0' else r.resmux; + sub_mux_sel <= e_in.sub_select when r.busy = '0' else r.submux; + with result_mux_sel select alu_result <= + adder_result when "000", + logical_result when "001", + rotator_result when "010", + muldiv_result when "011", + countzero_result when "100", + spr_result when "101", + misc_result when others; + execute1_0: process(clk) begin if rising_edge(clk) then @@ -310,7 +331,8 @@ begin execute1_1: process(all) variable v : reg_type; variable a_inv : std_ulogic_vector(63 downto 0); - variable result : std_ulogic_vector(63 downto 0); + variable b_or_m1 : std_ulogic_vector(63 downto 0); + variable addg6s : std_ulogic_vector(63 downto 0); variable newcrf : std_ulogic_vector(3 downto 0); variable sum_with_carry : std_ulogic_vector(64 downto 0); variable result_en : std_ulogic; @@ -348,16 +370,17 @@ begin variable spr_val : std_ulogic_vector(63 downto 0); variable addend : std_ulogic_vector(127 downto 0); variable do_trace : std_ulogic; + variable hold_wr_data : std_ulogic; variable f : Execute1ToFetch1Type; variable fv : Execute1ToFPUType; begin - result := (others => '0'); sum_with_carry := (others => '0'); result_en := '0'; newcrf := (others => '0'); is_branch := '0'; taken_branch := '0'; abs_branch := '0'; + hold_wr_data := '0'; v := r; v.e := Execute1ToWritebackInit; @@ -399,14 +422,24 @@ begin v.cntz_in_progress := '0'; v.mul_finish := '0'; + misc_result <= (others => '0'); + spr_result <= (others => '0'); + spr_val := (others => '0'); + -- Main adder if e_in.invert_a = '0' then a_inv := a_in; else a_inv := not a_in; end if; - sum_with_carry := ppc_adde(a_inv, b_in, + if e_in.addm1 = '0' then + b_or_m1 := b_in; + else + b_or_m1 := (others => '1'); + end if; + sum_with_carry := ppc_adde(a_inv, b_or_m1, decode_input_carry(e_in.input_carry, v.e.xerc)); + adder_result <= sum_with_carry(63 downto 0); -- signals to multiply and divide units sign1 := '0'; @@ -432,6 +465,7 @@ begin abs2 := - signed(b_in); end if; + -- Interface to multiply and divide units x_to_multiply <= MultiplyInputInit; x_to_multiply.is_32bit <= e_in.is_32bit; @@ -479,6 +513,18 @@ begin x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0)); end if; + case sub_mux_sel(1 downto 0) is + when "00" => + muldiv_result <= multiply_to_x.result(63 downto 0); + when "01" => + muldiv_result <= multiply_to_x.result(127 downto 64); + when "10" => + muldiv_result <= multiply_to_x.result(63 downto 32) & + multiply_to_x.result(63 downto 32); + when others => + muldiv_result <= divider_to_x.write_reg_data; + end case; + ctrl_tmp <= ctrl; -- FIXME: run at 512MHz not core freq ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1); @@ -611,6 +657,8 @@ begin v.slow_op_rc := e_in.rc; v.slow_op_oe := e_in.oe; v.slow_op_xerc := v.e.xerc; + v.resmux := e_in.result_sel; + v.submux := e_in.sub_select; case_0: case e_in.insn_type is @@ -642,8 +690,7 @@ begin when OP_NOP | OP_DCBF | OP_DCBST | OP_DCBT | OP_DCBTST | OP_ICBT => -- Do nothing when OP_ADD | OP_CMP | OP_TRAP => - result := sum_with_carry(63 downto 0); - carry_32 := result(32) xor a_inv(32) xor b_in(32); + carry_32 := sum_with_carry(32) xor a_inv(32) xor b_in(32); carry_64 := sum_with_carry(64); if e_in.insn_type = OP_ADD then if e_in.output_carry = '1' then @@ -724,17 +771,18 @@ begin end if; end if; when OP_ADDG6S => - result := (others => '0'); + addg6s := (others => '0'); for i in 0 to 14 loop lo := i * 4; hi := (i + 1) * 4; if (a_in(hi) xor b_in(hi) xor sum_with_carry(hi)) = '0' then - result(lo + 3 downto lo) := "0110"; + addg6s(lo + 3 downto lo) := "0110"; end if; end loop; if sum_with_carry(64) = '0' then - result(63 downto 60) := "0110"; + addg6s(63 downto 60) := "0110"; end if; + misc_result <= addg6s; result_en := '1'; when OP_CMPRB => newcrf := ppc_cmprb(a_in, b_in, insn_l(e_in.insn)); @@ -754,7 +802,6 @@ begin newcrf & newcrf & newcrf & newcrf; when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS | OP_BPERM | OP_BCD => - result := logical_result; result_en := '1'; when OP_B => is_branch := '1'; @@ -765,12 +812,11 @@ begin end if; when OP_BC => -- read_data1 is CTR + v.e.write_reg := fast_spr_num(SPR_CTR); bo := insn_bo(e_in.insn); bi := insn_bi(e_in.insn); if bo(4-2) = '0' then - result := std_ulogic_vector(unsigned(a_in) - 1); result_en := '1'; - v.e.write_reg := fast_spr_num(SPR_CTR); end if; is_branch := '1'; taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in); @@ -781,12 +827,11 @@ begin when OP_BCREG => -- read_data1 is CTR -- read_data2 is target register (CTR, LR or TAR) + v.e.write_reg := fast_spr_num(SPR_CTR); bo := insn_bo(e_in.insn); bi := insn_bi(e_in.insn); if bo(4-2) = '0' and e_in.insn(10) = '0' then - result := std_ulogic_vector(unsigned(a_in) - 1); result_en := '1'; - v.e.write_reg := fast_spr_num(SPR_CTR); end if; is_branch := '1'; taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in); @@ -825,9 +870,9 @@ begin when OP_ISEL => crbit := to_integer(unsigned(insn_bc(e_in.insn))); if cr_in(31-crbit) = '1' then - result := a_in; + misc_result <= a_in; else - result := b_in; + misc_result <= b_in; end if; result_en := '1'; when OP_CROP => @@ -885,38 +930,38 @@ begin if random_err = '0' then case e_in.insn(17 downto 16) is when "00" => - result := x"00000000" & random_cond(31 downto 0); + misc_result <= x"00000000" & random_cond(31 downto 0); when "10" => - result := random_raw; + misc_result <= random_raw; when others => - result := random_cond; + misc_result <= random_cond; end case; else - result := (others => '1'); + misc_result <= (others => '1'); end if; result_en := '1'; when OP_MFMSR => - result := ctrl.msr; + misc_result <= ctrl.msr; result_en := '1'; when OP_MFSPR => report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & "=" & to_hstring(a_in); result_en := '1'; if is_fast_spr(e_in.read_reg1) then - result := a_in; - if decode_spr_num(e_in.insn) = SPR_XER then + spr_val := a_in; + if decode_spr_num(e_in.insn) = SPR_XER then -- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer - result(63 downto 32) := (others => '0'); - result(63-32) := v.e.xerc.so; - result(63-33) := v.e.xerc.ov; - result(63-34) := v.e.xerc.ca; - result(63-35 downto 63-43) := "000000000"; - result(63-44) := v.e.xerc.ov32; - result(63-45) := v.e.xerc.ca32; - end if; + spr_val(63 downto 32) := (others => '0'); + spr_val(63-32) := v.e.xerc.so; + spr_val(63-33) := v.e.xerc.ov; + spr_val(63-34) := v.e.xerc.ca; + spr_val(63-35 downto 63-43) := "000000000"; + spr_val(63-44) := v.e.xerc.ov32; + spr_val(63-45) := v.e.xerc.ca32; + end if; else spr_val := c_in; - case decode_spr_num(e_in.insn) is + case decode_spr_num(e_in.insn) is when SPR_TB => spr_val := ctrl.tb; when SPR_TBU => @@ -940,22 +985,23 @@ begin if ctrl.msr(MSR_PR) = '1' then illegal := '1'; end if; - end case; - result := spr_val; - end if; + end case; + end if; + spr_result <= spr_val; + when OP_MFCR => if e_in.insn(20) = '0' then -- mfcr - result := x"00000000" & cr_in; + misc_result <= x"00000000" & cr_in; else -- mfocrf crnum := fxm_to_num(insn_fxm(e_in.insn)); - result := (others => '0'); + misc_result <= (others => '0'); for i in 0 to 7 loop lo := (7-i)*4; hi := lo + 3; if crnum = i then - result(hi downto lo) := cr_in(hi downto lo); + misc_result(hi downto lo) <= cr_in(hi downto lo); end if; end loop; end if; @@ -999,7 +1045,6 @@ begin report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & "=" & to_hstring(c_in); if is_fast_spr(e_in.write_reg) then - result := c_in; result_en := '1'; if decode_spr_num(e_in.insn) = SPR_XER then v.e.xerc.so := c_in(63-32); @@ -1025,7 +1070,6 @@ begin end case; end if; when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR | OP_EXTSWSLI => - result := rotator_result; if e_in.output_carry = '1' then set_carry(v.e, rotator_carry, rotator_carry); end if; @@ -1033,11 +1077,11 @@ begin when OP_SETB => bfa := insn_bfa(e_in.insn); crbit := to_integer(unsigned(bfa)) * 4; - result := (others => '0'); + misc_result <= (others => '0'); if cr_in(31 - crbit) = '1' then - result := (others => '1'); + misc_result <= (others => '1'); elsif cr_in(30 - crbit) = '1' then - result(0) := '1'; + misc_result(0) <= '1'; end if; when OP_ISYNC => @@ -1130,10 +1174,9 @@ begin v.e.valid := '1'; -- Keep r.e.write_data unchanged next cycle in case it is needed -- for a forwarded result (e.g. for CTR). - result := r.e.write_data; + hold_wr_data := '1'; elsif r.cntz_in_progress = '1' then -- cnt[lt]z always takes two cycles - result := countzero_result; result_en := '1'; v.e.write_reg := gpr_to_gspr(r.slow_op_dest); v.e.rc := r.slow_op_rc; @@ -1144,18 +1187,7 @@ begin (r.div_in_progress = '1' and divider_to_x.valid = '1') then if r.mul_in_progress = '1' then overflow := '0'; - case r.slow_op_insn is - when OP_MUL_H32 => - result := multiply_to_x.result(63 downto 32) & - multiply_to_x.result(63 downto 32); - when OP_MUL_H64 => - result := multiply_to_x.result(127 downto 64); - when others => - -- i.e. OP_MUL_L64 - result := multiply_to_x.result(63 downto 0); - end case; else - result := divider_to_x.write_reg_data; overflow := divider_to_x.overflow; end if; if r.mul_in_progress = '1' and r.slow_op_oe = '1' then @@ -1184,7 +1216,7 @@ begin v.div_in_progress := r.div_in_progress; end if; elsif r.mul_finish = '1' then - result := r.e.write_data; + hold_wr_data := '1'; result_en := '1'; v.e.write_reg := gpr_to_gspr(r.slow_op_dest); v.e.rc := r.slow_op_rc; @@ -1225,7 +1257,11 @@ begin v.trace_next := '1'; end if; - v.e.write_data := result; + if hold_wr_data = '0' then + v.e.write_data := alu_result; + else + v.e.write_data := r.e.write_data; + end if; v.e.write_enable := result_en and not exception; -- generate DSI or DSegI for load/store exceptions diff --git a/logical.vhdl b/logical.vhdl index d008e47..6b6f202 100644 --- a/logical.vhdl +++ b/logical.vhdl @@ -197,8 +197,7 @@ begin tmp := x"00" & dpd_to_bcd(rs(51 downto 42)) & dpd_to_bcd(rs(41 downto 32)) & x"00" & dpd_to_bcd(rs(19 downto 10)) & dpd_to_bcd(rs(9 downto 0)); end if; - when others => - -- EXTS + when OP_EXTS => -- note datalen is a 1-hot encoding negative := (datalen(0) and rs(7)) or (datalen(1) and rs(15)) or @@ -211,6 +210,9 @@ begin tmp(15 downto 8) := rs(15 downto 8); end if; tmp(7 downto 0) := rs(7 downto 0); + when others => + -- e.g. OP_MTSPR + tmp := rs; end case; result <= tmp;