From b0510fd1bbfe50ab7f61e6be4a4643c9d5dd87b1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 26 Sep 2020 19:58:46 +1000 Subject: [PATCH] core: Reorganize execute1 This breaks up the enormous if .. elsif .. case .. elsif statement in execute1 in order to try to make it simpler and more understandable. We now have decode2 deciding whether the instruction has a value to be written back to a register (GPR, GSPR, FPR, etc.) rather than individual cases in execute1 setting result_en. The computation of the data to be written back is now independent of detection of various exception conditions. We now have an if block determining if any exception condition exists which prevents the next instruction from being executed, then the case statement which performs actions such as setting carry/overflow bits, determining if a trap exception exists, doing branches, etc., then an if statement for all the r.busy = 1 cases (continuing execution of an instruction which was started in a previous cycle, or writing SRR1 for an interrupt). Signed-off-by: Paul Mackerras --- common.vhdl | 3 +- decode2.vhdl | 18 +- execute1.vhdl | 556 ++++++++++++++++++++++++-------------------------- 3 files changed, 289 insertions(+), 288 deletions(-) diff --git a/common.vhdl b/common.vhdl index 44f63bd..d085199 100644 --- a/common.vhdl +++ b/common.vhdl @@ -195,6 +195,7 @@ package common is insn_type: insn_type_t; nia: std_ulogic_vector(63 downto 0); write_reg: gspr_index_t; + write_reg_enable: std_ulogic; read_reg1: gspr_index_t; read_reg2: gspr_index_t; read_data1: std_ulogic_vector(63 downto 0); @@ -232,7 +233,7 @@ package common is end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, - bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', + write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0', diff --git a/decode2.vhdl b/decode2.vhdl index 561fd79..e00a05d 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -249,7 +249,8 @@ architecture behaviour of decode2 is OP_MOD => "011", OP_CNTZ => "100", -- countzero_result OP_MFSPR => "101", -- spr_result - OP_ISEL => "111", -- misc_result + OP_ADDG6S => "111", -- misc_result + OP_ISEL => "111", OP_DARN => "111", OP_MFMSR => "111", OP_MFCR => "111", @@ -264,6 +265,12 @@ architecture behaviour of decode2 is OP_DIV => "011", OP_DIVE => "011", OP_MOD => "011", + OP_ADDG6S => "001", -- misc_result + OP_ISEL => "010", + OP_DARN => "011", + OP_MFMSR => "100", + OP_MFCR => "101", + OP_SETB => "110", others => "000" ); @@ -438,6 +445,7 @@ begin v.e.read_data3 := decoded_reg_c.data; v.e.bypass_data3 := gpr_c_bypass; v.e.write_reg := decoded_reg_o.reg; + v.e.write_reg_enable := decoded_reg_o.reg_valid; v.e.rc := decode_rc(d_in.decode.rc, d_in.insn); if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then v.e.oe := decode_oe(d_in.decode.rc, d_in.insn); @@ -448,7 +456,13 @@ begin v.e.invert_a := d_in.decode.invert_a; v.e.addm1 := '0'; if d_in.decode.insn_type = OP_BC or d_in.decode.insn_type = OP_BCREG then + -- add -1 to CTR v.e.addm1 := '1'; + if d_in.insn(23) = '1' or + (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then + -- don't write decremented CTR if BO(2) = 1 or bcctr + v.e.write_reg_enable := '0'; + end if; end if; v.e.invert_out := d_in.decode.invert_out; v.e.input_carry := d_in.decode.input_carry; @@ -472,7 +486,7 @@ begin control_valid_in <= d_in.valid; control_sgl_pipe <= d_in.decode.sgl_pipe; - gpr_write_valid <= decoded_reg_o.reg_valid; + gpr_write_valid <= v.e.write_reg_enable; gpr_write <= decoded_reg_o.reg; gpr_bypassable <= '0'; if EX1_BYPASS and d_in.decode.unit = ALU then diff --git a/execute1.vhdl b/execute1.vhdl index 6d2eb04..6a27ee8 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -53,6 +53,7 @@ end entity execute1; architecture behaviour of execute1 is type reg_type is record e : Execute1ToWritebackType; + cur_instr : Decode2ToExecute1Type; busy: std_ulogic; terminate: std_ulogic; fp_exception_next : std_ulogic; @@ -60,17 +61,10 @@ architecture behaviour of execute1 is prev_op : insn_type_t; lr_update : std_ulogic; next_lr : std_ulogic_vector(63 downto 0); - resmux : std_ulogic_vector(2 downto 0); - submux : std_ulogic_vector(2 downto 0); mul_in_progress : std_ulogic; mul_finish : std_ulogic; div_in_progress : std_ulogic; cntz_in_progress : std_ulogic; - slow_op_insn : insn_type_t; - slow_op_dest : gpr_index_t; - slow_op_rc : std_ulogic; - slow_op_oe : std_ulogic; - slow_op_xerc : xer_common_t; last_nia : std_ulogic_vector(63 downto 0); redirect : std_ulogic; abs_br : std_ulogic; @@ -82,10 +76,10 @@ architecture behaviour of execute1 is end record; constant reg_type_init : reg_type := (e => Execute1ToWritebackInit, + cur_instr => Decode2ToExecute1Init, busy => '0', lr_update => '0', terminate => '0', fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', - slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, next_lr => (others => '0'), last_nia => (others => '0'), redirect => '0', abs_br => '0', do_intr => '0', vector => 0, br_offset => (others => '0'), redir_mode => "0000", @@ -112,6 +106,7 @@ architecture behaviour of execute1 is signal spr_result: std_ulogic_vector(63 downto 0); signal result_mux_sel: std_ulogic_vector(2 downto 0); signal sub_mux_sel: std_ulogic_vector(2 downto 0); + signal current: Decode2ToExecute1Type; -- multiply signals signal x_to_multiply: MultiplyInputType; @@ -294,10 +289,10 @@ begin terminate_out <= r.terminate; + current <= e_in when r.busy = '0' else r.cur_instr; + -- Result mux - result_mux_sel <= e_in.result_sel when r.busy = '0' else r.resmux; - sub_mux_sel <= e_in.sub_select when r.busy = '0' else r.submux; - with result_mux_sel select alu_result <= + with current.result_sel select alu_result <= adder_result when "000", logical_result when "001", rotator_result when "010", @@ -333,9 +328,12 @@ begin variable a_inv : std_ulogic_vector(63 downto 0); variable b_or_m1 : std_ulogic_vector(63 downto 0); variable addg6s : std_ulogic_vector(63 downto 0); + variable isel_result : std_ulogic_vector(63 downto 0); + variable darn : std_ulogic_vector(63 downto 0); + variable mfcr_result : std_ulogic_vector(63 downto 0); + variable setb_result : std_ulogic_vector(63 downto 0); variable newcrf : std_ulogic_vector(3 downto 0); variable sum_with_carry : std_ulogic_vector(64 downto 0); - variable result_en : std_ulogic; variable crnum : crnum_t; variable crbit : integer range 0 to 31; variable scrnum : crnum_t; @@ -375,7 +373,6 @@ begin variable fv : Execute1ToFPUType; begin sum_with_carry := (others => '0'); - result_en := '0'; newcrf := (others => '0'); is_branch := '0'; taken_branch := '0'; @@ -400,7 +397,7 @@ begin -- (SO, OV[32] and CA[32]) are only modified by instructions that are -- handled here, we can just forward the result being sent to -- writeback. - if r.e.write_xerc_enable = '1' then + if r.e.write_xerc_enable = '1' or r.busy = '1' then v.e.xerc := r.e.xerc; else v.e.xerc := e_in.xerc; @@ -422,7 +419,6 @@ begin v.cntz_in_progress := '0'; v.mul_finish := '0'; - misc_result <= (others => '0'); spr_result <= (others => '0'); spr_val := (others => '0'); @@ -440,6 +436,8 @@ begin sum_with_carry := ppc_adde(a_inv, b_or_m1, decode_input_carry(e_in.input_carry, v.e.xerc)); adder_result <= sum_with_carry(63 downto 0); + carry_32 := sum_with_carry(32) xor a_inv(32) xor b_in(32); + carry_64 := sum_with_carry(64); -- signals to multiply and divide units sign1 := '0'; @@ -513,7 +511,7 @@ begin x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0)); end if; - case sub_mux_sel(1 downto 0) is + case current.sub_select(1 downto 0) is when "00" => muldiv_result <= multiply_to_x.result(63 downto 0); when "01" => @@ -525,6 +523,117 @@ begin muldiv_result <= divider_to_x.write_reg_data; end case; + -- Compute misc_result + case current.sub_select is + when "000" => + misc_result <= (others => '0'); + when "001" => + -- addg6s + addg6s := (others => '0'); + for i in 0 to 14 loop + lo := i * 4; + hi := (i + 1) * 4; + if (a_in(hi) xor b_in(hi) xor sum_with_carry(hi)) = '0' then + addg6s(lo + 3 downto lo) := "0110"; + end if; + end loop; + if sum_with_carry(64) = '0' then + addg6s(63 downto 60) := "0110"; + end if; + misc_result <= addg6s; + when "010" => + -- isel + crbit := to_integer(unsigned(insn_bc(e_in.insn))); + if cr_in(31-crbit) = '1' then + isel_result := a_in; + else + isel_result := b_in; + end if; + misc_result <= isel_result; + when "011" => + -- darn + darn := (others => '1'); + if random_err = '0' then + case e_in.insn(17 downto 16) is + when "00" => + darn := x"00000000" & random_cond(31 downto 0); + when "10" => + darn := random_raw; + when others => + darn := random_cond; + end case; + end if; + misc_result <= darn; + when "100" => + -- mfmsr + misc_result <= ctrl.msr; + when "101" => + if e_in.insn(20) = '0' then + -- mfcr + mfcr_result := x"00000000" & cr_in; + else + -- mfocrf + crnum := fxm_to_num(insn_fxm(e_in.insn)); + mfcr_result := (others => '0'); + for i in 0 to 7 loop + lo := (7-i)*4; + hi := lo + 3; + if crnum = i then + mfcr_result(hi downto lo) := cr_in(hi downto lo); + end if; + end loop; + end if; + misc_result <= mfcr_result; + when "110" => + -- setb + bfa := insn_bfa(e_in.insn); + crbit := to_integer(unsigned(bfa)) * 4; + setb_result := (others => '0'); + if cr_in(31 - crbit) = '1' then + setb_result := (others => '1'); + elsif cr_in(30 - crbit) = '1' then + setb_result(0) := '1'; + end if; + misc_result <= setb_result; + when others => + misc_result <= (others => '0'); + end case; + + -- compute comparison results + -- Note, we have done RB - RA, not RA - RB + if e_in.insn_type = OP_CMP then + l := insn_l(e_in.insn); + else + l := not e_in.is_32bit; + end if; + zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0))); + zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32))); + if zerolo = '1' and (l = '0' or zerohi = '1') then + -- values are equal + trapval := "00100"; + else + if l = '1' then + -- 64-bit comparison + msb_a := a_in(63); + msb_b := b_in(63); + else + -- 32-bit comparison + msb_a := a_in(31); + msb_b := b_in(31); + end if; + if msb_a /= msb_b then + -- Subtraction might overflow, but + -- comparison is clear from MSB difference. + -- for signed, 0 is greater; for unsigned, 1 is greater + trapval := msb_a & msb_b & '0' & msb_b & msb_a; + else + -- Subtraction cannot overflow since MSBs are equal. + -- carry = 1 indicates RA is smaller (signed or unsigned) + a_lt := (not l and carry_32) or (l and carry_64); + trapval := a_lt & not a_lt & '0' & a_lt & not a_lt; + end if; + end if; + ctrl_tmp <= ctrl; -- FIXME: run at 512MHz not core freq ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1); @@ -577,38 +686,20 @@ begin v.prev_op := e_in.insn_type; end if; - if ctrl.irq_state = WRITE_SRR1 then - v.e.exc_write_reg := fast_spr_num(SPR_SRR1); - v.e.exc_write_data := ctrl.srr1; - v.e.exc_write_enable := '1'; - ctrl_tmp.msr(MSR_SF) <= '1'; - ctrl_tmp.msr(MSR_EE) <= '0'; - ctrl_tmp.msr(MSR_PR) <= '0'; - ctrl_tmp.msr(MSR_SE) <= '0'; - ctrl_tmp.msr(MSR_BE) <= '0'; - ctrl_tmp.msr(MSR_FP) <= '0'; - ctrl_tmp.msr(MSR_FE0) <= '0'; - ctrl_tmp.msr(MSR_FE1) <= '0'; - ctrl_tmp.msr(MSR_IR) <= '0'; - ctrl_tmp.msr(MSR_DR) <= '0'; - ctrl_tmp.msr(MSR_RI) <= '0'; - ctrl_tmp.msr(MSR_LE) <= '1'; - v.e.valid := '1'; - v.trace_next := '0'; - v.fp_exception_next := '0'; - report "Writing SRR1: " & to_hstring(ctrl.srr1); - - elsif valid_in = '1' and e_in.second = '0' and - ((HAS_FPU and r.fp_exception_next = '1') or r.trace_next = '1') then + -- Determine if there is any exception to be taken + -- before/instead of executing this instruction + if valid_in = '1' and e_in.second = '0' then if HAS_FPU and r.fp_exception_next = '1' then -- This is used for FP-type program interrupts that -- become pending due to MSR[FE0,FE1] changing from 00 to non-zero. + exception := '1'; v.vector := 16#700#; ctrl_tmp.srr1(63 - 43) <= '1'; ctrl_tmp.srr1(63 - 47) <= '1'; - else + elsif r.trace_next = '1' then -- Generate a trace interrupt rather than executing the next instruction -- or taking any asynchronous interrupt + exception := '1'; v.vector := 16#d00#; ctrl_tmp.srr1(63 - 33) <= '1'; if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or @@ -617,48 +708,38 @@ begin elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then ctrl_tmp.srr1(63 - 36) <= '1'; end if; - end if; - exception := '1'; - - elsif irq_valid = '1' and valid_in = '1' and e_in.second = '0' then - -- we need two cycles to write srr0 and 1 - -- will need more when we have to write HEIR - -- Don't deliver the interrupt until we have a valid instruction - -- coming in, so we have a valid NIA to put in SRR0. - exception := '1'; - elsif valid_in = '1' and ctrl.msr(MSR_PR) = '1' and - instr_is_privileged(e_in.insn_type, e_in.insn) then - -- generate a program interrupt - exception := '1'; - v.vector := 16#700#; - -- set bit 45 to indicate privileged instruction type interrupt - ctrl_tmp.srr1(63 - 45) <= '1'; - report "privileged instruction"; + elsif irq_valid = '1' then + -- Don't deliver the interrupt until we have a valid instruction + -- coming in, so we have a valid NIA to put in SRR0. + exception := '1'; - elsif not HAS_FPU and valid_in = '1' and e_in.fac = FPU then - -- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations - illegal := '1'; + elsif ctrl.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then + -- generate a program interrupt + exception := '1'; + v.vector := 16#700#; + -- set bit 45 to indicate privileged instruction type interrupt + ctrl_tmp.srr1(63 - 45) <= '1'; + report "privileged instruction"; - elsif HAS_FPU and valid_in = '1' and ctrl.msr(MSR_FP) = '0' and e_in.fac = FPU then - -- generate a floating-point unavailable interrupt - exception := '1'; - v.vector := 16#800#; - report "FP unavailable interrupt"; + elsif not HAS_FPU and e_in.fac = FPU then + -- make lfd/stfd/lfs/stfs etc. illegal in no-FPU implementations + illegal := '1'; - elsif valid_in = '1' and e_in.unit = ALU then + elsif HAS_FPU and ctrl.msr(MSR_FP) = '0' and e_in.fac = FPU then + -- generate a floating-point unavailable interrupt + exception := '1'; + v.vector := 16#800#; + report "FP unavailable interrupt"; + end if; + end if; + if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then report "execute nia " & to_hstring(e_in.nia); + v.cur_instr := e_in; + v.next_lr := next_nia; v.e.valid := '1'; - v.e.write_reg := e_in.write_reg; - v.slow_op_insn := e_in.insn_type; - v.slow_op_dest := gspr_to_gpr(e_in.write_reg); - v.slow_op_rc := e_in.rc; - v.slow_op_oe := e_in.oe; - v.slow_op_xerc := v.e.xerc; - v.resmux := e_in.result_sel; - v.submux := e_in.sub_select; case_0: case e_in.insn_type is @@ -689,101 +770,48 @@ begin end if; when OP_NOP | OP_DCBF | OP_DCBST | OP_DCBT | OP_DCBTST | OP_ICBT => -- Do nothing - when OP_ADD | OP_CMP | OP_TRAP => - carry_32 := sum_with_carry(32) xor a_inv(32) xor b_in(32); - carry_64 := sum_with_carry(64); - if e_in.insn_type = OP_ADD then - if e_in.output_carry = '1' then - if e_in.input_carry /= OV then - set_carry(v.e, carry_32, carry_64); - else - v.e.xerc.ov := carry_64; - v.e.xerc.ov32 := carry_32; - v.e.write_xerc_enable := '1'; - end if; - end if; - if e_in.oe = '1' then - set_ov(v.e, - calc_ov(a_inv(63), b_in(63), carry_64, sum_with_carry(63)), - calc_ov(a_inv(31), b_in(31), carry_32, sum_with_carry(31))); - end if; - result_en := '1'; - else - -- trap, CMP and CMPL instructions - -- Note, we have done RB - RA, not RA - RB - if e_in.insn_type = OP_CMP then - l := insn_l(e_in.insn); - else - l := not e_in.is_32bit; - end if; - zerolo := not (or (a_in(31 downto 0) xor b_in(31 downto 0))); - zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32))); - if zerolo = '1' and (l = '0' or zerohi = '1') then - -- values are equal - trapval := "00100"; + when OP_ADD => + if e_in.output_carry = '1' then + if e_in.input_carry /= OV then + set_carry(v.e, carry_32, carry_64); else - if l = '1' then - -- 64-bit comparison - msb_a := a_in(63); - msb_b := b_in(63); - else - -- 32-bit comparison - msb_a := a_in(31); - msb_b := b_in(31); - end if; - if msb_a /= msb_b then - -- Subtraction might overflow, but - -- comparison is clear from MSB difference. - -- for signed, 0 is greater; for unsigned, 1 is greater - trapval := msb_a & msb_b & '0' & msb_b & msb_a; - else - -- Subtraction cannot overflow since MSBs are equal. - -- carry = 1 indicates RA is smaller (signed or unsigned) - a_lt := (not l and carry_32) or (l and carry_64); - trapval := a_lt & not a_lt & '0' & a_lt & not a_lt; - end if; - end if; - if e_in.insn_type = OP_CMP then - if e_in.is_signed = '1' then - newcrf := trapval(4 downto 2) & v.e.xerc.so; - else - newcrf := trapval(1 downto 0) & trapval(2) & v.e.xerc.so; - end if; - bf := insn_bf(e_in.insn); - crnum := to_integer(unsigned(bf)); - v.e.write_cr_enable := '1'; - v.e.write_cr_mask := num_to_fxm(crnum); - for i in 0 to 7 loop - lo := i*4; - hi := lo + 3; - v.e.write_cr_data(hi downto lo) := newcrf; - end loop; - else - -- trap instructions (tw, twi, td, tdi) - v.vector := 16#700#; - -- set bit 46 to say trap occurred - ctrl_tmp.srr1(63 - 46) <= '1'; - if or (trapval and insn_to(e_in.insn)) = '1' then - -- generate trap-type program interrupt - exception := '1'; - report "trap"; - end if; + v.e.xerc.ov := carry_64; + v.e.xerc.ov32 := carry_32; + v.e.write_xerc_enable := '1'; end if; end if; - when OP_ADDG6S => - addg6s := (others => '0'); - for i in 0 to 14 loop - lo := i * 4; - hi := (i + 1) * 4; - if (a_in(hi) xor b_in(hi) xor sum_with_carry(hi)) = '0' then - addg6s(lo + 3 downto lo) := "0110"; - end if; + if e_in.oe = '1' then + set_ov(v.e, + calc_ov(a_inv(63), b_in(63), carry_64, sum_with_carry(63)), + calc_ov(a_inv(31), b_in(31), carry_32, sum_with_carry(31))); + end if; + when OP_CMP => + -- CMP and CMPL instructions + if e_in.is_signed = '1' then + newcrf := trapval(4 downto 2) & v.e.xerc.so; + else + newcrf := trapval(1 downto 0) & trapval(2) & v.e.xerc.so; + end if; + bf := insn_bf(e_in.insn); + crnum := to_integer(unsigned(bf)); + v.e.write_cr_enable := '1'; + v.e.write_cr_mask := num_to_fxm(crnum); + for i in 0 to 7 loop + lo := i*4; + hi := lo + 3; + v.e.write_cr_data(hi downto lo) := newcrf; end loop; - if sum_with_carry(64) = '0' then - addg6s(63 downto 60) := "0110"; + when OP_TRAP => + -- trap instructions (tw, twi, td, tdi) + v.vector := 16#700#; + -- set bit 46 to say trap occurred + ctrl_tmp.srr1(63 - 46) <= '1'; + if or (trapval and insn_to(e_in.insn)) = '1' then + -- generate trap-type program interrupt + exception := '1'; + report "trap"; end if; - misc_result <= addg6s; - result_en := '1'; + when OP_ADDG6S => when OP_CMPRB => newcrf := ppc_cmprb(a_in, b_in, insn_l(e_in.insn)); bf := insn_bf(e_in.insn); @@ -802,7 +830,6 @@ begin newcrf & newcrf & newcrf & newcrf; when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS | OP_BPERM | OP_BCD => - result_en := '1'; when OP_B => is_branch := '1'; taken_branch := '1'; @@ -812,12 +839,8 @@ begin end if; when OP_BC => -- read_data1 is CTR - v.e.write_reg := fast_spr_num(SPR_CTR); bo := insn_bo(e_in.insn); bi := insn_bi(e_in.insn); - if bo(4-2) = '0' then - result_en := '1'; - end if; is_branch := '1'; taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in); abs_branch := insn_aa(e_in.insn); @@ -827,12 +850,8 @@ begin when OP_BCREG => -- read_data1 is CTR -- read_data2 is target register (CTR, LR or TAR) - v.e.write_reg := fast_spr_num(SPR_CTR); bo := insn_bo(e_in.insn); bi := insn_bi(e_in.insn); - if bo(4-2) = '0' and e_in.insn(10) = '0' then - result_en := '1'; - end if; is_branch := '1'; taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in); abs_branch := '1'; @@ -868,13 +887,6 @@ begin v.cntz_in_progress := '1'; v.busy := '1'; when OP_ISEL => - crbit := to_integer(unsigned(insn_bc(e_in.insn))); - if cr_in(31-crbit) = '1' then - misc_result <= a_in; - else - misc_result <= b_in; - end if; - result_en := '1'; when OP_CROP => cr_op := insn_cr(e_in.insn); report "CR OP " & to_hstring(cr_op); @@ -927,27 +939,11 @@ begin v.e.write_cr_data := newcrf & newcrf & newcrf & newcrf & newcrf & newcrf & newcrf & newcrf; when OP_DARN => - if random_err = '0' then - case e_in.insn(17 downto 16) is - when "00" => - misc_result <= x"00000000" & random_cond(31 downto 0); - when "10" => - misc_result <= random_raw; - when others => - misc_result <= random_cond; - end case; - else - misc_result <= (others => '1'); - end if; - result_en := '1'; when OP_MFMSR => - misc_result <= ctrl.msr; - result_en := '1'; when OP_MFSPR => report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & "=" & to_hstring(a_in); - result_en := '1'; - if is_fast_spr(e_in.read_reg1) then + if is_fast_spr(e_in.read_reg1) = '1' then spr_val := a_in; if decode_spr_num(e_in.insn) = SPR_XER then -- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer @@ -982,7 +978,7 @@ begin when others => -- mfspr from unimplemented SPRs should be a nop in -- supervisor mode and a program interrupt for user mode - if ctrl.msr(MSR_PR) = '1' then + if is_fast_spr(e_in.read_reg1) = '0' and ctrl.msr(MSR_PR) = '1' then illegal := '1'; end if; end case; @@ -990,22 +986,6 @@ begin spr_result <= spr_val; when OP_MFCR => - if e_in.insn(20) = '0' then - -- mfcr - misc_result <= x"00000000" & cr_in; - else - -- mfocrf - crnum := fxm_to_num(insn_fxm(e_in.insn)); - misc_result <= (others => '0'); - for i in 0 to 7 loop - lo := (7-i)*4; - hi := lo + 3; - if crnum = i then - misc_result(hi downto lo) <= cr_in(hi downto lo); - end if; - end loop; - end if; - result_en := '1'; when OP_MTCRF => v.e.write_cr_enable := '1'; if e_in.insn(20) = '0' then @@ -1045,7 +1025,6 @@ begin report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & "=" & to_hstring(c_in); if is_fast_spr(e_in.write_reg) then - result_en := '1'; if decode_spr_num(e_in.insn) = SPR_XER then v.e.xerc.so := c_in(63-32); v.e.xerc.ov := c_in(63-33); @@ -1073,16 +1052,7 @@ begin if e_in.output_carry = '1' then set_carry(v.e, rotator_carry, rotator_carry); end if; - result_en := '1'; when OP_SETB => - bfa := insn_bfa(e_in.insn); - crbit := to_integer(unsigned(bfa)) * 4; - misc_result <= (others => '0'); - if cr_in(31 - crbit) = '1' then - misc_result <= (others => '1'); - elsif cr_in(30 - crbit) = '1' then - misc_result(0) <= '1'; - end if; when OP_ISYNC => v.redirect := '1'; @@ -1108,8 +1078,6 @@ begin report "illegal"; end case; - v.e.rc := e_in.rc and valid_in; - -- Mispredicted branches cause a redirect if is_branch = '1' then if taken_branch = '1' then @@ -1126,26 +1094,7 @@ begin end if; end if; - -- Update LR on the next cycle after a branch link - -- If we're not writing back anything else, we can write back LR - -- this cycle, otherwise we take an extra cycle. We use the - -- exc_write path since next_nia is written through that path - -- in other places. - if e_in.lr = '1' then - if result_en = '0' then - v.e.exc_write_enable := '1'; - v.e.exc_write_data := next_nia; - v.e.exc_write_reg := fast_spr_num(SPR_LR); - else - v.lr_update := '1'; - v.next_lr := next_nia; - v.e.valid := '0'; - report "Delayed LR update to " & to_hstring(next_nia); - v.busy := '1'; - end if; - end if; - - elsif valid_in = '1' then + elsif valid_in = '1' and exception = '0' and illegal = '0' then -- instruction for other units, i.e. LDST if e_in.unit = LDST then lv.valid := '1'; @@ -1164,23 +1113,28 @@ begin -- valid_in = 0. Hence they don't happen in the same cycle as any of -- the cases above which depend on valid_in = 1. - if r.redirect = '1' then - v.e.valid := '1'; - end if; - if r.lr_update = '1' then + if ctrl.irq_state = WRITE_SRR1 then + v.e.exc_write_reg := fast_spr_num(SPR_SRR1); + v.e.exc_write_data := ctrl.srr1; v.e.exc_write_enable := '1'; - v.e.exc_write_data := r.next_lr; - v.e.exc_write_reg := fast_spr_num(SPR_LR); - v.e.valid := '1'; - -- Keep r.e.write_data unchanged next cycle in case it is needed - -- for a forwarded result (e.g. for CTR). - hold_wr_data := '1'; + ctrl_tmp.msr(MSR_SF) <= '1'; + ctrl_tmp.msr(MSR_EE) <= '0'; + ctrl_tmp.msr(MSR_PR) <= '0'; + ctrl_tmp.msr(MSR_SE) <= '0'; + ctrl_tmp.msr(MSR_BE) <= '0'; + ctrl_tmp.msr(MSR_FP) <= '0'; + ctrl_tmp.msr(MSR_FE0) <= '0'; + ctrl_tmp.msr(MSR_FE1) <= '0'; + ctrl_tmp.msr(MSR_IR) <= '0'; + ctrl_tmp.msr(MSR_DR) <= '0'; + ctrl_tmp.msr(MSR_RI) <= '0'; + ctrl_tmp.msr(MSR_LE) <= '1'; + v.trace_next := '0'; + v.fp_exception_next := '0'; + report "Writing SRR1: " & to_hstring(ctrl.srr1); + elsif r.cntz_in_progress = '1' then -- cnt[lt]z always takes two cycles - result_en := '1'; - v.e.write_reg := gpr_to_gspr(r.slow_op_dest); - v.e.rc := r.slow_op_rc; - v.e.xerc := r.slow_op_xerc; v.e.valid := '1'; elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then if (r.mul_in_progress = '1' and multiply_to_x.valid = '1') or @@ -1190,23 +1144,21 @@ begin else overflow := divider_to_x.overflow; end if; - if r.mul_in_progress = '1' and r.slow_op_oe = '1' then + if r.mul_in_progress = '1' and current.oe = '1' then -- have to wait until next cycle for overflow indication v.mul_finish := '1'; v.busy := '1'; else - result_en := '1'; - v.e.write_reg := gpr_to_gspr(r.slow_op_dest); - v.e.rc := r.slow_op_rc; - v.e.xerc := r.slow_op_xerc; - v.e.write_xerc_enable := r.slow_op_oe; + v.e.write_xerc_enable := current.oe; -- We must test oe because the RC update code in writeback -- will use the xerc value to set CR0:SO so we must not clobber -- xerc if OE wasn't set. - if r.slow_op_oe = '1' then + if current.oe = '1' then v.e.xerc.ov := overflow; v.e.xerc.ov32 := overflow; - v.e.xerc.so := r.slow_op_xerc.so or overflow; + if overflow = '1' then + v.e.xerc.so := '1'; + end if; end if; v.e.valid := '1'; end if; @@ -1217,16 +1169,19 @@ begin end if; elsif r.mul_finish = '1' then hold_wr_data := '1'; - result_en := '1'; - v.e.write_reg := gpr_to_gspr(r.slow_op_dest); - v.e.rc := r.slow_op_rc; - v.e.xerc := r.slow_op_xerc; - v.e.write_xerc_enable := r.slow_op_oe; + v.e.write_xerc_enable := current.oe; v.e.xerc.ov := multiply_to_x.overflow; v.e.xerc.ov32 := multiply_to_x.overflow; - v.e.xerc.so := r.slow_op_xerc.so or multiply_to_x.overflow; + if multiply_to_x.overflow = '1' then + v.e.xerc.so := '1'; + end if; v.e.valid := '1'; end if; + -- When doing delayed LR update, keep r.e.write_data unchanged + -- next cycle in case it is needed for a forwarded result (e.g. CTR). + if r.lr_update = '1' then + hold_wr_data := '1'; + end if; -- Generate FP-type program interrupt. fp_in.interrupt will only -- be set during the execution of a FP instruction. @@ -1253,17 +1208,6 @@ begin end if; end if; - if do_trace = '1' then - v.trace_next := '1'; - end if; - - if hold_wr_data = '0' then - v.e.write_data := alu_result; - else - v.e.write_data := r.e.write_data; - end if; - v.e.write_enable := result_en and not exception; - -- generate DSI or DSegI for load/store exceptions -- or ISI or ISegI for instruction fetch exceptions if l_in.exception = '1' then @@ -1297,10 +1241,52 @@ begin v.do_intr := '1'; end if; + if do_trace = '1' then + v.trace_next := '1'; + end if; + + if hold_wr_data = '0' then + v.e.write_data := alu_result; + else + v.e.write_data := r.e.write_data; + end if; + v.e.write_reg := current.write_reg; + v.e.write_enable := current.write_reg_enable and v.e.valid and not exception; + v.e.rc := current.rc and v.e.valid and not exception; + + -- Update LR on the next cycle after a branch link + -- If we're not writing back anything else, we can write back LR + -- this cycle, otherwise we take an extra cycle. We use the + -- exc_write path since next_nia is written through that path + -- in other places. + if v.e.valid = '1' and exception = '0' and current.lr = '1' then + if current.write_reg_enable = '0' then + v.e.exc_write_enable := '1'; + v.e.exc_write_data := next_nia; + v.e.exc_write_reg := fast_spr_num(SPR_LR); + else + v.lr_update := '1'; + v.e.valid := '0'; + report "Delayed LR update to " & to_hstring(next_nia); + v.busy := '1'; + end if; + end if; + if r.lr_update = '1' then + v.e.exc_write_enable := '1'; + v.e.exc_write_data := r.next_lr; + v.e.exc_write_reg := fast_spr_num(SPR_LR); + v.e.valid := '1'; + end if; + + -- Defer completion for one cycle when redirecting. + -- This also ensures r.busy = 1 when ctrl.irq_state = WRITE_SRR1 if v.redirect = '1' then v.busy := '1'; v.e.valid := '0'; end if; + if r.redirect = '1' then + v.e.valid := '1'; + end if; -- Outputs to fetch1 f.redirect := r.redirect;