diff --git a/common.vhdl b/common.vhdl index d376ac3..18378d5 100644 --- a/common.vhdl +++ b/common.vhdl @@ -31,6 +31,7 @@ package common is constant SPR_DEC : spr_num_t := 22; constant SPR_SRR0 : spr_num_t := 26; constant SPR_SRR1 : spr_num_t := 27; + constant SPR_CFAR : spr_num_t := 28; constant SPR_HSRR0 : spr_num_t := 314; constant SPR_HSRR1 : spr_num_t := 315; constant SPR_SPRG0 : spr_num_t := 272; @@ -94,8 +95,8 @@ package common is tb: std_ulogic_vector(63 downto 0); dec: std_ulogic_vector(63 downto 0); msr: std_ulogic_vector(63 downto 0); + cfar: std_ulogic_vector(63 downto 0); irq_state : irq_state_t; - irq_nia: std_ulogic_vector(63 downto 0); srr1: std_ulogic_vector(63 downto 0); end record; @@ -150,6 +151,7 @@ package common is bypass_data2: std_ulogic; bypass_data3: std_ulogic; cr: std_ulogic_vector(31 downto 0); + bypass_cr : std_ulogic; xerc: xer_common_t; lr: std_ulogic; rc: std_ulogic; @@ -172,7 +174,7 @@ package common is end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := (valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', - lr => '0', rc => '0', oe => '0', invert_a => '0', + bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0', byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'), read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'), cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'), others => (others => '0')); @@ -232,8 +234,8 @@ package common is priv_mode: std_ulogic; redirect_nia: std_ulogic_vector(63 downto 0); end record; - constant Execute1ToFetch1TypeInit : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0', - priv_mode => '0', others => (others => '0')); + constant Execute1ToFetch1Init : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0', + priv_mode => '0', others => (others => '0')); type Execute1ToLoadstore1Type is record valid : std_ulogic; diff --git a/control.vhdl b/control.vhdl index 5e557c4..d04576a 100644 --- a/control.vhdl +++ b/control.vhdl @@ -38,6 +38,7 @@ entity control is cr_read_in : in std_ulogic; cr_write_in : in std_ulogic; + cr_bypassable : in std_ulogic; valid_out : out std_ulogic; stall_out : out std_ulogic; @@ -45,7 +46,8 @@ entity control is gpr_bypass_a : out std_ulogic; gpr_bypass_b : out std_ulogic; - gpr_bypass_c : out std_ulogic + gpr_bypass_c : out std_ulogic; + cr_bypass : out std_ulogic ); end entity control; @@ -161,8 +163,10 @@ begin cr_read_in => cr_read_in, cr_write_in => cr_write_valid, + bypassable => cr_bypassable, - stall_out => cr_stall_out + stall_out => cr_stall_out, + use_bypass => cr_bypass ); control0: process(clk) diff --git a/cr_hazard.vhdl b/cr_hazard.vhdl index 4b79020..a6203a8 100644 --- a/cr_hazard.vhdl +++ b/cr_hazard.vhdl @@ -16,15 +16,18 @@ entity cr_hazard is cr_read_in : in std_ulogic; cr_write_in : in std_ulogic; + bypassable : in std_ulogic; - stall_out : out std_ulogic + stall_out : out std_ulogic; + use_bypass : out std_ulogic ); end entity cr_hazard; architecture behaviour of cr_hazard is type pipeline_entry_type is record - valid : std_ulogic; + valid : std_ulogic; + bypass : std_ulogic; end record; - constant pipeline_entry_init : pipeline_entry_type := (valid => '0'); + constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0'); type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type; constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init); @@ -47,7 +50,20 @@ begin if complete_in = '1' then v(1).valid := '0'; end if; - stall_out <= cr_read_in and (v(0).valid or v(1).valid); + + use_bypass <= '0'; + stall_out <= '0'; + if cr_read_in = '1' then + loop_0: for i in 0 to PIPELINE_DEPTH loop + if v(i).valid = '1' then + if r(i).bypass = '1' then + use_bypass <= '1'; + else + stall_out <= '1'; + end if; + end if; + end loop; + end if; -- XXX assumes PIPELINE_DEPTH = 1 if busy_in = '0' then @@ -56,6 +72,7 @@ begin end if; if deferred = '0' and issuing = '1' then v(0).valid := cr_write_in; + v(0).bypass := bypassable; end if; if flush_in = '1' then v(0).valid := '0'; diff --git a/decode1.vhdl b/decode1.vhdl index 2060e64..29b7a05 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -60,7 +60,7 @@ architecture behaviour of decode1 is 41 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lhzu 32 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- lwz 33 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0'), -- lwzu - 7 => (ALU, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- mulli + 7 => (ALU, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- mulli 24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- ori 25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- oris 20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- rlwimi @@ -262,19 +262,19 @@ architecture behaviour of decode1 is 2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf 2#0010110010# => (ALU, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- mtmsrd # ignore top bits and d 2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr - 2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd - 2#0000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu - 2#0001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw - 2#0000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu + 2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd + 2#0000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu + 2#0001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw + 2#0000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu -- next 4 have reserved bit set - 2#1001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd - 2#1000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu - 2#1001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw - 2#1000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu - 2#0011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulld - 2#1011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulldo - 2#0011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullw - 2#1011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullwo + 2#1001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd + 2#1000001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- mulhdu + 2#1001001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mulhw + 2#1000001011# => (ALU, OP_MUL_H32, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- mulhwu + 2#0011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulld + 2#1011101001# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulldo + 2#0011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullw + 2#1011101011# => (ALU, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '0'), -- mullwo 2#0111011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nand 2#0001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- neg 2#1001101000# => (ALU, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- nego @@ -473,8 +473,8 @@ begin end if; else -- Could be OP_RFID - v.ispr1 := fast_spr_num(SPR_SRR0); - v.ispr2 := fast_spr_num(SPR_SRR1); + v.ispr1 := fast_spr_num(SPR_SRR1); + v.ispr2 := fast_spr_num(SPR_SRR0); end if; elsif majorop = "011110" then diff --git a/decode2.vhdl b/decode2.vhdl index 80687a0..d724874 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -213,7 +213,10 @@ architecture behaviour of decode2 is signal gpr_c_read : gpr_index_t; signal gpr_c_bypass : std_ulogic; - signal cr_write_valid : std_ulogic; + signal cr_write_valid : std_ulogic; + signal cr_bypass : std_ulogic; + signal cr_bypass_avail : std_ulogic; + begin control_0: entity work.control generic map ( @@ -248,7 +251,9 @@ begin gpr_c_read_in => gpr_c_read, cr_read_in => d_in.decode.input_cr, - cr_write_in => cr_write_valid, + cr_write_in => cr_write_valid, + cr_bypass => cr_bypass, + cr_bypassable => cr_bypass_avail, valid_out => control_valid_out, stall_out => stall_out, @@ -342,6 +347,7 @@ begin v.e.oe := decode_oe(d_in.decode.rc, d_in.insn); end if; v.e.cr := c_in.read_cr_data; + v.e.bypass_cr := cr_bypass; v.e.xerc := c_in.read_xerc_data; v.e.invert_a := d_in.decode.invert_a; v.e.invert_out := d_in.decode.invert_out; @@ -388,6 +394,10 @@ begin gpr_c_read <= gspr_to_gpr(decoded_reg_c.reg); cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn); + cr_bypass_avail <= '0'; + if EX1_BYPASS then + cr_bypass_avail <= d_in.decode.output_cr; + end if; v.e.valid := control_valid_out; if d_in.decode.unit = NONE then diff --git a/execute1.vhdl b/execute1.vhdl index c585f78..a1cd008 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -48,6 +48,7 @@ end entity execute1; architecture behaviour of execute1 is type reg_type is record e : Execute1ToWritebackType; + f : Execute1ToFetch1Type; busy: std_ulogic; terminate: std_ulogic; lr_update : std_ulogic; @@ -64,7 +65,8 @@ architecture behaviour of execute1 is log_addr_spr : std_ulogic_vector(31 downto 0); end record; constant reg_type_init : reg_type := - (e => Execute1ToWritebackInit, busy => '0', lr_update => '0', terminate => '0', + (e => Execute1ToWritebackInit, f => Execute1ToFetch1Init, + busy => '0', lr_update => '0', terminate => '0', mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0', slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0')); @@ -72,6 +74,7 @@ architecture behaviour of execute1 is signal r, rin : reg_type; signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0); + signal cr_in : std_ulogic_vector(31 downto 0); signal valid_in : std_ulogic; signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0')); @@ -316,6 +319,7 @@ begin v := r; v.e := Execute1ToWritebackInit; lv := Execute1ToLoadstore1Init; + v.f.redirect := '0'; -- XER forwarding. To avoid having to track XER hazards, we -- use the previously latched value. @@ -352,6 +356,16 @@ begin v.e.xerc := e_in.xerc; end if; + -- CR forwarding + cr_in <= e_in.cr; + if EX1_BYPASS and e_in.bypass_cr = '1' and r.e.write_cr_enable = '1' then + for i in 0 to 7 loop + if r.e.write_cr_mask(i) = '1' then + cr_in(i * 4 + 3 downto i * 4) <= r.e.write_cr_data(i * 4 + 3 downto i * 4); + end if; + end loop; + end if; + v.lr_update := '0'; v.mul_in_progress := '0'; v.div_in_progress := '0'; @@ -423,11 +437,11 @@ begin irq_valid := '0'; if ctrl.msr(MSR_EE) = '1' then if ctrl.dec(63) = '1' then - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#900#, 64)); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#900#, 64)); report "IRQ valid: DEC"; irq_valid := '1'; elsif ext_irq_in = '1' then - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#500#, 64)); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#500#, 64)); report "IRQ valid: External"; irq_valid := '1'; end if; @@ -436,10 +450,9 @@ begin v.terminate := '0'; icache_inval <= '0'; v.busy := '0'; - f_out <= Execute1ToFetch1TypeInit; -- send MSR[IR] and ~MSR[PR] up to fetch1 - f_out.virt_mode <= ctrl.msr(MSR_IR); - f_out.priv_mode <= not ctrl.msr(MSR_PR); + v.f.virt_mode := ctrl.msr(MSR_IR); + v.f.priv_mode := not ctrl.msr(MSR_PR); -- Next insn adder used in a couple of places next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4); @@ -450,6 +463,7 @@ begin rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0'; rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0'; + ctrl_tmp.srr1 <= msr_copy(ctrl.msr); ctrl_tmp.irq_state <= WRITE_SRR0; exception := '0'; illegal := '0'; @@ -472,10 +486,6 @@ begin ctrl_tmp.msr(MSR_DR) <= '0'; ctrl_tmp.msr(MSR_RI) <= '0'; ctrl_tmp.msr(MSR_LE) <= '1'; - f_out.redirect <= '1'; - f_out.virt_mode <= '0'; - f_out.priv_mode <= '1'; - f_out.redirect_nia <= ctrl.irq_nia; v.e.valid := '1'; report "Writing SRR1: " & to_hstring(ctrl.srr1); @@ -485,14 +495,12 @@ begin -- Don't deliver the interrupt until we have a valid instruction -- coming in, so we have a valid NIA to put in SRR0. exception := '1'; - ctrl_tmp.srr1 <= msr_copy(ctrl.msr); elsif valid_in = '1' and ctrl.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then -- generate a program interrupt exception := '1'; - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#700#, 64)); - ctrl_tmp.srr1 <= msr_copy(ctrl.msr); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64)); -- set bit 45 to indicate privileged instruction type interrupt ctrl_tmp.srr1(63 - 45) <= '1'; report "privileged instruction"; @@ -522,8 +530,7 @@ begin if e_in.insn(1) = '1' then exception := '1'; exception_nextpc := '1'; - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#C00#, 64)); - ctrl_tmp.srr1 <= msr_copy(ctrl.msr); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#C00#, 64)); report "sc"; else illegal := '1'; @@ -615,8 +622,7 @@ begin if or (trapval and insn_to(e_in.insn)) = '1' then -- generate trap-type program interrupt exception := '1'; - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#700#, 64)); - ctrl_tmp.srr1 <= msr_copy(ctrl.msr); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64)); -- set bit 46 to say trap occurred ctrl_tmp.srr1(63 - 46) <= '1'; report "trap"; @@ -640,7 +646,7 @@ begin v.e.write_reg := fast_spr_num(SPR_CTR); end if; is_branch := '1'; - taken_branch := ppc_bc_taken(bo, bi, e_in.cr, a_in); + taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in); abs_branch := insn_aa(e_in.insn); when OP_BCREG => -- read_data1 is CTR @@ -652,26 +658,27 @@ begin result_en := '1'; v.e.write_reg := fast_spr_num(SPR_CTR); end if; - if ppc_bc_taken(bo, bi, e_in.cr, a_in) = '1' then - f_out.redirect <= '1'; - f_out.redirect_nia <= b_in(63 downto 2) & "00"; - end if; + is_branch := '1'; + taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in); + abs_branch := '1'; when OP_RFID => - f_out.redirect <= '1'; - f_out.virt_mode <= b_in(MSR_IR) or b_in(MSR_PR); - f_out.priv_mode <= not b_in(MSR_PR); - f_out.redirect_nia <= a_in(63 downto 2) & "00"; -- srr0 + v.f.virt_mode := a_in(MSR_IR) or a_in(MSR_PR); + v.f.priv_mode := not a_in(MSR_PR); -- Can't use msr_copy here because the partial function MSR -- bits should be left unchanged, not zeroed. - ctrl_tmp.msr(63 downto 31) <= b_in(63 downto 31); - ctrl_tmp.msr(26 downto 22) <= b_in(26 downto 22); - ctrl_tmp.msr(15 downto 0) <= b_in(15 downto 0); - if b_in(MSR_PR) = '1' then + ctrl_tmp.msr(63 downto 31) <= a_in(63 downto 31); + ctrl_tmp.msr(26 downto 22) <= a_in(26 downto 22); + ctrl_tmp.msr(15 downto 0) <= a_in(15 downto 0); + if a_in(MSR_PR) = '1' then ctrl_tmp.msr(MSR_EE) <= '1'; ctrl_tmp.msr(MSR_IR) <= '1'; ctrl_tmp.msr(MSR_DR) <= '1'; end if; + -- mark this as a branch so CFAR gets updated + is_branch := '1'; + taken_branch := '1'; + abs_branch := '1'; when OP_CNTZ => v.e.valid := '0'; @@ -679,7 +686,7 @@ begin v.busy := '1'; when OP_ISEL => crbit := to_integer(unsigned(insn_bc(e_in.insn))); - if e_in.cr(31-crbit) = '1' then + if cr_in(31-crbit) = '1' then result := a_in; else result := b_in; @@ -699,7 +706,7 @@ begin lo := (7-i)*4; hi := lo + 3; if i = scrnum then - newcrf := e_in.cr(hi downto lo); + newcrf := cr_in(hi downto lo); end if; end loop; for i in 0 to 7 loop @@ -717,14 +724,14 @@ begin bbnum := 31 - to_integer(unsigned(bb)); -- Bits 5-8 of cr_op give the truth table of the requested -- logical operation - cr_operands := e_in.cr(banum) & e_in.cr(bbnum); + cr_operands := cr_in(banum) & cr_in(bbnum); crresult := cr_op(5 + to_integer(unsigned(cr_operands))); v.e.write_cr_mask := num_to_fxm((31-btnum) / 4); for i in 0 to 31 loop if i = btnum then v.e.write_cr_data(i) := crresult; else - v.e.write_cr_data(i) := e_in.cr(i); + v.e.write_cr_data(i) := cr_in(i); end if; end loop; end if; @@ -757,6 +764,8 @@ begin spr_val(31 downto 0) := ctrl.tb(63 downto 32); when SPR_DEC => spr_val := ctrl.dec; + when SPR_CFAR => + spr_val := ctrl.cfar; when 724 => -- LOG_ADDR SPR spr_val := log_wr_addr & r.log_addr_spr; when 725 => -- LOG_DATA SPR @@ -774,7 +783,7 @@ begin when OP_MFCR => if e_in.insn(20) = '0' then -- mfcr - result := x"00000000" & e_in.cr; + result := x"00000000" & cr_in; else -- mfocrf crnum := fxm_to_num(insn_fxm(e_in.insn)); @@ -783,7 +792,7 @@ begin lo := (7-i)*4; hi := lo + 3; if crnum = i then - result(hi downto lo) := e_in.cr(hi downto lo); + result(hi downto lo) := cr_in(hi downto lo); end if; end loop; end if; @@ -853,8 +862,8 @@ begin result_en := '1'; when OP_ISYNC => - f_out.redirect <= '1'; - f_out.redirect_nia <= next_nia; + v.f.redirect := '1'; + v.f.redirect_nia := next_nia; when OP_ICBI => icache_inval <= '1'; @@ -879,16 +888,21 @@ begin v.e.rc := e_in.rc and valid_in; -- Mispredicted branches cause a redirect - if is_branch = '1' and taken_branch /= e_in.br_pred then - f_out.redirect <= '1'; + if is_branch = '1' then if taken_branch = '1' then + ctrl_tmp.cfar <= e_in.nia; + end if; + if e_in.br_pred = '0' then if abs_branch = '1' then - f_out.redirect_nia <= b_in; + v.f.redirect_nia := b_in; else - f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in)); + v.f.redirect_nia := std_ulogic_vector(signed(e_in.nia) + signed(b_in)); end if; else - f_out.redirect_nia <= next_nia; + v.f.redirect_nia := next_nia; + end if; + if taken_branch /= e_in.br_pred then + v.f.redirect := '1'; end if; end if; @@ -917,6 +931,8 @@ begin lv.valid := '1'; end if; + elsif r.f.redirect = '1' then + v.e.valid := '1'; elsif r.lr_update = '1' then v.e.exc_write_enable := '1'; v.e.exc_write_data := r.next_lr; @@ -973,8 +989,7 @@ begin if illegal = '1' then exception := '1'; - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#700#, 64)); - ctrl_tmp.srr1 <= msr_copy(ctrl.msr); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#700#, 64)); -- Since we aren't doing Hypervisor emulation assist (0xe40) we -- set bit 44 to indicate we have an illegal ctrl_tmp.srr1(63 - 44) <= '1'; @@ -985,23 +1000,19 @@ begin if exception_nextpc = '1' then v.e.exc_write_data := next_nia; end if; - ctrl_tmp.irq_state <= WRITE_SRR1; - v.busy := '1'; - v.e.valid := '0'; end if; v.e.write_data := result; - v.e.write_enable := result_en; + v.e.write_enable := result_en and not exception; -- generate DSI or DSegI for load/store exceptions -- or ISI or ISegI for instruction fetch exceptions if l_in.exception = '1' then - ctrl_tmp.srr1 <= msr_copy(ctrl.msr); if l_in.instr_fault = '0' then if l_in.segment_fault = '0' then - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#300#, 64)); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#300#, 64)); else - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#380#, 64)); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#380#, 64)); end if; else if l_in.segment_fault = '0' then @@ -1009,16 +1020,27 @@ begin ctrl_tmp.srr1(63 - 35) <= l_in.perm_error; -- noexec fault ctrl_tmp.srr1(63 - 44) <= l_in.badtree; ctrl_tmp.srr1(63 - 45) <= l_in.rc_error; - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#400#, 64)); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#400#, 64)); else - ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#480#, 64)); + v.f.redirect_nia := std_logic_vector(to_unsigned(16#480#, 64)); end if; end if; v.e.exc_write_enable := '1'; v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_data := r.last_nia; report "ldst exception writing srr0=" & to_hstring(r.last_nia); + end if; + + if exception = '1' or l_in.exception = '1' then ctrl_tmp.irq_state <= WRITE_SRR1; + v.f.redirect := '1'; + v.f.virt_mode := '0'; + v.f.priv_mode := '1'; + end if; + + if v.f.redirect = '1' then + v.busy := '1'; + v.e.valid := '0'; end if; -- Outputs to loadstore1 (async) @@ -1049,7 +1071,7 @@ begin rin <= v; -- update outputs - --f_out <= r.f; + f_out <= r.f; l_out <= lv; e_out <= r.e; flush_out <= f_out.redirect; diff --git a/fetch1.vhdl b/fetch1.vhdl index 0d9c6f7..a56f33d 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -83,11 +83,11 @@ begin v.priv_mode := '1'; v_int.stop_state := RUNNING; elsif e_in.redirect = '1' then - v.nia := e_in.redirect_nia; + v.nia := e_in.redirect_nia(63 downto 2) & "00"; v.virt_mode := e_in.virt_mode; v.priv_mode := e_in.priv_mode; elsif d_in.redirect = '1' then - v.nia := d_in.redirect_nia; + v.nia := d_in.redirect_nia(63 downto 2) & "00"; elsif stall_in = '0' then -- For debug stop/step to work properly we need a little bit of diff --git a/logical.vhdl b/logical.vhdl index 5e6abfa..0f53544 100644 --- a/logical.vhdl +++ b/logical.vhdl @@ -87,12 +87,19 @@ begin end if; case op is - when OP_AND => - tmp := rs and rb_adj; - when OP_OR => - tmp := rs or rb_adj; - when OP_XOR => - tmp := rs xor rb_adj; + when OP_AND | OP_OR | OP_XOR => + case op is + when OP_AND => + tmp := rs and rb_adj; + when OP_OR => + tmp := rs or rb_adj; + when others => + tmp := rs xor rb_adj; + end case; + if invert_out = '1' then + tmp := not tmp; + end if; + when OP_POPCNT => tmp := popcnt; when OP_PRTY => @@ -115,9 +122,6 @@ begin tmp(7 downto 0) := rs(7 downto 0); end case; - if invert_out = '1' then - tmp := not tmp; - end if; result <= tmp; end process;