From 3cd3449b4b88e025ff9412f82737747b0c6d938a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 23 Dec 2020 11:13:21 +1100 Subject: [PATCH] core: Move redirect and interrupt delivery logic to writeback This moves the logic for redirecting fetching and writing SRR0 and SRR1 to writeback. The aim is that ultimately units other than execute1 can send their interrupts to writeback along with their instruction completions, so that there can be multiple instructions in flight without needing execute1 to keep track of the address of each outstanding instruction. Signed-off-by: Paul Mackerras --- common.vhdl | 55 ++++++------ control.vhdl | 3 +- core.vhdl | 15 +++- execute1.vhdl | 225 ++++++++++++++++--------------------------------- fetch1.vhdl | 34 ++++---- writeback.vhdl | 72 +++++++++++++++- 6 files changed, 198 insertions(+), 206 deletions(-) diff --git a/common.vhdl b/common.vhdl index a3a95f4..b2d6b13 100644 --- a/common.vhdl +++ b/common.vhdl @@ -139,8 +139,6 @@ package common is constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0'); function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean; - type irq_state_t is (WRITE_SRR0, WRITE_SRR1); - -- For now, fixed 16 sources, make this either a parametric -- package of some sort or an unconstrainted array. type ics_to_icp_t is record @@ -157,8 +155,6 @@ package common is dec: std_ulogic_vector(63 downto 0); msr: std_ulogic_vector(63 downto 0); cfar: std_ulogic_vector(63 downto 0); - irq_state : irq_state_t; - srr1: std_ulogic_vector(63 downto 0); end record; type Fetch1ToIcacheType is record @@ -329,22 +325,6 @@ package common is read_xerc_data : xer_common_t; end record; - type Execute1ToFetch1Type is record - redirect: std_ulogic; - virt_mode: std_ulogic; - priv_mode: std_ulogic; - big_endian: std_ulogic; - mode_32bit: std_ulogic; - redirect_nia: std_ulogic_vector(63 downto 0); - br_nia : std_ulogic_vector(63 downto 0); - br_last : std_ulogic; - br_taken : std_ulogic; - end record; - constant Execute1ToFetch1Init : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0', - priv_mode => '0', big_endian => '0', - mode_32bit => '0', br_taken => '0', - br_last => '0', others => (others => '0')); - type Execute1ToLoadstore1Type is record valid : std_ulogic; op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do @@ -492,17 +472,26 @@ package common is write_cr_data : std_ulogic_vector(31 downto 0); write_xerc_enable : std_ulogic; xerc : xer_common_t; - exc_write_enable : std_ulogic; - exc_write_reg : gspr_index_t; - exc_write_data : std_ulogic_vector(63 downto 0); + interrupt : std_ulogic; + intr_vec : integer range 0 to 16#fff#; + redirect: std_ulogic; + redir_mode: std_ulogic_vector(3 downto 0); + last_nia: std_ulogic_vector(63 downto 0); + br_offset: std_ulogic_vector(63 downto 0); + br_last: std_ulogic; + br_taken: std_ulogic; + abs_br: std_ulogic; + srr1: std_ulogic_vector(63 downto 0); end record; constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', instr_tag => instr_tag_init, rc => '0', mode_32bit => '0', - write_enable => '0', write_cr_enable => '0', exc_write_enable => '0', + write_enable => '0', write_cr_enable => '0', write_xerc_enable => '0', xerc => xerc_init, write_data => (others => '0'), write_cr_mask => (others => '0'), write_cr_data => (others => '0'), write_reg => (others => '0'), - exc_write_reg => (others => '0'), exc_write_data => (others => '0')); + interrupt => '0', intr_vec => 0, redirect => '0', redir_mode => "0000", + last_nia => (others => '0'), br_offset => (others => '0'), + br_last => '0', br_taken => '0', abs_br => '0', srr1 => (others => '0')); type Execute1ToFPUType is record valid : std_ulogic; @@ -556,6 +545,22 @@ package common is constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0', others => (others => '0')); + type WritebackToFetch1Type is record + redirect: std_ulogic; + virt_mode: std_ulogic; + priv_mode: std_ulogic; + big_endian: std_ulogic; + mode_32bit: std_ulogic; + redirect_nia: std_ulogic_vector(63 downto 0); + br_nia : std_ulogic_vector(63 downto 0); + br_last : std_ulogic; + br_taken : std_ulogic; + end record; + constant WritebackToFetch1Init : WritebackToFetch1Type := + (redirect => '0', virt_mode => '0', priv_mode => '0', big_endian => '0', + mode_32bit => '0', redirect_nia => (others => '0'), + br_last => '0', br_taken => '0', br_nia => (others => '0')); + type WritebackToRegisterFileType is record write_reg : gspr_index_t; write_data : std_ulogic_vector(63 downto 0); diff --git a/control.vhdl b/control.vhdl index 5c83f78..f14e350 100644 --- a/control.vhdl +++ b/control.vhdl @@ -235,8 +235,7 @@ begin stall_tmp := '0'; if flush_in = '1' then - -- expect to see complete_in next cycle - v_int.outstanding := 1; + v_int.outstanding := 0; elsif complete_in.valid = '1' then v_int.outstanding := r_int.outstanding - 1; end if; diff --git a/core.vhdl b/core.vhdl index 7dafd1c..e2a93b9 100644 --- a/core.vhdl +++ b/core.vhdl @@ -46,6 +46,7 @@ end core; architecture behave of core is -- icache signals signal fetch1_to_icache : Fetch1ToIcacheType; + signal writeback_to_fetch1: WritebackToFetch1Type; signal icache_to_decode1 : IcacheToDecode1Type; signal mmu_to_icache : MmuToIcacheType; @@ -66,7 +67,6 @@ architecture behave of core is -- execute signals signal execute1_to_writeback: Execute1ToWritebackType; - signal execute1_to_fetch1: Execute1ToFetch1Type; signal execute1_bypass: bypass_data_t; signal execute1_cr_bypass: cr_bypass_data_t; @@ -108,6 +108,7 @@ architecture behave of core is signal terminate: std_ulogic; signal core_rst: std_ulogic; signal icache_inv: std_ulogic; + signal do_interrupt: std_ulogic; -- Delayed/Latched resets and alt_reset signal rst_fetch1 : std_ulogic := '1'; @@ -119,6 +120,7 @@ architecture behave of core is signal rst_ex1 : std_ulogic := '1'; signal rst_fpu : std_ulogic := '1'; signal rst_ls1 : std_ulogic := '1'; + signal rst_wback : std_ulogic := '1'; signal rst_dbg : std_ulogic := '1'; signal alt_reset_d : std_ulogic; @@ -182,6 +184,7 @@ begin rst_ex1 <= core_rst; rst_fpu <= core_rst; rst_ls1 <= core_rst; + rst_wback <= core_rst; rst_dbg <= rst; alt_reset_d <= alt_reset; end if; @@ -202,7 +205,7 @@ begin inval_btc => ex1_icache_inval or mmu_to_icache.tlbie, stop_in => dbg_core_stop, d_in => decode1_to_fetch1, - e_in => execute1_to_fetch1, + w_in => writeback_to_fetch1, i_out => fetch1_to_icache, log_out => log_data(42 downto 0) ); @@ -324,14 +327,14 @@ begin port map ( clk => clk, rst => rst_ex1, - flush_out => flush, + flush_in => flush, busy_out => ex1_busy_out, e_in => decode2_to_execute1, l_in => loadstore1_to_execute1, fp_in => fpu_to_execute1, ext_irq_in => ext_irq, + interrupt_in => do_interrupt, l_out => execute1_to_loadstore1, - f_out => execute1_to_fetch1, fp_out => execute1_to_fpu, e_out => execute1_to_writeback, bypass_data => execute1_bypass, @@ -416,11 +419,15 @@ begin writeback_0: entity work.writeback port map ( clk => clk, + rst => rst_wback, + flush_out => flush, e_in => execute1_to_writeback, l_in => loadstore1_to_writeback, fp_in => fpu_to_writeback, w_out => writeback_to_register_file, c_out => writeback_to_cr_file, + f_out => writeback_to_fetch1, + interrupt_out => do_interrupt, complete_out => complete ); diff --git a/execute1.vhdl b/execute1.vhdl index 0b9ba0e..875e22c 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -22,7 +22,7 @@ entity execute1 is rst : in std_ulogic; -- asynchronous - flush_out : out std_ulogic; + flush_in : in std_ulogic; busy_out : out std_ulogic; e_in : in Decode2ToExecute1Type; @@ -30,10 +30,10 @@ entity execute1 is fp_in : in FPUToExecute1Type; ext_irq_in : std_ulogic; + interrupt_in : std_ulogic; -- asynchronous l_out : out Execute1ToLoadstore1Type; - f_out : out Execute1ToFetch1Type; fp_out : out Execute1ToFPUType; e_out : out Execute1ToWritebackType; @@ -61,21 +61,11 @@ architecture behaviour of execute1 is fp_exception_next : std_ulogic; trace_next : std_ulogic; prev_op : insn_type_t; - next_lr : std_ulogic_vector(63 downto 0); br_taken : std_ulogic; mul_in_progress : std_ulogic; mul_finish : std_ulogic; div_in_progress : std_ulogic; cntz_in_progress : std_ulogic; - last_nia : std_ulogic_vector(63 downto 0); - redirect : std_ulogic; - abs_br : std_ulogic; - taken_br : std_ulogic; - br_last : std_ulogic; - do_intr : std_ulogic; - vector : integer range 0 to 16#fff#; - br_offset : std_ulogic_vector(63 downto 0); - redir_mode : std_ulogic_vector(3 downto 0); log_addr_spr : std_ulogic_vector(31 downto 0); end record; constant reg_type_init : reg_type := @@ -84,9 +74,6 @@ architecture behaviour of execute1 is busy => '0', terminate => '0', fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0', mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', - next_lr => (others => '0'), last_nia => (others => '0'), - redirect => '0', abs_br => '0', taken_br => '0', br_last => '0', do_intr => '0', vector => 0, - br_offset => (others => '0'), redir_mode => "0000", others => (others => '0')); signal r, rin : reg_type; @@ -96,8 +83,8 @@ architecture behaviour of execute1 is signal xerc_in : xer_common_t; signal valid_in : std_ulogic; - signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0')); - signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0')); + signal ctrl: ctrl_t := (others => (others => '0')); + signal ctrl_tmp: ctrl_t := (others => (others => '0')); signal right_shift, rot_clear_left, rot_clear_right: std_ulogic; signal rot_sign_ext: std_ulogic; signal rotator_result: std_ulogic_vector(63 downto 0); @@ -307,7 +294,7 @@ begin xerc_in <= r.e.xerc when r.e.write_xerc_enable = '1' or r.busy = '1' else e_in.xerc; busy_out <= l_in.busy or r.busy or fp_in.busy; - valid_in <= e_in.valid and not busy_out; + valid_in <= e_in.valid and not busy_out and not flush_in; terminate_out <= r.terminate; @@ -332,7 +319,6 @@ begin ctrl.tb <= (others => '0'); ctrl.dec <= (others => '0'); ctrl.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0'); - ctrl.irq_state <= WRITE_SRR0; else r <= rin; ctrl <= ctrl_tmp; @@ -673,7 +659,6 @@ begin variable lv : Execute1ToLoadstore1Type; variable irq_valid : std_ulogic; variable exception : std_ulogic; - variable exception_nextpc : std_ulogic; variable illegal : std_ulogic; variable is_branch : std_ulogic; variable is_direct_branch : std_ulogic; @@ -682,7 +667,6 @@ begin variable spr_val : std_ulogic_vector(63 downto 0); variable do_trace : std_ulogic; variable hold_wr_data : std_ulogic; - variable f : Execute1ToFetch1Type; variable fv : Execute1ToFPUType; begin is_branch := '0'; @@ -693,15 +677,8 @@ begin v := r; v.e := Execute1ToWritebackInit; - v.redirect := '0'; - v.abs_br := '0'; - v.do_intr := '0'; - v.vector := 0; - v.br_offset := (others => '0'); - v.redir_mode := ctrl.msr(MSR_IR) & not ctrl.msr(MSR_PR) & - not ctrl.msr(MSR_LE) & not ctrl.msr(MSR_SF); - v.taken_br := '0'; - v.br_last := '0'; + v.e.redir_mode := ctrl.msr(MSR_IR) & not ctrl.msr(MSR_PR) & + not ctrl.msr(MSR_LE) & not ctrl.msr(MSR_SF); v.e.xerc := xerc_in; lv := Execute1ToLoadstore1Init; @@ -725,11 +702,11 @@ begin irq_valid := '0'; if ctrl.msr(MSR_EE) = '1' then if ctrl.dec(63) = '1' then - v.vector := 16#900#; + v.e.intr_vec := 16#900#; report "IRQ valid: DEC"; irq_valid := '1'; elsif ext_irq_in = '1' then - v.vector := 16#500#; + v.e.intr_vec := 16#500#; report "IRQ valid: External"; irq_valid := '1'; end if; @@ -748,18 +725,13 @@ begin rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0'; rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0'; - ctrl_tmp.srr1 <= msr_copy(ctrl.msr); - ctrl_tmp.irq_state <= WRITE_SRR0; + v.e.srr1 := msr_copy(ctrl.msr); exception := '0'; illegal := '0'; - exception_nextpc := '0'; - v.e.exc_write_enable := '0'; - v.e.exc_write_reg := fast_spr_num(SPR_SRR0); if valid_in = '1' then - v.e.exc_write_data := e_in.nia; - v.last_nia := e_in.nia; + v.e.last_nia := e_in.nia; else - v.e.exc_write_data := r.last_nia; + v.e.last_nia := r.e.last_nia; end if; v.e.mode_32bit := not ctrl.msr(MSR_SF); @@ -777,20 +749,20 @@ begin -- This is used for FP-type program interrupts that -- become pending due to MSR[FE0,FE1] changing from 00 to non-zero. exception := '1'; - v.vector := 16#700#; - ctrl_tmp.srr1(63 - 43) <= '1'; - ctrl_tmp.srr1(63 - 47) <= '1'; + v.e.intr_vec := 16#700#; + v.e.srr1(63 - 43) := '1'; + v.e.srr1(63 - 47) := '1'; elsif r.trace_next = '1' then -- Generate a trace interrupt rather than executing the next instruction -- or taking any asynchronous interrupt exception := '1'; - v.vector := 16#d00#; - ctrl_tmp.srr1(63 - 33) <= '1'; + v.e.intr_vec := 16#d00#; + v.e.srr1(63 - 33) := '1'; if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then - ctrl_tmp.srr1(63 - 35) <= '1'; + v.e.srr1(63 - 35) := '1'; elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then - ctrl_tmp.srr1(63 - 36) <= '1'; + v.e.srr1(63 - 36) := '1'; end if; elsif irq_valid = '1' then @@ -801,9 +773,9 @@ begin elsif ctrl.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then -- generate a program interrupt exception := '1'; - v.vector := 16#700#; + v.e.intr_vec := 16#700#; -- set bit 45 to indicate privileged instruction type interrupt - ctrl_tmp.srr1(63 - 45) <= '1'; + v.e.srr1(63 - 45) := '1'; report "privileged instruction"; elsif not HAS_FPU and e_in.fac = FPU then @@ -813,14 +785,13 @@ begin elsif HAS_FPU and ctrl.msr(MSR_FP) = '0' and e_in.fac = FPU then -- generate a floating-point unavailable interrupt exception := '1'; - v.vector := 16#800#; + v.e.intr_vec := 16#800#; report "FP unavailable interrupt"; end if; end if; if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then v.cur_instr := e_in; - v.next_lr := next_nia; v.e.valid := '1'; case_0: case e_in.insn_type is @@ -835,8 +806,8 @@ begin -- we need two cycles to write srr0 and 1 if e_in.insn(1) = '1' then exception := '1'; - exception_nextpc := '1'; - v.vector := 16#C00#; + v.e.intr_vec := 16#C00#; + v.e.last_nia := next_nia; report "sc"; else illegal := '1'; @@ -867,9 +838,9 @@ begin when OP_CMP => when OP_TRAP => -- trap instructions (tw, twi, td, tdi) - v.vector := 16#700#; + v.e.intr_vec := 16#700#; -- set bit 46 to say trap occurred - ctrl_tmp.srr1(63 - 46) <= '1'; + v.e.srr1(63 - 46) := '1'; if or (trapval and insn_to(e_in.insn)) = '1' then -- generate trap-type program interrupt exception := '1'; @@ -916,8 +887,8 @@ begin end if; when OP_RFID => - v.redir_mode := (a_in(MSR_IR) or a_in(MSR_PR)) & not a_in(MSR_PR) & - not a_in(MSR_LE) & not a_in(MSR_SF); + v.e.redir_mode := (a_in(MSR_IR) or a_in(MSR_PR)) & not a_in(MSR_PR) & + not a_in(MSR_LE) & not a_in(MSR_SF); -- Can't use msr_copy here because the partial function MSR -- bits should be left unchanged, not zeroed. ctrl_tmp.msr(63 downto 31) <= a_in(63 downto 31); @@ -1051,8 +1022,8 @@ begin when OP_SETB => when OP_ISYNC => - v.redirect := '1'; - v.br_offset := std_ulogic_vector(to_unsigned(4, 64)); + v.e.redirect := '1'; + v.e.br_offset := std_ulogic_vector(to_unsigned(4, 64)); when OP_ICBI => icache_inval <= '1'; @@ -1080,16 +1051,16 @@ begin ctrl_tmp.cfar <= e_in.nia; end if; if taken_branch = '1' then - v.br_offset := b_in; - v.abs_br := abs_branch; + v.e.br_offset := b_in; + v.e.abs_br := abs_branch; else - v.br_offset := std_ulogic_vector(to_unsigned(4, 64)); + v.e.br_offset := std_ulogic_vector(to_unsigned(4, 64)); end if; if taken_branch /= e_in.br_pred then - v.redirect := '1'; + v.e.redirect := '1'; end if; - v.br_last := is_direct_branch; - v.taken_br := taken_branch; + v.e.br_last := is_direct_branch; + v.e.br_taken := taken_branch; end if; elsif valid_in = '1' and exception = '0' and illegal = '0' then @@ -1110,28 +1081,7 @@ begin -- The following cases all occur when r.busy = 1 and therefore -- valid_in = 0. Hence they don't happen in the same cycle as any of -- the cases above which depend on valid_in = 1. - - if ctrl.irq_state = WRITE_SRR1 then - v.e.exc_write_reg := fast_spr_num(SPR_SRR1); - v.e.exc_write_data := ctrl.srr1; - v.e.exc_write_enable := '1'; - ctrl_tmp.msr(MSR_SF) <= '1'; - ctrl_tmp.msr(MSR_EE) <= '0'; - ctrl_tmp.msr(MSR_PR) <= '0'; - ctrl_tmp.msr(MSR_SE) <= '0'; - ctrl_tmp.msr(MSR_BE) <= '0'; - ctrl_tmp.msr(MSR_FP) <= '0'; - ctrl_tmp.msr(MSR_FE0) <= '0'; - ctrl_tmp.msr(MSR_FE1) <= '0'; - ctrl_tmp.msr(MSR_IR) <= '0'; - ctrl_tmp.msr(MSR_DR) <= '0'; - ctrl_tmp.msr(MSR_RI) <= '0'; - ctrl_tmp.msr(MSR_LE) <= '1'; - v.trace_next := '0'; - v.fp_exception_next := '0'; - report "Writing SRR1: " & to_hstring(ctrl.srr1); - - elsif r.cntz_in_progress = '1' then + if r.cntz_in_progress = '1' then -- cnt[lt]z always takes two cycles v.e.valid := '1'; elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then @@ -1179,63 +1129,67 @@ begin -- The case where MSR[FE0,FE1] goes from zero to non-zero is -- handled above by mtmsrd and rfid setting v.fp_exception_next. if HAS_FPU and fp_in.interrupt = '1' then - v.vector := 16#700#; - ctrl_tmp.srr1(63 - 43) <= '1'; + v.e.intr_vec := 16#700#; + v.e.srr1(63 - 43) := '1'; exception := '1'; end if; if illegal = '1' or (HAS_FPU and fp_in.illegal = '1') then exception := '1'; - v.vector := 16#700#; + v.e.intr_vec := 16#700#; -- Since we aren't doing Hypervisor emulation assist (0xe40) we -- set bit 44 to indicate we have an illegal - ctrl_tmp.srr1(63 - 44) <= '1'; + v.e.srr1(63 - 44) := '1'; report "illegal"; end if; - if exception = '1' then - v.e.exc_write_enable := '1'; - if exception_nextpc = '1' then - v.e.exc_write_data := next_nia; - end if; - end if; -- generate DSI or DSegI for load/store exceptions -- or ISI or ISegI for instruction fetch exceptions if l_in.exception = '1' then if l_in.alignment = '1' then - v.vector := 16#600#; + v.e.intr_vec := 16#600#; elsif l_in.instr_fault = '0' then if l_in.segment_fault = '0' then - v.vector := 16#300#; + v.e.intr_vec := 16#300#; else - v.vector := 16#380#; + v.e.intr_vec := 16#380#; end if; else if l_in.segment_fault = '0' then - ctrl_tmp.srr1(63 - 33) <= l_in.invalid; - ctrl_tmp.srr1(63 - 35) <= l_in.perm_error; -- noexec fault - ctrl_tmp.srr1(63 - 44) <= l_in.badtree; - ctrl_tmp.srr1(63 - 45) <= l_in.rc_error; - v.vector := 16#400#; + v.e.srr1(63 - 33) := l_in.invalid; + v.e.srr1(63 - 35) := l_in.perm_error; -- noexec fault + v.e.srr1(63 - 44) := l_in.badtree; + v.e.srr1(63 - 45) := l_in.rc_error; + v.e.intr_vec := 16#400#; else - v.vector := 16#480#; + v.e.intr_vec := 16#480#; end if; end if; - v.e.exc_write_enable := '1'; - v.e.exc_write_reg := fast_spr_num(SPR_SRR0); - report "ldst exception writing srr0=" & to_hstring(r.last_nia); end if; - if exception = '1' or l_in.exception = '1' then - ctrl_tmp.irq_state <= WRITE_SRR1; - v.redirect := '1'; - v.do_intr := '1'; - end if; + v.e.interrupt := exception or l_in.exception; if do_trace = '1' then v.trace_next := '1'; end if; + if interrupt_in = '1' then + ctrl_tmp.msr(MSR_SF) <= '1'; + ctrl_tmp.msr(MSR_EE) <= '0'; + ctrl_tmp.msr(MSR_PR) <= '0'; + ctrl_tmp.msr(MSR_SE) <= '0'; + ctrl_tmp.msr(MSR_BE) <= '0'; + ctrl_tmp.msr(MSR_FP) <= '0'; + ctrl_tmp.msr(MSR_FE0) <= '0'; + ctrl_tmp.msr(MSR_FE1) <= '0'; + ctrl_tmp.msr(MSR_IR) <= '0'; + ctrl_tmp.msr(MSR_DR) <= '0'; + ctrl_tmp.msr(MSR_RI) <= '0'; + ctrl_tmp.msr(MSR_LE) <= '1'; + v.trace_next := '0'; + v.fp_exception_next := '0'; + end if; + if hold_wr_data = '0' then v.e.write_data := alu_result; else @@ -1263,41 +1217,6 @@ begin end if; end loop; - -- Defer completion for one cycle when redirecting. - -- This also ensures r.busy = 1 when ctrl.irq_state = WRITE_SRR1 - if v.redirect = '1' then - v.busy := '1'; - v.e.valid := '0'; - end if; - if r.redirect = '1' then - v.e.valid := '1'; - end if; - - -- Outputs to fetch1 - f.redirect := r.redirect; - f.br_nia := r.last_nia; - f.br_last := r.br_last and not r.do_intr; - f.br_taken := r.taken_br; - if r.do_intr = '1' then - f.redirect_nia := std_ulogic_vector(to_unsigned(r.vector, 64)); - f.virt_mode := '0'; - f.priv_mode := '1'; - -- XXX need an interrupt LE bit here, e.g. from LPCR - f.big_endian := '0'; - f.mode_32bit := '0'; - else - if r.abs_br = '1' then - f.redirect_nia := r.br_offset; - else - f.redirect_nia := std_ulogic_vector(unsigned(r.last_nia) + unsigned(r.br_offset)); - end if; - -- send MSR[IR], ~MSR[PR], ~MSR[LE] and ~MSR[SF] up to fetch1 - f.virt_mode := r.redir_mode(3); - f.priv_mode := r.redir_mode(2); - f.big_endian := r.redir_mode(1); - f.mode_32bit := r.redir_mode(0); - end if; - -- Outputs to loadstore1 (async) lv.op := e_in.insn_type; lv.nia := e_in.nia; @@ -1344,11 +1263,9 @@ begin rin <= v; -- update outputs - f_out <= f; l_out <= lv; e_out <= r.e; fp_out <= fv; - flush_out <= f_out.redirect; exception_log <= exception; irq_valid_log <= irq_valid; @@ -1364,13 +1281,13 @@ begin ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) & exception_log & irq_valid_log & - std_ulogic_vector(to_unsigned(irq_state_t'pos(ctrl.irq_state), 1)) & + interrupt_in & "000" & r.e.write_enable & r.e.valid & - f_out.redirect & + (r.e.redirect or r.e.interrupt) & r.busy & - flush_out; + flush_in; end if; end process; log_out <= log_data; diff --git a/fetch1.vhdl b/fetch1.vhdl index 8ca7e57..788a76d 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -22,8 +22,8 @@ entity fetch1 is stop_in : in std_ulogic; alt_reset_in : in std_ulogic; - -- redirect from execution unit - e_in : in Execute1ToFetch1Type; + -- redirect from writeback unit + w_in : in WritebackToFetch1Type; -- redirect from decode1 d_in : in Decode1ToFetch1Type; @@ -70,12 +70,12 @@ begin " P:" & std_ulogic'image(r_next.priv_mode) & " E:" & std_ulogic'image(r_next.big_endian) & " 32:" & std_ulogic'image(r_next_int.mode_32bit) & - " R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) & + " R:" & std_ulogic'image(w_in.redirect) & std_ulogic'image(d_in.redirect) & " S:" & std_ulogic'image(stall_in) & " T:" & std_ulogic'image(stop_in) & " nia:" & to_hstring(r_next.nia); end if; - if rst = '1' or e_in.redirect = '1' or d_in.redirect = '1' or stall_in = '0' then + if rst = '1' or w_in.redirect = '1' or d_in.redirect = '1' or stall_in = '0' then r.virt_mode <= r_next.virt_mode; r.priv_mode <= r_next.priv_mode; r.big_endian <= r_next.big_endian; @@ -109,11 +109,11 @@ begin signal btc_wr_addr : std_ulogic_vector(BTC_ADDR_BITS - 1 downto 0); signal btc_wr_v : std_ulogic; begin - btc_wr_data <= e_in.br_nia(63 downto BTC_ADDR_BITS + 2) & - e_in.redirect_nia(63 downto 2); - btc_wr_addr <= e_in.br_nia(BTC_ADDR_BITS + 1 downto 2); - btc_wr <= e_in.br_last; - btc_wr_v <= e_in.br_taken; + btc_wr_data <= w_in.br_nia(63 downto BTC_ADDR_BITS + 2) & + w_in.redirect_nia(63 downto 2); + btc_wr_addr <= w_in.br_nia(BTC_ADDR_BITS + 1 downto 2); + btc_wr <= w_in.br_last; + btc_wr_v <= w_in.br_taken; btc_ram : process(clk) variable raddr : unsigned(BTC_ADDR_BITS - 1 downto 0); @@ -158,15 +158,15 @@ begin v.big_endian := '0'; v_int.mode_32bit := '0'; v_int.predicted_nia := (others => '0'); - elsif e_in.redirect = '1' then - v.nia := e_in.redirect_nia(63 downto 2) & "00"; - if e_in.mode_32bit = '1' then + elsif w_in.redirect = '1' then + v.nia := w_in.redirect_nia(63 downto 2) & "00"; + if w_in.mode_32bit = '1' then v.nia(63 downto 32) := (others => '0'); end if; - v.virt_mode := e_in.virt_mode; - v.priv_mode := e_in.priv_mode; - v.big_endian := e_in.big_endian; - v_int.mode_32bit := e_in.mode_32bit; + v.virt_mode := w_in.virt_mode; + v.priv_mode := w_in.priv_mode; + v.big_endian := w_in.big_endian; + v_int.mode_32bit := w_in.mode_32bit; elsif d_in.redirect = '1' then v.nia := d_in.redirect_nia(63 downto 2) & "00"; if r_int.mode_32bit = '1' then @@ -191,7 +191,7 @@ begin -- If the last NIA value went down with a stop mark, it didn't get -- executed, and hence we shouldn't increment NIA. - advance_nia <= rst or e_in.redirect or d_in.redirect or (not r.stop_mark and not stall_in); + advance_nia <= rst or w_in.redirect or d_in.redirect or (not r.stop_mark and not stall_in); r_next <= v; r_next_int <= v_int; diff --git a/writeback.vhdl b/writeback.vhdl index 044b1fb..c7632ea 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -9,6 +9,7 @@ use work.crhelpers.all; entity writeback is port ( clk : in std_ulogic; + rst : in std_ulogic; e_in : in Execute1ToWritebackType; l_in : in Loadstore1ToWritebackType; @@ -16,12 +17,24 @@ entity writeback is w_out : out WritebackToRegisterFileType; c_out : out WritebackToCrFileType; + f_out : out WritebackToFetch1Type; + flush_out : out std_ulogic; + interrupt_out: out std_ulogic; complete_out : out instr_tag_t ); end entity writeback; architecture behaviour of writeback is + type irq_state_t is (WRITE_SRR0, WRITE_SRR1); + + type reg_type is record + state : irq_state_t; + srr1 : std_ulogic_vector(63 downto 0); + end record; + + signal r, rin : reg_type; + begin writeback_0: process(clk) variable x : std_ulogic_vector(0 downto 0); @@ -29,6 +42,13 @@ begin variable w : std_ulogic_vector(0 downto 0); begin if rising_edge(clk) then + if rst = '1' then + r.state <= WRITE_SRR0; + r.srr1 <= (others => '0'); + else + r <= rin; + end if; + -- Do consistency checks only on the clock edge x(0) := e_in.valid; y(0) := l_in.valid; @@ -36,7 +56,7 @@ begin assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + to_integer(unsigned(w))) <= 1 severity failure; - x(0) := e_in.write_enable or e_in.exc_write_enable; + x(0) := e_in.write_enable; y(0) := l_in.write_enable; w(0) := fp_in.write_enable; assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) + @@ -55,6 +75,8 @@ begin end process; writeback_1: process(all) + variable v : reg_type; + variable f : WritebackToFetch1Type; variable cf: std_ulogic_vector(3 downto 0); variable zero : std_ulogic; variable sign : std_ulogic; @@ -62,6 +84,9 @@ begin begin w_out <= WritebackToRegisterFileInit; c_out <= WritebackToCrFileInit; + f := WritebackToFetch1Init; + interrupt_out <= '0'; + v := r; complete_out <= instr_tag_init; if e_in.valid = '1' then @@ -72,10 +97,19 @@ begin complete_out <= fp_in.instr_tag; end if; - if e_in.exc_write_enable = '1' then - w_out.write_reg <= e_in.exc_write_reg; - w_out.write_data <= e_in.exc_write_data; + if r.state = WRITE_SRR1 then + w_out.write_reg <= fast_spr_num(SPR_SRR1); + w_out.write_data <= r.srr1; + w_out.write_enable <= '1'; + interrupt_out <= '1'; + v.state := WRITE_SRR0; + + elsif e_in.interrupt = '1' then + w_out.write_reg <= fast_spr_num(SPR_SRR0); + w_out.write_data <= e_in.last_nia; w_out.write_enable <= '1'; + v.state := WRITE_SRR1; + v.srr1 := e_in.srr1; else if e_in.write_enable = '1' then w_out.write_reg <= e_in.write_reg; @@ -142,5 +176,35 @@ begin c_out.write_cr_data(31 downto 28) <= cf; end if; end if; + + -- Outputs to fetch1 + f.redirect := e_in.redirect or e_in.interrupt; + f.br_nia := e_in.last_nia; + f.br_last := e_in.br_last and not e_in.interrupt; + f.br_taken := e_in.br_taken; + if e_in.interrupt = '1' then + f.redirect_nia := std_ulogic_vector(to_unsigned(e_in.intr_vec, 64)); + f.virt_mode := '0'; + f.priv_mode := '1'; + -- XXX need an interrupt LE bit here, e.g. from LPCR + f.big_endian := '0'; + f.mode_32bit := '0'; + else + if e_in.abs_br = '1' then + f.redirect_nia := e_in.br_offset; + else + f.redirect_nia := std_ulogic_vector(unsigned(e_in.last_nia) + unsigned(e_in.br_offset)); + end if; + -- send MSR[IR], ~MSR[PR], ~MSR[LE] and ~MSR[SF] up to fetch1 + f.virt_mode := e_in.redir_mode(3); + f.priv_mode := e_in.redir_mode(2); + f.big_endian := e_in.redir_mode(1); + f.mode_32bit := e_in.redir_mode(0); + end if; + + f_out <= f; + flush_out <= f_out.redirect; + + rin <= v; end process; end;