diff --git a/common.vhdl b/common.vhdl index 6741044..59b3744 100644 --- a/common.vhdl +++ b/common.vhdl @@ -238,6 +238,10 @@ package common is reserve => '0', rc => '0', virt_mode => '0', spr_num => 0, others => (others => '0')); + type Loadstore1ToExecute1Type is record + exception : std_ulogic; + end record; + type Loadstore1ToDcacheType is record valid : std_ulogic; load : std_ulogic; -- is this a load diff --git a/core.vhdl b/core.vhdl index acb37cc..0cb2ecd 100644 --- a/core.vhdl +++ b/core.vhdl @@ -63,6 +63,7 @@ architecture behave of core is -- load store signals signal execute1_to_loadstore1: Execute1ToLoadstore1Type; + signal loadstore1_to_execute1: Loadstore1ToExecute1Type; signal loadstore1_to_writeback: Loadstore1ToWritebackType; -- dcache signals @@ -251,6 +252,7 @@ begin stall_out => ex1_stall_out, e_in => decode2_to_execute1, i_in => xics_in, + l_in => loadstore1_to_execute1, l_out => execute1_to_loadstore1, f_out => execute1_to_fetch1, e_out => execute1_to_writeback, @@ -264,6 +266,7 @@ begin clk => clk, rst => core_rst, l_in => execute1_to_loadstore1, + e_out => loadstore1_to_execute1, l_out => loadstore1_to_writeback, d_out => loadstore1_to_dcache, d_in => dcache_to_loadstore1, diff --git a/dcache.vhdl b/dcache.vhdl index 3464c0d..7895877 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -147,6 +147,7 @@ architecture rtl of dcache is attribute ram_style of dtlb_ptes : signal is "distributed"; signal r0 : Loadstore1ToDcacheType; + signal r0_valid : std_ulogic; -- Type of operation on a "valid" input type op_t is (OP_NONE, @@ -406,6 +407,10 @@ begin end if; end process; + -- Hold off the request in r0 when stalling, + -- and cancel it if we get an error in a previous request. + r0_valid <= r0.valid and not stall_out and not r1.error_done; + -- TLB -- Operates in the second cycle on the request latched in r0. -- TLB updates write the entry at the end of the second cycle. @@ -478,7 +483,7 @@ begin hit := '1'; end if; end loop; - tlb_hit <= hit and r0.valid; + tlb_hit <= hit and r0_valid; tlb_hit_way <= hitway; pte <= read_tlb_pte(hitway, tlb_pte_way); valid_ra <= tlb_hit or not r0.virt_mode; @@ -503,7 +508,7 @@ begin tlbie := '0'; tlbia := '0'; tlbwe := '0'; - if r0.valid = '1' and stall_out = '0' and r0.tlbie = '1' then + if r0_valid = '1' and r0.tlbie = '1' then if r0.addr(11 downto 10) /= "00" then tlbia := '1'; elsif r0.addr(9) = '1' then @@ -596,7 +601,7 @@ begin req_tag <= get_tag(ra); -- Only do anything if not being stalled by stage 1 - go := r0.valid and not stall_out and not r0.tlbie; + go := r0_valid and not r0.tlbie; -- Calculate address of beginning of cache line, will be -- used for cache miss processing if needed @@ -697,7 +702,7 @@ begin cancel_store <= '0'; set_rsrv <= '0'; clear_rsrv <= '0'; - if stall_out = '0' and r0.valid = '1' and r0.reserve = '1' then + if r0_valid = '1' and r0.reserve = '1' then -- XXX generate alignment interrupt if address is not aligned -- XXX or if r0.nc = '1' if r0.load = '1' then @@ -920,7 +925,7 @@ begin end if; -- complete tlbies in the third cycle - r1.tlbie_done <= r0.valid and r0.tlbie and not stall_out; + r1.tlbie_done <= r0_valid and r0.tlbie; end if; end process; diff --git a/execute1.vhdl b/execute1.vhdl index 98b95dc..e2cb651 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -23,6 +23,7 @@ entity execute1 is stall_out : out std_ulogic; e_in : in Decode2ToExecute1Type; + l_in : in Loadstore1ToExecute1Type; i_in : in XicsToExecute1Type; @@ -51,6 +52,7 @@ architecture behaviour of execute1 is slow_op_rc : std_ulogic; slow_op_oe : std_ulogic; slow_op_xerc : xer_common_t; + ldst_nia : std_ulogic_vector(63 downto 0); end record; constant reg_type_init : reg_type := (e => Execute1ToWritebackInit, lr_update => '0', @@ -446,9 +448,9 @@ begin v.e.exc_write_reg := fast_spr_num(SPR_SRR0); v.e.exc_write_data := e_in.nia; - if ctrl.irq_state = WRITE_SRR1 then - v.e.exc_write_reg := fast_spr_num(SPR_SRR1); - v.e.exc_write_data := ctrl.srr1; + if ctrl.irq_state = WRITE_SRR1 then + v.e.exc_write_reg := fast_spr_num(SPR_SRR1); + v.e.exc_write_data := ctrl.srr1; v.e.exc_write_enable := '1'; ctrl_tmp.msr(MSR_SF) <= '1'; ctrl_tmp.msr(MSR_EE) <= '0'; @@ -899,6 +901,7 @@ begin elsif e_in.valid = '1' then -- instruction for other units, i.e. LDST + v.ldst_nia := e_in.nia; v.e.valid := '0'; if e_in.unit = LDST then lv.valid := '1'; @@ -969,6 +972,17 @@ begin v.e.write_data := result; v.e.write_enable := result_en; + -- generate DSI for load/store exceptions + if l_in.exception = '1' then + ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#300#, 64)); + ctrl_tmp.srr1 <= msr_copy(ctrl.msr); + v.e.exc_write_enable := '1'; + v.e.exc_write_reg := fast_spr_num(SPR_SRR0); + v.e.exc_write_data := r.ldst_nia; + ctrl_tmp.irq_state <= WRITE_SRR1; + v.e.valid := '1'; -- complete the original load or store + end if; + -- Outputs to loadstore1 (async) lv.op := e_in.insn_type; lv.addr1 := a_in; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index d5a59e8..6ab18f5 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -16,6 +16,7 @@ entity loadstore1 is rst : in std_ulogic; l_in : in Execute1ToLoadstore1Type; + e_out : out Loadstore1ToExecute1Type; l_out : out Loadstore1ToWritebackType; d_out : out Loadstore1ToDcacheType; @@ -142,6 +143,9 @@ begin variable mfspr : std_ulogic; variable sprn : std_ulogic_vector(9 downto 0); variable sprval : std_ulogic_vector(63 downto 0); + variable exception : std_ulogic; + variable next_addr : std_ulogic_vector(63 downto 0); + variable dsisr : std_ulogic_vector(31 downto 0); begin v := r; req := '0'; @@ -151,6 +155,8 @@ begin addr := lsu_sum; mfspr := '0'; sprval := (others => '0'); -- avoid inferred latches + exception := '0'; + dsisr := (others => '0'); write_enable := '0'; do_update := '0'; @@ -204,6 +210,9 @@ begin end case; end loop; + -- compute (addr + 8) & ~7 for the second doubleword when unaligned + next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000"; + case r.state is when IDLE => if l_in.valid = '1' then @@ -301,8 +310,7 @@ begin end if; when SECOND_REQ => - -- compute (addr + 8) & ~7 for the second doubleword when unaligned - addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000"; + addr := next_addr; byte_sel := r.second_bytes; req := '1'; stall := '1'; @@ -311,25 +319,43 @@ begin when FIRST_ACK_WAIT => stall := '1'; if d_in.valid = '1' then - v.state := LAST_ACK_WAIT; - if r.load = '1' then - v.load_data := data_permuted; + if d_in.error = '1' then + -- dcache will discard the second request + exception := '1'; + dsisr(30) := d_in.tlb_miss; + v.state := IDLE; + else + v.state := LAST_ACK_WAIT; + if r.load = '1' then + v.load_data := data_permuted; + end if; end if; end if; when LAST_ACK_WAIT => stall := '1'; if d_in.valid = '1' then - write_enable := r.load; - if r.load = '1' and r.update = '1' then - -- loads with rA update need an extra cycle - v.state := LD_UPDATE; - else - -- stores write back rA update in this cycle - do_update := r.update; - stall := '0'; - done := '1'; + if d_in.error = '1' then + if two_dwords = '1' then + addr := next_addr; + else + addr := r.addr; + end if; + exception := '1'; + dsisr(30) := d_in.tlb_miss; v.state := IDLE; + else + write_enable := r.load; + if r.load = '1' and r.update = '1' then + -- loads with rA update need an extra cycle + v.state := LD_UPDATE; + else + -- stores write back rA update in this cycle + do_update := r.update; + stall := '0'; + done := '1'; + v.state := IDLE; + end if; end if; end if; @@ -372,6 +398,13 @@ begin l_out.rc <= r.rc and done; l_out.store_done <= d_in.store_done; + -- update exception info back to execute1 + e_out.exception <= exception; + if exception = '1' then + v.dar := addr; + v.dsisr := dsisr; + end if; + stall_out <= stall; -- Update registers