From b55c9cc2987d30974adb06d2130ad774944252fd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 28 Apr 2020 20:28:20 +1000 Subject: [PATCH 01/10] execute1: Improve architecture compliance of MSR and related instructions This makes our treatment of the MSR conform better with the ISA. - On reset, initialize the MSR to have the SF and LE bits set and all the others reset. For good measure initialize r properly too. - Fix the bit numbering in msr_copy (the code was using big-endian bit numbers, not little-endian). - Use constants like MSR_EE to index MSR bits instead of expressions like '63 - 48', for readability. - Set MSR[SF, LE] and clear MSR[PR, IR, DR, RI] on interrupts. - Copy the relevant fields for rfid instead of using msr_copy, because the partial function fields of the MSR should be left unchanged, not zeroed. Our implementation of rfid is like the architecture description of hrfid, because we don't implement hypervisor mode. - Return the whole MSR for mfmsr. - Implement the L field for mtmsrd (L=1 copies just EE and RI). - For mtmsrd with L=0, leave out the HV, ME and LE bits as per the arch. - For mtmsrd and rfid, if PR ends up set, then also set EE, IR and DR as per the arch. - A few other minor tidyups (no semantic change). Signed-off-by: Paul Mackerras --- common.vhdl | 9 ++++++ execute1.vhdl | 83 ++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 71 insertions(+), 21 deletions(-) diff --git a/common.vhdl b/common.vhdl index 9f6e96d..9041d32 100644 --- a/common.vhdl +++ b/common.vhdl @@ -7,6 +7,15 @@ use work.decode_types.all; package common is + -- MSR bit numbers + constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode + constant MSR_EE : integer := (63 - 48); -- External interrupt Enable + constant MSR_PR : integer := (63 - 49); -- PRoblem state + constant MSR_IR : integer := (63 - 58); -- Instruction Relocation + constant MSR_DR : integer := (63 - 59); -- Data Relocation + constant MSR_RI : integer := (63 - 62); -- Recoverable Interrupt + constant MSR_LE : integer := (63 - 63); -- Little Endian + -- SPR numbers subtype spr_num_t is integer range 0 to 1023; diff --git a/execute1.vhdl b/execute1.vhdl index 2c0a558..0f4eea9 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -48,6 +48,11 @@ architecture behaviour of execute1 is slow_op_oe : std_ulogic; slow_op_xerc : xer_common_t; end record; + constant reg_type_init : reg_type := + (e => Execute1ToWritebackInit, lr_update => '0', + mul_in_progress => '0', div_in_progress => '0', cntz_in_progress => '0', + slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, + others => (others => '0')); signal r, rin : reg_type; @@ -124,11 +129,11 @@ architecture behaviour of execute1 is -- tion MSR bits are not saved or restored. -- Full function MSR bits lie in the range 0:32, 37:41, and -- 48:63, and partial function MSR bits lie in the range - -- 33:36 and 42:47. + -- 33:36 and 42:47. (Note this is IBM bit numbering). msr_out := (others => '0'); - msr_out(32 downto 0) := msr(32 downto 0); - msr_out(41 downto 37) := msr(41 downto 37); - msr_out(63 downto 48) := msr(63 downto 48); + msr_out(63 downto 31) := msr(63 downto 31); + msr_out(26 downto 22) := msr(26 downto 22); + msr_out(15 downto 0) := msr(15 downto 0); return msr_out; end; @@ -193,14 +198,20 @@ begin execute1_0: process(clk) begin if rising_edge(clk) then - r <= rin; - ctrl <= ctrl_tmp; - assert not (r.lr_update = '1' and e_in.valid = '1') - report "LR update collision with valid in EX1" - severity failure; - if r.lr_update = '1' then - report "LR update to " & to_hstring(r.next_lr); - end if; + if rst = '1' then + r <= reg_type_init; + ctrl.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0'); + ctrl.irq_state <= WRITE_SRR0; + else + r <= rin; + ctrl <= ctrl_tmp; + assert not (r.lr_update = '1' and e_in.valid = '1') + report "LR update collision with valid in EX1" + severity failure; + if r.lr_update = '1' then + report "LR update to " & to_hstring(r.next_lr); + end if; + end if; end if; end process; @@ -370,7 +381,7 @@ begin ctrl_tmp.dec <= std_ulogic_vector(unsigned(ctrl.dec) - 1); irq_valid := '0'; - if ctrl.msr(63 - 48) = '1' and ctrl.dec(63) = '1' then + if ctrl.msr(MSR_EE) = '1' and ctrl.dec(63) = '1' then report "IRQ valid"; irq_valid := '1'; end if; @@ -400,7 +411,13 @@ begin v.e.exc_write_reg := fast_spr_num(SPR_SRR1); v.e.exc_write_data := ctrl.srr1; v.e.exc_write_enable := '1'; - ctrl_tmp.msr(63 - 48) <= '0'; -- clear EE + ctrl_tmp.msr(MSR_SF) <= '1'; + ctrl_tmp.msr(MSR_EE) <= '0'; + ctrl_tmp.msr(MSR_PR) <= '0'; + ctrl_tmp.msr(MSR_IR) <= '0'; + ctrl_tmp.msr(MSR_DR) <= '0'; + ctrl_tmp.msr(MSR_RI) <= '0'; + ctrl_tmp.msr(MSR_LE) <= '1'; f_out.redirect <= '1'; f_out.redirect_nia <= ctrl.irq_nia; v.e.valid := e_in.valid; @@ -545,7 +562,7 @@ begin when OP_B => f_out.redirect <= '1'; if (insn_aa(e_in.insn)) then - f_out.redirect_nia <= std_ulogic_vector(signed(b_in)); + f_out.redirect_nia <= b_in; else f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in)); end if; @@ -561,7 +578,7 @@ begin if ppc_bc_taken(bo, bi, e_in.cr, a_in) = 1 then f_out.redirect <= '1'; if (insn_aa(e_in.insn)) then - f_out.redirect_nia <= std_ulogic_vector(signed(b_in)); + f_out.redirect_nia <= b_in; else f_out.redirect_nia <= std_ulogic_vector(signed(e_in.nia) + signed(b_in)); end if; @@ -584,7 +601,17 @@ begin when OP_RFID => f_out.redirect <= '1'; f_out.redirect_nia <= a_in(63 downto 2) & "00"; -- srr0 - ctrl_tmp.msr <= msr_copy(std_ulogic_vector(signed(b_in))); -- srr1 + -- Can't use msr_copy here because the partial function MSR + -- bits should be left unchanged, not zeroed. + ctrl_tmp.msr(63 downto 31) <= b_in(63 downto 31); + ctrl_tmp.msr(26 downto 22) <= b_in(26 downto 22); + ctrl_tmp.msr(15 downto 0) <= b_in(15 downto 0); + if b_in(MSR_PR) = '1' then + ctrl_tmp.msr(MSR_EE) <= '1'; + ctrl_tmp.msr(MSR_IR) <= '1'; + ctrl_tmp.msr(MSR_DR) <= '1'; + end if; + when OP_CMPB => result := ppc_cmpb(c_in, b_in); result_en := '1'; @@ -658,7 +685,7 @@ begin end loop; end if; when OP_MFMSR => - result := msr_copy(ctrl.msr); + result := ctrl.msr; result_en := '1'; when OP_MFSPR => report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & @@ -714,9 +741,23 @@ begin v.e.write_cr_mask := num_to_fxm(crnum); end if; v.e.write_cr_data := c_in(31 downto 0); - when OP_MTMSRD => - -- FIXME handle just the bits we need to. - ctrl_tmp.msr <= msr_copy(c_in); + when OP_MTMSRD => + if e_in.insn(16) = '1' then + -- just update EE and RI + ctrl_tmp.msr(MSR_EE) <= c_in(MSR_EE); + ctrl_tmp.msr(MSR_RI) <= c_in(MSR_RI); + else + -- Architecture says to leave out bits 3 (HV), 51 (ME) + -- and 63 (LE) (IBM bit numbering) + ctrl_tmp.msr(63 downto 61) <= c_in(63 downto 61); + ctrl_tmp.msr(59 downto 13) <= c_in(59 downto 13); + ctrl_tmp.msr(11 downto 1) <= c_in(11 downto 1); + if c_in(MSR_PR) = '1' then + ctrl_tmp.msr(MSR_EE) <= '1'; + ctrl_tmp.msr(MSR_IR) <= '1'; + ctrl_tmp.msr(MSR_DR) <= '1'; + end if; + end if; when OP_MTSPR => report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & "=" & to_hstring(c_in); From 74db0710678d4871843a783edfa602ed621c91d1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 28 Apr 2020 19:38:58 +1000 Subject: [PATCH 02/10] execute1: Generate privileged instruction interrupts when MSR[PR] = 1 This adds logic to execute1 to check, when MSR[PR] = 1, whether each instruction arriving to be executed is a privileged instruction. If it is, a privileged-instruction type program interrupt is generated. For the mtspr and mfspr instructions, we need to look at bit 20 of the instruction (bit 4 of the SPR number) to determine if the SPR is privileged. Signed-off-by: Paul Mackerras --- execute1.vhdl | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/execute1.vhdl b/execute1.vhdl index 0f4eea9..9153b37 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -76,6 +76,28 @@ architecture behaviour of execute1 is signal x_to_divider: Execute1ToDividerType; signal divider_to_x: DividerToExecute1Type; + type privilege_level is (USER, SUPER); + type op_privilege_array is array(insn_type_t) of privilege_level; + constant op_privilege: op_privilege_array := ( + OP_ATTN => SUPER, + OP_MFMSR => SUPER, + OP_MTMSRD => SUPER, + OP_RFID => SUPER, + others => USER + ); + + function instr_is_privileged(op: insn_type_t; insn: std_ulogic_vector(31 downto 0)) + return boolean is + begin + if op_privilege(op) = SUPER then + return true; + elsif op = OP_MFSPR or op = OP_MTSPR then + return insn(20) = '1'; + else + return false; + end if; + end; + procedure set_carry(e: inout Execute1ToWritebackType; carry32 : in std_ulogic; carry : in std_ulogic) is @@ -432,6 +454,16 @@ begin ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#900#, 64)); ctrl_tmp.srr1 <= msr_copy(ctrl.msr); + elsif e_in.valid = '1' and ctrl.msr(MSR_PR) = '1' and + instr_is_privileged(e_in.insn_type, e_in.insn) then + -- generate a program interrupt + exception := '1'; + ctrl_tmp.irq_nia <= std_logic_vector(to_unsigned(16#700#, 64)); + ctrl_tmp.srr1 <= msr_copy(ctrl.msr); + -- set bit 45 to indicate privileged instruction type interrupt + ctrl_tmp.srr1(63 - 45) <= '1'; + report "privileged instruction"; + elsif e_in.valid = '1' then v.e.valid := '1'; From 167e37d6675136d26acdb6f7aba0a7f7ad1e60d8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Apr 2020 14:50:17 +1100 Subject: [PATCH 03/10] Plumb insn_type through to loadstore1 In preparation for adding a TLB to the dcache, this plumbs the insn_type from execute1 through to loadstore1, so that we can have other operations besides loads and stores (e.g. tlbie) going to loadstore1 and thence to the dcache. This also plumbs the unit field of the decode ROM from decode2 through to execute1 to simplify the logic around which ops need to go to loadstore1. The load and store data formatting are now not conditional on the op being OP_LOAD or OP_STORE. This eliminates the inferred latches clocked by each of the bits of r.op that we were getting previously. Signed-off-by: Paul Mackerras --- Makefile | 2 +- common.vhdl | 7 +-- decode2.vhdl | 1 + execute1.vhdl | 19 ++++---- loadstore1.vhdl | 116 ++++++++++++++++++++++++------------------------ 5 files changed, 74 insertions(+), 71 deletions(-) diff --git a/Makefile b/Makefile index 8c3133d..c09696a 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ icache_tb.o: common.o wishbone_types.o icache.o wishbone_bram_wrapper.o dcache.o: utils.o common.o wishbone_types.o plru.o cache_ram.o utils.o dcache_tb.o: common.o wishbone_types.o dcache.o wishbone_bram_wrapper.o insn_helpers.o: -loadstore1.o: common.o helpers.o +loadstore1.o: common.o helpers.o decode_types.o logical.o: decode_types.o multiply_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o multiply.o multiply.o: common.o decode_types.o diff --git a/common.vhdl b/common.vhdl index 9041d32..65e40c1 100644 --- a/common.vhdl +++ b/common.vhdl @@ -118,6 +118,7 @@ package common is type Decode2ToExecute1Type is record valid: std_ulogic; + unit : unit_t; insn_type: insn_type_t; nia: std_ulogic_vector(63 downto 0); write_reg: gspr_index_t; @@ -150,7 +151,7 @@ package common is reserve : std_ulogic; -- set for larx/stcx end record; constant Decode2ToExecute1Init : Decode2ToExecute1Type := - (valid => '0', insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', + (valid => '0', unit => NONE, insn_type => OP_ILLEGAL, bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0', is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', @@ -213,7 +214,7 @@ package common is type Execute1ToLoadstore1Type is record valid : std_ulogic; - load : std_ulogic; -- is this a load or store + op : insn_type_t; -- what ld/st op to do addr1 : std_ulogic_vector(63 downto 0); addr2 : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- data to write, unused for read @@ -228,7 +229,7 @@ package common is reserve : std_ulogic; -- set for larx/stcx. rc : std_ulogic; -- set for stcx. end record; - constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', load => '0', ci => '0', byte_reverse => '0', + constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0', sign_extend => '0', update => '0', xerc => xerc_init, reserve => '0', rc => '0', others => (others => '0')); diff --git a/decode2.vhdl b/decode2.vhdl index ff773aa..edcc50c 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -304,6 +304,7 @@ begin -- execute unit v.e.nia := d_in.nia; + v.e.unit := d_in.decode.unit; v.e.insn_type := d_in.decode.insn_type; v.e.read_reg1 := decoded_reg_a.reg; v.e.read_data1 := decoded_reg_a.data; diff --git a/execute1.vhdl b/execute1.vhdl index 9153b37..abd4a18 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -464,7 +464,7 @@ begin ctrl_tmp.srr1(63 - 45) <= '1'; report "privileged instruction"; - elsif e_in.valid = '1' then + elsif e_in.valid = '1' and e_in.unit = ALU then v.e.valid := '1'; v.e.write_reg := e_in.write_reg; @@ -844,11 +844,6 @@ begin stall_out <= '1'; x_to_divider.valid <= '1'; - when OP_LOAD | OP_STORE => - -- loadstore/dcache has its own port to writeback - v.e.valid := '0'; - lv.valid := '1'; - when others => terminate_out <= '1'; report "illegal"; @@ -874,6 +869,14 @@ begin report "Delayed LR update to " & to_hstring(next_nia); stall_out <= '1'; end if; + + elsif e_in.valid = '1' then + -- instruction for other units, i.e. LDST + v.e.valid := '0'; + if e_in.unit = LDST then + lv.valid := '1'; + end if; + elsif r.lr_update = '1' then result_en := '1'; result := r.next_lr; @@ -940,9 +943,7 @@ begin v.e.write_enable := result_en; -- Outputs to loadstore1 (async) - if e_in.insn_type = OP_LOAD then - lv.load := '1'; - end if; + lv.op := e_in.insn_type; lv.addr1 := a_in; lv.addr2 := b_in; lv.data := c_in; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 518feee..664e396 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -3,6 +3,7 @@ use ieee.std_logic_1164.all; use ieee.numeric_std.all; library work; +use work.decode_types.all; use work.common.all; use work.helpers.all; @@ -41,7 +42,7 @@ architecture behave of loadstore1 is type reg_stage_t is record -- latch most of the input request - load : std_ulogic; + load : std_ulogic; addr : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0); @@ -146,59 +147,60 @@ begin two_dwords := or (r.second_bytes); -- load data formatting - if r.load = '1' then - byte_offset := unsigned(r.addr(2 downto 0)); - brev_lenm1 := "000"; - if r.byte_reverse = '1' then - brev_lenm1 := unsigned(r.length(2 downto 0)) - 1; - end if; + byte_offset := unsigned(r.addr(2 downto 0)); + brev_lenm1 := "000"; + if r.byte_reverse = '1' then + brev_lenm1 := unsigned(r.length(2 downto 0)) - 1; + end if; - -- shift and byte-reverse data bytes - for i in 0 to 7 loop - kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); - use_second(i) := kk(3); - j := to_integer(kk(2 downto 0)) * 8; - data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j); - end loop; - - -- Work out the sign bit for sign extension. - -- Assumes we are not doing both sign extension and byte reversal, - -- in that for unaligned loads crossing two dwords we end up - -- using a bit from the second dword, whereas for a byte-reversed - -- (i.e. big-endian) load the sign bit would be in the first dword. - negative := (r.length(3) and data_permuted(63)) or - (r.length(2) and data_permuted(31)) or - (r.length(1) and data_permuted(15)) or - (r.length(0) and data_permuted(7)); - - -- trim and sign-extend - for i in 0 to 7 loop - if i < to_integer(unsigned(r.length)) then - if two_dwords = '1' then - trim_ctl(i) := '1' & not use_second(i); - else - trim_ctl(i) := not use_second(i) & '0'; - end if; + -- shift and byte-reverse data bytes + for i in 0 to 7 loop + kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset); + use_second(i) := kk(3); + j := to_integer(kk(2 downto 0)) * 8; + data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j); + end loop; + + -- Work out the sign bit for sign extension. + -- Assumes we are not doing both sign extension and byte reversal, + -- in that for unaligned loads crossing two dwords we end up + -- using a bit from the second dword, whereas for a byte-reversed + -- (i.e. big-endian) load the sign bit would be in the first dword. + negative := (r.length(3) and data_permuted(63)) or + (r.length(2) and data_permuted(31)) or + (r.length(1) and data_permuted(15)) or + (r.length(0) and data_permuted(7)); + + -- trim and sign-extend + for i in 0 to 7 loop + if i < to_integer(unsigned(r.length)) then + if two_dwords = '1' then + trim_ctl(i) := '1' & not use_second(i); else - trim_ctl(i) := '0' & (negative and r.sign_extend); + trim_ctl(i) := not use_second(i) & '0'; end if; - case trim_ctl(i) is - when "11" => - data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8); - when "10" => - data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8); - when "01" => - data_trimmed(i * 8 + 7 downto i * 8) := x"FF"; - when others => - data_trimmed(i * 8 + 7 downto i * 8) := x"00"; - end case; - end loop; - end if; + else + trim_ctl(i) := '0' & (negative and r.sign_extend); + end if; + case trim_ctl(i) is + when "11" => + data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8); + when "10" => + data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8); + when "01" => + data_trimmed(i * 8 + 7 downto i * 8) := x"FF"; + when others => + data_trimmed(i * 8 + 7 downto i * 8) := x"00"; + end case; + end loop; case r.state is when IDLE => if l_in.valid = '1' then - v.load := l_in.load; + v.load := '0'; + if l_in.op = OP_LOAD then + v.load := '1'; + end if; v.addr := lsu_sum; v.write_reg := l_in.write_reg; v.length := l_in.length; @@ -229,18 +231,16 @@ begin v.addr := lsu_sum; -- Do byte reversing and rotating for stores in the first cycle - if v.load = '0' then - byte_offset := unsigned(lsu_sum(2 downto 0)); - brev_lenm1 := "000"; - if l_in.byte_reverse = '1' then - brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; - end if; - for i in 0 to 7 loop - k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset; - j := to_integer(k) * 8; - v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8); - end loop; + byte_offset := unsigned(lsu_sum(2 downto 0)); + brev_lenm1 := "000"; + if l_in.byte_reverse = '1' then + brev_lenm1 := unsigned(l_in.length(2 downto 0)) - 1; end if; + for i in 0 to 7 loop + k := (to_unsigned(i, 3) xor brev_lenm1) + byte_offset; + j := to_integer(k) * 8; + v.store_data(j + 7 downto j) := l_in.data(i * 8 + 7 downto i * 8); + end loop; req := '1'; stall := '1'; From 041d6bef60956849364c1540e7eecb6fdca77497 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 28 Apr 2020 18:11:52 +1000 Subject: [PATCH 04/10] dcache: Implement the dcbz instruction This adds logic to dcache and loadstore1 to implement dcbz. For now it zeroes a single cache line (by default 64 bytes), not 128 bytes like IBM Power processors do. The dcbz operation is performed much like a load miss, except that we are writing zeroes to memory instead of reading. As each ack comes back, we write zeroes to the BRAM instead of data from memory. In this way we zero the line in memory and also zero the line of cache memory, establishing the line in the cache if it wasn't already resident. If it was already resident then we overwrite the existing line in the cache. Signed-off-by: Paul Mackerras --- common.vhdl | 1 + dcache.vhdl | 66 +++++++++++++++++++++++++++++++++++++++---------- decode1.vhdl | 2 +- loadstore1.vhdl | 5 ++++ 4 files changed, 60 insertions(+), 14 deletions(-) diff --git a/common.vhdl b/common.vhdl index 65e40c1..61252bd 100644 --- a/common.vhdl +++ b/common.vhdl @@ -236,6 +236,7 @@ package common is type Loadstore1ToDcacheType is record valid : std_ulogic; load : std_ulogic; + dcbz : std_ulogic; nc : std_ulogic; reserve : std_ulogic; addr : std_ulogic_vector(63 downto 0); diff --git a/dcache.vhdl b/dcache.vhdl index 7e553bf..550298b 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -581,8 +581,12 @@ begin wr_data <= r0.data; wr_sel <= r0.byte_sel; else - -- Otherwise, we might be doing a reload - wr_data <= wishbone_in.dat; + -- Otherwise, we might be doing a reload or a DCBZ + if r1.req.dcbz = '1' then + wr_data <= (others => '0'); + else + wr_data <= wishbone_in.dat; + end if; wr_sel <= (others => '1'); wr_addr <= std_ulogic_vector(to_unsigned(r1.store_row, ROW_BITS)); end if; @@ -718,18 +722,54 @@ begin r1.wb.we <= '0'; r1.state <= NC_LOAD_WAIT_ACK; - when OP_STORE_HIT | OP_STORE_MISS => - r1.wb.sel <= r0.byte_sel; - r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000"; - r1.wb.dat <= r0.data; - if cancel_store = '0' then + when OP_STORE_HIT | OP_STORE_MISS => + if r0.dcbz = '0' then + r1.wb.sel <= r0.byte_sel; + r1.wb.adr <= r0.addr(r1.wb.adr'left downto 3) & "000"; + r1.wb.dat <= r0.data; + if cancel_store = '0' then + r1.wb.cyc <= '1'; + r1.wb.stb <= '1'; + r1.wb.we <= '1'; + r1.state <= STORE_WAIT_ACK; + else + r1.stcx_fail <= '1'; + r1.state <= IDLE; + end if; + else + -- dcbz is handled much like a load miss except + -- that we are writing to memory instead of reading + r1.store_index <= req_index; + r1.store_row <= get_row(req_laddr); + + if req_op = OP_STORE_HIT then + r1.store_way <= req_hit_way; + else + r1.store_way <= replace_way; + + -- Force misses on the victim way while zeroing + cache_valids(req_index)(replace_way) <= '0'; + + -- Store new tag in selected way + for i in 0 to NUM_WAYS-1 loop + if i = replace_way then + tagset := cache_tags(req_index); + write_tag(i, tagset, req_tag); + cache_tags(req_index) <= tagset; + end if; + end loop; + end if; + + -- Set up for wishbone writes + r1.wb.adr <= req_laddr(r1.wb.adr'left downto 0); + r1.wb.sel <= (others => '1'); + r1.wb.we <= '1'; + r1.wb.dat <= (others => '0'); r1.wb.cyc <= '1'; r1.wb.stb <= '1'; - r1.wb.we <= '1'; - r1.state <= STORE_WAIT_ACK; - else - r1.stcx_fail <= '1'; - r1.state <= IDLE; + + -- Handle the rest like a load miss + r1.state <= RELOAD_WAIT_ACK; end if; -- OP_NONE and OP_BAD do nothing @@ -766,7 +806,7 @@ begin -- not idle, which we don't currently know how to deal -- with. -- - if r1.store_row = get_row(r1.req.addr) then + if r1.store_row = get_row(r1.req.addr) and r1.req.dcbz = '0' then r1.slow_data <= wishbone_in.dat; end if; diff --git a/decode1.vhdl b/decode1.vhdl index 8c7d5f2..785b669 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -164,7 +164,7 @@ architecture behaviour of decode1 is 2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst 2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt 2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst - -- 2#1111110110# dcbz + 2#1111110110# => (LDST, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- dcbz 2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu 2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo 2#0110001011# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- divweu diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 664e396..90650db 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -43,6 +43,7 @@ architecture behave of loadstore1 is type reg_stage_t is record -- latch most of the input request load : std_ulogic; + dcbz : std_ulogic; addr : std_ulogic_vector(63 downto 0); store_data : std_ulogic_vector(63 downto 0); load_data : std_ulogic_vector(63 downto 0); @@ -198,8 +199,11 @@ begin when IDLE => if l_in.valid = '1' then v.load := '0'; + v.dcbz := '0'; if l_in.op = OP_LOAD then v.load := '1'; + elsif l_in.op = OP_DCBZ then + v.dcbz := '1'; end if; v.addr := lsu_sum; v.write_reg := l_in.write_reg; @@ -293,6 +297,7 @@ begin -- Update outputs to dcache d_out.valid <= req; d_out.load <= v.load; + d_out.dcbz <= v.dcbz; d_out.nc <= v.nc; d_out.reserve <= v.reserve; d_out.addr <= addr; From 10f4be4309667ba5fa42c52edbe5132607cbdcbb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Apr 2020 09:09:23 +1000 Subject: [PATCH 05/10] tests: Add a test for privileged instruction interrupts This adds a test that tries to execute various privileged instructions with MSR[PR] = 1. This also incidentally tests some of the MSR bit manipulations. Signed-off-by: Paul Mackerras --- tests/privileged/Makefile | 3 + tests/privileged/head.S | 91 ++++++++++++++++++ tests/privileged/powerpc.lds | 13 +++ tests/privileged/privileged.c | 152 ++++++++++++++++++++++++++++++ tests/test_privileged.bin | Bin 0 -> 9900 bytes tests/test_privileged.console_out | 6 ++ tests/update_console_tests | 2 +- 7 files changed, 266 insertions(+), 1 deletion(-) create mode 100644 tests/privileged/Makefile create mode 100644 tests/privileged/head.S create mode 100644 tests/privileged/powerpc.lds create mode 100644 tests/privileged/privileged.c create mode 100755 tests/test_privileged.bin create mode 100644 tests/test_privileged.console_out diff --git a/tests/privileged/Makefile b/tests/privileged/Makefile new file mode 100644 index 0000000..7c24998 --- /dev/null +++ b/tests/privileged/Makefile @@ -0,0 +1,3 @@ +TEST=privileged + +include ../Makefile.test diff --git a/tests/privileged/head.S b/tests/privileged/head.S new file mode 100644 index 0000000..9b76234 --- /dev/null +++ b/tests/privileged/head.S @@ -0,0 +1,91 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define STACK_TOP 0x8000 + +/* Load an immediate 64-bit value into a register */ +#define LOAD_IMM64(r, e) \ + lis r,(e)@highest; \ + ori r,r,(e)@higher; \ + rldicr r,r, 32, 31; \ + oris r,r, (e)@h; \ + ori r,r, (e)@l; + + .section ".head","ax" + + /* + * Microwatt currently enters in LE mode at 0x0, so we don't need to + * do any endian fix ups + */ + . = 0 +.global _start +_start: + b boot_entry + +.global boot_entry +boot_entry: + /* setup stack */ + LOAD_IMM64(%r1, STACK_TOP - 0x100) + LOAD_IMM64(%r12, main) + mtctr %r12 + bctrl + attn // terminate on exit + b . + + /* Call a function with a specified MSR value */ + .global call_with_msr +call_with_msr: + mtsrr0 %r4 + mr %r12,%r4 + mtsrr1 %r5 + rfid + +#define EXCEPTION(nr) \ + .= nr ;\ + li %r3,nr ;\ + blr + + EXCEPTION(0x300) + EXCEPTION(0x380) + EXCEPTION(0x400) + EXCEPTION(0x480) + EXCEPTION(0x500) + EXCEPTION(0x600) + EXCEPTION(0x700) + EXCEPTION(0x800) + EXCEPTION(0x900) + EXCEPTION(0x980) + EXCEPTION(0xa00) + EXCEPTION(0xb00) + + /* + * System call - used to exit from tests where MSR[PR] + * may have been set. + */ + . = 0xc00 + blr + + EXCEPTION(0xd00) + EXCEPTION(0xe00) + EXCEPTION(0xe20) + EXCEPTION(0xe40) + EXCEPTION(0xe60) + EXCEPTION(0xe80) + EXCEPTION(0xf00) + EXCEPTION(0xf20) + EXCEPTION(0xf40) + EXCEPTION(0xf60) + EXCEPTION(0xf80) diff --git a/tests/privileged/powerpc.lds b/tests/privileged/powerpc.lds new file mode 100644 index 0000000..8c8c65b --- /dev/null +++ b/tests/privileged/powerpc.lds @@ -0,0 +1,13 @@ +SECTIONS +{ + _start = .; + . = 0; + .head : { + KEEP(*(.head)) + } + . = 0x2000; + .text : { *(.text) } + . = 0x4000; + .data : { *(.data) } + .bss : { *(.bss) } +} diff --git a/tests/privileged/privileged.c b/tests/privileged/privileged.c new file mode 100644 index 0000000..073dc07 --- /dev/null +++ b/tests/privileged/privileged.c @@ -0,0 +1,152 @@ +#include +#include +#include + +#include "console.h" + +#define MSR_EE 0x8000 +#define MSR_PR 0x4000 +#define MSR_IR 0x0020 +#define MSR_DR 0x0010 + +extern int call_with_msr(unsigned long arg, int (*fn)(unsigned long), unsigned long msr); + +#define SRR0 26 +#define SRR1 27 + +static inline unsigned long mfspr(int sprnum) +{ + long val; + + __asm__ volatile("mfspr %0,%1" : "=r" (val) : "i" (sprnum)); + return val; +} + +static inline void mtspr(int sprnum, unsigned long val) +{ + __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val)); +} + +void print_string(const char *str) +{ + for (; *str; ++str) + putchar(*str); +} + +void print_hex(unsigned long val, int ndigits) +{ + int i, x; + + for (i = (ndigits - 1) * 4; i >= 0; i -= 4) { + x = (val >> i) & 0xf; + if (x >= 10) + putchar(x + 'a' - 10); + else + putchar(x + '0'); + } +} + +// i < 100 +void print_test_number(int i) +{ + print_string("test "); + putchar(48 + i/10); + putchar(48 + i%10); + putchar(':'); +} + +int priv_fn_1(unsigned long x) +{ + __asm__ volatile("attn"); + __asm__ volatile("li 3,0; sc"); + return 0; +} + +int priv_fn_2(unsigned long x) +{ + __asm__ volatile("mfmsr 3"); + __asm__ volatile("sc"); + return 0; +} + +int priv_fn_3(unsigned long x) +{ + __asm__ volatile("mtmsrd 3"); + __asm__ volatile("li 3,0; sc"); + return 0; +} + +int priv_fn_4(unsigned long x) +{ + __asm__ volatile("rfid"); + __asm__ volatile("li 3,0; sc"); + return 0; +} + +int priv_fn_5(unsigned long x) +{ + __asm__ volatile("mfsrr0 3"); + __asm__ volatile("sc"); + return 0; +} + +int priv_fn_6(unsigned long x) +{ + __asm__ volatile("mtsrr0 3"); + __asm__ volatile("sc"); + return 0; +} + +int priv_test(int (*fn)(unsigned long)) +{ + unsigned long msr; + int vec; + + __asm__ volatile ("mtdec %0" : : "r" (0x7fffffff)); + __asm__ volatile ("mfmsr %0" : "=r" (msr)); + /* this should fail */ + vec = call_with_msr(0, fn, msr | MSR_PR); + if (vec != 0x700) + return vec | 1; + /* SRR1 should be set correctly */ + msr |= MSR_PR | MSR_EE | MSR_IR | MSR_DR; + if (mfspr(SRR1) != (msr | 0x40000)) + return 2; + return 0; +} + +int fail = 0; + +void do_test(int num, int (*fn)(unsigned long)) +{ + int ret; + + print_test_number(num); + ret = priv_test(fn); + if (ret == 0) { + print_string("PASS\r\n"); + } else { + fail = 1; + print_string("FAIL "); + print_hex(ret, 4); + print_string(" SRR0="); + print_hex(mfspr(SRR0), 16); + print_string(" SRR1="); + print_hex(mfspr(SRR1), 16); + print_string("\r\n"); + } +} + +int main(void) +{ + potato_uart_init(); + + do_test(1, priv_fn_1); + do_test(2, priv_fn_2); + do_test(3, priv_fn_3); + do_test(4, priv_fn_4); + do_test(5, priv_fn_5); + do_test(6, priv_fn_6); + + return fail; +} diff --git a/tests/test_privileged.bin b/tests/test_privileged.bin new file mode 100755 index 0000000000000000000000000000000000000000..5b8ce63ab5e843bf8ff40c84d36833ad52abd0ca GIT binary patch literal 9900 zcmeHNUuauZ9RA%TP13B38T6sr>24aqWfY7c<7Ua~z3ZQ)bZBWeCirr*w7B`CxQDdf z^j^bWlst3~z6h>9U3?JnVGjfU5D^A~FMbC>YJlYvDqdJjwny8e$9VpGAolVC|S=;+wA?mYU zCJ*r|V|za|^0AU(d%rNAB|0a(?uVNv?a*a8j88s?`8$kHUC#1%Xxr_< z`a7I|pu_nGJDh*Fv-~ZG@hRji|Gk6Aq28g+=I`WpABR?aKkVVoUVjQ7%wKL3SKGw( zHgT&>eE(uS2J4A7{M*FUHgUa8+-ehl42<29t^%$Ct^%$Ct^%$Ct^%$Ct^%$Ct^%$C zt^%$C9j3r0`{sUxr1c70xo>-ehWI^D{J*a&Xhw)0{e8qH-Dnk?k#vKPX%(-&C(t@> zd*)l#w!N)m5~*n_pHXa2&;XZh##U3{Rr!HMD-xL{qS#;$71abql?Y=KSucO@f%G-> zYfCO!5>aCvXtQ0dwU)~FFBqpH>|(v<%>Xlrj-jR{YTkOtCQj~rXAT8UT#y6txo!## zCg_UNL+`2=DV`ki$1XhZR55oLOO+dOA=ZE4a!kf9r9-rw3eM(~I`-2T`}6igGeM>j zB6e)ZpF4lOR(o>OG!m3kxUW$2ox*y&RqJ#9J(%k*=O%1s%BMN!eE$rv?pO|ZYd)1u9n7Muv`?IKWO(M`ktYyRA^&wXxImy?R=ky_mtbywf zFdrXT3a=AK4v)!*{Uqi&4=hVLQu7~&|105tNBFnU=lKcv7Y~>>pYICx+6Ur&8jfE~ zOW@k8UsK{3$8FC2Mc7Z)Y%lCd*xfbT2m4LfTVgK7bQkPJ*!OC-ANHrPe}OGSXU@}O zU0MhB`tGw^U0BINTi;VumhmnR=_JRqeMCu_;&VZ3VCyMTlN)Ep3(fxl?2f}2OuEL6H5cAl0=8D1h*ui7Z)W)7Dtp4$w7)w~6jaS~No-@_CiqASGe}4C# zyK4$#-==y8R?Yf;P%B@fe*^n%q4W`S+r-WHfwH}SPjj5lGQ$O;_n+?(`SB#r%}>N0 z;map*b@uJ6GTKRPdirpPXiOVF$8C9fYAONsc>4lc9^)4@5tlSSMZUZy_4i1^2Ysph zH)C=&EPR9Le-Gc$2A_)lk4<^!;ahI8}PK&Qw~&y#pUV)M%tzqa|rL zoH(K_jD*6Z-SumSh>jz43-?i+j(MIpio6@1C26!F^NvU*UULoiW#n?a-P|#eJIRY8 zm1KlKoxVh#g+7go_>wfShs$dvkkf~QkK^x~Uj})@e(f$M$n|*@d7mKfW1cq}mcih) JFpU6-=wFGzEoA@z literal 0 HcmV?d00001 diff --git a/tests/test_privileged.console_out b/tests/test_privileged.console_out new file mode 100644 index 0000000..a49bb9b --- /dev/null +++ b/tests/test_privileged.console_out @@ -0,0 +1,6 @@ +test 01:PASS +test 02:PASS +test 03:PASS +test 04:PASS +test 05:PASS +test 06:PASS diff --git a/tests/update_console_tests b/tests/update_console_tests index c17c12b..bd012d9 100755 --- a/tests/update_console_tests +++ b/tests/update_console_tests @@ -3,7 +3,7 @@ # Script to update console related tests from source # -for i in sc illegal decrementer ; do +for i in sc illegal decrementer privileged ; do cd $i make cd - From a05ee9fc7f3da2a61a358a172a2c1c44cc03a1c5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Apr 2020 11:11:22 +1000 Subject: [PATCH 06/10] Makefile: fix typo Fix a typo which meant that the console tests weren't getting executed by 'make check'. Signed-off-by: Paul Mackerras --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c09696a..a13fdcc 100644 --- a/Makefile +++ b/Makefile @@ -130,7 +130,7 @@ dmi_dtm_tb: dmi_dtm_tb.o sim_vhpi_c.o sim_bram_helpers_c.o tests = $(sort $(patsubst tests/%.out,%,$(wildcard tests/*.out))) tests_console = $(sort $(patsubst tests/%.console_out,%,$(wildcard tests/*.console_out))) -check: $(tests) $(test_console) test_micropython test_micropython_long +check: $(tests) $(tests_console) test_micropython test_micropython_long check_light: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 test_micropython test_micropython_long $(tests_console) From cf4dfeca3645fb3f43785536deaa2cc8643b0e48 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Apr 2020 11:37:02 +1000 Subject: [PATCH 07/10] Change the default cross compiler prefix to powerpc64le-linux-gnu- That is what is used by the packaged cross-compilers on (at least) Fedora and Ubuntu. Signed-off-by: Paul Mackerras --- README.md | 5 ++++- hello_world/Makefile | 2 +- rust_lib_demo/Makefile | 2 +- tests/Makefile.test | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8bf4622..98f2140 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,10 @@ You can try out Microwatt/Micropython without hardware by using the ghdl simulat - Build micropython. If you aren't building on a ppc64le box you will need a cross compiler. If it isn't available on your distro - grab the powerpc64le-power8 toolchain from https://toolchains.bootlin.com + grab the powerpc64le-power8 toolchain from https://toolchains.bootlin.com. + You may need to set the CROSS_COMPILE environment variable + to the prefix used for your cross compilers. The default is + powerpc64le-linux-gnu-. ``` git clone https://github.com/micropython/micropython.git diff --git a/hello_world/Makefile b/hello_world/Makefile index 674095e..a609199 100644 --- a/hello_world/Makefile +++ b/hello_world/Makefile @@ -1,7 +1,7 @@ ARCH = $(shell uname -m) ifneq ("$(ARCH)", "ppc64") ifneq ("$(ARCH)", "ppc64le") - CROSS_COMPILE ?= powerpc64le-linux- + CROSS_COMPILE ?= powerpc64le-linux-gnu- endif endif diff --git a/rust_lib_demo/Makefile b/rust_lib_demo/Makefile index 26aebf8..fdbb18b 100644 --- a/rust_lib_demo/Makefile +++ b/rust_lib_demo/Makefile @@ -1,7 +1,7 @@ ARCH = $(shell uname -m) ifneq ("$(ARCH)", "ppc64") ifneq ("$(ARCH)", "ppc64le") - CROSS_COMPILE ?= powerpc64le-linux- + CROSS_COMPILE ?= powerpc64le-linux-gnu- endif endif diff --git a/tests/Makefile.test b/tests/Makefile.test index 9676370..250135d 100644 --- a/tests/Makefile.test +++ b/tests/Makefile.test @@ -1,7 +1,7 @@ ARCH = $(shell uname -m) ifneq ("$(ARCH)", "ppc64") ifneq ("$(ARCH)", "ppc64le") - CROSS_COMPILE ?= powerpc64le-linux- + CROSS_COMPILE ?= powerpc64le-linux-gnu- endif endif From 4db1676ef8b37fe7f36abe14b3255e4b92fbc5bd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 4 May 2020 08:31:18 +1000 Subject: [PATCH 08/10] dcache: Don't assert on dcbz cache hit We can hit the assert for req_op = OP_STORE_HIT and reloading in the case of dcbz, since it looks like a store. Therefore we need to exclude that case from the assert. Signed-off-by: Paul Mackerras --- dcache.vhdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dcache.vhdl b/dcache.vhdl index 550298b..7d61a85 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -597,7 +597,8 @@ begin if reloading and wishbone_in.ack = '1' and r1.store_way = i then do_write <= '1'; end if; - if req_op = OP_STORE_HIT and req_hit_way = i and cancel_store = '0' then + if req_op = OP_STORE_HIT and req_hit_way = i and cancel_store = '0' and + r1.req.dcbz = '0' then assert not reloading report "Store hit while in state:" & state_t'image(r1.state) severity FAILURE; From fe789190e40fe160d129f0504d1f69fec54cf4d9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 1 May 2020 09:00:21 +1000 Subject: [PATCH 09/10] wishbone_debug_master: Fix address auto-increment for memory writes Signed-off-by: Paul Mackerras --- wishbone_debug_master.vhdl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/wishbone_debug_master.vhdl b/wishbone_debug_master.vhdl index 11b9ee3..ddf6923 100644 --- a/wishbone_debug_master.vhdl +++ b/wishbone_debug_master.vhdl @@ -49,6 +49,7 @@ architecture behaviour of wishbone_debug_master is type state_t is (IDLE, WB_CYCLE, DMI_WAIT); signal state : state_t; + signal do_inc : std_ulogic; begin @@ -84,16 +85,16 @@ begin reg_addr <= (others => '0'); reg_ctrl <= (others => '0'); else -- Standard register writes - if dmi_req and dmi_wr then + if do_inc = '1' then + -- Address register auto-increment + reg_addr <= std_ulogic_vector(unsigned(reg_addr) + + decode_autoinc(reg_ctrl(10 downto 9))); + elsif dmi_req and dmi_wr then if dmi_addr = DBG_WB_ADDR then reg_addr <= dmi_din; elsif dmi_addr = DBG_WB_CTRL then reg_ctrl <= dmi_din(10 downto 0); end if; - elsif state = WB_CYCLE and (wb_in.ack and reg_ctrl(8))= '1' then - -- Address register auto-increment - reg_addr <= std_ulogic_vector(unsigned(reg_addr) + - decode_autoinc(reg_ctrl(10 downto 9))); end if; end if; end if; @@ -145,6 +146,7 @@ begin if (rst) then state <= IDLE; wb_out.stb <= '0'; + do_inc <= '0'; else case state is when IDLE => @@ -162,11 +164,13 @@ begin -- wb_out.stb <= '0'; state <= DMI_WAIT; + do_inc <= reg_ctrl(8); end if; when DMI_WAIT => if dmi_req = '0' then state <= IDLE; end if; + do_inc <= '0'; end case; end if; end if; From 102fbcfe9a3d8e054fdb0ad050512944051e4844 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 4 May 2020 15:17:04 +1000 Subject: [PATCH 10/10] execute1: Fix interrupt delivery during slow instructions During slow instructions such as multiply or divide, if a decrementer (or other asynchronous) interrupt becomes pending, it disrupts the logic that keeps stall asserted until the end of the slow instruction, and the interrupt logic starts trying to deliver the interrupt before the slow instruction has finished. To fix that, make the interrupt logic wait until it sees e_in.valid set before setting exception to 1. Signed-off-by: Paul Mackerras --- execute1.vhdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/execute1.vhdl b/execute1.vhdl index 1846488..8286d30 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -454,12 +454,12 @@ begin v.e.valid := e_in.valid; report "Writing SRR1: " & to_hstring(ctrl.srr1); - elsif irq_valid = '1' then + elsif irq_valid = '1' and e_in.valid = '1' then -- we need two cycles to write srr0 and 1 -- will need more when we have to write DSISR, DAR and HIER -- Don't deliver the interrupt until we have a valid instruction -- coming in, so we have a valid NIA to put in SRR0. - exception := e_in.valid; + exception := '1'; ctrl_tmp.srr1 <= msr_copy(ctrl.msr); elsif e_in.valid = '1' and ctrl.msr(MSR_PR) = '1' and