From 6a80825e705af736e689cb2421b64453838d189e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 26 Aug 2020 19:19:34 +1000 Subject: [PATCH 1/3] decode1: Avoid overriding fields of v.decode in decode1 In the cases where we need to override the values from the decode ROMs, we now do that overriding after the clock edge (eating into decode2's cycle) rather than before. This helps timing a little. Signed-off-by: Paul Mackerras --- decode1.vhdl | 154 ++++++++++++++++++++++++++++---------------------- decode2.vhdl | 3 - execute1.vhdl | 2 + 3 files changed, 90 insertions(+), 69 deletions(-) diff --git a/decode1.vhdl b/decode1.vhdl index 21fea4a..9544637 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -31,6 +31,21 @@ architecture behaviour of decode1 is signal r, rin : Decode1ToDecode2Type; signal s : Decode1ToDecode2Type; + constant illegal_inst : decode_rom_t := + (NONE, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'); + + type reg_internal_t is record + override : std_ulogic; + override_decode: decode_rom_t; + override_unit: std_ulogic; + force_single: std_ulogic; + end record; + constant reg_internal_t_init : reg_internal_t := + (override => '0', override_decode => illegal_inst, override_unit => '0', force_single => '0'); + + signal ri, ri_in : reg_internal_t; + signal si : reg_internal_t; + subtype major_opcode_t is unsigned(5 downto 0); type major_rom_array_t is array(0 to 63) of decode_rom_t; type minor_valid_array_t is array(0 to 1023) of std_ulogic; @@ -41,9 +56,6 @@ architecture behaviour of decode1 is type op_31_subop_array_t is array(0 to 1023) of decode_rom_t; type minor_rom_array_2_t is array(0 to 3) of decode_rom_t; - constant illegal_inst : decode_rom_t := - (ALU, OP_ILLEGAL, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'); - constant major_decode_rom_array : major_rom_array_t := ( -- unit internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- op in out A out in out len ext pipe @@ -107,25 +119,21 @@ architecture behaviour of decode1 is -- indexed by bits 10..1 of instruction word constant decode_op_19_valid : minor_valid_array_t := ( - -- addpcis, 5 upper bits are part of constant - 2#0000000010# => '1', 2#0000100010# => '1', 2#0001000010# => '1', 2#0001100010# => '1', 2#0010000010# => '1', 2#0010100010# => '1', 2#0011000010# => '1', 2#0011100010# => '1', - 2#0100000010# => '1', 2#0100100010# => '1', 2#0101000010# => '1', 2#0101100010# => '1', 2#0110000010# => '1', 2#0110100010# => '1', 2#0111000010# => '1', 2#0111100010# => '1', - 2#1000000010# => '1', 2#1000100010# => '1', 2#1001000010# => '1', 2#1001100010# => '1', 2#1010000010# => '1', 2#1010100010# => '1', 2#1011000010# => '1', 2#1011100010# => '1', - 2#1100000010# => '1', 2#1100100010# => '1', 2#1101000010# => '1', 2#1101100010# => '1', 2#1110000010# => '1', 2#1110100010# => '1', 2#1111000010# => '1', 2#1111100010# => '1', + 2#0001000000# to 2#0001011111# => '1', -- addpcis, 5 upper bits are part of constant 2#1000010000# => '1', -- bcctr - 2#0000010000# => '1', -- bclr - 2#1000110000# => '1', -- bctar - 2#0100000001# => '1', -- crand - 2#0010000001# => '1', -- crandc - 2#0100100001# => '1', -- creqv - 2#0011100001# => '1', -- crnand + 2#1000000000# => '1', -- bclr + 2#1000010001# => '1', -- bctar + 2#0000101000# => '1', -- crand + 2#0000100100# => '1', -- crandc + 2#0000101001# => '1', -- creqv + 2#0000100111# => '1', -- crnand 2#0000100001# => '1', -- crnor - 2#0111000001# => '1', -- cror - 2#0110100001# => '1', -- crorc - 2#0011000001# => '1', -- crxor - 2#0010010110# => '1', -- isync + 2#0000101110# => '1', -- cror + 2#0000101101# => '1', -- crorc + 2#0000100110# => '1', -- crxor + 2#1011000100# => '1', -- isync 2#0000000000# => '1', -- mcrf - 2#0000010010# => '1', -- rfid + 2#1001000000# => '1', -- rfid others => '0' ); @@ -401,19 +409,24 @@ begin if rst = '1' then r <= Decode1ToDecode2Init; s <= Decode1ToDecode2Init; + ri <= reg_internal_t_init; + si <= reg_internal_t_init; elsif flush_in = '1' then r.valid <= '0'; s.valid <= '0'; elsif s.valid = '1' then if stall_in = '0' then r <= s; + ri <= si; s.valid <= '0'; end if; else s <= rin; + si <= ri_in; s.valid <= rin.valid and r.valid and stall_in; if r.valid = '0' or stall_in = '0' then r <= rin; + ri <= ri_in; end if; end if; end if; @@ -422,6 +435,7 @@ begin decode1_1: process(all) variable v : Decode1ToDecode2Type; + variable vi : reg_internal_t; variable f : Decode1ToFetch1Type; variable majorop : major_opcode_t; variable minor4op : std_ulogic_vector(10 downto 0); @@ -432,37 +446,30 @@ begin variable br_offset : signed(23 downto 0); begin v := Decode1ToDecode2Init; + vi := reg_internal_t_init; v.valid := f_in.valid; v.nia := f_in.nia; v.insn := f_in.insn; v.stop_mark := f_in.stop_mark; - v.ispr1 := (others => '0'); - v.ispr2 := (others => '0'); if f_in.valid = '1' then report "Decode insn " & to_hstring(f_in.insn) & " at " & to_hstring(f_in.nia); end if; + br_offset := (others => '0'); + majorop := unsigned(f_in.insn(31 downto 26)); - if f_in.fetch_failed = '1' then - v.valid := '1'; - -- Only send down a single OP_FETCH_FAILED - if r.decode.insn_type = OP_FETCH_FAILED then - v.valid := '0'; - end if; - v.decode := fetch_fail_inst; + v.decode := major_decode_rom_array(to_integer(majorop)); - elsif majorop = "000100" then + case to_integer(unsigned(majorop)) is + when 4 => -- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*) minor4op := f_in.insn(5 downto 0) & f_in.insn(10 downto 6); - if decode_op_4_valid(to_integer(unsigned(minor4op))) = '1' then - v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0)))); - else - v.decode := illegal_inst; - end if; + vi.override := not decode_op_4_valid(to_integer(unsigned(minor4op))); + v.decode := decode_op_4_array(to_integer(unsigned(f_in.insn(5 downto 0)))); - elsif majorop = "011111" then + when 31 => -- major opcode 31, lots of things v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1)))); @@ -474,32 +481,35 @@ begin -- mfspr or mtspr -- Make slow SPRs single issue if is_fast_spr(v.ispr1) = '0' then - v.decode.sgl_pipe := '1'; + vi.force_single := '1'; -- send MMU-related SPRs to loadstore1 case sprn is when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL => - v.decode.unit := LDST; + vi.override_decode.unit := LDST; + vi.override_unit := '1'; when others => end case; end if; end if; - elsif majorop = "010000" then + when 16 => -- CTR may be needed as input to bc - v.decode := major_decode_rom_array(to_integer(majorop)); if f_in.insn(23) = '0' then v.ispr1 := fast_spr_num(SPR_CTR); end if; + -- Predict backward branches as taken, forward as untaken + v.br_pred := f_in.insn(15); + br_offset := resize(signed(f_in.insn(15 downto 2)), 24); - elsif majorop = "010011" then - if decode_op_19_valid(to_integer(unsigned(f_in.insn(10 downto 1)))) = '0' then - report "op 19 illegal subcode"; - v.decode := illegal_inst; - else - op_19_bits := f_in.insn(5) & f_in.insn(3) & f_in.insn(2); - v.decode := decode_op_19_array(to_integer(unsigned(op_19_bits))); - report "op 19 sub " & to_hstring(op_19_bits); - end if; + when 18 => + -- Unconditional branches are always taken + v.br_pred := '1'; + br_offset := signed(f_in.insn(25 downto 2)); + + when 19 => + vi.override := not decode_op_19_valid(to_integer(unsigned(f_in.insn(5 downto 1) & f_in.insn(10 downto 6)))); + op_19_bits := f_in.insn(5) & f_in.insn(3) & f_in.insn(2); + v.decode := decode_op_19_array(to_integer(unsigned(op_19_bits))); -- Work out ispr1/ispr2 independent of v.decode since they seem to be critical path if f_in.insn(2) = '0' then @@ -523,36 +533,39 @@ begin v.ispr2 := fast_spr_num(SPR_SRR0); end if; - elsif majorop = "011110" then + when 30 => v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1)))); - elsif majorop = "111010" then + when 48 => + -- ori, special-case the standard NOP + if std_match(f_in.insn, "01100000000000000000000000000000") then + report "PPC_nop"; + vi.override := '1'; + vi.override_decode := nop_instr; + end if; + + when 58 => v.decode := decode_op_58_array(to_integer(unsigned(f_in.insn(1 downto 0)))); - elsif majorop = "111110" then + when 62 => v.decode := decode_op_62_array(to_integer(unsigned(f_in.insn(1 downto 0)))); - elsif std_match(f_in.insn, "01100000000000000000000000000000") then - report "PPC_nop"; - v.decode := nop_instr; + when others => + end case; - else - v.decode := major_decode_rom_array(to_integer(majorop)); + if f_in.fetch_failed = '1' then + v.valid := '1'; + vi.override := '1'; + vi.override_decode := fetch_fail_inst; + -- Only send down a single OP_FETCH_FAILED + if ri.override = '1' and ri.override_decode.insn_type = OP_FETCH_FAILED then + v.valid := '0'; + end if; end if; -- Branch predictor -- Note bclr, bcctr and bctar are predicted not taken as we have no -- count cache or link stack. - br_offset := (others => '0'); - if majorop = 18 then - -- Unconditional branches are always taken - v.br_pred := '1'; - br_offset := signed(f_in.insn(25 downto 2)); - elsif majorop = 16 then - -- Predict backward branches as taken, forward as untaken - v.br_pred := f_in.insn(15); - br_offset := resize(signed(f_in.insn(15 downto 2)), 24); - end if; br_nia := f_in.nia(63 downto 2); if f_in.insn(1) = '1' then br_nia := (others => '0'); @@ -563,9 +576,18 @@ begin -- Update registers rin <= v; + ri_in <= vi; -- Update outputs d_out <= r; + if ri.override = '1' then + d_out.decode <= ri.override_decode; + elsif ri.override_unit = '1' then + d_out.decode.unit <= ri.override_decode.unit; + end if; + if ri.force_single = '1' then + d_out.decode.sgl_pipe <= '1'; + end if; f_out <= f; flush_out <= f.redirect; end process; diff --git a/decode2.vhdl b/decode2.vhdl index b1531f1..a2a602c 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -403,9 +403,6 @@ begin end if; v.e.valid := control_valid_out; - if d_in.decode.unit = NONE then - v.e.insn_type := OP_ILLEGAL; - end if; if rst = '1' or flush_in = '1' then v.e := Decode2ToExecute1Init; diff --git a/execute1.vhdl b/execute1.vhdl index 51ea5b0..076c4ae 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1044,6 +1044,8 @@ begin -- instruction for other units, i.e. LDST if e_in.unit = LDST then lv.valid := '1'; + elsif e_in.unit = NONE then + illegal := '1'; end if; elsif r.f.redirect = '1' then From b589d2d472be58d81e01aad6de467119ee15e1a4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 28 Aug 2020 20:34:09 +1000 Subject: [PATCH 2/3] execute1: Implement trace interrupts Trace interrupts occur when the MSR[TE] field is non-zero and an instruction other than rfid has been successfully completed. A trace interrupt occurs before the next instruction is executed or any asynchronous interrupt is taken. Since the trace interrupt is defined to set SRR1 bits depending on whether the traced instruction is a load or an instruction treated as a load, or a store or an instruction treated as a store, we need to make sure the treated-as-a-load instructions (icbi, icbt, dcbt, dcbst, dcbf) and the treated-as-a-store instructions (dcbtst, dcbz) have the correct opcodes in decode1. Several of them were previously marked as OP_NOP. We don't yet implement the SIAR or SDAR registers, which should be set by trace interrupts. Signed-off-by: Paul Mackerras --- common.vhdl | 2 ++ decode1.vhdl | 10 +++++----- execute1.vhdl | 42 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/common.vhdl b/common.vhdl index 03211ce..1ca1178 100644 --- a/common.vhdl +++ b/common.vhdl @@ -13,6 +13,8 @@ package common is constant MSR_SF : integer := (63 - 0); -- Sixty-Four bit mode constant MSR_EE : integer := (63 - 48); -- External interrupt Enable constant MSR_PR : integer := (63 - 49); -- PRoblem state + constant MSR_SE : integer := (63 - 53); -- Single-step bit of TE field + constant MSR_BE : integer := (63 - 54); -- Branch trace bit of TE field constant MSR_IR : integer := (63 - 58); -- Instruction Relocation constant MSR_DR : integer := (63 - 59); -- Data Relocation constant MSR_RI : integer := (63 - 62); -- Recoverable Interrupt diff --git a/decode1.vhdl b/decode1.vhdl index 9544637..a7d5910 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -201,10 +201,10 @@ architecture behaviour of decode1 is 2#1000111010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- cnttzd 2#1000011010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- cnttzw 2#1011110011# => (ALU, OP_DARN, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- darn - 2#0001010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbf - 2#0000110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst - 2#0100010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt - 2#0011110110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst + 2#0001010110# => (ALU, OP_DCBF, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbf + 2#0000110110# => (ALU, OP_DCBST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbst + 2#0100010110# => (ALU, OP_DCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbt + 2#0011110110# => (ALU, OP_DCBTST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- dcbtst 2#1111110110# => (LDST, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- dcbz 2#0110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeu 2#1110001001# => (ALU, OP_DIVE, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- divdeuo @@ -230,7 +230,7 @@ architecture behaviour of decode1 is 2#1101111010# => (ALU, OP_EXTSWSLI, NONE, CONST_SH, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extswsli 2#1101111011# => (ALU, OP_EXTSWSLI, NONE, CONST_SH, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- extswsli 2#1111010110# => (ALU, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- icbi - 2#0000010110# => (ALU, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- icbt + 2#0000010110# => (ALU, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- icbt 2#0000001111# => (ALU, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- isel 2#0000101111# => (ALU, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- isel 2#0001001111# => (ALU, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- isel diff --git a/execute1.vhdl b/execute1.vhdl index 076c4ae..04cc970 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -53,6 +53,8 @@ architecture behaviour of execute1 is f : Execute1ToFetch1Type; busy: std_ulogic; terminate: std_ulogic; + trace_next : std_ulogic; + prev_op : insn_type_t; lr_update : std_ulogic; next_lr : std_ulogic_vector(63 downto 0); mul_in_progress : std_ulogic; @@ -69,7 +71,7 @@ architecture behaviour of execute1 is end record; constant reg_type_init : reg_type := (e => Execute1ToWritebackInit, f => Execute1ToFetch1Init, - busy => '0', lr_update => '0', terminate => '0', + busy => '0', lr_update => '0', terminate => '0', trace_next => '0', prev_op => OP_ILLEGAL, mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0', slow_op_insn => OP_ILLEGAL, slow_op_rc => '0', slow_op_oe => '0', slow_op_xerc => xerc_init, next_lr => (others => '0'), last_nia => (others => '0'), others => (others => '0')); @@ -330,6 +332,7 @@ begin variable abs_branch : std_ulogic; variable spr_val : std_ulogic_vector(63 downto 0); variable addend : std_ulogic_vector(127 downto 0); + variable do_trace : std_ulogic; begin result := (others => '0'); sum_with_carry := (others => '0'); @@ -525,6 +528,11 @@ begin v.e.mode_32bit := not ctrl.msr(MSR_SF); + do_trace := valid_in and ctrl.msr(MSR_SE); + if valid_in = '1' then + v.prev_op := e_in.insn_type; + end if; + if ctrl.irq_state = WRITE_SRR1 then v.e.exc_write_reg := fast_spr_num(SPR_SRR1); v.e.exc_write_data := ctrl.srr1; @@ -532,13 +540,29 @@ begin ctrl_tmp.msr(MSR_SF) <= '1'; ctrl_tmp.msr(MSR_EE) <= '0'; ctrl_tmp.msr(MSR_PR) <= '0'; + ctrl_tmp.msr(MSR_SE) <= '0'; + ctrl_tmp.msr(MSR_BE) <= '0'; ctrl_tmp.msr(MSR_IR) <= '0'; ctrl_tmp.msr(MSR_DR) <= '0'; ctrl_tmp.msr(MSR_RI) <= '0'; ctrl_tmp.msr(MSR_LE) <= '1'; v.e.valid := '1'; + v.trace_next := '0'; report "Writing SRR1: " & to_hstring(ctrl.srr1); + elsif r.trace_next = '1' and valid_in = '1' then + -- Generate a trace interrupt rather than executing the next instruction + -- or taking any asynchronous interrupt + v.f.redirect_nia := std_logic_vector(to_unsigned(16#d00#, 64)); + ctrl_tmp.srr1(63 - 33) <= '1'; + if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or + r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then + ctrl_tmp.srr1(63 - 35) <= '1'; + elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then + ctrl_tmp.srr1(63 - 36) <= '1'; + end if; + exception := '1'; + elsif irq_valid = '1' and valid_in = '1' then -- we need two cycles to write srr0 and 1 -- will need more when we have to write HEIR @@ -594,7 +618,7 @@ begin else illegal := '1'; end if; - when OP_NOP => + when OP_NOP | OP_DCBF | OP_DCBST | OP_DCBT | OP_DCBTST | OP_ICBT => -- Do nothing when OP_ADD | OP_CMP | OP_TRAP => result := sum_with_carry(63 downto 0); @@ -715,6 +739,9 @@ begin is_branch := '1'; taken_branch := '1'; abs_branch := insn_aa(e_in.insn); + if ctrl.msr(MSR_BE) = '1' then + do_trace := '1'; + end if; when OP_BC => -- read_data1 is CTR bo := insn_bo(e_in.insn); @@ -727,6 +754,9 @@ begin is_branch := '1'; taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in); abs_branch := insn_aa(e_in.insn); + if ctrl.msr(MSR_BE) = '1' then + do_trace := '1'; + end if; when OP_BCREG => -- read_data1 is CTR -- read_data2 is target register (CTR, LR or TAR) @@ -740,6 +770,9 @@ begin is_branch := '1'; taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in); abs_branch := '1'; + if ctrl.msr(MSR_BE) = '1' then + do_trace := '1'; + end if; when OP_RFID => v.f.virt_mode := a_in(MSR_IR) or a_in(MSR_PR); @@ -760,6 +793,7 @@ begin is_branch := '1'; taken_branch := '1'; abs_branch := '1'; + do_trace := '0'; when OP_CNTZ => v.e.valid := '0'; @@ -1135,6 +1169,10 @@ begin end if; end if; + if do_trace = '1' then + v.trace_next := '1'; + end if; + v.e.write_data := result; v.e.write_enable := result_en and not exception; From e1672ea7097b18ab4620afcf651be996ac3a0d00 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 29 Aug 2020 19:30:56 +1000 Subject: [PATCH 3/3] tests: Add a test for trace interrupts Signed-off-by: Paul Mackerras --- tests/test_trace.bin | Bin 0 -> 11596 bytes tests/test_trace.console_out | 7 ++ tests/trace/Makefile | 3 + tests/trace/head.S | 216 ++++++++++++++++++++++++++++++++++ tests/trace/powerpc.lds | 27 +++++ tests/trace/trace.c | 222 +++++++++++++++++++++++++++++++++++ tests/update_console_tests | 2 +- 7 files changed, 476 insertions(+), 1 deletion(-) create mode 100755 tests/test_trace.bin create mode 100644 tests/test_trace.console_out create mode 100644 tests/trace/Makefile create mode 100644 tests/trace/head.S create mode 100644 tests/trace/powerpc.lds create mode 100644 tests/trace/trace.c diff --git a/tests/test_trace.bin b/tests/test_trace.bin new file mode 100755 index 0000000000000000000000000000000000000000..b9a612d6b6df4450a701117064157bb6fd10b545 GIT binary patch literal 11596 zcmeHNZ)_9i9sZqd;>%x5mk-%s)=xc?prxcAU@*z9 z&h;gWG>{@KO#9G&7>q9`RG}%8_<;J!tP`TrKD1I*Ax#>jrBZCuEv)|>8+vKhAL5hF_rCA%x%YjZ=lyf`>myPIh=ya=yu%4xk5X_rMx>%_5@nMpn>?xqCu1YO zIF{T^lcC>yIGO0A>4@QeEqUiKX$lR8@+L)zq%e_Rz2^1DaUIu>`&A;6@0vp17<@`2 z(oyFu>YPQLvq#g<%*G7&nWW#NA0QWyL_@~l_cSBQSShfyd0Mf{tzhG$;lLAdz)kY$ z`{s*Jr0zN@)zQAVj_s2N`tPlyezY&HWBarT`itwQzm6gDWFbw^U)aFXLiwtrUQa-zX~nr0*v8Rtpy5En zfrbMO2O17E9B4SuaG>Ep!-0ka4F~>j4hW=#X+A3RK0w}FDDqywRqO>Al*aGePjJqjEcO1stvwT!+IRNgeE`L9y{mc1?5CdH8owVP(3pJLJBIfI z{PK7U{@Zu;C%4CYe@%)q;A&T88tK(;o70L!{)`)Sx~Vtt5%N9AyO6$)H2)S)o8`S3 zK^KTt^L3LHV$Od7=OFo)9o}6Pyn{4V=Dnp(-Xq|BskwspNNwI|8#?}H-e(=&-K%)F z+J3+CuloFE-d~CpyoVmnyVdp^yr){J`LX>DuHx;ClMA2M&pTYnyNch!3hy=f?R{{Z zl>H8`;_djIxl=#y8SmQs_O8M2)$?)4;p@7lK8N|}@C`kLZ)cVH_)${@-zxLb86P|! zpIgJYUd?yB3g0ui@A~Vj!nb>ke0hx2yMCDFr~ScLrW(1IQyblJNzdCxTaPuT5UM|(k(V-wPOH$~9#o>b-f=d{ahn&y#k3*G9!_{JFT zX7q#9d9MrU#l+SW_Eve{^GJ9e-1vC_ev7z2$NR4FZ_O74%uV)~F;{>g zeaG1t%HzdPQyz@*KO)VsG-9=$2dsMm{TQ+}n&xzv*C9GSBfkWGoaepSdE^J|JU@FT zPRz(ZvhyC~rC-g+>>cAbA%6q;c01pM{M^YI`M$+zI_yPWIW;4Hjy#@a*yknHr5{;j zjn2L@8;C>D`NMwRBB}cH9hPoUkK)>UhAPN^^XSW1JPxYXzB%)) zx?;bp!ADoXje@C<{;Iro=ER#FYw=MK^56{td zlBH^1q-j7RF@`f?`L>Uel$Hjl zNlZW!%6m)Y@R_~nVvi74R>dgI?^Krl5Af-q$RrKp-r)rLd3hVZZ;AZx4~a=nzAy3K z=HY~HeP6;HzN{-D$x6HaDkeP67&%xfbCVKkE0-x2&gqwT z4wmv>V4$T`FUqVwVU6^{+omaox}*x7sJB|5vA)vy?GBJNcF+8n%a~`7&PBSR$%VFf zJ;nbX_&zusTj6WBi})?{1;<@9rovN;SGGK^P@{dEI(qVyg!D+{(4qDgqJ5E{?dLqF zF>Z2|YYKDrA;yRB#>?c1M})0ySY?p!!rAtfY0l$H`#O{@kvGo^hgv!)a>zn}gbzrqh>^=)S z!lnXhc{&GN8n{0&&hdRe+8u0)+=TJlP<$DfV8Pa~w?hK6S377AY9M-8e{vc3=Ox^8 z8dpBJ3p$5$MtW(@_l@8ojT4NC<}li PA6&*-mhjNFd_wxa!bJfU literal 0 HcmV?d00001 diff --git a/tests/test_trace.console_out b/tests/test_trace.console_out new file mode 100644 index 0000000..340756c --- /dev/null +++ b/tests/test_trace.console_out @@ -0,0 +1,7 @@ +test 01:PASS +test 02:PASS +test 03:PASS +test 04:PASS +test 05:PASS +test 06:PASS +test 07:PASS diff --git a/tests/trace/Makefile b/tests/trace/Makefile new file mode 100644 index 0000000..03f9874 --- /dev/null +++ b/tests/trace/Makefile @@ -0,0 +1,3 @@ +TEST=trace + +include ../Makefile.test diff --git a/tests/trace/head.S b/tests/trace/head.S new file mode 100644 index 0000000..d23aeb7 --- /dev/null +++ b/tests/trace/head.S @@ -0,0 +1,216 @@ +/* Copyright 2020 Paul Mackerras, IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Load an immediate 64-bit value into a register */ +#define LOAD_IMM64(r, e) \ + lis r,(e)@highest; \ + ori r,r,(e)@higher; \ + rldicr r,r, 32, 31; \ + oris r,r, (e)@h; \ + ori r,r, (e)@l; + + .section ".head","ax" + + /* + * Microwatt currently enters in LE mode at 0x0, so we don't need to + * do any endian fix ups + */ + . = 0 +.global _start +_start: + LOAD_IMM64(%r10,__bss_start) + LOAD_IMM64(%r11,__bss_end) + subf %r11,%r10,%r11 + addi %r11,%r11,63 + srdi. %r11,%r11,6 + beq 2f + mtctr %r11 +1: dcbz 0,%r10 + addi %r10,%r10,64 + bdnz 1b + +2: LOAD_IMM64(%r1,__stack_top) + li %r0,0 + stdu %r0,-16(%r1) + mtsprg2 %r0 + LOAD_IMM64(%r12, main) + mtctr %r12 + bctrl + attn // terminate on exit + b . + +exception: + mtsprg3 %r0 + mfsprg2 %r0 + cmpdi %r0,0 + bne call_ret + attn + +#define EXCEPTION(nr) \ + .= nr ;\ + li %r0,nr ;\ + b exception + + EXCEPTION(0x300) + EXCEPTION(0x380) + EXCEPTION(0x400) + EXCEPTION(0x480) + EXCEPTION(0x500) + EXCEPTION(0x600) + EXCEPTION(0x700) + EXCEPTION(0x800) + EXCEPTION(0x900) + EXCEPTION(0x980) + EXCEPTION(0xa00) + EXCEPTION(0xb00) + EXCEPTION(0xc00) + EXCEPTION(0xd00) + EXCEPTION(0xe00) + EXCEPTION(0xe20) + EXCEPTION(0xe40) + EXCEPTION(0xe60) + EXCEPTION(0xe80) + EXCEPTION(0xf00) + EXCEPTION(0xf20) + EXCEPTION(0xf40) + EXCEPTION(0xf60) + EXCEPTION(0xf80) + + . = 0x1000 + /* + * Call a function in a context with a given MSR value. + * r3, r4 = args; r5 = function, r6 = MSR, + * r7 = array in which to return r3 and r4 + * Return value is trap number or 0. + */ + .globl callit +callit: + mflr %r0 + std %r0,16(%r1) + stdu %r1,-256(%r1) + mfcr %r8 + stw %r8,100(%r1) + std %r13,104(%r1) + std %r14,112(%r1) + std %r15,120(%r1) + std %r16,128(%r1) + std %r17,136(%r1) + std %r18,144(%r1) + std %r19,152(%r1) + std %r20,160(%r1) + std %r21,168(%r1) + std %r22,176(%r1) + std %r23,184(%r1) + std %r24,192(%r1) + std %r25,200(%r1) + std %r26,208(%r1) + std %r27,216(%r1) + std %r28,224(%r1) + std %r29,232(%r1) + std %r30,240(%r1) + std %r31,248(%r1) + li %r10,call_ret@l + mtlr %r10 + mtsprg0 %r7 + mtsprg1 %r1 + mtsprg2 %r2 + li %r11,0 + mtsprg3 %r11 + mtsrr0 %r5 + mtsrr1 %r6 + rfid +call_ret: + mfsprg0 %r7 /* restore regs in case of trap */ + mfsprg1 %r1 + mfsprg2 %r2 + li %r0,0 + mtsprg2 %r0 + std %r3,0(%r7) + std %r4,8(%r7) + mfsprg3 %r3 + lwz %r8,100(%r1) + mtcr %r8 + ld %r13,104(%r1) + ld %r14,112(%r1) + ld %r15,120(%r1) + ld %r16,128(%r1) + ld %r17,136(%r1) + ld %r18,144(%r1) + ld %r19,152(%r1) + ld %r20,160(%r1) + ld %r21,168(%r1) + ld %r22,176(%r1) + ld %r23,184(%r1) + ld %r24,192(%r1) + ld %r25,200(%r1) + ld %r26,208(%r1) + ld %r27,216(%r1) + ld %r28,224(%r1) + ld %r29,232(%r1) + ld %r30,240(%r1) + ld %r31,248(%r1) + addi %r1,%r1,256 + ld %r0,16(%r1) + mtlr %r0 + blr + + .global test1 +test1: + addi %r3,%r4,1 + li %r3,0 + blr + + .global test2 +test2: + ld %r3,0(%r4) + li %r3,-1 + blr + + .global test3 +test3: + stw %r3,0(%r4) + li %r3,-1 + blr + + .global test4 +test4: + dcbt 0,%r3 + li %r3,-1 + blr + + .global test5 +test5: + dcbtst 0,%r3 + li %r3,-1 + blr + + .global test6 +test6: + nop + nop + b 1f + li %r3,2 + blr +1: li %r3,1 + blr + + .global test7 +test7: + li %r4,1 + cmpwi %r4,0 + bne 1f + li %r3,-1 +1: blr diff --git a/tests/trace/powerpc.lds b/tests/trace/powerpc.lds new file mode 100644 index 0000000..99611ab --- /dev/null +++ b/tests/trace/powerpc.lds @@ -0,0 +1,27 @@ +SECTIONS +{ + . = 0; + _start = .; + .head : { + KEEP(*(.head)) + } + . = ALIGN(0x1000); + .text : { *(.text) *(.text.*) *(.rodata) *(.rodata.*) } + . = ALIGN(0x1000); + .data : { *(.data) *(.data.*) *(.got) *(.toc) } + . = ALIGN(0x80); + __bss_start = .; + .bss : { + *(.dynsbss) + *(.sbss) + *(.scommon) + *(.dynbss) + *(.bss) + *(.common) + *(.bss.*) + } + . = ALIGN(0x80); + __bss_end = .; + . = . + 0x4000; + __stack_top = .; +} diff --git a/tests/trace/trace.c b/tests/trace/trace.c new file mode 100644 index 0000000..5f7ae9f --- /dev/null +++ b/tests/trace/trace.c @@ -0,0 +1,222 @@ +#include +#include +#include + +#include "console.h" + +extern unsigned long callit(unsigned long arg1, unsigned long arg2, + unsigned long (*fn)(unsigned long, unsigned long), + unsigned long msr, unsigned long *regs); + +#define MSR_SE 0x400 +#define MSR_BE 0x200 + +#define SRR0 26 +#define SRR1 27 +#define SPRG0 272 +#define SPRG1 273 + +static inline unsigned long mfmsr(void) +{ + unsigned long msr; + + __asm__ volatile ("mfmsr %0" : "=r" (msr)); + return msr; +} + +static inline unsigned long mfspr(int sprnum) +{ + long val; + + __asm__ volatile("mfspr %0,%1" : "=r" (val) : "i" (sprnum)); + return val; +} + +static inline void mtspr(int sprnum, unsigned long val) +{ + __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val)); +} + +void print_string(const char *str) +{ + for (; *str; ++str) + putchar(*str); +} + +void print_hex(unsigned long val, int ndigits) +{ + int i, x; + + for (i = (ndigits - 1) * 4; i >= 0; i -= 4) { + x = (val >> i) & 0xf; + if (x >= 10) + putchar(x + 'a' - 10); + else + putchar(x + '0'); + } +} + +// i < 100 +void print_test_number(int i) +{ + print_string("test "); + putchar(48 + i/10); + putchar(48 + i%10); + putchar(':'); +} + +extern unsigned long test1(unsigned long, unsigned long); + +int trace_test_1(void) +{ + unsigned long ret; + unsigned long regs[2]; + + ret = callit(1, 2, test1, mfmsr() | MSR_SE, regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test1 + 4) + return ret + 1; + if ((mfspr(SRR1) & 0x781f0000) != 0x40000000) + return ret + 2; + if (regs[0] != 3 || regs[1] != 2) + return 3; + return 0; +} + +extern unsigned long test2(unsigned long, unsigned long); + +int trace_test_2(void) +{ + unsigned long x = 3; + unsigned long ret; + unsigned long regs[2]; + + ret = callit(1, (unsigned long)&x, test2, mfmsr() | MSR_SE, regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test2 + 4) + return ret + 1; + if ((mfspr(SRR1) & 0x781f0000) != 0x50000000) + return ret + 2; + if (regs[0] != 3 || x != 3) + return 3; + return 0; +} + +extern unsigned long test3(unsigned long, unsigned long); + +int trace_test_3(void) +{ + unsigned int x = 3; + unsigned long ret; + unsigned long regs[2]; + + ret = callit(11, (unsigned long)&x, test3, mfmsr() | MSR_SE, regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test3 + 4) + return ret + 1; + if ((mfspr(SRR1) & 0x781f0000) != 0x48000000) + return ret + 2; + if (regs[0] != 11 || x != 11) + return 3; + return 0; +} + +extern unsigned long test4(unsigned long, unsigned long); + +int trace_test_4(void) +{ + unsigned long x = 3; + unsigned long ret; + unsigned long regs[2]; + + ret = callit(1, (unsigned long)&x, test4, mfmsr() | MSR_SE, regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test4 + 4) + return ret + 1; + if ((mfspr(SRR1) & 0x781f0000) != 0x50000000) + return ret + 2; + if (regs[0] != 1 || x != 3) + return 3; + return 0; +} + +extern unsigned long test5(unsigned long, unsigned long); + +int trace_test_5(void) +{ + unsigned int x = 7; + unsigned long ret; + unsigned long regs[2]; + + ret = callit(11, (unsigned long)&x, test5, mfmsr() | MSR_SE, regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test5 + 4) + return ret + 1; + if ((mfspr(SRR1) & 0x781f0000) != 0x48000000) + return ret + 2; + if (regs[0] != 11 || x != 7) + return 3; + return 0; +} + +extern unsigned long test6(unsigned long, unsigned long); + +int trace_test_6(void) +{ + unsigned long ret; + unsigned long regs[2]; + + ret = callit(11, 55, test6, mfmsr() | MSR_BE, regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test6 + 20) + return ret + 1; + if ((mfspr(SRR1) & 0x781f0000) != 0x40000000) + return ret + 2; + if (regs[0] != 11 || regs[1] != 55) + return 3; + return 0; +} + +extern unsigned long test7(unsigned long, unsigned long); + +int trace_test_7(void) +{ + unsigned long ret; + unsigned long regs[2]; + + ret = callit(11, 55, test7, mfmsr() | MSR_BE, regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test7 + 16) + return ret + 1; + if ((mfspr(SRR1) & 0x781f0000) != 0x40000000) + return ret + 2; + if (regs[0] != 11 || regs[1] != 1) + return 3; + return 0; +} + +int fail = 0; + +void do_test(int num, int (*test)(void)) +{ + int ret; + + print_test_number(num); + ret = test(); + if (ret == 0) { + print_string("PASS\r\n"); + } else { + fail = 1; + print_string("FAIL "); + print_hex(ret, 4); + print_string("\r\n"); + } +} + +int main(void) +{ + console_init(); + + do_test(1, trace_test_1); + do_test(2, trace_test_2); + do_test(3, trace_test_3); + do_test(4, trace_test_4); + do_test(5, trace_test_5); + do_test(6, trace_test_6); + do_test(7, trace_test_7); + + return fail; +} diff --git a/tests/update_console_tests b/tests/update_console_tests index ffb30c7..906b0cc 100755 --- a/tests/update_console_tests +++ b/tests/update_console_tests @@ -3,7 +3,7 @@ # Script to update console related tests from source # -for i in sc illegal decrementer xics privileged mmu misc modes reservation ; do +for i in sc illegal decrementer xics privileged mmu misc modes reservation trace ; do cd $i make cd -