From 205c0e2c787590139d8a8d93521029c6e5c9c947 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 18 Sep 2023 22:15:07 +1000 Subject: [PATCH] Implement the wait instruction This implements the behaviour of the 'wait 0' instruction of pausing execution of instructions until an exception arises. The exceptions that terminate a wait are a pending trace exception, external interrupt request, PMU interrupt request, or decrementer negative exception. These exception conditions terminate a wait even if not enabled to generate an interrupt (e.g. if MSR[EE] is zero). This is implemented by having execute1 assert its busy_out signal while the wait state exists. The wait state is set by the completion of the wait instruction and cleared by a pending exception. If the WC operand of the wait instruction is non-zero, indicating wait for reservation loss or wait for a short period, then the wait instruction does not wait, but just acts as a no-op. In order to make space in the insn_type_t type without going over 64 elements, this combines OP_DCBT and OP_ICBT into a single OP_XCBT, since they were both no-ops (except for their influence on how SRR1 is set on a trace interrupt, where they were identical). Signed-off-by: Paul Mackerras --- common.vhdl | 3 ++- decode1.vhdl | 6 +++--- decode_types.vhdl | 5 +++-- execute1.vhdl | 28 +++++++++++++++++++++++----- scripts/fmt_log/fmt_log.c | 12 ++++++------ 5 files changed, 37 insertions(+), 17 deletions(-) diff --git a/common.vhdl b/common.vhdl index 9f38874..58033d1 100644 --- a/common.vhdl +++ b/common.vhdl @@ -248,6 +248,7 @@ package common is -- This needs to die... type ctrl_t is record + wait_state: std_ulogic; run: std_ulogic; tb: std_ulogic_vector(63 downto 0); dec: std_ulogic_vector(63 downto 0); @@ -264,7 +265,7 @@ package common is heir: std_ulogic_vector(63 downto 0); end record; constant ctrl_t_init : ctrl_t := - (run => '1', xer_low => 18x"0", + (wait_state => '0', run => '1', xer_low => 18x"0", fscr_ic => x"0", fscr_pref => '1', fscr_tar => '1', hfscr_ic => x"0", hfscr_pref => '1', hfscr_tar => '1', hfscr_fp => '1', others => (others => '0')); diff --git a/decode1.vhdl b/decode1.vhdl index 09f9f77..252a21f 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -128,7 +128,7 @@ architecture behaviour of decode1 is INSN_darn => (ALU, NONE, OP_DARN, NONE, NONE, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbf => (ALU, NONE, OP_DCBF, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbst => (ALU, NONE, OP_DCBST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), - INSN_dcbt => (ALU, NONE, OP_DCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_dcbt => (ALU, NONE, OP_XCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbtst => (ALU, NONE, OP_DCBTST, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_dcbz => (LDST, NONE, OP_DCBZ, RA_OR_ZERO, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_divd => (DVU, NONE, OP_DIV, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE), @@ -197,7 +197,7 @@ architecture behaviour of decode1 is INSN_ftdiv => (FPU, FPU, OP_FP_CMP, FRA, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_ftsqrt => (FPU, FPU, OP_FP_CMP, NONE, FRB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_icbi => (ALU, NONE, OP_ICBI, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), - INSN_icbt => (ALU, NONE, OP_ICBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), + INSN_icbt => (ALU, NONE, OP_XCBT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_isel => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_isync => (ALU, NONE, OP_ISYNC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_lbarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), @@ -373,7 +373,7 @@ architecture behaviour of decode1 is INSN_tlbsync => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_tw => (ALU, NONE, OP_TRAP, RA, RB, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), INSN_twi => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), - INSN_wait => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_wait => (ALU, NONE, OP_WAIT, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), INSN_xor => (ALU, NONE, OP_XOR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_xori => (ALU, NONE, OP_XOR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_xoris => (ALU, NONE, OP_XOR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), diff --git a/decode_types.vhdl b/decode_types.vhdl index 5b21fff..8cb732a 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -7,8 +7,8 @@ package decode_types is OP_BCD, OP_BPERM, OP_BREV, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, OP_CNTZ, OP_CROP, - OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, - OP_DCBZ, OP_ICBI, OP_ICBT, + OP_DARN, OP_DCBF, OP_DCBST, OP_XCBT, OP_DCBTST, + OP_DCBZ, OP_ICBI, OP_FP_CMP, OP_FP_ARITH, OP_FP_MOVE, OP_FP_MISC, OP_DIV, OP_DIVE, OP_MOD, OP_EXTS, OP_EXTSWSLI, @@ -24,6 +24,7 @@ package decode_types is OP_SYNC, OP_TLBIE, OP_TRAP, OP_XOR, OP_ADDG6S, + OP_WAIT, OP_FETCH_FAILED ); diff --git a/execute1.vhdl b/execute1.vhdl index ed79a3d..b1087ba 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -94,6 +94,7 @@ architecture behaviour of execute1 is write_heir : std_ulogic; set_heir : std_ulogic; write_ctrl : std_ulogic; + enter_wait : std_ulogic; end record; constant side_effect_init : side_effect_type := (others => '0'); @@ -551,7 +552,7 @@ begin -- N.B. the busy signal from each source includes the -- stage2 stall from that source in it. - busy_out <= l_in.busy or ex1.busy or fp_in.busy; + busy_out <= l_in.busy or ex1.busy or fp_in.busy or ctrl.wait_state; valid_in <= e_in.valid and not (busy_out or flush_in or ex1.e.redirect or ex1.e.interrupt); @@ -1146,7 +1147,7 @@ begin else illegal := '1'; end if; - when OP_NOP | OP_DCBF | OP_DCBST | OP_DCBT | OP_DCBTST | OP_ICBT => + when OP_NOP | OP_DCBF | OP_DCBST | OP_XCBT | OP_DCBTST => -- Do nothing when OP_ADD => if e_in.output_carry = '1' then @@ -1398,6 +1399,11 @@ begin owait := '1'; end if; + when OP_WAIT => + if e_in.insn(22 downto 21) = "00" then + v.se.enter_wait := '1'; + end if; + when OP_FETCH_FAILED => -- Handling an ITLB miss doesn't count as having executed an instruction v.do_trace := '0'; @@ -1513,7 +1519,7 @@ begin variable bypass_valid : std_ulogic; begin v := ex1; - if (ex1.busy or l_in.busy or fp_in.busy) = '0' then + if busy_out = '0' then v.e := actions.e; v.e.valid := '0'; v.oe := e_in.oe; @@ -1577,8 +1583,8 @@ begin v.e.srr1 := (others => '0'); v.e.srr1(47 - 33) := '1'; v.e.srr1(47 - 34) := ex1.prev_prefixed; - if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or - ex1.prev_op = OP_DCBT or ex1.prev_op = OP_DCBST or ex1.prev_op = OP_DCBF then + if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or + ex1.prev_op = OP_XCBT or ex1.prev_op = OP_DCBST or ex1.prev_op = OP_DCBF then v.e.srr1(47 - 35) := '1'; elsif ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ or ex1.prev_op = OP_DCBTST then @@ -1802,6 +1808,7 @@ begin variable cr_mask : std_ulogic_vector(7 downto 0); variable sign, zero : std_ulogic; variable rcnz_hi, rcnz_lo : std_ulogic; + variable irq_exc : std_ulogic; begin -- Next insn adder used in a couple of places next_nia <= std_ulogic_vector(unsigned(ex1.e.last_nia) + 4); @@ -1960,6 +1967,17 @@ begin if ex1.se.write_ctrl = '1' then ctrl_tmp.run <= ex1.e.write_data(0); end if; + if ex1.se.enter_wait = '1' then + ctrl_tmp.wait_state <= '1'; + end if; + end if; + + -- pending exceptions clear any wait state + -- ex1.fp_exception_next is not tested because it is not possible to + -- get into wait state with a pending FP exception. + irq_exc := pmu_to_x.intr or ctrl.dec(63) or ext_irq_in; + if ex1.trace_next = '1' or irq_exc = '1' or interrupt_in.intr = '1' then + ctrl_tmp.wait_state <= '0'; end if; if interrupt_in.intr = '1' then diff --git a/scripts/fmt_log/fmt_log.c b/scripts/fmt_log/fmt_log.c index e15d42d..226cfbe 100644 --- a/scripts/fmt_log/fmt_log.c +++ b/scripts/fmt_log/fmt_log.c @@ -88,12 +88,12 @@ const char *ops[64] = { "illegal", "nop ", "add ", "attn ", "b ", "bc ", "bcreg ", "bcd ", "bperm ", "brev ", "cmp ", "cmpb ", "cmpeqb ", "cmprb ", "cntz ", "crop ", - "darn ", "dcbf ", "dcbst ", "dcbt ", "dcbtst ", "dcbz ", "icbi ", "icbt ", - "fpcmp ", "fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ", - "extswsl", "isel ", "isync ", "logic ", "ld ", "st ", "mcrxrx ", "mfcr ", - "mfmsr ", "mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", - "popcnt ", "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", - "shl ", "shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "ffail ", + "darn ", "dcbf ", "dcbst ", "xcbt ", "dcbtst ", "dcbz ", "icbi ", "fpcmp ", + "fparith", "fpmove ", "fpmisc ", "div ", "dive ", "mod ", "exts ", "extswsl", + "isel ", "isync ", "logic ", "ld ", "st ", "mcrxrx ", "mfcr ", "mfmsr ", + "mfspr ", "mtcrf ", "mtmsr ", "mtspr ", "mull64 ", "mulh64 ", "mulh32 ", "popcnt ", + "prty ", "rfid ", "rlc ", "rlcl ", "rlcr ", "sc ", "setb ", "shl ", + "shr ", "sync ", "tlbie ", "trap ", "xor ", "addg6s ", "wait ", "ffail ", }; const char *spr_names[13] =