From 722f239c025e55bb45e477ca70f8f6500d7801b8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 26 Dec 2024 22:09:51 +1100 Subject: [PATCH] Reimplement quadword loads and stores This adds implementations of lq, plq, stq, pstq, lqarx and stqcx. Because register file addresses are now computed in decode1 before we have the decode table entry for the instruction, we have to check the icode directly to know when to read register RS|1 before RS (i.e. for stq and stqcx in LE mode, but not pstq). For the second instance of the instruction, loadstore1 uses the EA from the first instance + 8. It generates an alignment interrupt for unaligned lqarx and stqcx and for lq in LE mode with an unaligned address. (The reason for the latter case is that it writes RT|1 before RT, and if we have RA = RT|1 and the second instance traps, we will have overwritten RA.) Signed-off-by: Paul Mackerras --- common.vhdl | 5 +++- dcache.vhdl | 4 ++-- decode1.vhdl | 19 +++++++++++++++ decode2.vhdl | 25 +++++++++++++++++++- decode_types.vhdl | 60 ++++++++++++++++++++++++++++------------------- execute1.vhdl | 5 ++-- loadstore1.vhdl | 36 +++++++++++++++++++++++++--- predecode.vhdl | 15 +++++++++++- 8 files changed, 135 insertions(+), 34 deletions(-) diff --git a/common.vhdl b/common.vhdl index 7c79ccf..3af1d7b 100644 --- a/common.vhdl +++ b/common.vhdl @@ -427,6 +427,7 @@ package common is prefix : std_ulogic_vector(25 downto 0); illegal_suffix : std_ulogic; misaligned_prefix : std_ulogic; + illegal_form : std_ulogic; uses_tar : std_ulogic; uses_dscr : std_ulogic; end record; @@ -450,7 +451,7 @@ package common is dbg_spr_access => '0', dec_ctr => '0', prefixed => '0', prefix => (others => '0'), illegal_suffix => '0', - misaligned_prefix => '0', uses_tar => '0', uses_dscr => '0', + misaligned_prefix => '0', illegal_form => '0', uses_tar => '0', uses_dscr => '0', others => (others => '0')); type MultiplyInputType is record @@ -604,6 +605,8 @@ package common is dcbz : std_ulogic; nc : std_ulogic; reserve : std_ulogic; + atomic_qw : std_ulogic; -- part of a quadword atomic op + atomic_last : std_ulogic; virt_mode : std_ulogic; priv_mode : std_ulogic; addr : std_ulogic_vector(63 downto 0); diff --git a/dcache.vhdl b/dcache.vhdl index c9541e5..807a2dc 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -1112,10 +1112,10 @@ begin -- XXX or if r0.req.nc = '1' if r0.req.load = '1' then -- load with reservation - set_rsrv <= '1'; + set_rsrv <= not r0.req.atomic_qw or r0.req.atomic_last; else -- store conditional - clear_rsrv <= '1'; + clear_rsrv <= not r0.req.atomic_qw or r0.req.atomic_last; if reservation.valid = '0' or r0.req.addr(63 downto LINE_OFF_BITS) /= reservation.addr then cancel_store <= '1'; diff --git a/decode1.vhdl b/decode1.vhdl index ebc5993..643523b 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -237,6 +237,8 @@ architecture behaviour of decode1 is INSN_lhzu => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), INSN_lhzux => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), INSN_lhzx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_lq => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DQ, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRTP), + INSN_lqarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', DRTP), INSN_lwa => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_lwarx => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), INSN_lwaux => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), @@ -292,6 +294,7 @@ architecture behaviour of decode1 is INSN_plfs => (LDST, FPU, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), INSN_plha => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plhz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_plq => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRTP), INSN_plwa => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plwz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_pnop => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), @@ -300,6 +303,7 @@ architecture behaviour of decode1 is INSN_pstfd => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_pstfs => (LDST, FPU, OP_STORE, RA0_OR_CIA, CONST_PSI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), INSN_psth => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_pstq => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRSP), INSN_pstw => (LDST, NONE, OP_STORE, RA0_OR_CIA, CONST_PSI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_popcntb => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_popcntd => (ALU, NONE, OP_COUNTB, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), @@ -358,6 +362,8 @@ architecture behaviour of decode1 is INSN_sthu => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), INSN_sthux => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), INSN_sthx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_stq => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRSP), + INSN_stqcx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', DRSP), INSN_stw => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_stwbrx => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_stwcix => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '1', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), @@ -509,6 +515,7 @@ begin end if; else r.second <= '1'; + r.reg_c <= rin.reg_c; end if; end if; if rst = '1' then @@ -679,6 +686,12 @@ begin end if; end if; end if; + -- See if this is an instruction where repeat_t = DRSP and we need + -- to read RS|1 followed by RS, i.e. stq or stqcx. in LE mode + -- (note we don't have access to the decode for the current instruction) + if (icode = INSN_stq or icode = INSN_stqcx) and f_in.big_endian = '0' then + vr.reg_3_addr(0) := '1'; + end if; vr.read_1_enable := f_in.valid; vr.read_2_enable := f_in.valid and maybe_rb; vr.read_3_enable := f_in.valid; @@ -690,6 +703,12 @@ begin vr.read_1_enable := '0'; -- (not actually used) vr.read_2_enable := '0'; vr.read_3_enable := '1'; -- (not actually used) + -- For pstq, and for stq and stqcx in BE mode, + -- we need to read register RS|1 in the cycle after we read RS; + -- stq and stqcx in LE mode read RS. + if decode.repeat = DRSP then + vr.reg_3_addr(0) := r.prefixed or f_in.big_endian; + end if; end if; v.reg_a := vr.reg_1_addr; diff --git a/decode2.vhdl b/decode2.vhdl index 4a020da..7e993d5 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -348,7 +348,8 @@ begin elsif deferred = '0' then if dc2in.e.valid = '1' then report "execute " & to_hstring(dc2in.e.nia) & - " tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid); + " tag=" & integer'image(dc2in.e.instr_tag.tag) & std_ulogic'image(dc2in.e.instr_tag.valid) & + " rpt=" & std_ulogic'image(dc2in.e.repeat) & " 2nd=" & std_ulogic'image(dc2in.e.second) & " wr=" & to_hstring(dc2in.e.write_reg); end if; dc2 <= dc2in; elsif dc2.read_rspr = '0' then @@ -383,6 +384,16 @@ begin -- update-form loads, 2nd instruction writes RA dec_o.reg := dec_a.reg; end if; + when DRSP => + -- non-prefixed stq, stqcx do RS|1, RS in LE mode; others do RS, RS|1 + if d_in.second = (d_in.big_endian or d_in.prefixed) then + dec_c.reg(0) := '1'; -- do RS, RS|1 + end if; + when DRTP => + -- non-prefixed lq, lqarx do RT|1, RT in LE mode; others do RT, RT|1 + if d_in.second = (d_in.big_endian or d_in.prefixed) then + dec_o.reg(0) := '1'; + end if; when others => end case; -- For the second instance of a doubled instruction, we ignore the RA @@ -642,6 +653,18 @@ begin v.e.prefix := d_in.prefix; v.e.illegal_suffix := d_in.illegal_suffix; v.e.misaligned_prefix := d_in.misaligned_prefix; + + -- check for invalid forms that cause an illegal instruction interrupt + -- Does RA = RT for a load quadword instr, or RB = RT for lqarx? + if d_in.decode.repeat = DRTP and + (insn_ra(d_in.insn) = insn_rt(d_in.insn) or + (d_in.decode.reserve = '1' and insn_rb(d_in.insn) = insn_rt(d_in.insn))) then + v.e.illegal_form := '1'; + end if; + -- Is RS/RT odd for a load/store quadword instruction? + if (d_in.decode.repeat = DRSP or d_in.decode.repeat = DRTP) and d_in.insn(21) = '1' then + v.e.illegal_form := '1'; + end if; end if; -- issue control diff --git a/decode_types.vhdl b/decode_types.vhdl index dc104cd..03e958b 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -125,8 +125,9 @@ package decode_types is INSN_std, INSN_stdu, INSN_sthu, - INSN_stwu, - INSN_subfic, -- 90 + INSN_stq, + INSN_stwu, -- 90 + INSN_subfic, INSN_subfme, INSN_subfze, INSN_sync, @@ -135,23 +136,23 @@ package decode_types is INSN_twi, INSN_wait, INSN_xori, - INSN_xoris, - -- pad to 104 - INSN_064, INSN_065, INSN_066, INSN_067, + INSN_xoris, -- 100 + -- pad to 102 + INSN_065, -- Non-prefixed instructions that have a MLS:D prefixed form and -- their corresponding prefixed instructions. -- The non-prefixed versions have even indexes so that we can -- convert them to the prefixed version by setting bit 0 - INSN_addi, -- 104 + INSN_addi, -- 102 INSN_paddi, INSN_lbz, INSN_plbz, INSN_lha, INSN_plha, - INSN_lhz, -- 110 + INSN_lhz, INSN_plhz, - INSN_lwz, + INSN_lwz, -- 110 INSN_plwz, INSN_stb, INSN_pstb, @@ -161,15 +162,18 @@ package decode_types is INSN_pstw, -- Slots for non-prefixed opcodes that are 8LS:D when prefixed - INSN_lhzu, -- 120 + INSN_lhzu, INSN_plwa, + INSN_lq, -- 120 + INSN_plq, INSN_op57, INSN_pld, + INSN_op60, + INSN_pstq, INSN_op61, INSN_pstd, -- pad to 128 to simplify comparison logic - INSN_07e, INSN_07f, -- The following instructions have an RB operand but don't access FPRs INSN_add, @@ -219,12 +223,13 @@ package decode_types is INSN_lhzcix, INSN_lhzx, INSN_lhzux, + INSN_lqarx, INSN_lwarx, INSN_lwax, INSN_lwaux, INSN_lwbrx, - INSN_lwzcix, - INSN_lwzx, -- 180 + INSN_lwzcix, -- 180 + INSN_lwzx, INSN_lwzux, INSN_modsd, INSN_modsw, @@ -233,8 +238,8 @@ package decode_types is INSN_mulhw, INSN_mulhwu, INSN_mulhd, - INSN_mulhdu, - INSN_mullw, -- 190 + INSN_mulhdu, -- 190 + INSN_mullw, INSN_mulld, INSN_nand, INSN_nor, @@ -243,8 +248,8 @@ package decode_types is INSN_pdepd, INSN_pextd, INSN_rldcl, - INSN_rldcr, - INSN_rlwnm, -- 200 + INSN_rldcr, -- 200 + INSN_rlwnm, INSN_slw, INSN_sld, INSN_sraw, @@ -253,8 +258,8 @@ package decode_types is INSN_srd, INSN_stbcix, INSN_stbcx, - INSN_stbx, - INSN_stbux, -- 210 + INSN_stbx, -- 210 + INSN_stbux, INSN_stdbrx, INSN_stdcix, INSN_stdcx, @@ -263,8 +268,9 @@ package decode_types is INSN_sthbrx, INSN_sthcix, INSN_sthcx, - INSN_sthx, - INSN_sthux, -- 220 + INSN_sthx, -- 220 + INSN_sthux, + INSN_stqcx, INSN_stwbrx, INSN_stwcix, INSN_stwcx, @@ -272,15 +278,14 @@ package decode_types is INSN_stwux, INSN_subf, INSN_subfc, - INSN_subfe, + INSN_subfe, -- 230 INSN_td, - INSN_tlbie, -- 230 + INSN_tlbie, INSN_tlbiel, INSN_tw, INSN_xor, -- pad to 240 to simplify comparison logic - INSN_234, INSN_235, INSN_236, INSN_237, INSN_238, INSN_239, -- The following instructions have a third input addressed by RC @@ -439,7 +444,9 @@ package decode_types is type length_t is (NONE, is1B, is2B, is4B, is8B); type repeat_t is (NONE, -- instruction is not repeated - DUPD); -- update-form load + DUPD, -- update-form load + DRSP, -- double RS (RS, RS+1) + DRTP); -- double RT (RT, RT+1, or RT+1, RT) type decode_rom_t is record unit : unit_t; @@ -523,6 +530,7 @@ package body decode_types is when INSN_lhau => return "101011"; when INSN_lhz => return "101000"; when INSN_lhzu => return "101001"; + when INSN_lq => return "111000"; when INSN_lwz => return "100000"; when INSN_lwzu => return "100001"; when INSN_mulli => return "000111"; @@ -542,6 +550,7 @@ package body decode_types is when INSN_sth => return "101100"; when INSN_sthu => return "101101"; when INSN_stw => return "100100"; + when INSN_stq => return "111110"; when INSN_stwu => return "100101"; when INSN_subfic => return "001000"; when INSN_tdi => return "000010"; @@ -587,6 +596,7 @@ package body decode_types is when INSN_fnmadd => return "111111"; when INSN_prefix => return "000001"; when INSN_op57 => return "111001"; + when INSN_op60 => return "111100"; when INSN_op61 => return "111101"; when INSN_add => return "011111"; when INSN_addc => return "011111"; @@ -654,6 +664,7 @@ package body decode_types is when INSN_lhzcix => return "011111"; when INSN_lhzux => return "011111"; when INSN_lhzx => return "011111"; + when INSN_lqarx => return "011111"; when INSN_lwarx => return "011111"; when INSN_lwaux => return "011111"; when INSN_lwax => return "011111"; @@ -719,6 +730,7 @@ package body decode_types is when INSN_sthcx => return "011111"; when INSN_sthux => return "011111"; when INSN_sthx => return "011111"; + when INSN_stqcx => return "011111"; when INSN_stwbrx => return "011111"; when INSN_stwcix => return "011111"; when INSN_stwcx => return "011111"; diff --git a/execute1.vhdl b/execute1.vhdl index 2cc9c35..ecb1e63 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -704,7 +704,8 @@ begin if valid_in = '1' then report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) & " wr=" & to_hstring(ex1in.e.write_reg) & " we=" & std_ulogic'image(ex1in.e.write_enable) & - " tag=" & integer'image(ex1in.e.instr_tag.tag) & std_ulogic'image(ex1in.e.instr_tag.valid); + " tag=" & integer'image(ex1in.e.instr_tag.tag) & std_ulogic'image(ex1in.e.instr_tag.valid) & + " 2nd=" & std_ulogic'image(e_in.second); end if; -- We mustn't get stalled on a cycle where execute2 is -- completing an instruction or generating an interrupt @@ -1147,7 +1148,7 @@ begin slow_op := '0'; owait := '0'; - if e_in.illegal_suffix = '1' then + if e_in.illegal_suffix = '1' or e_in.illegal_form = '1' then illegal := '1'; elsif ex1.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then privileged := '1'; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index fc8c158..dcacc75 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -84,6 +84,8 @@ architecture behave of loadstore1 is update : std_ulogic; xerc : xer_common_t; reserve : std_ulogic; + atomic_qw : std_ulogic; + atomic_last : std_ulogic; rc : std_ulogic; nc : std_ulogic; -- non-cacheable access virt_mode : std_ulogic; @@ -108,6 +110,7 @@ architecture behave of loadstore1 is elt_length => x"0", byte_reverse => '0', brev_mask => "000", sign_extend => '0', update => '0', xerc => xerc_init, reserve => '0', + atomic_qw => '0', atomic_last => '0', rc => '0', nc => '0', virt_mode => '0', priv_mode => '0', load_sp => '0', sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0', @@ -447,7 +450,10 @@ begin if l_in.second = '1' then -- for an update-form load, use the previous address -- as the value to write back to RA. - addr := r1.addr0; + -- for a quadword load or store, use with the previous + -- address + 8. + addr := std_ulogic_vector(unsigned(r1.addr0(63 downto 3)) + not l_in.update) & + r1.addr0(2 downto 0); end if; if l_in.mode_32bit = '1' then addr(63 downto 32) := (others => '0'); @@ -474,12 +480,32 @@ begin misaligned := or (addr_mask and addr(2 downto 0)); v.align_intr := l_in.reserve and misaligned; + -- is this a quadword load or store? i.e. lq plq stq pstq lqarx stqcx. + if l_in.repeat = '1' and l_in.update = '0' then + -- is the access aligned? + if misaligned = '0' and addr(3) = l_in.second then + -- Since the access is aligned we have to do it atomically + v.atomic_qw := '1'; + v.atomic_last := l_in.second; + else + -- lqarx/stqcx have to be aligned + if l_in.reserve = '1' then + v.align_intr := '1'; + end if; + -- We require non-prefixed lq in LE mode to be aligned in order + -- to avoid the case where RA = RT+1 and the second access faults + -- after the first has overwritten RA. + if l_in.op = OP_LOAD and l_in.byte_reverse = '0' and l_in.prefixed = '0' then + v.align_intr := '1'; + end if; + end if; + end if; + case l_in.op is when OP_STORE => v.store := '1'; when OP_LOAD => - -- Note: only RA updates have l_in.second = 1 - if l_in.second = '0' then + if l_in.update = '0' or l_in.second = '0' then v.load := '1'; if HAS_FPU and l_in.is_32bit = '1' then -- Allow an extra cycle for SP->DP precision conversion @@ -952,6 +978,8 @@ begin d_out.dcbz <= stage1_req.dcbz; d_out.nc <= stage1_req.nc; d_out.reserve <= stage1_req.reserve; + d_out.atomic_qw <= stage1_req.atomic_qw; + d_out.atomic_last <= stage1_req.atomic_last; d_out.addr <= stage1_req.addr; d_out.byte_sel <= stage1_req.byte_sel; d_out.virt_mode <= stage1_req.virt_mode; @@ -962,6 +990,8 @@ begin d_out.dcbz <= r2.req.dcbz; d_out.nc <= r2.req.nc; d_out.reserve <= r2.req.reserve; + d_out.atomic_qw <= r2.req.atomic_qw; + d_out.atomic_last <= r2.req.atomic_last; d_out.addr <= r2.req.addr; d_out.byte_sel <= r2.req.byte_sel; d_out.virt_mode <= r2.req.virt_mode; diff --git a/predecode.vhdl b/predecode.vhdl index 65cb751..e8689ef 100644 --- a/predecode.vhdl +++ b/predecode.vhdl @@ -121,6 +121,8 @@ architecture behaviour of predecoder is 2#011110_01110# to 2#011110_01111# => INSN_rldimi, 2#011110_10000# to 2#011110_10001# => INSN_rldcl, 2#011110_10010# to 2#011110_10011# => INSN_rldcr, + -- major opcode 56 + 2#111000_00000# to 2#111000_11111# => INSN_lq, -- major opcode 58 2#111010_00000# => INSN_ld, 2#111010_00001# => INSN_ldu, @@ -161,20 +163,28 @@ architecture behaviour of predecoder is -- major opcode 62 2#111110_00000# => INSN_std, 2#111110_00001# => INSN_stdu, + 2#111110_00010# => INSN_stq, 2#111110_00100# => INSN_std, 2#111110_00101# => INSN_stdu, + 2#111110_00110# => INSN_stq, 2#111110_01000# => INSN_std, 2#111110_01001# => INSN_stdu, + 2#111110_01010# => INSN_stq, 2#111110_01100# => INSN_std, 2#111110_01101# => INSN_stdu, + 2#111110_01110# => INSN_stq, 2#111110_10000# => INSN_std, 2#111110_10001# => INSN_stdu, + 2#111110_10010# => INSN_stq, 2#111110_10100# => INSN_std, 2#111110_10101# => INSN_stdu, + 2#111110_10110# => INSN_stq, 2#111110_11000# => INSN_std, 2#111110_11001# => INSN_stdu, + 2#111110_11010# => INSN_stq, 2#111110_11100# => INSN_std, 2#111110_11101# => INSN_stdu, + 2#111110_11110# => INSN_stq, -- major opcode 63 2#111111_00100# to 2#111111_00101# => INSN_fdiv, 2#111111_01000# to 2#111111_01001# => INSN_fsub, @@ -190,8 +200,9 @@ architecture behaviour of predecoder is 2#111111_11110# to 2#111111_11111# => INSN_fnmadd, -- prefix word, PO1 2#000001_00000# to 2#000001_11111# => INSN_prefix, - -- Major opcodes 57 and 61 are SFFS load/store instructions when prefixed + -- Major opcodes 57, 60 and 61 are SFFS load/store instructions when prefixed 2#111001_00000# to 2#111001_11111# => INSN_op57, + 2#111100_00000# to 2#111100_11111# => INSN_op60, 2#111101_00000# to 2#111101_11111# => INSN_op61, others => INSN_illegal ); @@ -317,6 +328,7 @@ architecture behaviour of predecoder is 2#0_11001_10101# => INSN_lhzcix, 2#0_01001_10111# => INSN_lhzux, 2#0_01000_10111# => INSN_lhzx, + 2#0_01000_10100# => INSN_lqarx, 2#0_00000_10100# => INSN_lwarx, 2#0_01011_10101# => INSN_lwaux, 2#0_01010_10101# => INSN_lwax, @@ -405,6 +417,7 @@ architecture behaviour of predecoder is 2#0_10110_10110# => INSN_sthcx, 2#0_01101_10111# => INSN_sthux, 2#0_01100_10111# => INSN_sthx, + 2#0_00101_10110# => INSN_stqcx, 2#0_10100_10110# => INSN_stwbrx, 2#0_11100_10101# => INSN_stwcix, 2#0_00100_10110# => INSN_stwcx,