From 5a28f76b6fa1d6b809ca043c068d973a6015136f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 13 Jan 2025 19:42:57 +1100 Subject: [PATCH 1/8] execute1: Implement CIABR CIABR (Completed Instruction Address Breakpoint Register) is an SPR that contains an instruction address. When the instruction at that address completes, the CPU takes a Trace interrupt before executing the next instruction (provided the instruction doesn't cause some other interrupt and isn't an rfid, hrfid or rfscv instruction). Signed-off-by: Paul Mackerras --- common.vhdl | 3 +++ decode1.vhdl | 2 ++ execute1.vhdl | 29 ++++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/common.vhdl b/common.vhdl index 76eaec2..d182a32 100644 --- a/common.vhdl +++ b/common.vhdl @@ -64,6 +64,7 @@ package common is constant SPR_DSCR : spr_num_t := 17; constant SPR_VRSAVE : spr_num_t := 256; constant SPR_PIR : spr_num_t := 1023; + constant SPR_CIABR : spr_num_t := 187; -- PMU registers constant SPR_UPMC1 : spr_num_t := 771; @@ -174,6 +175,7 @@ package common is constant SPRSEL_CTRL : spr_selector := 4x"a"; constant SPRSEL_DSCR : spr_selector := 4x"b"; constant SPRSEL_PIR : spr_selector := 4x"c"; + constant SPRSEL_CIABR : spr_selector := 4x"d"; constant SPRSEL_XER : spr_selector := 4x"f"; -- FSCR and HFSCR bit numbers @@ -275,6 +277,7 @@ package common is hfscr_fp: std_ulogic; heir: std_ulogic_vector(63 downto 0); dscr: std_ulogic_vector(24 downto 0); + ciabr: std_ulogic_vector(63 downto 0); end record; constant ctrl_t_init : ctrl_t := (wait_state => '0', run => '1', xer_low => 18x"0", diff --git a/decode1.vhdl b/decode1.vhdl index 0ea9ed1..8d2d2fb 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -488,6 +488,8 @@ architecture behaviour of decode1 is i.sel := SPRSEL_DSCR; when SPR_PIR => i.sel := SPRSEL_PIR; + when SPR_CIABR => + i.sel := SPRSEL_CIABR; when others => i.valid := '0'; end case; diff --git a/execute1.vhdl b/execute1.vhdl index 3b7ec2f..5240063 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -96,6 +96,7 @@ architecture behaviour of execute1 is set_heir : std_ulogic; write_ctrl : std_ulogic; write_dscr : std_ulogic; + write_ciabr : std_ulogic; enter_wait : std_ulogic; scv_trap : std_ulogic; end record; @@ -116,6 +117,7 @@ architecture behaviour of execute1 is start_div : std_ulogic; start_bsort : std_ulogic; do_trace : std_ulogic; + ciabr_trace : std_ulogic; fp_intr : std_ulogic; res2_sel : std_ulogic_vector(1 downto 0); bypass_valid : std_ulogic; @@ -133,6 +135,7 @@ architecture behaviour of execute1 is busy: std_ulogic; fp_exception_next : std_ulogic; trace_next : std_ulogic; + trace_ciabr : std_ulogic; prev_op : insn_type_t; prev_prefixed : std_ulogic; oe : std_ulogic; @@ -165,8 +168,8 @@ architecture behaviour of execute1 is constant reg_stage1_type_init : reg_stage1_type := (e => Execute1ToWritebackInit, se => side_effect_init, busy => '0', - fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, - prev_prefixed => '0', + fp_exception_next => '0', trace_next => '0', trace_ciabr => '0', + prev_op => OP_ILLEGAL, prev_prefixed => '0', oe => '0', mul_select => "000", res2_sel => "00", spr_select => spr_id_init, pmu_spr_num => 5x"0", redir_to_next => '0', advance_nia => '0', lr_from_next => '0', @@ -1157,6 +1160,13 @@ begin end if; v.do_trace := ex1.msr(MSR_SE); + -- see if we have a CIABR map + if ctrl.ciabr(0) = '1' and ctrl.ciabr(1) = not ex1.msr(MSR_PR) and + ctrl.ciabr(63 downto 2) = e_in.nia(63 downto 2) then + v.do_trace := '1'; + v.ciabr_trace := '1'; + end if; + case_0: case e_in.insn_type is when OP_ILLEGAL => illegal := '1'; @@ -1392,6 +1402,8 @@ begin v.se.write_ctrl := '1'; when SPRSEL_DSCR => v.se.write_dscr := '1'; + when SPRSEL_CIABR => + v.se.write_ciabr := '1'; when others => end case; end if; @@ -1655,12 +1667,14 @@ begin v.e.srr1 := (others => '0'); v.e.srr1(47 - 33) := '1'; v.e.srr1(47 - 34) := ex1.prev_prefixed; - if ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or - ex1.prev_op = OP_DCBF then + if (ex1.prev_op = OP_LOAD or ex1.prev_op = OP_ICBI or ex1.prev_op = OP_ICBT or + ex1.prev_op = OP_DCBF) and ex1.trace_ciabr = '0' then v.e.srr1(47 - 35) := '1'; - elsif ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ then + elsif (ex1.prev_op = OP_STORE or ex1.prev_op = OP_DCBZ) and + ex1.trace_ciabr = '0' then v.e.srr1(47 - 36) := '1'; end if; + v.e.srr1(47 - 43) := ex1.trace_ciabr; elsif irq_valid = '1' then -- Don't deliver the interrupt until we have a valid instruction @@ -1694,6 +1708,7 @@ begin bypass_valid := actions.bypass_valid; v.taken_branch_event := actions.take_branch; v.trace_next := actions.do_trace; + v.trace_ciabr := actions.ciabr_trace; v.fp_exception_next := actions.fp_intr; v.res2_sel := actions.res2_sel; v.msr := actions.new_msr; @@ -1877,6 +1892,7 @@ begin assemble_ctrl(ctrl, ex1.msr(MSR_PR)) when SPRSEL_CTRL, 39x"0" & ctrl.dscr when SPRSEL_DSCR, 56x"0" & std_ulogic_vector(to_unsigned(CPU_INDEX, 8)) when SPRSEL_PIR, + ctrl.ciabr when SPRSEL_CIABR, assemble_xer(ex1.e.xerc, ctrl.xer_low) when others; stage2_stall <= l_in.l2stall or fp_in.f2stall; @@ -2057,6 +2073,9 @@ begin if ex1.se.write_dscr = '1' then ctrl_tmp.dscr <= ex1.e.write_data(24 downto 0); end if; + if ex1.se.write_ciabr = '1' then + ctrl_tmp.ciabr <= ex1.e.write_data; + end if; if ex1.se.enter_wait = '1' then ctrl_tmp.wait_state <= '1'; end if; From f64ab6569d4c5dd94a3be86a0e56ad932c178d84 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 13 Jan 2025 22:00:00 +1100 Subject: [PATCH 2/8] tests/trace: Add a couple of tests of CIABR function Signed-off-by: Paul Mackerras --- tests/test_trace.bin | Bin 11948 -> 12348 bytes tests/test_trace.console_out | 1 + tests/trace/head.S | 13 ++++++++++++ tests/trace/trace.c | 38 ++++++++++++++++++++++++++++++++++- 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/tests/test_trace.bin b/tests/test_trace.bin index 3c7c7bc962f6acf707f6b04ec268e4dd73c3453e..9ad843bdfd2582c39f0f03c773e4481590844869 100755 GIT binary patch literal 12348 zcmeHNe{2)?6@Pa&_T?vGLs~kza>mh=WS*2sXg!kz&IwQxTIvEDo5r*Y30i16X%$p+ zCcD05mIhL!!PGxmse&=J7DLs9Pr7`+@B7}p_xZeg-|t<%XCl3usIPz0+c$#tAnASmLj-`6oRt?Oo9@Fh{OIiI>&A=nc zfQz)F{}nqQiSH#iH4$G~#`xq0|GUe`AMurCj8Ch;KfiqZmrx>)*2Dw;b1Rs?7yM^e zFnikha-3Yt5dQPYa%Hp319B(MDanAP7KlM*`Lz+*(goTnk3B~CEwg}3eCMr{d2#e z(YZ5pdhQ&ZpL?68<|atWUm#!pGKKQ*Q-A(CjpnE6bbf};=W{fbKiZ@RQ?774)jJbQ z^?nQIZv2051XI0-<0;plV=33wSPRk9sUGNipzmQlX@xz5ALkVMWiHioI0js!>aEpr zU`qpA8ragnmI1a5uw{TP18f;!%K%#j*fPMD1-2}(Wq~aVY*}E-0$Uc?vLyL5aGy@O zl$c89E{O&;VAHryG&n6pv47m@2fj)%r+@RY`93fkeAZ(hL$-ZL#nyyhcR-;r-MN;? zb%Enx>tkyn_JAf+QghRQrl2L&AFGEw`{Z-}I!M){nt^Htsu`$epqhbd2C5mTW}upZ zY6hwqsAiy=fd|ZhM0yuZ^k}>fkP}mR-U~R7y?_)=;Q!ozY^i@vbgDF^k$cjR$<+9N z+GzftiHrZS_Dl?F@VDak0D4N^6nB&8r0|v)|2{y%|FJsxv_E!g_Pt}@(CzSB3C zj@w;|dkOjWK0v<9=VRExD^D6vVm>-}gA3qYyU2XpTT_zfMdqVZA3PtQe1N(x$2+(P z-ecK&^V_rt-Zzv7!^<_Y)b*nrZxndNRW- zZsfHOYbD!R>txHM!FFu_rellR<8>1Ie<2yy6xKz5{F0Z#_HupC+Qzu96xZS3cA3w; zS@3UR;r&EI+vfPMFJR&QMf`Ruh&hdQ*B^(E;@)O44m#y)D6jKJ!FxuW&wFW$wVqnf zfNylhnVV#DV_rP3|CYY@-YN8>AcfoQwSS)D4G-VDtBLi|4}aEV)f#dU*=(=-Mg;3eBa17gm!$6m0WH=(o# zF+GU0-+JDJ(sk%Z(3dL2{9xMV%=eP~0ac8Jyol3W`h3D>?!CGFsMEQZ5+%m5?~BLE zn9ld;(kcqCaalEB+$i-^_MGGU!p~+QZgfW1y&T)rB=$KziPpoPY59x7-?#5xOl*oA%fy)Q->YGye79eQPXSNA%|nQ>E}j!)B()vD$TIK$ zpMxBQ8EIw(N$i9!P4B+43sL??bM!We?=z14(Vx!Zg<9g{%)IwX`iOkY^x& zV9Pa-#}6j8i?&<~`3~eiLB{(P)(SFY^T0Ic#rf8;(0agjP3DGU+$v@d>bxeTyqPO2obfYcdXU(hVx~s}Zv>n$-c}e#jyV_2oFnMgKUu_x8n;R*uQwxq|$^#@EzCX9|86DpstsPpNUm++gQh zxBBv5`bOMNZ9H2TbCDjbFB@}e;;e1sg$;NVY6@fa{lIswK0ChJ-K>`IoVNk|!r|OA zNOsNC1h~%_TP!~QKx<<%%FkVilEyqv)2@{k{w5caX}~}ZEi9R09oaz&jdA4Q;Ug;g zkknfs`fZ0VZAT$-TcL;Cq{az{hbx_V?5ax>2HkA*mgmbN$QYeFxd zl6KcNQqMI{sE#a~F1EWZ#M^B1RAa=l+4Dsk1FE;~W0Qo9ey6gZGq8E@J~o$NGxSMi z%rmgLQ^5wcMr$P^^HU*SN59B@e}OvdH$s0G`XSbDYwQ5;U5(W12|;YdxflAap9!%a zEp4p{(J2WTJSoF2VrAz;D}~eowF#ReIU&O6+qS)y$E~mo6>`l!&%o{#*hSfH8+@LW zPf3N?y_RK(&-Y+^9k#Jj+wGRu2s?$T75gsiHqHw1D%%xs7Gm#&N6@*38_j%M@XUJy zcGuZ%TcZjd!;Q4n;=54&@K`g>$ea*-Z~5RcF8T?KnP~Z7UOLVIRxkst#p{%GQl8I1 n4}(RyS=YaS)_^Tv#|Qf~aUQ`~09^Rsu~%@8;{{@f2XOokod6j3nrBkTZd8^&W8Lyj_HoRcf1 zjPahg%J@NcN^#>Cx!K9gonXH1G&XqG)~!SoHi=gv@`L1*cR?PLR66tIMOT*RvgCpF zgp0~nl-K5Cc^$qbdHc$i$&;3du8_@w3? zd9j{wQj4kJlX~g#9jq4X?YPlhyHpk1pE4F_uAnM(s{#wE?^k)N5a%O__;-p-Gl~)& zO%kOPvX92NoWgW2`_u1YC&9pYmP-ABb=0p()T1nxEBIyyi%OF+rFL^g^Lpl@+69g; zto3@L`dv(-mfgVslxe8jt-}s~-xXm7Kn#*n{2?_$d17xqx+>=*DNE>rKnMlji&lU~ z{N=$wxhZmiA4RA@q%l?Zd{f>n6ykOZ=Yr`a(my5ghp=>upWzk&OOw_q$^GmHJsi?V{hP@Lsb@SGxqAX#w;>pHw8i?@@6Z{TmCa6 z(~0a6u;YLCInuI&MW6I}?Wx@u&mODRad7=AD;kFy)wmg`u48Kg7NvXx@rPn(B6nN2 z<5<7OnCGT3XTCJ~H8q1g?zQ9z(I1ics1K)=R}q?R8|)11R?FsR?7)$<(rDRs*z)^n zg%iQ}V%QU~-IiSf`{V~{<$)=pKjMI0r+%1L{rM}8 zQze>>{|3L%wIMp zH~aM@MiW3LOk|lxXKZJsb`P)EZf5G1nND-n5U{oPg~0oOs1|mIi3Phh1t`gw8V4=` z+-b%wp3N?>ysNEwY;5)|2Ew#b)zd~@vxvfDDq0hsY!k=3J lT}6)>lUo{^PUnPR4>v}q(1+j)fu9@uSjPyi{sDHN{SV}mszU$( diff --git a/tests/test_trace.console_out b/tests/test_trace.console_out index 2fe36d2..3e84260 100644 --- a/tests/test_trace.console_out +++ b/tests/test_trace.console_out @@ -7,3 +7,4 @@ test 06:PASS test 07:PASS test 08:PASS test 09:PASS +test 10:PASS diff --git a/tests/trace/head.S b/tests/trace/head.S index cd57e3a..14026e4 100644 --- a/tests/trace/head.S +++ b/tests/trace/head.S @@ -224,3 +224,16 @@ test8: test9: sc blr + + .global test10 +test10: + addi %r3,%r3,1 + addi %r4,%r4,2 + addi %r3,%r3,4 + addi %r4,%r4,8 + cmpd %r3,%r4 + bne 1f + nop + nop +1: li %r3,-1 + blr diff --git a/tests/trace/trace.c b/tests/trace/trace.c index 908d299..764b3dd 100644 --- a/tests/trace/trace.c +++ b/tests/trace/trace.c @@ -7,7 +7,6 @@ extern unsigned long callit(unsigned long arg1, unsigned long arg2, unsigned long (*fn)(unsigned long, unsigned long), unsigned long msr, unsigned long *regs); - #define MSR_FP 0x2000 #define MSR_SE 0x400 #define MSR_BE 0x200 @@ -16,6 +15,7 @@ extern unsigned long callit(unsigned long arg1, unsigned long arg2, #define SRR1 27 #define SPRG0 272 #define SPRG1 273 +#define CIABR 187 static inline unsigned long mfmsr(void) { @@ -218,6 +218,41 @@ int trace_test_9(void) return 0; } +extern unsigned long test10(unsigned long, unsigned long); + +int trace_test_10(void) +{ + unsigned long ret; + unsigned long regs[2]; + + mtspr(CIABR, (unsigned long)&test10 + 4 + 3); + ret = callit(1, 1, test10, mfmsr(), regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test10 + 8) + return ret + 1; + if ((mfspr(SRR1) & 0x781f0000) != 0x40100000) + return ret + 2; + if (regs[0] != 2 || regs[1] != 3) + return 3; + + /* test CIABR on a taken branch */ + mtspr(CIABR, (unsigned long)&test10 + 20 + 3); + ret = callit(1, 1, test10, mfmsr(), regs); + if (ret != 0xd00 || mfspr(SRR0) != (unsigned long)&test10 + 32) + return ret + 4; + if ((mfspr(SRR1) & 0x781f0000) != 0x40100000) + return ret + 5; + if (regs[0] != 6 || regs[1] != 11) + return 6; + + /* test CIABR with PRIV = problem state */ + mtspr(CIABR, (unsigned long)&test10 + 1); + ret = callit(1, 1, test10, mfmsr(), regs); + if (ret != 0) + return ret + 7; + /* don't have page tables so can't actually run in problem state */ + return 0; +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -249,6 +284,7 @@ int main(void) do_test(7, trace_test_7); do_test(8, trace_test_8); do_test(9, trace_test_9); + do_test(10, trace_test_10); return fail; } From 622f8c81cc69c61edb6732021b29a80ea0407f53 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 16 Jan 2025 18:57:33 +1100 Subject: [PATCH 3/8] loadstore1: Fix setting of SRR0 on alignment interrupt When an alignment interrupt was being generated, loadstore1 was setting the l_out.valid signal in one cycle and l_out.interrupt in the next, for the same instruction. This meant that the offending instruction completed and the interrupt was applied to the next instruction, meaning that SRR0 ended up pointing to the following instruction. To fix this, when an access causing an alignment interrupt is going into r2, we set r2.busy for one cycle and set r2.one_cycle to 0 so that the complete signal doesn't get asserted. Signed-off-by: Paul Mackerras --- loadstore1.vhdl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 485947b..77b7060 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -702,8 +702,13 @@ begin v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and not r1.req.incomplete; v.wait_mmu := r1.req.valid and r1.req.mmu_op; - v.busy := r1.req.valid and r1.req.mmu_op; - v.one_cycle := r1.req.valid and not (r1.req.dc_req or r1.req.mmu_op); + if r1.req.valid = '1' and r1.req.align_intr = '1' then + v.busy := '1'; + v.one_cycle := '0'; + else + v.busy := r1.req.valid and r1.req.mmu_op; + v.one_cycle := r1.req.valid and not (r1.req.dc_req or r1.req.mmu_op); + end if; if r1.req.do_update = '1' or r1.req.store = '1' or r1.req.read_spr = '1' then v.wr_sel := "00"; elsif r1.req.load_sp = '1' then From 23b183fb168a50e2e0599b4905bdcef1761e65db Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 16 Jan 2025 19:06:00 +1100 Subject: [PATCH 4/8] tests/reservation: Check that SRR0 is set correctly on alignment interrupt The tests that intentionally generate alignment interrupts now also check that SRR0 is pointing to a l*arx or st*cx instruction. Signed-off-by: Paul Mackerras --- tests/reservation/reservation.c | 7 +++++++ tests/test_reservation.bin | Bin 11588 -> 11692 bytes 2 files changed, 7 insertions(+) diff --git a/tests/reservation/reservation.c b/tests/reservation/reservation.c index 502b285..9ae2921 100644 --- a/tests/reservation/reservation.c +++ b/tests/reservation/reservation.c @@ -157,6 +157,7 @@ int resv_test_2(void) { unsigned long x[3]; unsigned long offset, j, size, ret; + unsigned int instr; x[0] = 1234; x[1] = x[2] = 0; @@ -169,6 +170,9 @@ int resv_test_2(void) if (ret == 0x600) { if ((offset & (size - 1)) == 0) return j + 0x10; + instr = *(unsigned int *)mfspr(SRR0); + if ((instr & 0xfc00073f) != 0x7c000028) + return j + 0x40; } else if (ret) return ret; ret = callit(size, (unsigned long)&x[0] + offset, do_stcx); @@ -177,6 +181,9 @@ int resv_test_2(void) if (ret == 0x600) { if ((offset & (size - 1)) == 0) return j + 0x30; + instr = *(unsigned int *)mfspr(SRR0); + if ((instr & 0xfc00033f) != 0x7c00012d) + return j + 0x50; } else if (ret) return ret; } diff --git a/tests/test_reservation.bin b/tests/test_reservation.bin index 9c9ad8f957c83a99af0adbe2666c392eeb2f3f6f..7d3f0a522263d2e076be74792f392e87d53bc9fa 100755 GIT binary patch delta 762 zcmZ8dT}V@57=FLgHl0pUTqM}Vw&Ui}9pOY>I8UiFi=jaavdp}g+Jz8-l@~`?J81?L zUIefGSgRI6G>A&{qSC^Px`{57%yF>sa*rL~4Xr$YcNqO`zQ3^p?6ke=YY?xs+F44}B{j-|N1)?8c5u%U~{x9T=5pr)OBuIJ_=?u7< z1C>%CU1sn|HNh6iM4UdXY_sw3O0nZMeCR4+FEQyl$eMANSipMlkhqSO;z@C<@MR94 zi7w&k9DWsB488EKFffaPdk-7IeeRvC4dd=sHilWAI@5g<1Tf*w#}3aXHidCd5li5> zr-04iyl1aqJ36?%=m=vCgAb%>mLB6>sayPd3Zo^8ahRpUEIuzez}j(xH&0i-bKbDJ zW)Af#LI=&X)mec0Zo(AJ+8B`1GcH&!ruU~^vRpPtO;^@j6|S*df5e|1 zW1);}sQ&oj{kyksUQfN6d^ut9XVO)L5F>p1!Fjttq}GbVUblIIsDeqaXkI23#4NeU zEKybzeX?i?5?ffp7TIOih=%aAym4(b#d$B8jWX7j33eQp^Vlg&%3k_yxutoIv0t3q Sjisp!S07<)ne!%NK=m6FHwY{M delta 655 zcmYjLUr1AN82!Gn&AB87Em|6M=iNWuEiPoSc~Q68!-XM1L`oS%`cU+uQgDOW9X|Qc z!{UqZ!H1-mgh9ymm=I+zQ4qB3kG70n3=NF5+zNhuzcTQ^;huZG^E;g4x%smN3t{%U z5wyQ}z#I|5Eg>8|LOkvUcw`_bMrAl)Xd?@q3IuBaN>YJOojU6nQ(y10t?x^Axna%* zST=+hGyL^F+j`=%tydY_TD@m)eic~&xB#GP2Vgvv+`4RQFrG%pB?=Hslp6+fQy#EQ zaI3L(@h{NCK_3T}g~(c^r#JqW&Fm=iZfCT2f3k&Xyzc~)^hki_BON$FwMY~T6pXguBy~mm91llJ zccP<+W~s65Gtx%8PFj2jU(&7ko@0egA^QJL0`T!S#jLPrZF}3&#WaX*TGp*G%%+jK7+L5+$U5J|;mXM1y$zMWDNP`h$rkHoB_fWi}!WZdC$aJ2$ U!k%AlWk~NfbLF=X1JYpp0Y;40^#A|> From ff00dc1505efa4258dc456e3d5109bbee7d5f7ea Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 15 Jan 2025 21:47:51 +1100 Subject: [PATCH 5/8] PMU: Fix setting of SIAR and SDAR on trace interrupt This arranges for SIAR and SDAR to be set when a trace interrupt is triggered by a non-zero setting of the MSR[TE] field. According to the ISA, SIAR should be set to the address of the instruction and SDAR should be set to the effective address of its storage operand if any. This also fixes setting of SDAR by the PMU when an alert occurs; previously it was always just set to zero. Signed-off-by: Paul Mackerras --- common.vhdl | 3 +++ execute1.vhdl | 10 ++++++---- loadstore1.vhdl | 12 ++++++++++-- pmu.vhdl | 4 ++-- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/common.vhdl b/common.vhdl index d182a32..dc5348a 100644 --- a/common.vhdl +++ b/common.vhdl @@ -529,6 +529,7 @@ package common is nia : std_ulogic_vector(63 downto 0); addr : std_ulogic_vector(63 downto 0); addr_v : std_ulogic; + trace : std_ulogic; occur : PMUEventType; end record; @@ -601,6 +602,8 @@ package common is type Loadstore1ToExecute1Type is record busy : std_ulogic; l2stall : std_ulogic; + ea_for_pmu : std_ulogic_vector(63 downto 0); + ea_valid : std_ulogic; end record; type Loadstore1ToDcacheType is record diff --git a/execute1.vhdl b/execute1.vhdl index 5240063..a3b9522 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -254,6 +254,7 @@ architecture behaviour of execute1 is -- PMU signals signal x_to_pmu : Execute1ToPMUType; signal pmu_to_x : PMUToExecute1Type; + signal pmu_trace : std_ulogic; -- signals for logging signal exception_log : std_ulogic; @@ -560,11 +561,12 @@ begin br_mispredict => ex2.br_mispredict, others => '0'); x_to_pmu.nia <= e_in.nia; - x_to_pmu.addr <= (others => '0'); - x_to_pmu.addr_v <= '0'; + x_to_pmu.addr <= l_in.ea_for_pmu; + x_to_pmu.addr_v <= l_in.ea_valid; x_to_pmu.spr_num <= ex1.pmu_spr_num; x_to_pmu.spr_val <= ex1.e.write_data; x_to_pmu.run <= ctrl.run; + x_to_pmu.trace <= pmu_trace; -- XER forwarding. The CA and CA32 bits are only modified by instructions -- that are handled here, so for them we can just use the result most @@ -1163,7 +1165,6 @@ begin -- see if we have a CIABR map if ctrl.ciabr(0) = '1' and ctrl.ciabr(1) = not ex1.msr(MSR_PR) and ctrl.ciabr(63 downto 2) = e_in.nia(63 downto 2) then - v.do_trace := '1'; v.ciabr_trace := '1'; end if; @@ -1707,7 +1708,7 @@ begin v.e.valid := actions.complete; bypass_valid := actions.bypass_valid; v.taken_branch_event := actions.take_branch; - v.trace_next := actions.do_trace; + v.trace_next := actions.do_trace or actions.ciabr_trace; v.trace_ciabr := actions.ciabr_trace; v.fp_exception_next := actions.fp_intr; v.res2_sel := actions.res2_sel; @@ -1740,6 +1741,7 @@ begin end if; is_scv := go and actions.se.scv_trap; bsort_start <= go and actions.start_bsort; + pmu_trace <= go and actions.do_trace; if not HAS_FPU and ex1.div_in_progress = '1' then v.div_in_progress := not divider_to_x.valid; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 77b7060..85fb129 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -102,6 +102,7 @@ architecture behave of loadstore1 is dword_index : std_ulogic; two_dwords : std_ulogic; incomplete : std_ulogic; + ea_valid : std_ulogic; end record; constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', flush => '0', touch => '0', sync => '0', tlbie => '0', @@ -119,7 +120,8 @@ architecture behave of loadstore1 is rc => '0', nc => '0', virt_mode => '0', priv_mode => '0', load_sp => '0', sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0', - dword_index => '0', two_dwords => '0', incomplete => '0'); + dword_index => '0', two_dwords => '0', incomplete => '0', + ea_valid => '0'); type reg_stage1_t is record req : request_t; @@ -464,6 +466,7 @@ begin addr(63 downto 32) := (others => '0'); end if; v.addr := addr; + v.ea_valid := l_in.valid; -- XXX Temporary hack. Mark the op as non-cachable if the address -- is the form 0xc------- for a real-mode access. @@ -509,6 +512,7 @@ begin case l_in.op is when OP_SYNC => v.sync := '1'; + v.ea_valid := '0'; when OP_STORE => v.store := '1'; if l_in.length = "0000" then @@ -536,14 +540,15 @@ begin v.align_intr := v.nc; when OP_TLBIE => v.tlbie := '1'; - v.addr := l_in.addr2; -- address from RB for tlbie v.is_slbia := l_in.insn(7); v.mmu_op := '1'; when OP_MFSPR => v.read_spr := '1'; + v.ea_valid := '0'; when OP_MTSPR => v.write_spr := '1'; v.mmu_op := not sprn(1); + v.ea_valid := '0'; when OP_FETCH_FAILED => -- send it to the MMU to do the radix walk v.instr_fault := '1'; @@ -1067,6 +1072,9 @@ begin e_out.busy <= busy; e_out.l2stall <= dc_stall or d_in.error or r2.busy; + e_out.ea_for_pmu <= req_in.addr; + e_out.ea_valid <= req_in.ea_valid; + events <= r3.events; flush <= exception; diff --git a/pmu.vhdl b/pmu.vhdl index 928d6c2..2afa1eb 100644 --- a/pmu.vhdl +++ b/pmu.vhdl @@ -183,12 +183,12 @@ begin end if; if p_in.mtspr = '1' and p_in.spr_num(3 downto 0) = "1100" then siar <= p_in.spr_val; - elsif doalert = '1' then + elsif doalert = '1' or p_in.trace = '1' then siar <= p_in.nia; end if; if p_in.mtspr = '1' and p_in.spr_num(3 downto 0) = "1101" then sdar <= p_in.spr_val; - elsif doalert = '1' then + elsif doalert = '1' or p_in.trace = '1' then sdar <= p_in.addr; end if; if p_in.mtspr = '1' and p_in.spr_num(3 downto 0) = "0000" then From 09de0738de7b607b22fe914efc8741141821ddba Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 16 Jan 2025 19:44:28 +1100 Subject: [PATCH 6/8] tests/trace: Add checks for SIAR and SDAR being set correctly Signed-off-by: Paul Mackerras --- tests/test_trace.bin | Bin 12348 -> 12492 bytes tests/trace/trace.c | 12 ++++++++++++ 2 files changed, 12 insertions(+) diff --git a/tests/test_trace.bin b/tests/test_trace.bin index 9ad843bdfd2582c39f0f03c773e4481590844869..8139a81460205a9dffaca390cc4aa1569758e21e 100755 GIT binary patch delta 1080 zcmb7CT}V@582-L(N1dWfn3SlvU$^Og>Blo2=mK%&MG9UhBu=N&g^7m5>lMLUOa>b-23x??O>Wn`^3<0&9J2nIueeFMwjQQ+u%j%ly88}plpUf?6AoJ@rDNc;U!X~NWVe;1 z4n#zfFb%eSWew8$>$rv_#B<0Q>kQn7d`Epe6_!Om<4#{djW;mdA&U-0^ZQM7%3`6S z1=M0h7PX4bK>Z1I`g_N|Kql(RE?HbubQWrNu|! zG)s)7pvk?Z)q;Ox05OctOMH@0USodn=tb2f+MJz7cD;C>5zIlGpX)-jEN_0!fM@y< zI#_U(3S-EwaHf5NOk81XnoorRmqxpa&aC1T3B{BU2@bl8gaES7Fou{d0CVXyDJ3yh24FL4iR%b>|ZHvWH@X#$?Ox zL(z!Ev50yKRD9^cVK2f0eF*}42qxrPMn0@RCS?!h=-0V=eToizxu4JBp2Oj{lB(+E zF{)I?S3BXpe9*0t0DIv~=RY<>M^J)0q8AidfwJW5@Lk}2B3Wm|@k&pSTZ|H&jqPj{ zTED?fIYygdQzE9F}^lr3v~BmZcc2!dK}Y zt;0C(alnjxTkZYAhX^4$X^|sdGO5klPE2?S?;~di^5}z4LNA zfi8|1tle?}{T;f?@+S0h+F-x_?9DsTwW|iJTYdn23w_OU7kX*bV6QEA6Wz|XM;AuA zp4f%bch2X5s%>r&U$!6P`vH&CqfI*E^Ia4SL=CQ~ox&Va%_P?fQs$>M+ajWEVYYWW z>#ASyQbLMA<9;ED)PMleFw!X)jE99HQVz0lS(rnbf`{0vA-#uX>@|>*P)Aa1giJ$g z;&}A@Z_dp-%<^EmF-zDh;@7MQM#FSNHW5qeJDi&xv&_GXm>l-HakE)wZwgataGo(6 GSbqV0m-@T_ diff --git a/tests/trace/trace.c b/tests/trace/trace.c index 764b3dd..e7d58ba 100644 --- a/tests/trace/trace.c +++ b/tests/trace/trace.c @@ -16,6 +16,8 @@ extern unsigned long callit(unsigned long arg1, unsigned long arg2, #define SPRG0 272 #define SPRG1 273 #define CIABR 187 +#define SIAR 780 +#define SDAR 781 static inline unsigned long mfmsr(void) { @@ -80,6 +82,8 @@ int trace_test_1(void) return ret + 2; if (regs[0] != 3 || regs[1] != 2) return 3; + if (mfspr(SIAR) != (unsigned long)&test1) + return 4; return 0; } @@ -98,6 +102,8 @@ int trace_test_2(void) return ret + 2; if (regs[0] != 3 || x != 3) return 3; + if (mfspr(SIAR) != (unsigned long)&test2 || mfspr(SDAR) != (unsigned long)&x) + return 4; return 0; } @@ -116,6 +122,8 @@ int trace_test_3(void) return ret + 2; if (regs[0] != 11 || x != 11) return 3; + if (mfspr(SIAR) != (unsigned long)&test3 || mfspr(SDAR) != (unsigned long)&x) + return 4; return 0; } @@ -169,6 +177,8 @@ int trace_test_6(void) return ret + 2; if (regs[0] != 11 || regs[1] != 55) return 3; + if (mfspr(SIAR) != (unsigned long)&test6 + 8) + return 4; return 0; } @@ -186,6 +196,8 @@ int trace_test_7(void) return ret + 2; if (regs[0] != 11 || regs[1] != 1) return 3; + if (mfspr(SIAR) != (unsigned long)&test7 + 8) + return 4; return 0; } From 5ddd8884fafd881aa3639c031dbc4b0d68f8ef6d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 15 Jan 2025 15:18:23 +1100 Subject: [PATCH 7/8] core: Implement two data watchpoints This implements the DAWR0, DAWRX0, DAWR1, and DAWRX1 registers, which provide the ability to set watchpoints on two ranges of data addresses and take an interrupt when an access is made to either range. The address comparisons are done in loadstore1 in the second cycle (doing it in the first cycle turned out to have poor timing). If a match is detected, a signal is sent to the dcache which causes the access to fail and generate an error signal back to loadstore1, in much the same way that a protection violation would, whereupon a data storage interrupt is generated. Signed-off-by: Paul Mackerras --- common.vhdl | 5 ++ dcache.vhdl | 11 +++- decode2.vhdl | 6 +- loadstore1.vhdl | 149 +++++++++++++++++++++++++++++++++++++++--------- 4 files changed, 142 insertions(+), 29 deletions(-) diff --git a/common.vhdl b/common.vhdl index dc5348a..1c8642b 100644 --- a/common.vhdl +++ b/common.vhdl @@ -65,6 +65,10 @@ package common is constant SPR_VRSAVE : spr_num_t := 256; constant SPR_PIR : spr_num_t := 1023; constant SPR_CIABR : spr_num_t := 187; + constant SPR_DAWR0 : spr_num_t := 180; + constant SPR_DAWR1 : spr_num_t := 181; + constant SPR_DAWRX0 : spr_num_t := 188; + constant SPR_DAWRX1 : spr_num_t := 189; -- PMU registers constant SPR_UPMC1 : spr_num_t := 771; @@ -624,6 +628,7 @@ package common is addr : std_ulogic_vector(63 downto 0); data : std_ulogic_vector(63 downto 0); -- valid the cycle after .valid = 1 byte_sel : std_ulogic_vector(7 downto 0); + dawr_match : std_ulogic; -- valid the cycle after .valid = 1 end record; constant Loadstore1ToDcacheInit : Loadstore1ToDcacheType := (addr => (others => '0'), data => (others => '0'), byte_sel => x"00", diff --git a/dcache.vhdl b/dcache.vhdl index ce7b351..ff7383c 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -316,6 +316,7 @@ architecture rtl of dcache is hit_way : way_t; same_tag : std_ulogic; mmu_req : std_ulogic; + dawr_m : std_ulogic; end record; -- First stage register, contains state for stage 1 of load hits @@ -635,6 +636,8 @@ begin -- put directly into req.data in the dcache_slow process below. r0.req.data <= d_in.data; r0.d_valid <= r0.req.valid; + -- the dawr_match signal has the same timing as the data + r0.req.dawr_match <= d_in.dawr_match; end if; end if; end process; @@ -953,12 +956,18 @@ begin variable snp_matches : std_ulogic_vector(TLB_NUM_WAYS - 1 downto 0); variable snoop_match : std_ulogic; variable hit_reload : std_ulogic; + variable dawr_match : std_ulogic; begin -- Extract line, row and tag from request rindex := get_index(r0.req.addr); req_index <= rindex; req_row := get_row(r0.req.addr); req_tag <= get_tag(ra); + if r0.d_valid = '0' then + dawr_match := d_in.dawr_match; + else + dawr_match := r0.req.dawr_match; + end if; go := r0_valid and not (r0.tlbie or r0.tlbld) and not r1.ls_error; if is_X(r0.req.addr) then @@ -1135,7 +1144,7 @@ begin rc_ok <= perm_attr.reference and (r0.req.load or perm_attr.changed); perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and (perm_attr.wr_perm or (r0.req.load and perm_attr.rd_perm)); - access_ok <= valid_ra and perm_ok and rc_ok; + access_ok <= valid_ra and perm_ok and rc_ok and not dawr_match; -- Combine the request and cache hit status to decide what -- operation needs to be done diff --git a/decode2.vhdl b/decode2.vhdl index 7e993d5..cc241a2 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -477,7 +477,8 @@ begin case decode_spr_num(d_in.insn) is when SPR_XER => v.input_ov := '1'; - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => unit := LDST; when SPR_TAR => v.e.uses_tar := '1'; @@ -499,7 +500,8 @@ begin when SPR_XER => v.e.output_xer := '1'; v.output_ov := '1'; - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PTCR | + SPR_DAWR0 | SPR_DAWR1 | SPR_DAWRX0 | SPR_DAWRX1 => unit := LDST; if d_in.valid = '1' then v.sgl_pipe := '1'; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 85fb129..0816931 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -95,10 +95,11 @@ architecture behave of loadstore1 is virt_mode : std_ulogic; priv_mode : std_ulogic; load_sp : std_ulogic; - sprsel : std_ulogic_vector(1 downto 0); + sprsel : std_ulogic_vector(2 downto 0); ric : std_ulogic_vector(1 downto 0); is_slbia : std_ulogic; align_intr : std_ulogic; + dawr_intr : std_ulogic; dword_index : std_ulogic; two_dwords : std_ulogic; incomplete : std_ulogic; @@ -119,7 +120,8 @@ architecture behave of loadstore1 is atomic_qw => '0', atomic_first => '0', atomic_last => '0', rc => '0', nc => '0', virt_mode => '0', priv_mode => '0', load_sp => '0', - sprsel => "00", ric => "00", is_slbia => '0', align_intr => '0', + sprsel => "000", ric => "00", is_slbia => '0', align_intr => '0', + dawr_intr => '0', dword_index => '0', two_dwords => '0', incomplete => '0', ea_valid => '0'); @@ -140,11 +142,15 @@ architecture behave of loadstore1 is one_cycle : std_ulogic; wr_sel : std_ulogic_vector(1 downto 0); addr0 : std_ulogic_vector(63 downto 0); - sprsel : std_ulogic_vector(1 downto 0); + sprsel : std_ulogic_vector(2 downto 0); dbg_spr : std_ulogic_vector(63 downto 0); dbg_spr_ack: std_ulogic; end record; + constant num_dawr : positive := 2; + type dawr_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(63 downto 3); + type dawrx_array_t is array(0 to num_dawr - 1) of std_ulogic_vector(15 downto 0); + type reg_stage3_t is record state : state_t; complete : std_ulogic; @@ -166,6 +172,10 @@ architecture behave of loadstore1 is intr_vec : integer range 0 to 16#fff#; srr1 : std_ulogic_vector(15 downto 0); events : Loadstore1EventType; + dawr : dawr_array_t; + dawrx : dawrx_array_t; + dawr_uplim : dawr_array_t; + dawr_upd : std_ulogic; end record; signal req_in : request_t; @@ -185,6 +195,7 @@ architecture behave of loadstore1 is signal stage1_req : request_t; signal stage1_dcreq : std_ulogic; signal stage1_dreq : std_ulogic; + signal stage1_dawr_match : std_ulogic; -- Generate byte enables from sizes function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is @@ -287,6 +298,25 @@ architecture behave of loadstore1 is return fs2; end; + function dawrx_match_enable(dawrx : std_ulogic_vector(15 downto 0); virt_mode : std_ulogic; + priv_mode : std_ulogic; is_store : std_ulogic) + return boolean is + begin + -- check PRIVM field; note priv_mode = '1' implies hypervisor mode + if (priv_mode = '0' and dawrx(0) = '0') or (priv_mode = '1' and dawrx(2) = '0') then + return false; + end if; + -- check WT/WTI fields + if dawrx(3) = '0' and virt_mode /= dawrx(4) then + return false; + end if; + -- check DW/DR fields + if (is_store = '0' and dawrx(5) = '0') or (is_store = '1' and dawrx(6) = '0') then + return false; + end if; + return true; + end; + begin loadstore1_reg: process(clk) begin @@ -302,7 +332,7 @@ begin r1.req.instr_fault <= '0'; r1.req.load <= '0'; r1.req.priv_mode <= '0'; - r1.req.sprsel <= "00"; + r1.req.sprsel <= "000"; r1.req.ric <= "00"; r1.req.xerc <= xerc_init; @@ -313,7 +343,7 @@ begin r2.req.instr_fault <= '0'; r2.req.load <= '0'; r2.req.priv_mode <= '0'; - r2.req.sprsel <= "00"; + r2.req.sprsel <= "000"; r2.req.ric <= "00"; r2.req.xerc <= xerc_init; @@ -330,12 +360,19 @@ begin r3.stage1_en <= '1'; r3.events.load_complete <= '0'; r3.events.store_complete <= '0'; + for i in 0 to num_dawr - 1 loop + r3.dawr(i) <= (others => '0'); + r3.dawrx(i) <= (others => '0'); + r3.dawr_uplim(i) <= (others => '0'); + end loop; + r3.dawr_upd <= '0'; flushing <= '0'; else r1 <= r1in; r2 <= r2in; r3 <= r3in; - flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and + flushing <= (flushing or (r1in.req.valid and + (r1in.req.align_intr or r1in.req.dawr_intr))) and not flush; end if; stage1_dreq <= stage1_dcreq; @@ -437,12 +474,15 @@ begin v.virt_mode := l_in.virt_mode; v.priv_mode := l_in.priv_mode; v.ric := l_in.insn(19 downto 18); - if sprn(1) = '1' then + if sprn(8 downto 7) = "01" then + -- debug registers DAWR[X][01] + v.sprsel := '1' & sprn(3) & sprn(0); + elsif sprn(1) = '1' then -- DSISR and DAR - v.sprsel := '1' & sprn(0); + v.sprsel := "01" & sprn(0); else -- PID and PTCR - v.sprsel := '0' & sprn(8); + v.sprsel := "00" & sprn(8); end if; lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)); @@ -547,7 +587,7 @@ begin v.ea_valid := '0'; when OP_MTSPR => v.write_spr := '1'; - v.mmu_op := not sprn(1); + v.mmu_op := not (sprn(1) or sprn(2)); v.ea_valid := '0'; when OP_FETCH_FAILED => -- send it to the MMU to do the radix walk @@ -659,8 +699,12 @@ begin variable byte_offset : unsigned(2 downto 0); variable interrupt : std_ulogic; variable dbg_spr_rd : std_ulogic; - variable sprsel : std_ulogic_vector(1 downto 0); + variable sprsel : std_ulogic_vector(2 downto 0); variable sprval : std_ulogic_vector(63 downto 0); + variable dawr_match : std_ulogic; + variable addr : std_ulogic_vector(63 downto 3); + variable addl : unsigned(64 downto 3); + variable addu : unsigned(64 downto 3); begin v := r2; @@ -677,21 +721,47 @@ begin end if; end loop; + -- Test for DAWR0/1 matches + dawr_match := '0'; + for i in 0 to 1 loop + addr := r1.req.addr(63 downto 3); + if r1.req.priv_mode = '1' and r3.dawrx(i)(7) = '1' then + -- HRAMMC=1 => trim top bit from address + addr(63) := '0'; + end if; + addl := unsigned('0' & addr) - unsigned('0' & r3.dawr(i)); + addu := unsigned('0' & r3.dawr_uplim(i)) - unsigned('0' & addr); + if addl(64) = '0' and addu(64) = '0' and + dawrx_match_enable(r3.dawrx(i), r1.req.virt_mode, + r1.req.priv_mode, r1.req.store) then + dawr_match := r1.req.valid and r1.req.dc_req and not r3.dawr_upd and + not (r1.req.touch or r1.req.sync or r1.req.flush); + end if; + end loop; + stage1_dawr_match <= dawr_match; + dbg_spr_rd := dbg_spr_req and not (r1.req.valid and r1.req.read_spr); if dbg_spr_rd = '0' then sprsel := r1.req.sprsel; else - sprsel := dbg_spr_addr; + sprsel := '0' & dbg_spr_addr; end if; - if sprsel(1) = '1' then - if sprsel(0) = '0' then + case sprsel is + when "100" => + sprval := r3.dawr(0) & "000"; + when "101" => + sprval := r3.dawr(1) & "000"; + when "110" => + sprval := 48x"0" & r3.dawrx(0); + when "111" => + sprval := 48x"0" & r3.dawrx(1); + when "010" => sprval := x"00000000" & r3.dsisr; - else + when "011" => sprval := r3.dar; - end if; - else - sprval := m_in.sprval; - end if; + when others => + sprval := m_in.sprval; -- MMU regs + end case; if dbg_spr_req = '0' then v.dbg_spr_ack := '0'; elsif dbg_spr_rd = '1' and r2.dbg_spr_ack = '0' then @@ -704,6 +774,7 @@ begin v.req := r1.req; v.addr0 := r1.addr0; v.req.store_data := store_data; + v.req.dawr_intr := dawr_match; v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and not r1.req.incomplete; v.wait_mmu := r1.req.valid and r1.req.mmu_op; @@ -751,7 +822,7 @@ begin end if; interrupt := (r2.req.valid and r2.req.align_intr) or - (d_in.error and (d_in.cache_paradox or d_in.reserve_nc)) or + (d_in.error and (d_in.cache_paradox or d_in.reserve_nc or r2.req.dawr_intr)) or m_in.err; if interrupt = '1' then v.req.valid := '0'; @@ -808,6 +879,15 @@ begin v.srr1 := (others => '0'); v.events := (others => '0'); + -- Evaluate DAWR upper limits after a clock edge + v.dawr_upd := '0'; + if r3.dawr_upd = '1' then + for i in 0 to num_dawr - 1 loop + v.dawr_uplim(i) := std_ulogic_vector(unsigned(r3.dawr(i)) + + unsigned(r3.dawrx(i)(15 downto 10))); + end loop; + end if; + -- load data formatting -- shift and byte-reverse data bytes for i in 0 to 7 loop @@ -887,12 +967,25 @@ begin if r2.req.load_sp = '1' and r2.req.dc_req = '0' then write_enable := '1'; end if; - if r2.req.write_spr = '1' and r2.req.mmu_op = '0' then - if r2.req.sprsel(0) = '0' then - v.dsisr := r2.req.store_data(31 downto 0); - else - v.dar := r2.req.store_data; + if r2.req.write_spr = '1' then + if r2.req.sprsel(2) = '1' then + v.dawr_upd := '1'; end if; + case r2.req.sprsel is + when "100" => + v.dawr(0) := r2.req.store_data(63 downto 3); + when "101" => + v.dawr(1) := r2.req.store_data(63 downto 3); + when "110" => + v.dawrx(0) := r2.req.store_data(15 downto 0); + when "111" => + v.dawrx(1) := r2.req.store_data(15 downto 0); + when "010" => + v.dsisr := r2.req.store_data(31 downto 0); + when "011" => + v.dar := r2.req.store_data; + when others => + end case; end if; end if; @@ -915,9 +1008,10 @@ begin end if; end if; if d_in.error = '1' then - if d_in.cache_paradox = '1' then + if d_in.cache_paradox = '1' or d_in.reserve_nc = '1' or r2.req.dawr_intr = '1' then -- signal an interrupt straight away exception := '1'; + dsisr(63 - 41) := r2.req.dawr_intr; dsisr(63 - 38) := not r2.req.load; dsisr(63 - 37) := d_in.reserve_nc; -- XXX there is no architected bit for this @@ -970,6 +1064,7 @@ begin v.srr1(47 - 34) := r2.req.prefixed; v.dar := r2.req.addr; if m_in.segerr = '0' then + dsisr(63 - 38) := not r2.req.load; v.intr_vec := 16#300#; v.dsisr := dsisr; else @@ -1036,8 +1131,10 @@ begin end if; if stage1_dreq = '1' then d_out.data <= store_data; + d_out.dawr_match <= stage1_dawr_match; else d_out.data <= r2.req.store_data; + d_out.dawr_match <= r2.req.dawr_intr; end if; d_out.hold <= l_in.e2stall; From 80bc9d5098f924b911e645f3970c5339b5ca3148 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 16 Jan 2025 22:37:58 +1100 Subject: [PATCH 8/8] tests/trace: Add a few tests of DAWR (data watchpoint) functionality Signed-off-by: Paul Mackerras --- tests/test_trace.bin | Bin 12492 -> 13352 bytes tests/test_trace.console_out | 1 + tests/trace/head.S | 14 ++++++ tests/trace/trace.c | 93 ++++++++++++++++++++++++++++++++++- 4 files changed, 107 insertions(+), 1 deletion(-) diff --git a/tests/test_trace.bin b/tests/test_trace.bin index 8139a81460205a9dffaca390cc4aa1569758e21e..44538d83668af16e48d936f4008d8da975306bc7 100755 GIT binary patch delta 1506 zcmZvbUrbw79LIk>v;|7(O0xI|y5;s#p)FmhN|wT4YuTJt76L9R4 ze7?W)JLk^4HPSPbV8em2_xF|s{O&HzO5H;Nm9$ZXw0mxR&7=`^XAnJy)Y{ZVGK|biY z$QDarE}647it#Hx7xz@9nbXFXs#f)dB8q(^+*yUD_oZrP^+Tl8r)nDeQZ%Ir@$-S} z?P6bq7Q%^#X`SeJIFY-f|9V%n5F9F~^zNjAsENA5adL-m6n1GN;H6kYAH@!64C^yu zAd+Gi^axX$DAuUW<^30t->=Px0X^cSs}R1SOF=1+7lVit?B60}hfo28hJ|R60%kf^ zZ^n9x`L)43^o2f}_i2fL1)8)|(#D}YTMqT4hMv}UZ4)8-=^TEHaog`nqh-mpaWiX;ZMnz*KX?j3w^H_Q*ppW z{gF4xSFcdz*{}H-@5^i?&cE^Q4z)9ifpENSMb|=5m(RP&h&4dvdBaA3!$pAdnQHu6 zz7V?}=WqC`*hHM4^Oaks;$7iMKBiP~rOIqsz`u5ePy4Qw3Ng@F0?gdogW=D|Pxz0S zOyTkGYVMladQZIDt3bIedh}?ijYv5hedP!O(w-Onxs14)TM^T_tXRl7$%KD5*^2lw z>vK z*dGXy=BD7&Hd#d$bMyhf9aNi-JQU&*TsrKClts^7Hg!6v`6p}Cg;1l3=zHXQWd0>b z2>DhA9kfQ_dNDhZcmBbfYt@DT-1BfFe+qGr!8uqQrRXK|rv;y}%O-4$I^yj%iN5{g z57uaj2TLZfe(f)Qx>mK!!*ubFYpX4GZ0_OHwUyR7Sd&@4RIBdngcLG_Jfb_d*Azh{{Qt%YwB?c zmM^S4xEQH#M$Yy=sX0|X4TUfcoHKBqFT>Lw{9zcFi>*TA1fDX!LR+Wg16VpP#os~s zI8`%PNi0GmM0QJUb+e#q_9IU=q1}aB$pie7;^CIYg2*uzFvcd^xWePeO|F#CHwTqB zc+0#FwIVK=QQL?ArLmun$~Mb;@4NHm^ig(+O?t5-Z?l-!R9S$np?>VKhM`f!t5y;| z7V(ZXpf!tmZV9eUbk*smj zBKvG#hz*)z*Lr!xe|Q(m*+eQh5TYVOLLbp0HYl4Tl{inlLQEXuS~fMDI}}1j60$x= UEKp|AQVE$Y5-Wd&n2`bDAL`Gg{Qv*} diff --git a/tests/test_trace.console_out b/tests/test_trace.console_out index 3e84260..3a5a601 100644 --- a/tests/test_trace.console_out +++ b/tests/test_trace.console_out @@ -8,3 +8,4 @@ test 07:PASS test 08:PASS test 09:PASS test 10:PASS +test 11:PASS diff --git a/tests/trace/head.S b/tests/trace/head.S index 14026e4..fd444b6 100644 --- a/tests/trace/head.S +++ b/tests/trace/head.S @@ -237,3 +237,17 @@ test10: nop 1: li %r3,-1 blr + + .global test11 +test11: + stdx %r3,%r3,%r4 + stw %r3,6(%r4) + dcbt 0,%r4 + dcbf 0,%r4 + dcbtst 0,%r4 + sync + ld %r3,0(%r4) + lwz %r3,6(%r4) + lwz %r3,27(%r4) + stb %r3,26(%r4) + blr diff --git a/tests/trace/trace.c b/tests/trace/trace.c index e7d58ba..a88c751 100644 --- a/tests/trace/trace.c +++ b/tests/trace/trace.c @@ -11,11 +11,17 @@ extern unsigned long callit(unsigned long arg1, unsigned long arg2, #define MSR_SE 0x400 #define MSR_BE 0x200 +#define DSISR 18 +#define DAR 19 #define SRR0 26 #define SRR1 27 #define SPRG0 272 #define SPRG1 273 #define CIABR 187 +#define DAWR0 180 +#define DAWR1 181 +#define DAWRX0 188 +#define DAWRX1 189 #define SIAR 780 #define SDAR 781 @@ -232,6 +238,7 @@ int trace_test_9(void) extern unsigned long test10(unsigned long, unsigned long); +/* test CIABR */ int trace_test_10(void) { unsigned long ret; @@ -265,6 +272,89 @@ int trace_test_10(void) return 0; } +/* test DAWR[X]{0,1} */ +#define MRD_SHIFT 10 +#define HRAMMC 0x80 +#define DW 0x40 +#define DR 0x20 +#define WT 0x10 +#define WTI 0x08 +#define PRIVM_HYP 0x04 +#define PRIVM_PNH 0x02 +#define PRIVM_PRO 0x01 + +extern unsigned long test11(unsigned long, unsigned long); + +int trace_test_11(void) +{ + unsigned long ret; + unsigned long regs[2]; + unsigned long x[4]; + + mtspr(DAWR0, (unsigned long)&x[0]); + mtspr(DAWRX0, (0 << MRD_SHIFT) + DW + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 1; + if (mfspr(SRR0) != (unsigned long) &test11 || mfspr(DSISR) != 0x02400000 || + mfspr(DAR) != (unsigned long)&x[0]) + return 2; + + mtspr(DAWR0, (unsigned long)&x[1]); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 3; + if (mfspr(SRR0) != (unsigned long) &test11 + 4 || mfspr(DSISR) != 0x02400000 || + mfspr(DAR) != (unsigned long)&x[1]) + return 4; + + mtspr(DAWR0, (unsigned long)&x[0]); + mtspr(DAWRX0, (0 << MRD_SHIFT) + DR + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 5; + if (mfspr(SRR0) != (unsigned long) &test11 + 24 || mfspr(DSISR) != 0x00400000) + return 6; + + mtspr(DAWR0, (unsigned long)&x[1]); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 7; + if (mfspr(SRR0) != (unsigned long) &test11 + 28 || mfspr(DSISR) != 0x00400000) + return 8; + + mtspr(DAWR0, (unsigned long)&x[3]); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 9; + if (mfspr(SRR0) != (unsigned long) &test11 + 32 || mfspr(DSISR) != 0x00400000) + return 10; + + mtspr(DAWR0, (unsigned long)&x[2]); + mtspr(DAWRX0, (1 << MRD_SHIFT) + DW + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 11; + if (mfspr(SRR0) != (unsigned long) &test11 + 36 || mfspr(DSISR) != 0x02400000) + return 12; + + mtspr(DAWR0, (unsigned long)&x[0]); + mtspr(DAWRX0, (3 << MRD_SHIFT) + DR + DW + WT + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0) + return ret + 13; + + mtspr(DAWR0, (unsigned long)&x[0]); + mtspr(DAWRX0, (3 << MRD_SHIFT) + DR + DW + WT + WTI + PRIVM_HYP); + ret = callit(0, (unsigned long) &x, test11, mfmsr(), regs); + if (ret != 0x300) + return ret + 14; + if (mfspr(SRR0) != (unsigned long) &test11 || mfspr(DSISR) != 0x02400000) + return 15; + + return 0; +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -277,7 +367,7 @@ void do_test(int num, int (*test)(void)) print_string("PASS\r\n"); } else { fail = 1; - print_string("FAIL "); + print_string(" FAIL "); print_hex(ret, 4); print_string("\r\n"); } @@ -297,6 +387,7 @@ int main(void) do_test(8, trace_test_8); do_test(9, trace_test_9); do_test(10, trace_test_10); + do_test(11, trace_test_11); return fail; }