From b0f7237b7f5cbdbf132085f5517d84f6cd19f3c6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 3 Oct 2020 20:08:11 +1000 Subject: [PATCH 1/6] execute1: Fix bug in trace interrupt vs. ITLB miss If an instruction fetch results in an instruction TLB miss, an OP_FETCH_FAILED instruction is sent down the pipe. If the MSR[TE] field is set for instruction tracing, the core currently considers that executing the OP_FETCH_FAILED counts as having executed one instruction and so generates a trace interrupt on the next valid instruction, meaning that the trace interrupt happens before the desired instruction rather than after it. Fix this by not tracing OP_FETCH_FAILED instructions. Signed-off-by: Paul Mackerras --- execute1.vhdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/execute1.vhdl b/execute1.vhdl index 29713b2..b6387b9 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1124,6 +1124,10 @@ begin elsif HAS_FPU and e_in.unit = FPU then fv.valid := '1'; end if; + -- Handling an ITLB miss doesn't count as having executed an instruction + if e_in.insn_type = OP_FETCH_FAILED then + do_trace := '0'; + end if; elsif r.f.redirect = '1' then v.e.valid := '1'; From 144433218f4e2a68a8ee9a23efc65d781b0f9c3f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 25 Sep 2020 18:14:18 +1000 Subject: [PATCH 2/6] tests/trace: Test trace interrupt vs. FP unavailable interrupt Signed-off-by: Paul Mackerras --- tests/test_trace.bin | Bin 11596 -> 11812 bytes tests/test_trace.console_out | 1 + tests/trace/head.S | 5 +++++ tests/trace/trace.c | 18 ++++++++++++++++++ 4 files changed, 24 insertions(+) diff --git a/tests/test_trace.bin b/tests/test_trace.bin index b9a612d6b6df4450a701117064157bb6fd10b545..61fa48b3e6e4a2553a30e0e14727533689f4fc7b 100755 GIT binary patch delta 663 zcmXX=QAkr!82-=Qb{&(ANxr0W+r8VavwMj>_;8C+TOdq?qzHoWVQUupP#6eZ+Gun5 z;KKstueX9Ml|YcdFdz0{FXDrQLH1IC(u)Mej8&Hwp8jL|;XCL1zyJK-`L?eujLcfn zSnvG34%x94hVsSLsg9BldFX}|Xvdr3HEGm?L1hgCiWm2lW!At4>T9g3Uc3}}E>+v` zS7e2K!o}#-k{_7;hAZ|GY!vy&@EOjZC9T||8HRn+%xo3`$|g8BvjAB$0rU3mS3U&P z5I+ESBp!fDnnB)t)s`=&KrzYhV9DVwide!^>Xq_EdmqCj-dXoWCFr1HM#8`u=5)4=%{*bbFMfewy#t_(NCa z5u$Y)y9~`WLo|s4M#S}!*eu>MLY?o4!5_{y=pv^uIpy)G5$&szI3q+VKfw^B$@#Rn msp5nbVFJGydjA4Vgd$43E)&*CGlV^r&>j+Y{&H>!f$AUQ7Sm_| delta 398 zcmXYqJ4gdT6h&ty*@TFSiyu@l*`FIBv#^Cll&DxJDFiJ9K?EZSVqqf)h9zpDg@r=& zS=b5@0)`M#)N%_g6iW*`O=p=1@h|IJ^uiquXXd@h&D>3A)6CWnE%9_iTy5Mf1@@#A zOVz==bj}I^2+HTMtu#PcKBh6)Q7+h~A4b(8yZ6DXTBI5rX;YBvmV|;p;$^t$9;cJg zp@--a^yxz^Z-J!;O_7l}AtW(TST7#So@EoVsP zr{k1xfQ&Rs;_%umqz$dLfO5-~m?kX8N^oK{(=1#Y5qbqxqgi_RavY8;+_ Date: Sat, 21 Nov 2020 12:45:24 +1100 Subject: [PATCH 3/6] core: Implement mtmsr instruction This is like mtmsrd except it only alters the lower 32 bits of the MSR. Signed-off-by: Paul Mackerras --- decode1.vhdl | 1 + execute1.vhdl | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/decode1.vhdl b/decode1.vhdl index 5d6a557..6624a53 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -313,6 +313,7 @@ architecture behaviour of decode1 is 2#1100001001# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- modsd 2#1100001011# => (ALU, OP_MOD, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', NONE, '0', '0'), -- modsw 2#0010010000# => (ALU, OP_MTCRF, NONE, NONE, RS, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtcrf/mtocrf + 2#0010010010# => (ALU, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '1'), -- mtmsr 2#0010110010# => (ALU, OP_MTMSRD, NONE, NONE, RS, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- mtmsrd # ignore top bits and d 2#0111010011# => (ALU, OP_MTSPR, NONE, NONE, RS, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- mtspr 2#0001001001# => (ALU, OP_MUL_H64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0'), -- mulhd diff --git a/execute1.vhdl b/execute1.vhdl index b6387b9..de6bd25 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -993,8 +993,11 @@ begin else -- Architecture says to leave out bits 3 (HV), 51 (ME) -- and 63 (LE) (IBM bit numbering) - ctrl_tmp.msr(63 downto 61) <= c_in(63 downto 61); - ctrl_tmp.msr(59 downto 13) <= c_in(59 downto 13); + if e_in.is_32bit = '0' then + ctrl_tmp.msr(63 downto 61) <= c_in(63 downto 61); + ctrl_tmp.msr(59 downto 32) <= c_in(59 downto 32); + end if; + ctrl_tmp.msr(31 downto 13) <= c_in(31 downto 13); ctrl_tmp.msr(11 downto 1) <= c_in(11 downto 1); if c_in(MSR_PR) = '1' then ctrl_tmp.msr(MSR_EE) <= '1'; From 27ac74a3415d894437d5a3b146d270615c637f6f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 21 Nov 2020 13:54:14 +1100 Subject: [PATCH 4/6] execute1: Fix writing LR for bdnzl/bdzl instructions Branch instructions which do a redirect and write both CTR and LR were not doing the write to LR due to a logic error. This fixes it. Signed-off-by: Paul Mackerras --- execute1.vhdl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/execute1.vhdl b/execute1.vhdl index de6bd25..61df974 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1131,10 +1131,16 @@ begin if e_in.insn_type = OP_FETCH_FAILED then do_trace := '0'; end if; + end if; + + -- The following cases all occur when r.busy = 1 and therefore + -- valid_in = 0. Hence they don't happen in the same cycle as any of + -- the cases above which depend on valid_in = 1. - elsif r.f.redirect = '1' then + if r.f.redirect = '1' then v.e.valid := '1'; - elsif r.lr_update = '1' then + end if; + if r.lr_update = '1' then v.e.exc_write_enable := '1'; v.e.exc_write_data := r.next_lr; v.e.exc_write_reg := fast_spr_num(SPR_LR); From e49192cb5bbaf259aa66268a46fb1c6da9a54b59 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 24 Nov 2020 11:53:17 +1100 Subject: [PATCH 5/6] execute1: Fix forwarding of result when doing delayed LR update Random execution testcases showed that a bdnzl which doesn't branch, followed immediately by a bdnz, uses the wrong value for CTR for the bdnz. Decode2 detects the read-after-write hazard on CTR and tells execute1 to use the bypass path. However, the bdnzl takes two cycles because it has to write back both CTR and LR, meaning that by the time the bdnz starts to execute, r.e.write_data no longer contains the CTR value, but instead contains zero. To fix this, we make execute1 maintain the written-back value of CTR in r.e.write_data across the cycle where LR is written back (this is possible because the LR writeback uses the exc_write_data path). Signed-off-by: Paul Mackerras --- execute1.vhdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/execute1.vhdl b/execute1.vhdl index 61df974..4f85f3d 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1145,6 +1145,9 @@ begin v.e.exc_write_data := r.next_lr; v.e.exc_write_reg := fast_spr_num(SPR_LR); v.e.valid := '1'; + -- Keep r.e.write_data unchanged next cycle in case it is needed + -- for a forwarded result (e.g. for CTR). + result := r.e.write_data; elsif r.cntz_in_progress = '1' then -- cnt[lt]z always takes two cycles result := countzero_result; From 29fabeb12ec90dc0278957441688951f87ed7936 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 24 Nov 2020 12:00:48 +1100 Subject: [PATCH 6/6] tests/misc: Add a test for correct CTR and LR updating by branches This adds a test with a bdnzl followed immediately by a bdnz, to check that CTR and LR both get evaluated and written back correctly in this situation. Signed-off-by: Paul Mackerras --- tests/misc/head.S | 25 +++++++++++++++++++++++++ tests/misc/misc.c | 8 ++++++++ tests/test_misc.bin | Bin 5424 -> 5740 bytes tests/test_misc.console_out | 1 + 4 files changed, 34 insertions(+) diff --git a/tests/misc/head.S b/tests/misc/head.S index b0acb7f..d490a61 100644 --- a/tests/misc/head.S +++ b/tests/misc/head.S @@ -137,3 +137,28 @@ test_mtpvr: mtlr %r0 blr + +/* Test that bdnz and bdnzl update CTR and LR correctly */ + .global test_bdnzl +test_bdnzl: + mflr %r10 + mfcr %r11 + li %r0,0xf8 + mtctr %r0 + lis %r0,0x2000 + mtcr %r0 + addpcis %r9,0 +1: bdnztl 27,3f +2: bdnzt 14,4f +3: nop +4: li %r3,1 + addi %r9,%r9,2b-1b + mflr %r8 + cmpd %r8,%r9 + bne 9f + mfctr %r7 + cmpdi %r7,0xf6 + bne 9f + li %r3,0 +9: mtlr %r10 + blr diff --git a/tests/misc/misc.c b/tests/misc/misc.c index 283ca7f..73745d9 100644 --- a/tests/misc/misc.c +++ b/tests/misc/misc.c @@ -14,6 +14,7 @@ extern long test_addpcis_1(void); extern long test_addpcis_2(void); extern long test_mfpvr(void); extern long test_mtpvr(void); +extern long test_bdnzl(void); // i < 100 void print_test_number(int i) @@ -58,5 +59,12 @@ int main(void) } else puts(PASS); + print_test_number(5); + if (test_bdnzl() != 0) { + fail = 1; + puts(FAIL); + } else + puts(PASS); + return fail; } diff --git a/tests/test_misc.bin b/tests/test_misc.bin index 7e68e1cca7d67ba1d7e134d6bb8dd93a9c3640fe..2264686c06a630733a71d56de90ea1a1f3925baf 100755 GIT binary patch literal 5740 zcmeHLK};M~82)B=3j$D~Llsw`K!rvR9tIZBWWiZL-;sXV`bW)?Op)uWer$;+Gfz3;vE z{ong{llKxp4lsIwq9C!~(WDJ{eZa!hjmCun+nGyk3sdFzd@-5uMiGOQTsfw@ zZO|%rRI+lw?_YEixICg`E;DY-xCW9Lj<1*FI={Wq{rKOgFMYmJbG`pH2e7F#Pn~(^ z+O7Qb{V(!%d7$*K`|LaCRLR|cUat5g>|zDTl;ce<4D=em9b!zz=_YhoI$ABpZELVI z33O)}PZ7B;_b_J7a%{u>11C~Cze(ZzTv1{sKPr&}kpqzfkpqzfkpqzfkpqzfkpqzf zkpqzf|GNWQ>Z^NlN3Wp-qpRD;=x{W|DNHT_wM`iY~Xn6N6cEHPout9sqbvKevI5Kx$oC-zpdkThuk>1 z2g&^)fQj` z?s=dom8Nc>XX7y6L9Qm?UKuqgt^3Vil;G#ree8(s_BZ4wgLjagjkb`xxyLnyt1~}@ zze)agdpdkM)m(U#j?_Lnj9nc)8@`S)+kbx&y|o(jfM@nMoSOuOeD3Be#y{R|sv6|? zAo(c238n6ytmdVLwGV;*R2$!ZS8xmXjUpYKk` zc~|Cq*4|STZw<_z*67LBqr6x}p8*s||XAF%j<$aQZA>w2Ma^H|xf* zYWDfQ`e*!e2F4KQ4i$LL51s(W*#CG!FD5EIh3kig3Le+dU}R-p3&(&HOTzb*5##{9 z)4gM%=kUK{U+V>VD+|scz~V5oExVZM^Qz}^Wi047lEYZSd1i1X5+hz*QcBOZ8#Qr` zAsA;c)lxk(_QU+`Ou^6H`KJXFooOsvos22Qz2rZHZ+DDuXlhSyxG%?V^DJaM#`i~h z>g(Fv&8zCJz~szZZyW3mS}%<>vmLi4CZug+m7=}-G7^rZb+l;cWWSwt@lT~Zws@h* z!g6dy8*EKKZe2Lj3Ty3Fg9U>_ChIzRa~eN_@fFQ&ZpFY_gZ0?{>&u?9{<9C2y2w_F zU^D{EPcGGD`pe delta 785 zcmZ8fOK1~O6g@Mhb_}TvX#-hQeDMR0woVWi8Dq$dg)}x&3T^GSHM$U#Dk?ORFoPl( zL9`tx6eL}wE720rjVq&ByKv)9sCLtcVBJNcdES?qLdjxy_nmvrxo7Uo&*AFHYCw83 zFRdMPZb>uC!L#WT_j2(dq~>QHZN8R~0WkNa)wW`=;%|F%g|LVDVGT|0FLnf}ydG^sSo)vR|iuR(5+wgq% z5x0-HPd#o9EQw6Tvb*^2Jid?kU)wypWOWIyb_q(JAckmsv9)DACy@vX3d<<$b+127 z!v!v|84tU$2QxkF@g8i1upaSd>&}7DNKZ+Nq3k3=52+fasvf$T+TkBsP5!OLc|Eba zcM0uhzfhU^iFJ_Mx8SDuO zs=n@7JV@LdmPNwE4KV+2;@ok+FfYf9 zqGDfYbn02wspXGq-iL9wKg*nj!Bogd0==u8t9pdFk(|H)b2bP4$Jb8-Y6&Q5V%ds) uq(O@|k731>#bEla6k2zG_^`C%XyIriZC