From 25f93fc17e609509ade1d6a1bcb977fe8a075fb6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 9 Aug 2022 20:32:52 +1000 Subject: [PATCH 1/2] Add branch alias test Signed-off-by: Anton Blanchard --- tests/branch_alias/Makefile | 3 + tests/branch_alias/branch_alias.c | 238 ++++++++++++++++++++++++++++ tests/branch_alias/head.S | 210 ++++++++++++++++++++++++ tests/branch_alias/powerpc.lds | 27 ++++ tests/test_branch_alias.bin | Bin 0 -> 16416 bytes tests/test_branch_alias.console_out | 1 + tests/test_branch_alias.metavalue | 1 + tests/update_console_tests | 2 +- 8 files changed, 481 insertions(+), 1 deletion(-) create mode 100644 tests/branch_alias/Makefile create mode 100644 tests/branch_alias/branch_alias.c create mode 100644 tests/branch_alias/head.S create mode 100644 tests/branch_alias/powerpc.lds create mode 100755 tests/test_branch_alias.bin create mode 100644 tests/test_branch_alias.console_out create mode 100644 tests/test_branch_alias.metavalue diff --git a/tests/branch_alias/Makefile b/tests/branch_alias/Makefile new file mode 100644 index 0000000..867f4d7 --- /dev/null +++ b/tests/branch_alias/Makefile @@ -0,0 +1,3 @@ +TEST=branch_alias + +include ../Makefile.test diff --git a/tests/branch_alias/branch_alias.c b/tests/branch_alias/branch_alias.c new file mode 100644 index 0000000..1a01e6b --- /dev/null +++ b/tests/branch_alias/branch_alias.c @@ -0,0 +1,238 @@ +#include +#include +#include + +#include "console.h" + +#define MSR_LE 0x1 +#define MSR_DR 0x10 +#define MSR_IR 0x20 +#define MSR_SF 0x8000000000000000ul + +extern unsigned long callit(unsigned long arg1, unsigned long arg2, + unsigned long fn, unsigned long msr); + +static inline void do_tlbie(unsigned long rb, unsigned long rs) +{ + __asm__ volatile("tlbie %0,%1" : : "r" (rb), "r" (rs) : "memory"); +} + +#define SRR0 26 +#define SRR1 27 +#define PID 48 +#define SPRG0 272 +#define SPRG1 273 +#define PTCR 464 + +static inline unsigned long mfspr(int sprnum) +{ + long val; + + __asm__ volatile("mfspr %0,%1" : "=r" (val) : "i" (sprnum)); + return val; +} + +static inline void mtspr(int sprnum, unsigned long val) +{ + __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val)); +} + +static inline void store_pte(unsigned long *p, unsigned long pte) +{ + __asm__ volatile("stdbrx %1,0,%0" : : "r" (p), "r" (pte) : "memory"); +} + +void print_string(const char *str) +{ + for (; *str; ++str) + putchar(*str); +} + +void print_hex(unsigned long val, int ndigit) +{ + int i, x; + + for (i = (ndigit - 1) * 4; i >= 0; i -= 4) { + x = (val >> i) & 0xf; + if (x >= 10) + putchar(x + 'a' - 10); + else + putchar(x + '0'); + } +} + +// i < 100 +void print_test_number(int i) +{ + print_string("test "); + putchar(48 + i/10); + putchar(48 + i%10); + putchar(':'); +} + +#define CACHE_LINE_SIZE 64 + +void zero_memory(void *ptr, unsigned long nbytes) +{ + unsigned long nb, i, nl; + void *p; + + for (; nbytes != 0; nbytes -= nb, ptr += nb) { + nb = -((unsigned long)ptr) & (CACHE_LINE_SIZE - 1); + if (nb == 0 && nbytes >= CACHE_LINE_SIZE) { + nl = nbytes / CACHE_LINE_SIZE; + p = ptr; + for (i = 0; i < nl; ++i) { + __asm__ volatile("dcbz 0,%0" : : "r" (p) : "memory"); + p += CACHE_LINE_SIZE; + } + nb = nl * CACHE_LINE_SIZE; + } else { + if (nb > nbytes) + nb = nbytes; + for (i = 0; i < nb; ++i) + ((unsigned char *)ptr)[i] = 0; + } + } +} + +#define PERM_EX 0x001 +#define PERM_WR 0x002 +#define PERM_RD 0x004 +#define PERM_PRIV 0x008 +#define ATTR_NC 0x020 +#define CHG 0x080 +#define REF 0x100 + +#define DFLT_PERM (PERM_EX | PERM_WR | PERM_RD | REF | CHG) + +/* + * Set up an MMU translation tree using memory starting at the 64k point. + * We use 3 levels, mapping 512GB, with 4kB PGD/PMD/PTE pages. + */ +unsigned long *part_tbl = (unsigned long *) 0x10000; +unsigned long *proc_tbl = (unsigned long *) 0x11000; +unsigned long *pgdir = (unsigned long *) 0x12000; +unsigned long free_ptr = 0x13000; + +void init_mmu(void) +{ + /* set up partition table */ + store_pte(&part_tbl[1], (unsigned long)proc_tbl); + /* set up process table */ + zero_memory(proc_tbl, 512 * sizeof(unsigned long)); + mtspr(PTCR, (unsigned long)part_tbl); + mtspr(PID, 1); + zero_memory(pgdir, 512 * sizeof(unsigned long)); + /* RTS = 8 (512GB address space), RPDS = 9 (512-entry top level) */ + store_pte(&proc_tbl[2 * 1], (unsigned long) pgdir | 0x2000000000000009); + do_tlbie(0xc00, 0); /* invalidate all TLB entries */ +} + +static unsigned long *read_pd(unsigned long *pdp, unsigned long i) +{ + unsigned long ret; + + __asm__ volatile("ldbrx %0,%1,%2" : "=r" (ret) : "b" (pdp), + "r" (i * sizeof(unsigned long))); + return (unsigned long *) (ret & 0x00ffffffffffff00); +} + +void map(unsigned long ea, unsigned long pa, unsigned long perm_attr) +{ + unsigned long epn = ea >> 12; + unsigned long h, i, j; + unsigned long *ptep; + unsigned long *pmdp; + + h = (epn >> 18) & 0x1ff; + i = (epn >> 9) & 0x1ff; + j = epn & 0x1ff; + if (pgdir[h] == 0) { + zero_memory((void *)free_ptr, 512 * sizeof(unsigned long)); + store_pte(&pgdir[h], 0x8000000000000000 | free_ptr | 9); + free_ptr += 512 * sizeof(unsigned long); + } + pmdp = read_pd(pgdir, h); + if (pmdp[i] == 0) { + zero_memory((void *)free_ptr, 512 * sizeof(unsigned long)); + store_pte(&pmdp[i], 0x8000000000000000 | free_ptr | 9); + free_ptr += 512 * sizeof(unsigned long); + } + ptep = read_pd(pmdp, i); + if (ptep[j]) { + ptep[j] = 0; + do_tlbie(ea & ~0xfff, 0); + } + store_pte(&ptep[j], 0xc000000000000000 | (pa & 0x00fffffffffff000) | + perm_attr); +} + +void unmap(void *ea) +{ + unsigned long epn = (unsigned long) ea >> 12; + unsigned long h, i, j; + unsigned long *ptep, *pmdp; + + h = (epn >> 18) & 0x1ff; + i = (epn >> 9) & 0x1ff; + j = epn & 0x1ff; + if (pgdir[h] == 0) + return; + pmdp = read_pd(pgdir, h); + if (pmdp[i] == 0) + return; + ptep = read_pd(pmdp, i); + ptep[j] = 0; + do_tlbie(((unsigned long)ea & ~0xfff), 0); +} + +extern unsigned long test_code(unsigned long sel, unsigned long addr); + +int mode_test_1(void) +{ + unsigned long ret, msr; + + map(0, (unsigned long) &test_code, DFLT_PERM); + msr = MSR_SF | MSR_IR | MSR_DR | MSR_LE; + ret = callit(2, 0, 0x0, msr); + + return ret; +} + +int fail = 0; + +void do_test(int num, int (*test)(void)) +{ + int ret; + + print_test_number(num); + ret = test(); + if (ret == 0) { + print_string("PASS\r\n"); + } else { + fail = 1; + print_string("FAIL "); + print_hex(ret, 16); + if (ret != 0 && (ret & ~0xfe0ul) == 0) { + print_string(" SRR0="); + print_hex(mfspr(SPRG0), 16); + print_string(" SRR1="); + print_hex(mfspr(SPRG1), 16); + } + print_string("\r\n"); + } +} + +int main(void) +{ + console_init(); + init_mmu(); + + // Prime the branch caches + __asm__ __volatile__("sc"); + + do_test(1, mode_test_1); + + return fail; +} diff --git a/tests/branch_alias/head.S b/tests/branch_alias/head.S new file mode 100644 index 0000000..f221d27 --- /dev/null +++ b/tests/branch_alias/head.S @@ -0,0 +1,210 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Load an immediate 64-bit value into a register */ +#define LOAD_IMM64(r, e) \ + lis r,(e)@highest; \ + ori r,r,(e)@higher; \ + rldicr r,r, 32, 31; \ + oris r,r, (e)@h; \ + ori r,r, (e)@l; + + .section ".head","ax" + + /* + * Microwatt currently enters in LE mode at 0x0, so we don't need to + * do any endian fix ups + */ + . = 0 +.global _start +_start: + LOAD_IMM64(%r10,__bss_start) + LOAD_IMM64(%r11,__bss_end) + subf %r11,%r10,%r11 + addi %r11,%r11,63 + srdi. %r11,%r11,6 + beq 2f + mtctr %r11 +1: dcbz 0,%r10 + addi %r10,%r10,64 + bdnz 1b + +2: LOAD_IMM64(%r1,__stack_top) + li %r0,0 + stdu %r0,-16(%r1) + mtsprg2 %r0 + LOAD_IMM64(%r12, main) + mtctr %r12 + bctrl + attn // terminate on exit + b . + +#define EXCEPTION(nr) \ + .= nr; \ + b . + + EXCEPTION(0x300) + EXCEPTION(0x380) + +. = 0x400 + b call_ret + + EXCEPTION(0x480) + EXCEPTION(0x500) + EXCEPTION(0x600) + EXCEPTION(0x700) + EXCEPTION(0x800) + EXCEPTION(0x900) + EXCEPTION(0x980) + EXCEPTION(0xa00) + EXCEPTION(0xb00) + + . = 0xc00 + nop + nop + nop + nop + b 1f + nop +1: rfid + + EXCEPTION(0xd00) + EXCEPTION(0xe00) + EXCEPTION(0xe20) + EXCEPTION(0xe40) + EXCEPTION(0xe60) + EXCEPTION(0xe80) + EXCEPTION(0xf00) + EXCEPTION(0xf20) + EXCEPTION(0xf40) + EXCEPTION(0xf60) + EXCEPTION(0xf80) + + . = 0x1000 + /* + * This page gets mapped at virtual address 0 + */ + .globl test_code +test_code: + b 1f + . = 0x1c00 +1: nop + nop + nop + nop + li %r3,1 + li %r3,0 + // Exit via 0x400 exception + ba -4 + + .globl test_code_end +test_code_end: + + . = 0x2000 + /* + * Call a function in a context with a given MSR value. + * r3, r4 = args; r5 = function; r6 = MSR + */ + .globl callit +callit: + mflr %r0 + std %r0,16(%r1) + stdu %r1,-256(%r1) + mfcr %r8 + stw %r8,100(%r1) + std %r13,104(%r1) + std %r14,112(%r1) + std %r15,120(%r1) + std %r16,128(%r1) + std %r17,136(%r1) + std %r18,144(%r1) + std %r19,152(%r1) + std %r20,160(%r1) + std %r21,168(%r1) + std %r22,176(%r1) + std %r23,184(%r1) + std %r24,192(%r1) + std %r25,200(%r1) + std %r26,208(%r1) + std %r27,216(%r1) + std %r28,224(%r1) + std %r29,232(%r1) + std %r30,240(%r1) + std %r31,248(%r1) + li %r0,restore@l + mtsprg0 %r0 + mtsprg1 %r1 + mtsprg2 %r2 + mfmsr %r9 + mtsprg3 %r9 + li %r10,call_ret@l + mtlr %r10 + mtsrr0 %r5 + mtsrr1 %r6 + mr %r12,%r5 + rfid +call_ret: + tdi 0,%r0,0x48 /* b .+8 if wrong endian */ + b 2f /* if endian OK */ + /* reverse-endian version of instructions from 2: on */ + .long 0xa642107c + .long 0xa642937c + .long 0xa602ba7c + .long 0xa602db7c + .long 0xa643b07c + .long 0xa643d37c + .long 0xa6031a7c + .long 0xa6039b7c + .long 0x2400004c +2: mfsprg0 %r0 + mfsprg3 %r4 + mfsrr0 %r5 + mfsrr1 %r6 + mtsprg0 %r5 + mtsprg3 %r6 + mtsrr0 %r0 + mtsrr1 %r4 + rfid +restore: + mfsprg1 %r1 + mfsprg2 %r2 + li %r7,0 + mtsprg2 %r7 + lwz %r8,100(%r1) + mtcr %r8 + ld %r13,104(%r1) + ld %r14,112(%r1) + ld %r15,120(%r1) + ld %r16,128(%r1) + ld %r17,136(%r1) + ld %r18,144(%r1) + ld %r19,152(%r1) + ld %r20,160(%r1) + ld %r21,168(%r1) + ld %r22,176(%r1) + ld %r23,184(%r1) + ld %r24,192(%r1) + ld %r25,200(%r1) + ld %r26,208(%r1) + ld %r27,216(%r1) + ld %r28,224(%r1) + ld %r29,232(%r1) + ld %r30,240(%r1) + ld %r31,248(%r1) + addi %r1,%r1,256 + ld %r0,16(%r1) + mtlr %r0 + blr diff --git a/tests/branch_alias/powerpc.lds b/tests/branch_alias/powerpc.lds new file mode 100644 index 0000000..99611ab --- /dev/null +++ b/tests/branch_alias/powerpc.lds @@ -0,0 +1,27 @@ +SECTIONS +{ + . = 0; + _start = .; + .head : { + KEEP(*(.head)) + } + . = ALIGN(0x1000); + .text : { *(.text) *(.text.*) *(.rodata) *(.rodata.*) } + . = ALIGN(0x1000); + .data : { *(.data) *(.data.*) *(.got) *(.toc) } + . = ALIGN(0x80); + __bss_start = .; + .bss : { + *(.dynsbss) + *(.sbss) + *(.scommon) + *(.dynbss) + *(.bss) + *(.common) + *(.bss.*) + } + . = ALIGN(0x80); + __bss_end = .; + . = . + 0x4000; + __stack_top = .; +} diff --git a/tests/test_branch_alias.bin b/tests/test_branch_alias.bin new file mode 100755 index 0000000000000000000000000000000000000000..f4476662f418aa5a9200a34243a37147244c29d5 GIT binary patch literal 16416 zcmeHOQEU{~6}_`-ykp04w-u3GOyjY=R9<&KjM`MY!G_sgL+ynG7idA1Dt57v0Otb) zmB#T*c5p2fkfoZYAE6L19~ni6Mtty*Mv7!;Wue_X{*g(CJpI-0nTS)^^%e&v0-buPjiO8&=7?ILSRG%F4)oVES(9!xN5rsb!(ySH# znoeZG<`8TS!RFB3ju(bfmiM?(UuM2c9%vGE*nizWE(1@Of#uG-Wu1R`^|G(KWyOzF zTkiRvZni%-n~VQJPQFYMH~z~T?d9fwx#wSA_TBoQL;x0=?C_Bc5N%oRz&xe--TZ$v z(U0Tj^X`ANcTe8?dHesQ_MaT#qbyW?>A(LziXFGlWx!>?Wx!>?Wx!>?Wx!>?W#Eg( zKoWo!nncMkZo~J4*!)EU>f&)3a2Z(2faQ^OKS{HasGt2Nk$tD4M@Psr)z{%}>$!{0v>m z&(cJGT%oWP_v==CE!sBSBE_)c-!_I7HDY-?49okTZh3F&+P5z1yyu9`61fR`kh(u+ z>93=`=dtvgXqV!a{t?%TG*TnSTWd*y{=mx*znwh{Yz*c!QLvg)3!Y;hIwP# z@cV;t`0)=N_>L-{eZ>IA3@~PZF$0WQV9Wwz78tX@m<7fxFlK=<3ye8n%mHH#7<0gw z1I8RM=72FrQhgX4PFY@AR|vmna1WOOmjRanmjRanmjRanmjRanmjRanmjRc7|1$<8 zQd?;>7Uq3`Suv3p;`Tb;54eu~0EyB=MBY#p4X7gd3ii8Z(s~PZhgr6!AbSz}pkFUsGyZn?S_q$HI`1q*uniGXXyB!56vz7!i9_E3 zR~snM6atQc>nAj=c0cwaI*sYA{H{dMepjNW{J7So9M@MSy7f``q;>^#t?{@nV~n=F z(~u~GJy_l!$>=wP8hhvaUdR%Wf0nS@ZrqO@!X$If=Nhvi<2Pd%Cj%U+Ls^K^H zOp)IVZ-gz@u@1Muwi2om@2R*fIX4Ywg-)-KPl)h*>C!^X!nd` zce2iC9^VbS%^`kIhWWWu_I`i!#BQS}s~UG=zv~x5iFmFKYT6$L_3D{GU(-fYPlCU9 zom_*mAVGQKa)o6{dtb-1UP)l_H(qAXe2!58v}1cP(+*z> zY(syFbNjwMD#_%@3N-+iHn)?{9nb=GiN5Bp3gBq6^SfzB(6|#1_Jx0PV29^w@Zo*$ z@3Z?H%-vdZXNS?VN;U4^6D!VFtT|faIeQP1b2M^SY)0OvYbZLa<)_p2cs~f&Q(T9z z&QxC4vslxdZ;T1~o?}dEUToHC(>mue<6u4N>kgY{!^t(=m%p9%gPW`8MSlwSQd0Eu zT4g;MdJ<$->>&Iw!%1WripA$VF@*~EaE1)Ah=%J2E$oTW@ zS=_e)+hyNx(=XtBt%TFA?OeBc%sVxmSOb;z8j!%tOi6DkAEQbeu%^)ZZ0^7-jpE(9 z@?f8g+M!Uwk^hj_L*kI#r#NyI{*V(_)GO7_-(fR$YBj%`2c2J_?y!DML0^bM?25dM zQk#m}tdQ)?v5Ab^sMkfCB|DwQNFg_PogjXv2xF@hIc1_I6~|xro?;wBjdNn9+Si_| ziN3O8o^5qxUieydpRAZy7YaH)=wjuCrL*Utlz_X3QcN<>kJU{eu(91zD2fZBha=f0QYu6^| z=eSO>E?;wBv2L-pgN;VMH__P5u9lw^v-(DNgHj=xUKm&Th+Gt3RFIXE|R;XyUEt=Vxv$c_UD&q z%D;s$apr^lc^hpAvJsqZf6jVh=OkzFuEJn0XxGrnW`$5CT(CdUH{?^kUzML;ai;7| zFFOYevFfC!-5&JUPj7zmmm8ROHPiu}(fH9ww&U#AGiZQVWmpz_iFalj3#|PUq(Jn(~b5@XX494*#nDyPm ZWx!>?W#E6#Knsc@A4=iuFPzCa{|EK|T-5*o literal 0 HcmV?d00001 diff --git a/tests/test_branch_alias.console_out b/tests/test_branch_alias.console_out new file mode 100644 index 0000000..ceba7e3 --- /dev/null +++ b/tests/test_branch_alias.console_out @@ -0,0 +1 @@ +test 01:PASS diff --git a/tests/test_branch_alias.metavalue b/tests/test_branch_alias.metavalue new file mode 100644 index 0000000..a29644e --- /dev/null +++ b/tests/test_branch_alias.metavalue @@ -0,0 +1 @@ +144 diff --git a/tests/update_console_tests b/tests/update_console_tests index d0613c8..ee82658 100755 --- a/tests/update_console_tests +++ b/tests/update_console_tests @@ -3,7 +3,7 @@ # Script to update console related tests from source # -for i in sc illegal decrementer xics privileged mmu misc modes pmu reservation trace fpu spr_read ; do +for i in sc illegal decrementer xics privileged mmu misc modes pmu reservation trace fpu spr_read branch_alias ; do cd $i make cd - From 769f5c3a5318a1af95086c8ca693abadb493aaf9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 10 Aug 2022 10:13:50 +1000 Subject: [PATCH 2/2] fetch1: Fix bug where BTC entries don't match on MSR[IR] This fixes a bug in the BTC where entries created for a given address when MSR[IR] = 0 are used when MSR[IR] = 1 and vice-versa. The fix is to include r.virt_mode (which mirrors MSR[IR]) in the tag portion of the BTC. Fixes: 0fb207be6069 ("fetch1: Implement a simple branch target cache", 2020-12-19) Reported-by: Anton Blanchard Signed-off-by: Paul Mackerras --- fetch1.vhdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fetch1.vhdl b/fetch1.vhdl index c6d26d7..13f2a7f 100644 --- a/fetch1.vhdl +++ b/fetch1.vhdl @@ -53,7 +53,7 @@ architecture behaviour of fetch1 is constant BTC_TAG_BITS : integer := 62 - BTC_ADDR_BITS; constant BTC_TARGET_BITS : integer := 62; constant BTC_SIZE : integer := 2 ** BTC_ADDR_BITS; - constant BTC_WIDTH : integer := BTC_TAG_BITS + BTC_TARGET_BITS + 1; + constant BTC_WIDTH : integer := BTC_TAG_BITS + BTC_TARGET_BITS + 2; type btc_mem_type is array (0 to BTC_SIZE - 1) of std_ulogic_vector(BTC_WIDTH - 1 downto 0); signal btc_rd_data : std_ulogic_vector(BTC_WIDTH - 1 downto 0) := (others => '0'); @@ -111,6 +111,7 @@ begin signal btc_wr_addr : std_ulogic_vector(BTC_ADDR_BITS - 1 downto 0); begin btc_wr_data <= w_in.br_taken & + r.virt_mode & w_in.br_nia(63 downto BTC_ADDR_BITS + 2) & w_in.redirect_nia(63 downto 2); btc_wr_addr <= w_in.br_nia(BTC_ADDR_BITS + 1 downto 2); @@ -193,7 +194,8 @@ begin v.nia(63 downto 32) := x"00000000"; end if; if btc_rd_valid = '1' and r_int.rd_is_niap4 = '1' and - btc_rd_data(BTC_WIDTH - 2 downto BTC_TARGET_BITS) + btc_rd_data(BTC_WIDTH - 2) = r.virt_mode and + btc_rd_data(BTC_WIDTH - 3 downto BTC_TARGET_BITS) = v.nia(BTC_TAG_BITS + BTC_ADDR_BITS + 1 downto BTC_ADDR_BITS + 2) then v_int.predicted_taken := btc_rd_data(BTC_WIDTH - 1); v_int.pred_not_taken := not btc_rd_data(BTC_WIDTH - 1);