From 6fe0b6e444164c4d673e3e2dd1b1c15a74063f23 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 19 Sep 2025 11:16:33 +1000 Subject: [PATCH 01/12] execute1: Fix no-op behaviour of reading undefined SPRs In privileged mode, mfspr from an undefined or unimplemented SPR number should be a no-op, which is implemented here by writing back the same value that the destination register previously had. However, we ended up writing back 0 because ex1.res2_sel was not set correctly. To fix this, set res2_sel to 10 in the undefined SPR case. Signed-off-by: Paul Mackerras --- execute1.vhdl | 1 + 1 file changed, 1 insertion(+) diff --git a/execute1.vhdl b/execute1.vhdl index 34fd03a..85b4a7a 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1456,6 +1456,7 @@ begin end if; slow_op := '1'; v.se.noop_spr_read := '1'; + v.res2_sel := "10"; if ex1.msr(MSR_PR) = '1' then illegal := '1'; end if; From 4073aa5ffd915be31a67c735484be8608d714be5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 29 Sep 2025 16:12:40 +1000 Subject: [PATCH 02/12] execute1: Fix setting HEIR and FSCR[IC] on interrupts Code in the execute1_actions process that handles illegal and facility unavailable interrupts was setting actions.se.set_heir or actions.se.set_ic, but then because actions.exception was also set, the contents of actions.se were ignored, meaning that HEIR or FSCR[IC] were not getting updated. To fix this, execute1_1 now conditions use of those fields on valid_in rather than go. Signed-off-by: Paul Mackerras --- execute1.vhdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/execute1.vhdl b/execute1.vhdl index 85b4a7a..fe608c4 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1747,6 +1747,8 @@ begin if valid_in = '1' then v.prev_op := e_in.insn_type; v.prev_prefixed := e_in.prefixed; + v.se.set_heir := actions.se.set_heir; + v.se.write_ic := actions.se.write_ic; end if; -- Determine if there is any interrupt to be taken From 9f9f9046ee890567ae3f364e364972b555c5c01f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Sep 2025 17:57:08 +1000 Subject: [PATCH 03/12] tests/spr_read: Add a check for no-op behaviour of mtspr and mfspr Signed-off-by: Paul Mackerras --- tests/spr_read/spr_read.c | 32 ++++++++++++++++++++++++++++++-- tests/test_spr_read.bin | Bin 6344 -> 6540 bytes tests/test_spr_read.console_out | 2 ++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tests/spr_read/spr_read.c b/tests/spr_read/spr_read.c index 6d2859e..a2a8a84 100644 --- a/tests/spr_read/spr_read.c +++ b/tests/spr_read/spr_read.c @@ -47,6 +47,7 @@ void print_test(char *str) int main(void) { unsigned long tmp; + int fail = 0; console_init(); @@ -86,7 +87,34 @@ int main(void) DO_ONE(SPR_PTCR); DO_ONE(SPR_PVR); - puts(PASS); + /* + * Test no-op behaviour of reserved no-op SPRs, + * and of accesses to undefined SPRs in privileged mode. + */ + print_test("reserved no-op"); + __asm__ __volatile__("mtspr 811,%0" : : "r" (7838)); + __asm__ __volatile__("li %0,%1; mfspr %0,811" : "=r" (tmp) : "i" (2398)); + if (tmp == 2398) { + puts(PASS); + } else { + puts(FAIL); + fail = 1; + } + + print_test("undefined SPR"); + __asm__ __volatile__("mtspr 179,%0" : : "r" (7738)); + __asm__ __volatile__("li %0,%1; mfspr %0,179" : "=r" (tmp) : "i" (2498)); + if (tmp == 2498) { + puts(PASS); + } else { + puts(FAIL); + fail = 1; + } + + if (!fail) + puts(PASS); + else + puts(FAIL); - return 0; + return fail; } diff --git a/tests/test_spr_read.bin b/tests/test_spr_read.bin index 68f6cd8f1d40ad5c59a98ccb0b2b2ceeeb6f7472..8c7a7d6a2d5a89f14fd7fcb083f10ee50c663c1a 100755 GIT binary patch literal 6540 zcmeI0O=whC6vxk-Op{5PXaW`jk@QWfkkN!Drk2T<&P*mIGa4Nxqjgc+iOG}(B&M;g zlR8tMsX~Q7Kj=pA6C|`!ikm`R6rzF)?LrqWx@et@B6>t@B6>t@B6>t@B6>t@B6>t@B6>t^!KPWI~UVd0f+WmR+?!&1f zUrr}uyGEf7IvWX2d3-t87qT^nbEdhsqx#@r5We%bO#l2Nul)W`XBX}rzH86hC>`vx z=S}1feruXrZ4ER-BU1BX-v!R?%!)BIy|{Xw&x_ZhcyvriB{ z1L9MkXSPRSA+T;?0byZ<^@0VMU4LEJT7@ORDkN@$uu_Gk!5;s^HMIzPR8*+c1m)-DeQ#846q}@+Jxm5_9a+GSi7(Rg`EY93F{Ent+0z= zjm+X!&Va(E!K#E=IV%))6YR-zK3|jMeC#Ql{SMf#!kUHMR@fZa&%!J{FDZ<&gTZsc zEIm&ttP<>)u-7E#5ryes!_4AVEg6M1f%OTqYKbW<1lG)~%!+GNSTEQs%*HHMrLf&# zPoMGGH%l!~w8GibV84N#uGL$oZmiG0Q;P9AkAeCjnh5c|B${r&ti)sy^BrQ^IA(oI z{tCye<1tX*YsY+L`xDhRL~UE%w()y)r{wCPUgY5SKtujfd-0FO4IR- z=(a1A_mSR8#qn{P4p=tm%v!91Mj>X3-8&x`C!JcauEaR)%UEakJY7xHNxE;~>#lg- z5N#=~Ar^PwL^=Eolk@l_|AihsPlls!Namg^aeS8Td$Gf^6`{`ZWj5z7T(Esg;Dcvj z-6z;=?bD_im%XcXrTzk6d^pat;dA(mh<#t{9`}@lj7q)LpTD&Wj6u(s18l(NOejsV=7q#uF zh~=M(>=gfKcbAO)$z+q0n*FhER@Yrykic%tdzAoZ?F2}CBH804bk@uH1|=VSdRmsR zcUlH3e<9)G+D^#pL@X-jNTj-!)S1|oq_NC_%-F}7A$??j3 zBLK*Nb)~((_DPbw*|1hqj#~&@HTvjZrn#GWv@}d-wC)v6RpC!#D=0iu99{!+2l{#V zoGikmN9bwK8O?K}Yz0MTelHHc{Nnt)C+GL#t)9fljcpC+IoLjD16n4?Z`|x7mR9FFSAv3jhEB literal 6344 zcmeI0-)oyy7{{MC4Q*Cy?Z81sWxm;V(T4u^O(@AP7Bb2nYLi-8I$T|IuE=Y8My80sG&yax{F zoaggA&w0-GJ?{&Nm#B}(j1mR%cNzi(jHl_z9Q^vh1RK}Qg20PE{uiYTm4xDsvTbW{hu$aJ(>#G^TU)6e^F=I zLlg+lD|Ot~z$hC;wRxRf6LviB@hSGGAa2eRC@J;(k;@lU7w>V)&dWa6;4>~hZ6#*c zH8u~{FRWeIC5}m>;Z@ zS=!2ZR%2~oJA_#|Piia-wz|&e3rWsH8tVhQC#+N0PK}L#EeW&qTsJn){x;YRVV0f` zHC6zt2-_n$f6&+r*jZ+2tCp`cb{XuXFsqi2H8u}+h*_f*cTQsqU^|(;XR%?8ErP8- z=d&M?S|T>%JJ(^)`JNN*{de%&>aN3<_my{7c%*VNLbNRyqUi+DzQ-?9d*o{`6>koj zC4<_}JymRfXIX5!^2P4@u^hjT?Av1du59|o+1E#7VfJ5v|6SZ)y9ODY_l@_its%HY z4dzA-`yNXTC-U+=sNLUZ`Ng~ICvrq-@m+btELpW~R^aUKd`IxDSySp!m-F5mbpI2g z?qCZ|#~mHtT~o&!X;)hlY%fCBUYx1$$L*;YM(un?8UH~RS|^IE{Wt#p5>aE&r0D?z zdOZ6t9Twv;lhUK;lcL$Fc!?Ix7}cjIaVFT?Eju)3E&Qwzc)W&rtL>AtNU_^5VKjY+ z^Wi^Ie!ia7SBPS4`>ZEjx+oe`-XJpq{lDaxCa|(HBOD_dA2Ul--fioRdNB8VpPuwO zcjJU*tD|_NsmA8qiJP`h1AK4}>rTODKaadpX?uS&<~Ox-Zu+2nt2xgMEBnsu4H8xM z^O|c5(UrtroXdl~i1q}1m>bYkuh-f)9(jQ0z}!P)dFUy1t!&&WdqaE3jlX~1#5_Bc zd2^&ULqmydw$Wa{p0Vsbxs32!!WvWA{w$C*j!W70T+;H-C5Od7)!QRuHj@e2?#{YX zU-ukE0=sd(J}tmmI{~~#KjtKK*0ubAmXG}p)~<&w1IPDi2{+buLS7H`r=;$oT+hGi z96FVu7CW!;VTQbggwfiBTL+sTJyca{gn4RPg0339&7tjykNTS_albxc!W>0E4WFm{ zzy497R}IfXGbP~w(^k3J#<~4?qc39Q#x`6Qnt`p1ky{&j9Q{w&@))UCYPjO*k$qB) pyvuL(i5Gx4{tSBCbEP Date: Mon, 22 Sep 2025 09:15:11 +1000 Subject: [PATCH 04/12] control: Fix forwarding when previous result write is suppressed If we have two successive instructions that write the same result register and then a third that uses the same register as an input, and the second instruction suppresses the write of its result, we can currently end up with the third instruction using the wrong value, because it uses the register value from before the first instruction rather than the result of the first instruction. (An example of an instruction suppressing the write of its result is a floating-point instruction that generates an enabled invalid operation exception but not an interrupt.) To fix this, the control module now uses any forwarded value for the register we want, not just the most recent value, but still stalls until it has the most recent value, or the previous instruction completes. Thus in the case described above, decode2 will have latched the value from the first instruction and so the third instruction gets the correct value. Signed-off-by: Paul Mackerras --- control.vhdl | 112 ++++++++++++++++++++++++++++++++++----------------- decode2.vhdl | 1 + 2 files changed, 77 insertions(+), 36 deletions(-) diff --git a/control.vhdl b/control.vhdl index b75fcc1..c34184a 100644 --- a/control.vhdl +++ b/control.vhdl @@ -36,6 +36,7 @@ entity control is execute_next_cr_tag : in instr_tag_t; execute2_next_tag : in instr_tag_t; execute2_next_cr_tag : in instr_tag_t; + writeback_tag : in instr_tag_t; cr_read_in : in std_ulogic; cr_write_in : in std_ulogic; @@ -163,70 +164,109 @@ begin variable byp_cr : std_ulogic_vector(1 downto 0); variable tag_ov : instr_tag_t; variable tag_prev : instr_tag_t; + variable rma : std_ulogic_vector(TAG_COUNT-1 downto 0); + variable rmb : std_ulogic_vector(TAG_COUNT-1 downto 0); + variable rmc : std_ulogic_vector(TAG_COUNT-1 downto 0); + variable tag_a_stall : std_ulogic; + variable tag_b_stall : std_ulogic; + variable tag_c_stall : std_ulogic; begin tag_a := instr_tag_init; + tag_a_stall := '0'; + rma := (others => '0'); for i in tag_number_t loop - if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in then - tag_a.valid := '1'; - tag_a.tag := i; - end if; - end loop; - tag_b := instr_tag_init; - for i in tag_number_t loop - if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then - tag_b.valid := '1'; - tag_b.tag := i; - end if; - end loop; - tag_c := instr_tag_init; - for i in tag_number_t loop - if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then - tag_c.valid := '1'; - tag_c.tag := i; + if tag_regs(i).valid = '1' and tag_regs(i).wr_gpr = '1' and + tag_regs(i).reg = gpr_a_read_in and gpr_a_read_valid_in = '1' then + rma(i) := '1'; + if tag_regs(i).recent = '1' then + tag_a_stall := '1'; + end if; end if; end loop; - byp_a := "0000"; - if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then + if EX1_BYPASS and execute_next_tag.valid = '1' and + rma(execute_next_tag.tag) = '1' then byp_a(1) := '1'; - end if; - if EX1_BYPASS and tag_match(execute2_next_tag, tag_a) then + tag_a := execute_next_tag; + elsif EX1_BYPASS and execute2_next_tag.valid = '1' and + rma(execute2_next_tag.tag) = '1' then byp_a(2) := '1'; - end if; - if tag_match(complete_in, tag_a) then + tag_a := execute2_next_tag; + elsif writeback_tag.valid = '1' and rma(writeback_tag.tag) = '1' then byp_a(3) := '1'; + tag_a := writeback_tag; end if; byp_a(0) := gpr_a_read_valid_in and (byp_a(1) or byp_a(2) or byp_a(3)); + if tag_a.valid = '1' and tag_regs(tag_a.tag).valid = '1' and + tag_regs(tag_a.tag).recent = '1' then + tag_a_stall := '0'; + end if; + + tag_b := instr_tag_init; + tag_b_stall := '0'; + rmb := (others => '0'); + for i in tag_number_t loop + if tag_regs(i).valid = '1' and tag_regs(i).wr_gpr = '1' and + tag_regs(i).reg = gpr_b_read_in and gpr_b_read_valid_in = '1' then + rmb(i) := '1'; + if tag_regs(i).recent = '1' then + tag_b_stall := '1'; + end if; + end if; + end loop; byp_b := "0000"; - if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then + if EX1_BYPASS and execute_next_tag.valid = '1' and + rmb(execute_next_tag.tag) = '1' then byp_b(1) := '1'; - end if; - if EX1_BYPASS and tag_match(execute2_next_tag, tag_b) then + tag_b := execute_next_tag; + elsif EX1_BYPASS and execute2_next_tag.valid = '1' and + rmb(execute2_next_tag.tag) = '1' then byp_b(2) := '1'; - end if; - if tag_match(complete_in, tag_b) then + tag_b := execute2_next_tag; + elsif writeback_tag.valid = '1' and rmb(writeback_tag.tag) = '1' then byp_b(3) := '1'; + tag_b := writeback_tag; end if; byp_b(0) := gpr_b_read_valid_in and (byp_b(1) or byp_b(2) or byp_b(3)); + if tag_b.valid = '1' and tag_regs(tag_b.tag).valid = '1' and + tag_regs(tag_b.tag).recent = '1' then + tag_b_stall := '0'; + end if; + + tag_c := instr_tag_init; + tag_c_stall := '0'; + rmc := (others => '0'); + for i in tag_number_t loop + if tag_regs(i).valid = '1' and tag_regs(i).wr_gpr = '1' and + tag_regs(i).reg = gpr_c_read_in and gpr_c_read_valid_in = '1' then + rmc(i) := '1'; + if tag_regs(i).recent = '1' then + tag_c_stall := '1'; + end if; + end if; + end loop; byp_c := "0000"; - if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then + if EX1_BYPASS and execute_next_tag.valid = '1' and rmc(execute_next_tag.tag) = '1' then byp_c(1) := '1'; - end if; - if EX1_BYPASS and tag_match(execute2_next_tag, tag_c) then + tag_c := execute_next_tag; + elsif EX1_BYPASS and execute2_next_tag.valid = '1' and rmc(execute2_next_tag.tag) = '1' then byp_c(2) := '1'; - end if; - if tag_match(complete_in, tag_c) then + tag_c := execute2_next_tag; + elsif writeback_tag.valid = '1' and rmc(writeback_tag.tag) = '1' then byp_c(3) := '1'; + tag_c := writeback_tag; end if; byp_c(0) := gpr_c_read_valid_in and (byp_c(1) or byp_c(2) or byp_c(3)); + if tag_c.valid = '1' and tag_regs(tag_c.tag).valid = '1' and + tag_regs(tag_c.tag).recent = '1' then + tag_c_stall := '0'; + end if; gpr_bypass_a <= byp_a; gpr_bypass_b <= byp_b; gpr_bypass_c <= byp_c; - gpr_tag_stall <= (tag_a.valid and gpr_a_read_valid_in and not byp_a(0)) or - (tag_b.valid and gpr_b_read_valid_in and not byp_b(0)) or - (tag_c.valid and gpr_c_read_valid_in and not byp_c(0)); + gpr_tag_stall <= tag_a_stall or tag_b_stall or tag_c_stall; incr_tag := curr_tag; instr_tag.tag <= curr_tag; diff --git a/decode2.vhdl b/decode2.vhdl index 0290840..eb701e3 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -283,6 +283,7 @@ begin execute_next_cr_tag => execute_cr_bypass.tag, execute2_next_tag => execute2_bypass.tag, execute2_next_cr_tag => execute2_cr_bypass.tag, + writeback_tag => writeback_bypass.tag, cr_read_in => cr_read_valid, cr_write_in => cr_write_valid, From f2166d326cb2432dd4c99aec60d9e02f9ce1a00e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 20 Sep 2025 15:19:33 +1000 Subject: [PATCH 05/12] tests/fpu: Add a test for result writing being suppressed When an arithmetic instruction generates an invalid operation exception or a divide by zero exception, and that exception is enabled in the FPSCR, the writing of the result to the destination register should be suppressed, leaving whatever value was last written in the destination. Add a check that this occurs correctly, for the cases of square root of a negative number (invalid operation exception) and division by zero (zero divide exception). Signed-off-by: Paul Mackerras --- tests/fpu/fpu.c | 44 +++++++++++++++++++++++++++++++++++++ tests/test_fpu.bin | Bin 32896 -> 32896 bytes tests/test_fpu.console_out | 1 + 3 files changed, 45 insertions(+) diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index c13110f..5e45038 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1795,6 +1795,49 @@ int fpu_test_26(void) return trapit(0, test26); } +/* Check for enabled invalid exception suppressing write of result */ +int test27(long arg) +{ + unsigned long operands[4]; + unsigned long result; + + operands[0] = 0xabcd1234ef895670; + operands[1] = 0xbff0000000000000; + operands[2] = 0xef895670abcd1234; + operands[3] = 0; + set_fpscr(FPS_VE); + asm("lfd 3,0(%0); isync; lfd 4,8(%0); lfd 3,16(%0); fsqrt 3,4; stfd 3,0(%1)" + : : "b" (&operands), "b" (&result) : "memory"); + if (result != 0xef895670abcd1234) { + if (result == 0x7ffc000000000000) + return 1; + if (result == 0xabcd1234ef895670) + return 2; + print_hex(result, 16, " "); + return 3; + } + + set_fpscr(FPS_ZE); + asm("lfd 3,0(%0); isync; lfd 4,8(%0); lfd 5,24(%0); lfd 3,16(%0); fdiv 3,4,5; stfd 3,0(%1)" + : : "b" (&operands), "b" (&result) : "memory"); + if (result != 0xef895670abcd1234) { + if (result == 0x7ffc000000000000) + return 4; + if (result == 0xabcd1234ef895670) + return 5; + print_hex(result, 16, " "); + return 6; + } + + return 0; +} + +int fpu_test_27(void) +{ + enable_fp(); + return trapit(0, test27); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -1846,6 +1889,7 @@ int main(void) do_test(24, fpu_test_24); do_test(25, fpu_test_25); do_test(26, fpu_test_26); + do_test(27, fpu_test_27); return fail; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 24878af7125b68dfa588e23782443a3a7f773f65..b2a293c223beb4f42d028d6301a081fde8194082 100755 GIT binary patch delta 4903 zcmai14NO#57QXMn44_B>L9y~PFhBtXr`Df03OK``149*Cv6`(d)M~X`x72Q%p$yKT zQ(HG^w5KL*)>`RSYqOfwsLk5cET%u9S#?ps+FjdX1sv*6>8iW9THk(Wc*q-*rjxv! z^L_W+bI(2J-22{mn`Cd3yro&*TX0}=VXS=ILMLN4$TkZt8hhu#-aO$rrlt-G(LHP%eK0l1&yrxZLH@vx-Px%aoxJ$NutKMNn_eD%XPsQV0!6r+@rBKZu8uA z`9jnvrr-FKrb~2kTpqP2hEipGc+7{o;kbu=%5+*D?=*y0`V>ru=7qM64C9=kwp`($ zf!G9kZd`cm##*((JJ|&}%)+PiQyH@&?Z+ZisBXq_kt5AzxikdJp=078zqo4ID4-XmF z>uB+)Pcih;j$q?+V2}2|^Hj1WhRXdn=;Xwgg;H& zvt1QM-1a?mB^KKgHtV2z{?>qc?AGp(@^Ns75JXiXE=+=}A| z>33cG)FWZdCSYEZcgbs7v^5j3=1yu%F_Swr(x|LYv%Z=}kC+NQt!s|X=E<&0yF9%( za4b1^*M;?-EftAcPy!xCT)$0RN^ za5(I@6kJd^Z6=zR_AE7gKiUdT{Ml4cx2U3aT5NXh#vesN zl*p-9N1Lqj&g}ZSMd7u)_5pN2mPozHwkBb}$+l4k*)TcSyL~B)s#Y!K3-^2BBX(L42^FjRQ!} zdRhyh*3VfMHnaPX%0aAjg#xQ7Sy}Hht}?p~z;8j<2f-Jh5xiGbon(nmK)K>l+t(~Z zhe?B{kg8l5oVj(!V?iNC2{na}!eKgQ9y_7se&OKbH4BGto2Dai%5}P8o+DLLZu)C- zWDa0ecNjSv$9sR&oi@~K)s^5I zr$xh@7`~OZGREz}$DFU*CSVDbk=qx3gDWLR{BA z_k?nsh_3}WuqHSz{8nH|%n6*3Lt*B|ENZbhrA>6j^2b`fdIEdoXgi-e)UCOncIhI%e4C zx@og=^>Hq_80@ngSUuHUj4XkgZ3BdYBiU8(D(ii^5_Z(xPwMJ|H zW-72YNGIrsb>z@qIUjszGcafPJ)TuhBqkZ{B!S*;JV*~dxzxjp( zP`c@A?ETa{K7z*N&+*(Y!G8>V0rDRl1Fo!+QheTO*a*C`41X0g90t4}_#F+00}ldk z(r^T@ZLUvg)bJ?a*MV1Scrmqk#7S=W6&q-~r$y z4Mzhj-sZkb#IH3{oFxUxvRKw$d z{lNP)JRaEMLunYF9H9Qfx6?iGI2~`pYj}$y6Nv#KC z=3JCQxhD<|+Cn8y{&7{s>p1Yk2cDRWC;Eh{Rt-^tIUAlzaC>ze{yvBva3&qM68ZqR zbd6)_X8ok9cCsvqu#0!4k|;}zeQnLCC{gqsH584J_nxFb6y+ox|A(p`VzXkH*|&$j z6S6~oSAwZM%C?YB6^)gv+NrN7S$4IPu{c?FpQ22N_fJt7#PL(K0^<59+5nMwns!5! zoTe`zo;yu_5O1C)V@b07$!W@j=s8Vg5Ro0U0%BSRZGd>ZgLXssI_L|Cy&cpC(b+-9 z(q#E|2W3K}cTyQdWhbpDP4@i0Q&su~;OdwtiJ%Lm@kRyQ&MvxBnr5tl+S)~#WjV$z zP!rG4qO!~<(qY>QxaEwh_VC2YKp~2wYVH=oXdf*^7KZ$=tNcn;_pn`gz;1_J9A$&$ z!a&Ks4VVgiMTzJnv9 z1FQ1({t8>yKUHL*>rPROZYv zwnm_7uF%U+2cSNFl@?Xx7)_(F@2hminUh!rb^nj5I>4zM>Pq=}%W}Harfht(+9^{{w?m73}4Qvtvo5Z%4#j&CTTT3U3hMCke@kP;_OTUeIac(3j9dv9OyXVB)j?3hSsd#}hUGeX^Q%glH0C?41q zGgc=(TPsUjD0Om?`GAf+zDSlzsByB#_^wBmU+OyT)sw@76+9#=;+L6WXX7ywudT@f$jc6^*sFDxR1S6vh;@_y;DbTTOvy{ zf^@BpKE6tp1_xCqW*y!3E9^s%{@~kj-%qTTrJ5kUNk{K}RF+~mJ)7Dr2GRK~SuKAQ zj#08D!&K*EQ4Mvo1{DuODNwb=L1Px5VQ)IN_r}BoYD>07HvtH#tWwqZIK}diqR7&) zJy?L=q!yny`#oP_?2hG(rR1cufXl^FCGR)ylaid2@Rq+cVtP#!i^{REO|Hk-bf=Bk zH*B`iZ0l@sg`HMeOU1D!+GCwB#@T7qYNh=y14XA~3I`}Zr6%tmWg-hmV@(%}csYB& zIpNR0r+6H!eu&Gln9Bebm|v>KTMtDv_?}3k_fk@5UfhEg6LUHDge_>{kvq1gIZt2} zylZ&3P^``Vu;#ow==|DlzuG(Byzloy7T|m{dWk#dOXALVrP3-}3dIyYNZV|&qB)iJ z*h>9lP4J*u4Woy+H4n{ut(o>`!Ay6j{oNmn6-Be> zU3`TJkiE<1w(x{MgXUw*lEe8-)E=LMo||KC!*$%yDa9OP$*SJ{I(%)(s-GlvUSjou z`*fzL>HHqO%hTDW)!dy#D$H6#M=~>o=`@)6(^0nE`uQkpq4RT5=Bs{wGRk;0N;8!7 zQO1!o>pS5T&C33(u=3!!>?sBzpT3@%y_7cx!{(hD4Q&kLSL2n*o97mv=RJ;U-9%8c-bVr!FAFN8i`91UZX9J-`8@#V@*oY$cW;x@iXD}VDL3$=85md6=<$Hk&k*AXAi90OwGaT$l&SSg)ULWr%V12yZ!1{Q* zfO)*Z60x@&p^k_BLJTG6{nD*JE8qE2%ZnD{Zeac%wQli#9#=a!ecb!$aNbj**+ka7 zM9Ryr3~_j1IMm@z;QJl6g3~+POWpZT@!;kxo1qV`dnnXb7qH&$L+Y5bL@1$=Im>eO zb|Y6q?JfZ8?e084YYVD`M(QqD67s^-`wY$Z89K-*EfIH4p!W)rXt(rmh;hvqp>LfB ztbglDGCG%teNn3MI%lZy7JOjU`qcJe1F+usRXXilB1BPY;cx5Jn&2N3)J4aDW~bF{ zE3m#!T7dO+(g@7!M8Cha;CQVBKNj{DFMVCuEF7lBqU;4f^ybnlp}o09!~D$Rb-RIM zU2pyxSl^p6T`GD=OpQ=$CcRh+$=vD7>LLQXdAP%4!1~4a)0W~F{I=lvJ~XCWy0#}Q z(${;5YuTLq+wD#A`xNEr;!NlS_NTQU#vEUw5#AiUwGi3}xIN-?(8G}oUIKqk<(hxw zT;NWCbDvc_`KUJjE#UfSgGuV4h7Zh-0la>RwwI&{^>nPH%zv|7mQLZK!SBpQ974tO zaJ$oRIB?y3S?bbo1n`@{9U6}0hZbQF$k*+J&J-)t@^`7gXQ4)3|w`P zVw`i~iTg-V_UbrXQrTW`HXXMb^1I;DHI9!yJSRV*tnz8%`j6?+@+{-F)9^%_%BRFX zI>E#q`S-;O=fm9(Sh$VASnMaXuiPq1pHL5m;xjagVdWVz&9jOxpP^g~ht5zHhVy4= zEe1m`ZN-q&OZzY^@1-6LzFr!|u&b9$^R42Cy_AdLW-nD?u=LSd4CQ^a6~nWAv=76! zKI*}6q>n~14D^wy!YW3erCbboXK8IkV))T8#$G;4TPqSv`ymdTRTMR7#BIZK6Wn!h zO^{i6l$-4k=9*k)_r_|z$Z$7LMuLGc=~McmA}!nn@$pZ|RGAgt1o6ho&2 z1O4Ua=w9XQ2^>mBz<+UA;s9LF;pd9d3z^M_H*=+5@349W9F}9l!rkGO2q7J6Z^Gs# zZC+rX`Z1)+^Wzo4Jq~b>bZo)Q@Pu&G(FH18SV)eA2ICWe1LRy-Hlq{LOa0@yvVBZu9z@FrHiw|+rUesv=-t~h@GRV`3;D!uc&8n)@0iRr0Xk1=|`!v zc_te+O8gDQRM~}2npHJva_oN;C7`V#ldl1fk7}#3r|-uMXZVk=IDqFkxE$^YKhE)t boIluARg31XnlDhl@HTC%jy!0p{+;80r>D;E diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out index 987a633..6913d7e 100644 --- a/tests/test_fpu.console_out +++ b/tests/test_fpu.console_out @@ -24,3 +24,4 @@ test 23:PASS test 24:PASS test 25:PASS test 26:PASS +test 27:PASS From 788f7a1755ab702c2789843642f7112a984c2af0 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 27 Sep 2025 08:52:18 +1000 Subject: [PATCH 06/12] core: Improve timing on bypass control paths In order to improve timing, the bypass paths now carry the register number being written as well as the tag. The decisions about which bypasses to use for which operands are then made by comparing the register numbers rather than by determining a tag from the register number and then comparing tags. Signed-off-by: Paul Mackerras --- common.vhdl | 4 +- control.vhdl | 107 ++++++++++++++++++++----------------------------- decode2.vhdl | 6 +-- execute1.vhdl | 6 ++- writeback.vhdl | 1 + 5 files changed, 55 insertions(+), 69 deletions(-) diff --git a/common.vhdl b/common.vhdl index 41969e2..ec38dfb 100644 --- a/common.vhdl +++ b/common.vhdl @@ -420,9 +420,11 @@ package common is type bypass_data_t is record tag : instr_tag_t; + reg : gspr_index_t; data : std_ulogic_vector(63 downto 0); end record; - constant bypass_data_init : bypass_data_t := (tag => instr_tag_init, data => (others => '0')); + constant bypass_data_init : bypass_data_t := + (tag => instr_tag_init, reg => (others => '0'), data => (others => '0')); type cr_bypass_data_t is record tag : instr_tag_t; diff --git a/control.vhdl b/control.vhdl index c34184a..a760377 100644 --- a/control.vhdl +++ b/control.vhdl @@ -32,11 +32,11 @@ entity control is gpr_c_read_valid_in : in std_ulogic; gpr_c_read_in : in gspr_index_t; - execute_next_tag : in instr_tag_t; - execute_next_cr_tag : in instr_tag_t; - execute2_next_tag : in instr_tag_t; + execute_next_bypass : in bypass_data_t; + execute2_next_bypass : in bypass_data_t; + writeback_bypass : in bypass_data_t; + execute_next_cr_tag : in instr_tag_t; execute2_next_cr_tag : in instr_tag_t; - writeback_tag : in instr_tag_t; cr_read_in : in std_ulogic; cr_write_in : in std_ulogic; @@ -164,109 +164,90 @@ begin variable byp_cr : std_ulogic_vector(1 downto 0); variable tag_ov : instr_tag_t; variable tag_prev : instr_tag_t; - variable rma : std_ulogic_vector(TAG_COUNT-1 downto 0); - variable rmb : std_ulogic_vector(TAG_COUNT-1 downto 0); - variable rmc : std_ulogic_vector(TAG_COUNT-1 downto 0); - variable tag_a_stall : std_ulogic; - variable tag_b_stall : std_ulogic; - variable tag_c_stall : std_ulogic; begin tag_a := instr_tag_init; - tag_a_stall := '0'; - rma := (others => '0'); for i in tag_number_t loop - if tag_regs(i).valid = '1' and tag_regs(i).wr_gpr = '1' and + if tag_regs(i).valid = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in and gpr_a_read_valid_in = '1' then - rma(i) := '1'; - if tag_regs(i).recent = '1' then - tag_a_stall := '1'; + tag_a.valid := '1'; + tag_a.tag := i; + if (EX1_BYPASS and tag_match(execute_next_bypass.tag, tag_a)) or + (EX1_BYPASS and tag_match(execute2_next_bypass.tag, tag_a)) or + tag_match(complete_in, tag_a) then + tag_a.valid := '0'; end if; end if; end loop; byp_a := "0000"; - if EX1_BYPASS and execute_next_tag.valid = '1' and - rma(execute_next_tag.tag) = '1' then + if EX1_BYPASS and execute_next_bypass.tag.valid = '1' and + execute_next_bypass.reg = gpr_a_read_in then byp_a(1) := '1'; - tag_a := execute_next_tag; - elsif EX1_BYPASS and execute2_next_tag.valid = '1' and - rma(execute2_next_tag.tag) = '1' then + elsif EX1_BYPASS and execute2_next_bypass.tag.valid = '1' and + execute2_next_bypass.reg = gpr_a_read_in then byp_a(2) := '1'; - tag_a := execute2_next_tag; - elsif writeback_tag.valid = '1' and rma(writeback_tag.tag) = '1' then + elsif writeback_bypass.tag.valid = '1' and + writeback_bypass.reg = gpr_a_read_in then byp_a(3) := '1'; - tag_a := writeback_tag; end if; byp_a(0) := gpr_a_read_valid_in and (byp_a(1) or byp_a(2) or byp_a(3)); - if tag_a.valid = '1' and tag_regs(tag_a.tag).valid = '1' and - tag_regs(tag_a.tag).recent = '1' then - tag_a_stall := '0'; - end if; tag_b := instr_tag_init; - tag_b_stall := '0'; - rmb := (others => '0'); for i in tag_number_t loop - if tag_regs(i).valid = '1' and tag_regs(i).wr_gpr = '1' and + if tag_regs(i).valid = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in and gpr_b_read_valid_in = '1' then - rmb(i) := '1'; - if tag_regs(i).recent = '1' then - tag_b_stall := '1'; + tag_b.valid := '1'; + tag_b.tag := i; + if (EX1_BYPASS and tag_match(execute_next_bypass.tag, tag_b)) or + (EX1_BYPASS and tag_match(execute2_next_bypass.tag, tag_b)) or + tag_match(complete_in, tag_b) then + tag_b.valid := '0'; end if; end if; end loop; byp_b := "0000"; - if EX1_BYPASS and execute_next_tag.valid = '1' and - rmb(execute_next_tag.tag) = '1' then + if EX1_BYPASS and execute_next_bypass.tag.valid = '1' and + execute_next_bypass.reg = gpr_b_read_in then byp_b(1) := '1'; - tag_b := execute_next_tag; - elsif EX1_BYPASS and execute2_next_tag.valid = '1' and - rmb(execute2_next_tag.tag) = '1' then + elsif EX1_BYPASS and execute2_next_bypass.tag.valid = '1' and + execute2_next_bypass.reg = gpr_b_read_in then byp_b(2) := '1'; - tag_b := execute2_next_tag; - elsif writeback_tag.valid = '1' and rmb(writeback_tag.tag) = '1' then + elsif writeback_bypass.tag.valid = '1' and + writeback_bypass.reg = gpr_b_read_in then byp_b(3) := '1'; - tag_b := writeback_tag; end if; byp_b(0) := gpr_b_read_valid_in and (byp_b(1) or byp_b(2) or byp_b(3)); - if tag_b.valid = '1' and tag_regs(tag_b.tag).valid = '1' and - tag_regs(tag_b.tag).recent = '1' then - tag_b_stall := '0'; - end if; tag_c := instr_tag_init; - tag_c_stall := '0'; - rmc := (others => '0'); for i in tag_number_t loop - if tag_regs(i).valid = '1' and tag_regs(i).wr_gpr = '1' and + if tag_regs(i).valid = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in and gpr_c_read_valid_in = '1' then - rmc(i) := '1'; - if tag_regs(i).recent = '1' then - tag_c_stall := '1'; + tag_c.valid := '1'; + tag_c.tag := i; + if (EX1_BYPASS and tag_match(execute_next_bypass.tag, tag_c)) or + (EX1_BYPASS and tag_match(execute2_next_bypass.tag, tag_c)) or + tag_match(complete_in, tag_c) then + tag_c.valid := '0'; end if; end if; end loop; byp_c := "0000"; - if EX1_BYPASS and execute_next_tag.valid = '1' and rmc(execute_next_tag.tag) = '1' then + if EX1_BYPASS and execute_next_bypass.tag.valid = '1' and + execute_next_bypass.reg = gpr_c_read_in then byp_c(1) := '1'; - tag_c := execute_next_tag; - elsif EX1_BYPASS and execute2_next_tag.valid = '1' and rmc(execute2_next_tag.tag) = '1' then + elsif EX1_BYPASS and execute2_next_bypass.tag.valid = '1' and + execute2_next_bypass.reg = gpr_c_read_in then byp_c(2) := '1'; - tag_c := execute2_next_tag; - elsif writeback_tag.valid = '1' and rmc(writeback_tag.tag) = '1' then + elsif writeback_bypass.tag.valid = '1' and + writeback_bypass.reg = gpr_c_read_in then byp_c(3) := '1'; - tag_c := writeback_tag; end if; byp_c(0) := gpr_c_read_valid_in and (byp_c(1) or byp_c(2) or byp_c(3)); - if tag_c.valid = '1' and tag_regs(tag_c.tag).valid = '1' and - tag_regs(tag_c.tag).recent = '1' then - tag_c_stall := '0'; - end if; gpr_bypass_a <= byp_a; gpr_bypass_b <= byp_b; gpr_bypass_c <= byp_c; - gpr_tag_stall <= tag_a_stall or tag_b_stall or tag_c_stall; + gpr_tag_stall <= tag_a.valid or tag_b.valid or tag_c.valid; incr_tag := curr_tag; instr_tag.tag <= curr_tag; diff --git a/decode2.vhdl b/decode2.vhdl index eb701e3..b292080 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -279,11 +279,11 @@ begin gpr_c_read_valid_in => gpr_c_read_valid, gpr_c_read_in => gpr_c_read, - execute_next_tag => execute_bypass.tag, + execute_next_bypass => execute_bypass, execute_next_cr_tag => execute_cr_bypass.tag, - execute2_next_tag => execute2_bypass.tag, + execute2_next_bypass => execute2_bypass, execute2_next_cr_tag => execute2_cr_bypass.tag, - writeback_tag => writeback_bypass.tag, + writeback_bypass => writeback_bypass, cr_read_in => cr_read_valid, cr_write_in => cr_write_valid, diff --git a/execute1.vhdl b/execute1.vhdl index fe608c4..ecc9cac 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1938,8 +1938,9 @@ begin v.fp_exception_next := '0'; end if; - bypass_data.tag.valid <= e_in.write_reg_enable and bypass_valid; - bypass_data.tag.tag <= e_in.instr_tag.tag; + bypass_data.tag.valid <= v.e.write_enable and bypass_valid; + bypass_data.tag.tag <= v.e.instr_tag.tag; + bypass_data.reg <= v.e.write_reg; bypass_data.data <= alu_result; bypass_cr_data.tag.valid <= e_in.output_cr and bypass_valid; @@ -2250,6 +2251,7 @@ begin bypass2_data.tag.valid <= ex1.e.write_enable and bypass_valid; bypass2_data.tag.tag <= ex1.e.instr_tag.tag; + bypass2_data.reg <= ex1.e.write_reg; bypass2_data.data <= ex_result; bypass2_cr_data.tag.valid <= (ex1.e.write_cr_enable or (ex1.e.rc and ex1.e.write_enable)) diff --git a/writeback.vhdl b/writeback.vhdl index 49a53cc..944bae5 100644 --- a/writeback.vhdl +++ b/writeback.vhdl @@ -203,6 +203,7 @@ begin -- Register write data bypass to decode2 wb_bypass.tag.tag <= complete_out.tag; wb_bypass.tag.valid <= complete_out.valid and w_out.write_enable; + wb_bypass.reg <= w_out.write_reg; wb_bypass.data <= w_out.write_data; end process; From 1d758f1d74418ac44b96d7b35fe04693506dd17e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Sep 2025 09:47:30 +1000 Subject: [PATCH 07/12] execute1: Simplify no-op behaviour of mfspr When mfspr is performed to one of the reserved no-op SPRs, or to an undefined SPR in privileged state, the behaviour is a no-op, that is, the destination register is not written. Previously this was done by writing back the same value that the register had before the instruction, but in fact it can be done simply by negating the write enable signal so that the result GPR is not written. This gives a small reduction in logic complexity. Signed-off-by: Paul Mackerras --- execute1.vhdl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/execute1.vhdl b/execute1.vhdl index ecc9cac..5d0f602 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -106,7 +106,6 @@ architecture behaviour of execute1 is scv_trap : std_ulogic; write_tbl : std_ulogic; write_tbu : std_ulogic; - noop_spr_read : std_ulogic; send_hmsg : std_ulogic_vector(NCPUS-1 downto 0); clr_hmsg : std_ulogic; end record; @@ -1434,7 +1433,9 @@ begin report "MFSPR to slow SPR " & integer'image(decode_spr_num(e_in.insn)); end if; slow_op := '1'; - v.se.noop_spr_read := e_in.spr_select.noop; + if e_in.spr_select.noop = '1' then + v.e.write_enable := '0'; + end if; if e_in.spr_select.ispmu = '0' then case e_in.spr_select.sel is when SPRSEL_LOGR => @@ -1455,8 +1456,7 @@ begin " invalid"; end if; slow_op := '1'; - v.se.noop_spr_read := '1'; - v.res2_sel := "10"; + v.e.write_enable := '0'; if ex1.msr(MSR_PR) = '1' then illegal := '1'; end if; @@ -2114,9 +2114,7 @@ begin else rcresult := countbits_result; end if; - if ex1.se.noop_spr_read = '1' then - sprres := ex1.spr_write_data; - elsif ex1.res2_sel(0) = '0' then + if ex1.res2_sel(0) = '0' then sprres := spr_result; else sprres := pmu_to_x.spr_val; From 9c40ddffd2ccf541ed028de890a2a62c23d1e171 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 25 Sep 2025 09:01:19 +1000 Subject: [PATCH 08/12] execute1: Implement LPCR[EVIRT] bit This implements the EVIRT bit in the LPCR register. When set to 1, EVIRT causes mfspr and mtspr to an undefined SPR number in privileged mode (i.e. hypervisor mode) to cause a hypervisor emulation assistance interrupt. When set to 0, such instructions are executed as no-ops. Signed-off-by: Paul Mackerras --- common.vhdl | 4 +++- execute1.vhdl | 12 ++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/common.vhdl b/common.vhdl index ec38dfb..91f9ebd 100644 --- a/common.vhdl +++ b/common.vhdl @@ -249,6 +249,7 @@ package common is -- LPCR bit numbers constant LPCR_HAIL : integer := 63 - 37; constant LPCR_UPRT : integer := 63 - 41; + constant LPCR_EVIRT : integer := 63 - 42; constant LPCR_HR : integer := 63 - 43; constant LPCR_LD : integer := 63 - 46; constant LPCR_HEIC : integer := 63 - 59; @@ -322,6 +323,7 @@ package common is hdexcr_hyp: aspect_bits_t; hdexcr_enf: aspect_bits_t; lpcr_hail: std_ulogic; + lpcr_evirt: std_ulogic; lpcr_ld: std_ulogic; lpcr_heic: std_ulogic; lpcr_lpes: std_ulogic; @@ -333,7 +335,7 @@ package common is dscr => (others => '0'), dexcr_pnh => aspect_bits_init, dexcr_pro => aspect_bits_init, hdexcr_hyp => aspect_bits_init, hdexcr_enf => aspect_bits_init, - lpcr_hail => '0', lpcr_ld => '1', lpcr_heic => '0', + lpcr_hail => '0', lpcr_evirt => '0', lpcr_ld => '1', lpcr_heic => '0', lpcr_lpes => '0', lpcr_hvice => '0', others => (others => '0')); diff --git a/execute1.vhdl b/execute1.vhdl index 5d0f602..7046d16 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -425,6 +425,7 @@ architecture behaviour of execute1 is begin ret := (others => '0'); ret(LPCR_HAIL) := c.lpcr_hail; + ret(LPCR_EVIRT) := c.lpcr_evirt; ret(LPCR_UPRT) := '1'; ret(LPCR_HR) := '1'; ret(LPCR_LD) := c.lpcr_ld; @@ -1450,14 +1451,15 @@ begin end if; else -- mfspr from unimplemented SPRs should be a nop in - -- supervisor mode and a program interrupt for user mode + -- supervisor mode and a program or HEAI interrupt for user mode + -- LPCR[EVIRT] = 1 makes it HEAI in privileged mode if e_in.valid = '1' and not is_X(e_in.insn) then report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & " invalid"; end if; slow_op := '1'; v.e.write_enable := '0'; - if ex1.msr(MSR_PR) = '1' then + if ex1.msr(MSR_PR) = '1' or ctrl.lpcr_evirt = '1' then illegal := '1'; end if; end if; @@ -1544,8 +1546,9 @@ begin end if; if e_in.spr_select.valid = '0' and e_in.spr_is_ram = '0' then -- mtspr to unimplemented SPRs should be a nop in - -- supervisor mode and a program interrupt for user mode - if ex1.msr(MSR_PR) = '1' then + -- supervisor mode and a program interrupt or HEAI for user mode + -- LPCR[EVIRT] = 1 makes it HEAI in privileged mode + if ex1.msr(MSR_PR) = '1' or ctrl.lpcr_evirt = '1' then illegal := '1'; end if; end if; @@ -2185,6 +2188,7 @@ begin end if; if ex1.se.write_lpcr = '1' then ctrl_tmp.lpcr_hail <= ex1.spr_write_data(LPCR_HAIL); + ctrl_tmp.lpcr_evirt <= ex1.spr_write_data(LPCR_EVIRT); ctrl_tmp.lpcr_ld <= ex1.spr_write_data(LPCR_LD); ctrl_tmp.lpcr_heic <= ex1.spr_write_data(LPCR_HEIC); ctrl_tmp.lpcr_lpes <= ex1.spr_write_data(LPCR_LPES); From 5548a5ba267f4a56fe8b0067c59b20a2e66bbfbd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 25 Sep 2025 22:37:47 +1000 Subject: [PATCH 09/12] execute1: Make mfspr/mtspr to SPRs 0,4,5,6 generate HEAI The ISA specifies that mfspr or mtspr to SPR 0, 4, 5 or 6 should generate a hypervisor emulation assistance interrupt in privileged mode, so this adds logic to do that. Signed-off-by: Paul Mackerras --- execute1.vhdl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/execute1.vhdl b/execute1.vhdl index 7046d16..9f7acaa 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1216,6 +1216,7 @@ begin variable owait : std_ulogic; variable srr1 : std_ulogic_vector(63 downto 0); variable c32, c64 : std_ulogic; + variable sprnum : spr_num_t; begin v := actions_type_init; v.e.write_data := alu_result; @@ -1424,14 +1425,15 @@ begin when OP_DARN => when OP_MFMSR => when OP_MFSPR => + sprnum := decode_spr_num(e_in.insn); if e_in.spr_is_ram = '1' then if e_in.valid = '1' and not is_X(e_in.insn) then - report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & + report "MFSPR to SPR " & integer'image(sprnum) & "=" & to_hstring(alu_result); end if; elsif e_in.spr_select.valid = '1' and e_in.spr_select.wonly = '0' then if e_in.valid = '1' and not is_X(e_in.insn) then - report "MFSPR to slow SPR " & integer'image(decode_spr_num(e_in.insn)); + report "MFSPR to slow SPR " & integer'image(sprnum); end if; slow_op := '1'; if e_in.spr_select.noop = '1' then @@ -1454,12 +1456,12 @@ begin -- supervisor mode and a program or HEAI interrupt for user mode -- LPCR[EVIRT] = 1 makes it HEAI in privileged mode if e_in.valid = '1' and not is_X(e_in.insn) then - report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & - " invalid"; + report "MFSPR to SPR " & integer'image(sprnum) & " invalid"; end if; slow_op := '1'; v.e.write_enable := '0'; - if ex1.msr(MSR_PR) = '1' or ctrl.lpcr_evirt = '1' then + if ex1.msr(MSR_PR) = '1' or ctrl.lpcr_evirt = '1' or + sprnum = 0 or sprnum = 4 or sprnum = 5 or sprnum = 6 then illegal := '1'; end if; end if; @@ -1505,8 +1507,9 @@ begin end if; end if; when OP_MTSPR => + sprnum := decode_spr_num(e_in.insn); if e_in.valid = '1' and not is_X(e_in.insn) then - report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) & + report "MTSPR to SPR " & integer'image(sprnum) & "=" & to_hstring(c_in); end if; v.se.write_pmuspr := e_in.spr_select.ispmu; @@ -1548,7 +1551,8 @@ begin -- mtspr to unimplemented SPRs should be a nop in -- supervisor mode and a program interrupt or HEAI for user mode -- LPCR[EVIRT] = 1 makes it HEAI in privileged mode - if ex1.msr(MSR_PR) = '1' or ctrl.lpcr_evirt = '1' then + if ex1.msr(MSR_PR) = '1' or ctrl.lpcr_evirt = '1' or + sprnum = 0 or sprnum = 4 or sprnum = 5 or sprnum = 6 then illegal := '1'; end if; end if; From 0255283159ed8b696589f63e64273130eb9bc13f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 24 Sep 2025 22:10:48 +1000 Subject: [PATCH 10/12] tests/spr_read: Test that mfspr/mtspr to SPRs 0,4,5,6 generate HEAI Signed-off-by: Paul Mackerras --- tests/spr_read/head.S | 88 ++++++++++++++++++++++++++++++++ tests/spr_read/spr_read.c | 38 ++++++++++++-- tests/test_spr_read.bin | Bin 6540 -> 7072 bytes tests/test_spr_read.console_out | 1 + 4 files changed, 123 insertions(+), 4 deletions(-) diff --git a/tests/spr_read/head.S b/tests/spr_read/head.S index 92d69bb..1c84b4f 100644 --- a/tests/spr_read/head.S +++ b/tests/spr_read/head.S @@ -44,3 +44,91 @@ boot_entry: bctrl attn // terminate on exit b . + + .globl read_sprn +read_sprn: + nop + nop + mr %r0,%r3 + mr %r3,%r4 + cmpdi %r0,0 + beq 0f + cmpdi %r0,1 + beq 1f + cmpdi %r0,4 + beq 4f + cmpdi %r0,5 + beq 5f + cmpdi %r0,6 + beq 6f + mfspr %r3,179 + blr +0: mfspr %r3,0 + blr +1: mfspr %r3,1 + blr +4: mfspr %r3,4 + blr +5: mfspr %r3,5 + blr +6: mfspr %r3,6 + blr + + .globl write_sprn +write_sprn: + nop + nop + mr %r0,%r3 + li %r3,0 + cmpdi %r0,0 + beq 0f + cmpdi %r0,1 + beq 1f + cmpdi %r0,4 + beq 4f + cmpdi %r0,5 + beq 5f + cmpdi %r0,6 + beq 6f + mtspr 179,%r3 + blr +0: mtspr 0,%r3 + blr +1: mtspr 1,%r3 + blr +4: mtspr 4,%r3 + blr +5: mtspr 5,%r3 + blr +6: mtspr 6,%r3 + blr + +#define EXCEPTION(nr) \ + .= nr ;\ + li %r3,nr ;\ + blr + + EXCEPTION(0x300) + EXCEPTION(0x380) + EXCEPTION(0x400) + EXCEPTION(0x480) + EXCEPTION(0x500) + EXCEPTION(0x600) + EXCEPTION(0x700) + EXCEPTION(0x800) + EXCEPTION(0x900) + EXCEPTION(0x980) + EXCEPTION(0xa00) + EXCEPTION(0xb00) + EXCEPTION(0xc00) + EXCEPTION(0xd00) + EXCEPTION(0xe00) + EXCEPTION(0xe20) + EXCEPTION(0xe40) + EXCEPTION(0xe60) + EXCEPTION(0xe80) + EXCEPTION(0xf00) + EXCEPTION(0xf20) + EXCEPTION(0xf40) + EXCEPTION(0xf60) + EXCEPTION(0xf80) diff --git a/tests/spr_read/spr_read.c b/tests/spr_read/spr_read.c index a2a8a84..2bf183a 100644 --- a/tests/spr_read/spr_read.c +++ b/tests/spr_read/spr_read.c @@ -8,6 +8,9 @@ #define PASS "PASS\n" #define FAIL "FAIL\n" +extern long read_sprn(long, long); +extern long write_sprn(long); + // i < 100 void print_test(char *str) { @@ -44,9 +47,23 @@ void print_test(char *str) #define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x) +void print_hex(unsigned long val, int ndigits, const char *str) +{ + int i, x; + + for (i = (ndigits - 1) * 4; i >= 0; i -= 4) { + x = (val >> i) & 0xf; + if (x >= 10) + putchar(x + 'a' - 10); + else + putchar(x + '0'); + } + puts(str); +} + int main(void) { - unsigned long tmp; + unsigned long tmp, r; int fail = 0; console_init(); @@ -102,9 +119,22 @@ int main(void) } print_test("undefined SPR"); - __asm__ __volatile__("mtspr 179,%0" : : "r" (7738)); - __asm__ __volatile__("li %0,%1; mfspr %0,179" : "=r" (tmp) : "i" (2498)); - if (tmp == 2498) { + r = write_sprn(179); + tmp = read_sprn(179, 2498); + if (r == 0 && tmp == 2498) { + puts(PASS); + } else { + puts(FAIL); + fail = 1; + } + + print_test("read SPR 0/4/5/6"); + if (read_sprn(0, 1234) == 0xe40 && read_sprn(2, 1234) == 1234 && + read_sprn(4, 1234) == 0xe40 && read_sprn(5, 1234) == 0xe40 && + read_sprn(6, 1234) == 0xe40 && + write_sprn(0) == 0xe40 && write_sprn(2) == 0 && + write_sprn(4) == 0xe40 && write_sprn(5) == 0xe40 && + write_sprn(6) == 0xe40) { puts(PASS); } else { puts(FAIL); diff --git a/tests/test_spr_read.bin b/tests/test_spr_read.bin index 8c7a7d6a2d5a89f14fd7fcb083f10ee50c663c1a..96a95bcfc46d9ed32aea7d9eae7735f15df470f8 100755 GIT binary patch literal 7072 zcmeI0YiJZ#6vxl(uAAK$V+0E+5uKO{iLSWu#l}RP-6ZNpYtzm8C}=w|S*g%!>!WOR zSH~9^5L!wL6&fGZhE__cl!AT`qPEi77OGINP^+yJ5@W%FmS}9J|8wVV_8M%rUt~X| z1LK+d|DAgtcka%Z@enN`((8!I%?kzPN!WIpRBjSk^N9|&UMM)&0h=ftcd)hF-I*@; zSj)*pd|)1#d?w4xRI*}k3ULAf+7s5K^+-EPTCcR-Q@5mavX*zdOle!CZI?D7?RsI= zJV+P%&v~ZFJZ_oCE%Uf#9=FWn9?FAM{~sf21>9zc_hPt~&>nemtzYZ@7KbTks`cKK2qBU!d9W!vlOt|!|<9Pg4{KG5lPz7-?~Z|q9T>v{M) zzHgMH2dO;tfo(69y=s<5q{g-FeWB;T&mvZ`Y-cJt?M!N3$&onGq(CWIMza6CT~lOS z1F;PF&VBIJt_1~EEBxdSOrYJyDypi{siI?>%=y5!YkS}qgl>8(I(pUo?uYYV{maQ6 zl{vV-Ui3ExbGlvSY1{r~;q9bd%l&o9`0PU+ZJ|EQ?RHhBneA4X5A0jPssu|YtOV>N zvx_qXiz=)X>}@e`reM<*RteT2*et=uDJ%%KmRTeq*jJ_d#zym74{<7CBf;Tvy!JL@Y z3VR1E%`6fWF`rP_2Vkv&RSR~{o$dV;SfgOhdR|r7mta+bIqP{=VOPM$2v#9tzOS%e zu-_kW4^Aze3L60XUNEPYmlfvQ-WEE|tk9YFtinpbx|nTt*c^rFV7r+`7K&P)R9Gcg z0_^Zay{7x?Y3b+vuou}@RlcI_F?e4R_0<0A%C3olIn|i+DSo5-Z;9WfdCqTzl4a=> zYksK_%?~sn#yNjQpiai>QsOitPT!!t^s>joH#;+Qso#lZ6jubEfEesiRD^dFvEc$S(zYjP)BvLZEv=iqF7n@AEzZw(_AinCH?#g&TgTr=Z6Tg>6LY@F)&iV;hO+PQT7r+LMSrN4$$g@h zXPj@h!|U)lzQzLY2D}GEkHY`vDn0Gg%@sI@`Yz#j6u<9m`$lc%{iM;g*ZJO+;NJ1O z2;oBG@7YTWp%p4UP4}w!~t=OlbDUxLO_8FG2u$Fkhlf$c!@+#Qf&i z;!MEIcsQS_#DmY`S;NOG9Kng74LCHG0eRdUZ4iAl$Lk-evw3BVHYc_uHoul=*EepO zzUdXBMd9c&UdL@4+Y{?IYy?0Aj5jA*#ibscF?+_G8FM*%3IBsb+a4oNGOU#qVP|4o zinHggZLfx<27j22XpP0eap9fOVhUgLh69l6aNdEB1uJ~QUGBrKBO1KI#YVJ{J%76D z4UYz9|CiY958FQF!9_h{KIHE%)-WR?XFRGd*0`=y_ig(iEFY|K>>!>R2Ux&SiPOM& z9GVGBjiUa;u6ofygOkRoi|2D)SK!O@_+ZV#c?;Tj4Hr0Adkg0+{O5EH3poA;&K?zA delta 1224 zcmajePe>GT6bJC%%x;E;vI|OKCA*}un=7qZHmTr_DebNp9X({MsZh{Cy0}r}tbtJA z!PqW=|AJWP5C}_q0~AH>p`+s`}KWqM*Q9S!P^<$=e;+--!MCWWwLoH zRC(VwIgs-_vx45t4WSEK@w{IvX!(G)^`DlC~REa%3HY^92&U^{`P;4HSU~6aV zR%`**NMFRND3yeo5I!1zn}4nA5r|qJDEM zQ6bEY60;WRcEoGcOnY#1opn~9)AZG{3emaoy~I2rvc>ANVQ*2&NtDp?AtH#!$UUV5 oRtOP86td>&hD){kQc{K%Rc80Iw|C3;+NC diff --git a/tests/test_spr_read.console_out b/tests/test_spr_read.console_out index a677b29..9722dfb 100644 --- a/tests/test_spr_read.console_out +++ b/tests/test_spr_read.console_out @@ -24,4 +24,5 @@ Test SPR_PTCR:PASS Test SPR_PVR:PASS Test reserved no-op:PASS Test undefined SPR:PASS +Test read SPR 0/4/5/6:PASS PASS From 9326fc7f1828fae460486d4ace67879d7a5fb265 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 25 Sep 2025 11:43:24 +1000 Subject: [PATCH 11/12] tests/modes: Test that mfspr/mtspr to unimplemented SPR in user mode causes HEAI Signed-off-by: Paul Mackerras --- tests/modes/modes.c | 38 +++++++++++++++++++++++++++++++++++ tests/test_modes.bin | Bin 20520 -> 20520 bytes tests/test_modes.console_out | 1 + 3 files changed, 39 insertions(+) diff --git a/tests/modes/modes.c b/tests/modes/modes.c index f37e70b..b083671 100644 --- a/tests/modes/modes.c +++ b/tests/modes/modes.c @@ -7,6 +7,8 @@ #define MSR_LE 0x1 #define MSR_DR 0x10 #define MSR_IR 0x20 +#define MSR_PR 0x4000 +#define MSR_EE 0x8000 #define MSR_SF 0x8000000000000000ul extern unsigned long callit(unsigned long arg1, unsigned long arg2, @@ -27,6 +29,7 @@ static inline void do_tlbie(unsigned long rb, unsigned long rs) #define DSISR 18 #define DAR 19 +#define DEC 22 #define SRR0 26 #define SRR1 27 #define PID 48 @@ -35,6 +38,7 @@ static inline void do_tlbie(unsigned long rb, unsigned long rs) #define SPRG3 275 #define HSRR0 314 #define HSRR1 315 +#define HEIR 339 #define PTCR 464 static inline unsigned long mfspr(int sprnum) @@ -464,6 +468,39 @@ int mode_test_8(void) return 0; } +int test_9_mf(void) +{ + mfspr(2); + return 0; +} + +int test_9_mt(unsigned long arg) +{ + mtspr(2, arg); + return 0; +} + +int mode_test_9(void) +{ + unsigned long ret, msr; + + /* + * Test that mfspr/mtspr to unimplemented SPRs in user mode + * causes an HEAI and sets HEIR. + */ + msr = MSR_SF | MSR_LE | MSR_PR | MSR_EE | MSR_IR | MSR_DR; + mtspr(DEC, 0x7fffffff); + ret = callit(0, 0, (unsigned long) test_9_mf, msr); + if (ret != 0xe40 || + (mfspr(HEIR) & 0xfc1fffff) != ((31ul << 26) | (2 << 16) | (339 << 1))) + return 1; + ret = callit(0, 0, (unsigned long) test_9_mt, msr); + if (ret != 0xe40 || + (mfspr(HEIR) & 0xfc1fffff) != ((31ul << 26) | (2 << 16) | (467 << 1))) + return 2; + return 0; +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -510,6 +547,7 @@ int main(void) do_test(6, mode_test_6); do_test(7, mode_test_7); do_test(8, mode_test_8); + do_test(9, mode_test_9); return fail; } diff --git a/tests/test_modes.bin b/tests/test_modes.bin index d0c24d70a6026017cb9837afa378b6144a1da476..0649686c181fa23af18db9d3e1b734e07d3c1833 100755 GIT binary patch delta 2257 zcmZuxeN0nV6hH43N>Oa8lC1)B4~iRQI13^?Xe+OLuc*Nf+;Cgcf;u$DsllY3uWM!9 z23yQ@*CnuJT8uWjWQmNK%w&InTg*(hM30Mud|Q;h;|(QU>1pXqglh^8x@KS%T0SNLo={n@TtvTA9N#Z23YzE4Ho2lfj8B z4}`kpVi=MIq?ka6UE;a10_cxPus^osK5x=E8ZU1gu`$$*f!V6&v%reygLq>ssxzqM zw69M>i%LabpViya0!||o`deBIUI3{gbKMEm2TO`1I;VkRjDpY|MQcj(v{c19a0 z8x|sYpd=ioEeWf{L3s>9c@+h#q<}IzOc}|;#Clhh7{Y31;Wz5!I6LCZGWY(^ScGvb z*bbXH$hlNO-S>diKRd-yU0I+#G$eC=%u)E{ybnAlWPlKEkGy+4o}!RQaTm^7blv*E z2p_SFDmbN+d|y;Fc;%Qo07{|SjQV6-<|JE7#<^4bOvKrGC5(xxu0}g|=1^XsP(ky=bDjJzmX=XWPQmTT&Q|2mb5+>MzvTWfCcB<@C@yu;jR$eI@ z_OdjYMb1v!*BQ(QyrY-Mp)kI$FAUB%b~6R(4+fEx=8 z)tk$#ewJdtC>&>(?G|C_;A8tjK~UH{=V#*T>)5D`Z*tz{03|HLlt=zph{zJO`(}GW zgKz~Wh6aFi)Qbx!!&F3jg(NuPqaM+B|LsV;M-&o`28PVODt>`=tPBfV*yKt}(7d;u zzC%rm-M>pO(bPlURl>vCh<(T4e>q=->ub0$Z05#q3ln{29n zK`1~hLK_KT9UdCfU2s%5BD#&9grVwdSM{#Bs3K#(;*Q2Z-G6|r9s)6%KcN>igH)f7+q8Wx7ia$JN(&>7?W3EiwWDpfI?aZ1`Og9Oj>xe zk7eqHB#L(=XfijHEMSO%3`j_(fyEg~G+~J*U|?=Yps+2Bh*BJQJ?E_ujEOI~yXTzy zo$r44o^!jyUBclm;iIs4$aj3_dkfj9Z#wh3<_QBVx;i3{dRC#E=? zTGPSL?Cv?~-)L5_M2Iq@x>F;{F;?rIB^+h$Md{)J&ej)Yv;FRDp^Y7PS5EK!)KTV1 zQ_98%^Fqv4kXf`W4gC9Xa}v0!eo!Vs4w!qdx7ShTfMMC=$qOm4Te8BkNO=IOL(+eirepwqdTZ844m{>)5d8iRhp_jkc*P*MbioZKbV3o9;tT2&Oy| zOoSwM#>ix$W?(@St8@N5%B^18lnjHi6=TF_!x*4#eqdm+rXc7^_y-g(oUf3&MY9%v znGa9kLaoH~-ZcCAl?@Cgp#+}AK5Cz(`iPjO-D4YsB@F#FOB@GrW5E?Pb%?*$83=G$Sn@)G2gIKUa% z&CV8oApRI*%cW9rXgk{@JtzFYPD=|#cZ}VX-V(pQ&eoUg6xHkOmy+d#$S>U^Zo0-! zm2MD)Yb;BCO^j`4@5yr(beOl9}9q?V-%f{Q&>{-L<(3OiN)A{(kmc5{9O zA0n4wJSe>{bV&|qz`9<2^ay{(dQqHWs7mT*WxiQyXNM;XKq5$>-Ne39vskN77q&2q z;+#3Yy@C2&!ivS>e0Zcg$*kRBs*=7gjw`yJv+v@cJU|U(16}Gn$akgQu36s@izQ*( z28%L!zEu|s!rmTBB^Qzjz|2dIMeS&1;@CXGc^GG?u{u)KcLG+ZG^yz3;*fBh!SxF` z!xD=MpNXA9wbg~V>Zw+!_;t{vPQxYQ+K6N8DCa?(q1xc8v_h4^H6KGQIF91Fj;|JK zEX!3;JCcYG$y94&{UqPzNy2%EIf3K;EzXBaA zhuA>XtO^H0GDU#8yNOF$Z0PVCI%??#OsWkrSY$Yam`1eeBkDSgL7YA9pRs0H87JEQqZnpNBqw& l(Es9`YDORN=s4Ojrr=@dqgXqRGZUO|?Frce=8x-g{{hgsiD3W$ diff --git a/tests/test_modes.console_out b/tests/test_modes.console_out index 25e791c..2fe36d2 100644 --- a/tests/test_modes.console_out +++ b/tests/test_modes.console_out @@ -6,3 +6,4 @@ test 05:PASS test 06:PASS test 07:PASS test 08:PASS +test 09:PASS From 79e69d2a231c0336b3585b9e1dff7eab1c24813b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 29 Sep 2025 19:53:43 +1000 Subject: [PATCH 12/12] execute2: Simplify execute2 logic to improve timing This aims to simplify the logic in the execute2_1 process. It is not really necessary to preserve the contents of ex2 when stalled, except for ex2.e.last_nia; but when stalled, bits which would initiate downstream actions, such as ex2.e.valid, ex2.e.interrupt and ex2.se, should be cleared. Also, the path through stage2_stall to the bypass valid signal has shown up as a critical path. This dependency is there because the mfspr instruction to a slow SPR or a PMU SPR should not forward a result before the instruction is about to complete, because the result might change (for example when reading the timebase). To avoid this dependency, we simply don't forward results for mfspr to slow/PMU SPRs. Signed-off-by: Paul Mackerras --- execute1.vhdl | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/execute1.vhdl b/execute1.vhdl index 9f7acaa..43831f3 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -1225,7 +1225,7 @@ begin v.e.rc := e_in.rc; v.e.write_cr_data := write_cr_data; v.e.write_cr_mask := write_cr_mask; - v.e.write_cr_enable := e_in.output_cr; + v.e.write_cr_enable := e_in.output_cr or e_in.rc; v.e.write_xerc_enable := e_in.output_xer; v.e.xerc := xerc_in; v.new_msr := ex1.msr; @@ -2058,16 +2058,22 @@ begin -- Next insn adder used in a couple of places next_nia <= std_ulogic_vector(unsigned(ex1.e.last_nia) + 4); - v := ex2; - if stage2_stall = '0' then - v.e := ex1.e; - v.se := ex1.se; - v.ext_interrupt := ex1.ext_interrupt; - v.taken_branch_event := ex1.taken_branch_event; - v.br_mispredict := ex1.br_mispredict; - if ex1.advance_nia = '1' then - v.e.last_nia := next_nia; - end if; + v.log_addr_spr := ex2.log_addr_spr; + + v.e := ex1.e; + v.se := ex1.se; + v.ext_interrupt := ex1.ext_interrupt and not stage2_stall; + v.taken_branch_event := ex1.taken_branch_event and not stage2_stall; + v.br_mispredict := ex1.br_mispredict and not stage2_stall; + if stage2_stall = '1' then + v.e.last_nia := ex2.e.last_nia; + elsif ex1.advance_nia = '1' then + v.e.last_nia := next_nia; + end if; + if stage2_stall = '1' then + v.e.valid := '0'; + v.e.interrupt := '0'; + v.se := side_effect_init; end if; if ex1.se.mult_32s = '1' and ex1.oe = '1' then @@ -2153,14 +2159,11 @@ begin cr_mask(7) := '1'; end if; - if stage2_stall = '0' then - v.e.write_data := ex_result; - v.e.write_cr_data := cr_res; - v.e.write_cr_mask := cr_mask; - if ex1.e.rc = '1' and ex1.e.write_enable = '1' and v.e.valid = '1' then - v.e.write_cr_enable := '1'; - end if; + v.e.write_data := ex_result; + v.e.write_cr_data := cr_res; + v.e.write_cr_mask := cr_mask; + if stage2_stall = '0' then if ex1.se.write_msr = '1' then ctrl_tmp.msr <= ex1.msr; end if; @@ -2250,10 +2253,11 @@ begin end if; end if; - bypass_valid := ex1.e.valid; - if stage2_stall = '1' and ex1.res2_sel(1) = '1' then - bypass_valid := '0'; - end if; + -- Don't bypass the result from mfspr to slow SPRs or PMU SPRs, + -- because we don't want to send the value while stalled because it + -- might change, and we don't want bypass_valid to depend on + -- stage2_stall for timing reasons. + bypass_valid := ex1.e.valid and not ex1.res2_sel(1); bypass2_data.tag.valid <= ex1.e.write_enable and bypass_valid; bypass2_data.tag.tag <= ex1.e.instr_tag.tag;