From b8f7cbd894b64410e3f639da2da923d78465b0cf Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 10 Dec 2025 20:14:35 +1100 Subject: [PATCH] FPU: Record bits shifted out of addend in fmadd-family instructions If the addend is smaller than the product and thus needs to be shifted right, record if any bits are lost from the right end in r.x, so that the result gets rounded correctly. Also add a test that checks one such case. Signed-off-by: Paul Mackerras --- fpu.vhdl | 1 + tests/fpu/fpu.c | 72 ++++++++++++++++++++++++--------------------- tests/test_fpu.bin | Bin 33136 -> 33464 bytes 3 files changed, 39 insertions(+), 34 deletions(-) diff --git a/fpu.vhdl b/fpu.vhdl index 8124dad..d120cd8 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -1970,6 +1970,7 @@ begin -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa set_s := '1'; opsel_s <= S_SHIFT; + set_x := '1'; -- set shift to r.shift - 64 rs_sel1 <= RSH1_S; rs_con2 <= RSCON2_64; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 824e764..5c46b6f 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1504,110 +1504,114 @@ struct fmavals { unsigned long ra; unsigned long rc; unsigned long rb; + unsigned long fpscr; unsigned long fma; unsigned long fms; unsigned long nfma; unsigned long nfms; } fmavals[] = { /* +0 * +0 +- +0 -> +0, +0, -0, -0 */ - { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, FPS_RN_NEAR, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000 }, /* +0 * NaNC +- +0 -> NaNC, NaNC, NaNC, NaNC */ - { 0x0000000000000000, 0x7ffc000000000000, 0x0000000000000000, + { 0x0000000000000000, 0x7ffc000000000000, 0x0000000000000000, FPS_RN_NEAR, 0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000, 0x7ffc000000000000 }, /* +0 * NaNC +- NaNB -> NaNB, NaNB, NaNB, NaNB */ - { 0x0000000000000000, 0x7ffc000000000000, 0x7ffb000000000000, + { 0x0000000000000000, 0x7ffc000000000000, 0x7ffb000000000000, FPS_RN_NEAR, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000, 0x7ffb000000000000 }, /* NaNA * NaNC +- NaNB -> NaNA, NaNA, NaNA, NaNA */ - { 0x7ffa000000000000, 0x7ffc000000000000, 0x7ffb000000000000, + { 0x7ffa000000000000, 0x7ffc000000000000, 0x7ffb000000000000, FPS_RN_NEAR, 0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000, 0x7ffa000000000000 }, /* +1.0 * -0 +- +finite B -> +B, -B, -B, +B */ - { 0x3ff0000000000000, 0x8000000000000000, 0x678123456789abcd, + { 0x3ff0000000000000, 0x8000000000000000, 0x678123456789abcd, FPS_RN_NEAR, 0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd }, /* +1.0 * -1.0 +- (B = +3.818e+190) -> +B, -B, -B, +B */ - { 0x3ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, + { 0x3ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, FPS_RN_NEAR, 0x678123456789abcd, 0xe78123456789abcd, 0xe78123456789abcd, 0x678123456789abcd }, /* +inf * -1.0 +- +finite B -> -inf, -inf, +inf, +inf */ - { 0x7ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, + { 0x7ff0000000000000, 0xbff0000000000000, 0x678123456789abcd, FPS_RN_NEAR, 0xfff0000000000000, 0xfff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, /* +inf * +0 +- +finite B -> NaNQ, NaNQ, NaNQ, NaNQ */ - { 0x7ff0000000000000, 0x0000000000000000, 0x678123456789abcd, + { 0x7ff0000000000000, 0x0000000000000000, 0x678123456789abcd, FPS_RN_NEAR, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000, 0x7ff8000000000000 }, /* +1.0 * +1.0 +- 1.00000012 -> +2.00000012, +1.2e-7, -2.00000012, -1.2e-7 */ - { 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000020000000, + { 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000020000000, FPS_RN_NEAR, 0x4000000010000000, 0xbe80000000000000, 0xc000000010000000, 0x3e80000000000000 }, /* +(1 + 2^-52) * +(1 + 2^-52) +- +1.0 -> +(2 + 2^-51), +2^-51, -(2 + 2^-51), -2^-51 */ - { 0x3ff0000000000001, 0x3ff0000000000001, 0x3ff0000000000000, + { 0x3ff0000000000001, 0x3ff0000000000001, 0x3ff0000000000000, FPS_RN_NEAR, 0x4000000000000001, 0x3cc0000000000000, 0xc000000000000001, 0xbcc0000000000000 }, /* +(1 + 3*2^-52) * +(1 + 2^-51) +- +1.0 -> +(2 + 2^-50), +5 * 2^-52 + 2^-101, -, - */ - { 0x3ff0000000000003, 0x3ff0000000000002, 0x3ff0000000000000, + { 0x3ff0000000000003, 0x3ff0000000000002, 0x3ff0000000000000, FPS_RN_NEAR, 0x4000000000000002, 0x3cd4000000000002, 0xc000000000000002, 0xbcd4000000000002 }, /* +2.443e-77 * 2.828 +- 6.909e-77 -> -1.402e-93, +1.382e-76, +1.402e-93, -1.382e-76 */ - { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb020000000000000, + { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb020000000000000, FPS_RN_NEAR, 0xaca765753908cd20, 0x3030000000000000, 0x2ca765753908cd20, 0xb030000000000000 }, /* +2.443e-77 * 2.828 +- 6.909e-77 -> +9.446e-93, +1.382e-76, -9.446e-93, -1.382e-76 */ - { 0x3006a09e667f3bcd, 0x4006a09e667f3bcd, 0xb020000000000000, + { 0x3006a09e667f3bcd, 0x4006a09e667f3bcd, 0xb020000000000000, FPS_RN_NEAR, 0x2cd3b3efbf5e2229, 0x3030000000000000, 0xacd3b3efbf5e2229, 0xb030000000000000 }, /* +2.443e-77 * 2.828 +- -1.1055e-75 -> -1.0364e-75, +1.1746e-75, +1.0364e-75, -1.1746e-75 */ - { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb060003450000000, + { 0x3006a09e667f3bcc, 0x4006a09e667f3bcd, 0xb060003450000000, FPS_RN_NEAR, 0xb05e0068a0000000, 0x3061003450000000, 0x305e0068a0000000, 0xb061003450000000 }, /* +2 * +3 +- 3 -> +9, +3, -9, -3 */ - { 0x4000000000000000, 0x4008000000000000, 0x4008000000000000, + { 0x4000000000000000, 0x4008000000000000, 0x4008000000000000, FPS_RN_NEAR, 0x4022000000000000, 0x4008000000000000, 0xc022000000000000, 0xc008000000000000 }, /* +2 * +3 +- 5 -> +11, +1, -11, -1 */ - { 0x4000000000000000, 0x4008000000000000, 0x4014000000000000, + { 0x4000000000000000, 0x4008000000000000, 0x4014000000000000, FPS_RN_NEAR, 0x4026000000000000, 0x3ff0000000000000, 0xc026000000000000, 0xbff0000000000000 }, /* +2 * +3 +- 7 -> +13, -1, -13, +1 */ - { 0x4000000000000000, 0x4008000000000000, 0x401c000000000000, + { 0x4000000000000000, 0x4008000000000000, 0x401c000000000000, FPS_RN_NEAR, 0x402a000000000000, 0xbff0000000000000, 0xc02a000000000000, 0x3ff0000000000000 }, /* +2 * +3 +- 9 -> +15, -3, -15, +3 */ - { 0x4000000000000000, 0x4008000000000000, 0x4022000000000000, + { 0x4000000000000000, 0x4008000000000000, 0x4022000000000000, FPS_RN_NEAR, 0x402e000000000000, 0xc008000000000000, 0xc02e000000000000, 0x4008000000000000 }, /* +2 * +3 +- -3 -> +3, +9, -3, -9 */ - { 0x4000000000000000, 0x4008000000000000, 0xc008000000000000, + { 0x4000000000000000, 0x4008000000000000, 0xc008000000000000, FPS_RN_NEAR, 0x4008000000000000, 0x4022000000000000, 0xc008000000000000, 0xc022000000000000 }, /* +2 * +3 +- -5 -> +1, +11, -1, -11 */ - { 0x4000000000000000, 0x4008000000000000, 0xc014000000000000, + { 0x4000000000000000, 0x4008000000000000, 0xc014000000000000, FPS_RN_NEAR, 0x3ff0000000000000, 0x4026000000000000, 0xbff0000000000000, 0xc026000000000000 }, /* +2 * +3 +- -7 -> -1, +13, +1, -13 */ - { 0x4000000000000000, 0x4008000000000000, 0xc01c000000000000, + { 0x4000000000000000, 0x4008000000000000, 0xc01c000000000000, FPS_RN_NEAR, 0xbff0000000000000, 0x402a000000000000, 0x3ff0000000000000, 0xc02a000000000000 }, /* +2 * +3 +- -9 -> -3, +15, +3, -15 */ - { 0x4000000000000000, 0x4008000000000000, 0xc022000000000000, + { 0x4000000000000000, 0x4008000000000000, 0xc022000000000000, FPS_RN_NEAR, 0xc008000000000000, 0x402e000000000000, 0x4008000000000000, 0xc02e000000000000 }, /* +2 * -3 +- 3 -> -3, -9, +3, +9 */ - { 0x4000000000000000, 0xc008000000000000, 0x4008000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x4008000000000000, FPS_RN_NEAR, 0xc008000000000000, 0xc022000000000000, 0x4008000000000000, 0x4022000000000000 }, /* +2 * -3 +- 5 -> -1, -11, +1, +11 */ - { 0x4000000000000000, 0xc008000000000000, 0x4014000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x4014000000000000, FPS_RN_NEAR, 0xbff0000000000000, 0xc026000000000000, 0x3ff0000000000000, 0x4026000000000000 }, /* +2 * -3 +- 7 -> +1, -13, -1, +13 */ - { 0x4000000000000000, 0xc008000000000000, 0x401c000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x401c000000000000, FPS_RN_NEAR, 0x3ff0000000000000, 0xc02a000000000000, 0xbff0000000000000, 0x402a000000000000 }, /* +2 * -3 +- 9 -> +3, -15, -3, +15 */ - { 0x4000000000000000, 0xc008000000000000, 0x4022000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x4022000000000000, FPS_RN_NEAR, 0x4008000000000000, 0xc02e000000000000, 0xc008000000000000, 0x402e000000000000 }, /* -2 * +3 +- -3 -> -9, -3, +9, +3 */ - { 0xc000000000000000, 0x4008000000000000, 0xc008000000000000, + { 0xc000000000000000, 0x4008000000000000, 0xc008000000000000, FPS_RN_NEAR, 0xc022000000000000, 0xc008000000000000, 0x4022000000000000, 0x4008000000000000 }, /* -2 * +3 +- -5 -> -11, -1, +11, +1 */ - { 0xc000000000000000, 0x4008000000000000, 0xc014000000000000, + { 0xc000000000000000, 0x4008000000000000, 0xc014000000000000, FPS_RN_NEAR, 0xc026000000000000, 0xbff0000000000000, 0x4026000000000000, 0x3ff0000000000000 }, /* -2 * +3 +- -7 -> -13, +1, +13, -1 */ - { 0xc000000000000000, 0x4008000000000000, 0xc01c000000000000, + { 0xc000000000000000, 0x4008000000000000, 0xc01c000000000000, FPS_RN_NEAR, 0xc02a000000000000, 0x3ff0000000000000, 0x402a000000000000, 0xbff0000000000000 }, /* -2 * +3 +- -9 -> -15, +3, +15, -3 */ - { 0xc000000000000000, 0x4008000000000000, 0xc022000000000000, + { 0xc000000000000000, 0x4008000000000000, 0xc022000000000000, FPS_RN_NEAR, 0xc02e000000000000, 0x4008000000000000, 0x402e000000000000, 0xc008000000000000 }, /* -2 * +3 +- +0 -> -6, -6, +6, +6 */ - { 0xc000000000000000, 0x4008000000000000, 0x0000000000000000, + { 0xc000000000000000, 0x4008000000000000, 0x0000000000000000, FPS_RN_NEAR, 0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 }, /* +2 * -3 +- -0 -> -6, -6, +6, +6 */ - { 0x4000000000000000, 0xc008000000000000, 0x8000000000000000, + { 0x4000000000000000, 0xc008000000000000, 0x8000000000000000, FPS_RN_NEAR, 0xc018000000000000, 0xc018000000000000, 0x4018000000000000, 0x4018000000000000 }, /* 2^-1026 * (1.5 * 2^1023) +- -0 -> (1.5 * 2^-3), ditto, -ditto, -ditto */ - { 0x0001000000000000, 0x7fe8000000000000, 0x8000000000000000, + { 0x0001000000000000, 0x7fe8000000000000, 0x8000000000000000, FPS_RN_NEAR, 0x3fc8000000000000, 0x3fc8000000000000, 0xbfc8000000000000, 0xbfc8000000000000 }, + /* 1 * -1 + tiny -> -1 + delta, -1, 1 - delta, 1 */ + { 0x3ff0000000000000, 0xbff0000000000000, 0x00000000b2200102, FPS_RN_CEIL, + 0xbfefffffffffffff, 0xbff0000000000000, 0x3fefffffffffffff, 0x3ff0000000000000 }, }; int test23(long arg) @@ -1617,8 +1621,8 @@ int test23(long arg) struct fmavals *vp = fmavals; unsigned long fpscr; - set_fpscr(FPS_RN_NEAR); for (i = 0; i < sizeof(fmavals) / sizeof(fmavals[0]); ++i, ++vp) { + set_fpscr(vp->fpscr); asm("lfd 6,0(%0); lfd 7,8(%0); lfd 8,16(%0); fmadd 0,6,7,8; stfd 0,0(%1)" : : "b" (&vp->ra), "b" (results) : "memory"); asm("fmsub 1,6,7,8; fnmadd 2,6,7,8; fnmsub 3,6,7,8; stfd 1,8(%0); stfd 2,16(%0); stfd 3,24(%0)" diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 4280dd2468e42c411dccace1f3377962c0b207f4..f68ea11be39a32ba6dab54ab64e3109f8f4b185a 100755 GIT binary patch delta 1143 zcmZute`r%z6h7}XuCGDsvzZz?%+iYK4}ZmM>Wk05zCP&goZ@d>ZY!kqt=dncx+mTALF+KuRy9)QgBt^d)U zM$=sTVtU|>R%|VDFq%}h;p_-oR31o9nIX?3_R+&K6JCYPmqJg+OFRV zy+-QQF@*S7QIh+lDD(3QmPh0_~gSDkaJDg>eZ-3zA z>#5T-hvS#1?cG64eZ|oGa)lLYlPTeEL!{UESXb4Ug!vzsUek#*D%4xW z8_x{L&{)k~)ey4of1cmpPd{lcNu~SR=2Zq1v`Wyo%r5pC1N1!j4pr~HjWhl1(cV%C zfwB3z2qH4l4WHA^Dldo#3Zo9=^s<)C8*vFa9SxVGo6$$%@&<*!_%#P~Weyr=M2$a3 zPC1|^xST5jBe?QEobs=pCc+*uaV{Jvwbz$yu_LhBeSbO2_LYPn^j*;)UY6E`x$YF|B}2@iPtN%&~I(rJKP5_nt1phE$t1Hb~z#c(F$^h`W2kk z*RtUWHKTva>7ojwJlZ`>uJf~~g7te9gYT$$+y%K+Xpyi zlyPCl5|gj1^L314bAz~0Kg}lN_oo z--=H~182uKV)YsUvSK;7Y8EpQNycx^GPE{6X7Zpq(^$;0u%ga4c^Hc*=#tDjS*f_B zos27ZLOQo%ROh82{e^;FcPWzb&1%ZHxPgHo^7vuB>}e6TrJa(N&*SO3H5 z|C({i&1)NH-Hzhr^F=kw5!9+ZUv&8WfS!d;-nMuPaI4RPZ9W0VeR_fwpZE$fO?{eb z#{%!3TZ|e)bzN?I+E@0m}~8Vc@WUGT}YJ7!dIY!P`=ZcLR1z1_b0= z1Z*S3f`vJLyv2%1n)x8{gqY?Hl9zh^2yuiVLXyBArI|29ND}x~6Tah0)N8G_%3r@Z BT=4(^