From 0ad2aa30149d0a6e2d3082e841f6fe5079209067 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 22 Jul 2020 16:13:12 +1000 Subject: [PATCH] FPU: Implement floating round-to-integer instructions This implements frin, friz, frip and frim, and adds tests for them. Signed-off-by: Paul Mackerras --- decode1.vhdl | 4 +++ fpu.vhdl | 40 +++++++++++++++++++-- tests/fpu/fpu.c | 71 +++++++++++++++++++++++++++++++++++++ tests/test_fpu.bin | Bin 14032 -> 21208 bytes tests/test_fpu.console_out | 1 + 5 files changed, 114 insertions(+), 2 deletions(-) diff --git a/decode1.vhdl b/decode1.vhdl index c659e3e..a42899d 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -441,6 +441,10 @@ architecture behaviour of decode1 is 2#100000010# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 2/8=fmr 2#100000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/8=fnabs 2#100001000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 8/8=fabs + 2#100001100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 12/8=frin + 2#100001101# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 13/8=friz + 2#100001110# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 14/8=frip + 2#100001111# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 15/8=frim 2#110000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- 0/12=frsp 2#111000000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 0/14=fctiw 2#111000100# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- 4/14=fctiwu diff --git a/fpu.vhdl b/fpu.vhdl index 6301fa7..371fdc5 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -39,7 +39,8 @@ architecture behaviour of fpu is DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF, DO_FMR, DO_FCFID, DO_FCTI, - DO_FRSP, + DO_FRSP, DO_FRI, + FRI_1, INT_SHIFT, INT_ROUND, INT_ISHIFT, INT_FINAL, INT_CHECK, INT_OFLOW, FINISH, NORMALIZE, @@ -461,7 +462,11 @@ begin v.state := DO_MTFSF; end if; when "01000" => - v.state := DO_FMR; + if e_in.insn(9 downto 8) /= "11" then + v.state := DO_FMR; + else + v.state := DO_FRI; + end if; when "01100" => v.state := DO_FRSP; when "01110" => @@ -587,6 +592,31 @@ begin v.instr_done := '1'; v.state := IDLE; + when DO_FRI => -- fri[nzpm] + opsel_a <= AIN_B; + v.result_class := r.b.class; + v.result_sign := r.b.negative; + v.result_exp := r.b.exponent; + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + if r.b.class = NAN and r.b.mantissa(53) = '0' then + -- Signalling NAN + v.fpscr(FPSCR_VXSNAN) := '1'; + invalid := '1'; + end if; + if r.b.class = FINITE then + if r.b.exponent >= to_signed(52, EXP_BITS) then + -- integer already, no rounding required + arith_done := '1'; + else + v.shift := r.b.exponent - to_signed(52, EXP_BITS); + v.state := FRI_1; + v.round_mode := '1' & r.insn(7 downto 6); + end if; + else + arith_done := '1'; + end if; + when DO_FRSP => opsel_a <= AIN_B; v.result_class := r.b.class; @@ -749,6 +779,12 @@ begin invalid := '1'; arith_done := '1'; + when FRI_1 => + opsel_r <= RES_SHIFT; + set_x := '1'; + v.shift := to_signed(-2, EXP_BITS); + v.state := ROUNDING; + when FINISH => if r.r(63 downto 54) /= "0000000001" then renormalize := '1'; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index 3c6a9bd..d24fe14 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -753,6 +753,76 @@ int fpu_test_10(void) return trapit(0, test10); } +struct frivals { + unsigned long val; + unsigned long nval; + unsigned long zval; + unsigned long pval; + unsigned long mval; +} frivals[] = { + { 0x0000000000000000, 0, 0, 0, 0 }, + { 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x8000000000000000 }, + { 0x3fdfffffffffffff, 0, 0, 0x3ff0000000000000, 0 }, + { 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000, 0x3ff0000000000000 }, + { 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000, 0xbff0000000000000 }, + { 0x402123456789abcd, 0x4022000000000000, 0x4020000000000000, 0x4022000000000000, 0x4020000000000000 }, + { 0x406123456789abcd, 0x4061200000000000, 0x4061200000000000, 0x4061400000000000, 0x4061200000000000 }, + { 0x409123456789abcd, 0x4091240000000000, 0x4091200000000000, 0x4091240000000000, 0x4091200000000000 }, + { 0x41c123456789abcd, 0x41c1234567800000, 0x41c1234567800000, 0x41c1234568000000, 0x41c1234567800000 }, + { 0x41d123456789abcd, 0x41d1234567800000, 0x41d1234567800000, 0x41d1234567c00000, 0x41d1234567800000 }, + { 0x41e123456789abcd, 0x41e1234567800000, 0x41e1234567800000, 0x41e1234567a00000, 0x41e1234567800000 }, + { 0x41f123456789abcd, 0x41f1234567900000, 0x41f1234567800000, 0x41f1234567900000, 0x41f1234567800000 }, + { 0xc1f123456789abcd, 0xc1f1234567900000, 0xc1f1234567800000, 0xc1f1234567800000, 0xc1f1234567900000 }, + { 0xc1f1234567880000, 0xc1f1234567900000, 0xc1f1234567800000, 0xc1f1234567800000, 0xc1f1234567900000 }, + { 0x432123456789abcd, 0x432123456789abce, 0x432123456789abcc, 0x432123456789abce, 0x432123456789abcc }, + { 0x433123456789abcd, 0x433123456789abcd, 0x433123456789abcd, 0x433123456789abcd, 0x433123456789abcd }, + { 0x434123456789abcd, 0x434123456789abcd, 0x434123456789abcd, 0x434123456789abcd, 0x434123456789abcd }, + { 0x43c123456789abcd, 0x43c123456789abcd, 0x43c123456789abcd, 0x43c123456789abcd, 0x43c123456789abcd }, + { 0x43d123456789abcd, 0x43d123456789abcd, 0x43d123456789abcd, 0x43d123456789abcd, 0x43d123456789abcd }, + { 0x43e123456789abcd, 0x43e123456789abcd, 0x43e123456789abcd, 0x43e123456789abcd, 0x43e123456789abcd }, + { 0x43f123456789abcd, 0x43f123456789abcd, 0x43f123456789abcd, 0x43f123456789abcd, 0x43f123456789abcd }, + { 0xc3f123456789abcd, 0xc3f123456789abcd, 0xc3f123456789abcd, 0xc3f123456789abcd, 0xc3f123456789abcd }, + { 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000, 0x7ff0000000000000 }, + { 0xfff0000000000000, 0xfff0000000000000, 0xfff0000000000000, 0xfff0000000000000, 0xfff0000000000000 }, + { 0x7ff123456789abcd, 0x7ff923456789abcd, 0x7ff923456789abcd, 0x7ff923456789abcd, 0x7ff923456789abcd }, + { 0xfff923456789abcd, 0xfff923456789abcd, 0xfff923456789abcd, 0xfff923456789abcd, 0xfff923456789abcd }, +}; + +int test11(long arg) +{ + long i; + unsigned long results[4]; + struct frivals *vp = frivals; + + for (i = 0; i < sizeof(frivals) / sizeof(frivals[0]); ++i, ++vp) { + set_fpscr(FPS_RN_FLOOR); + asm("lfd 3,0(%0); frin 4,3; stfd 4,0(%1)" + : : "b" (&vp->val), "b" (results) : "memory"); + set_fpscr(FPS_RN_NEAR); + asm("friz 5,3; stfd 5,8(%0)" : : "b" (results) : "memory"); + set_fpscr(FPS_RN_ZERO); + asm("frip 5,3; stfd 5,16(%0)" : : "b" (results) : "memory"); + set_fpscr(FPS_RN_CEIL); + asm("frim 5,3; stfd 5,24(%0)" : : "b" (results) : "memory"); + if (results[0] != vp->nval || results[1] != vp->zval || + results[2] != vp->pval || results[3] != vp->mval) { + print_hex(i, 2, "\r\n"); + print_hex(results[0], 16, " "); + print_hex(results[1], 16, " "); + print_hex(results[2], 16, " "); + print_hex(results[3], 16, " "); + return i + 1; + } + } + return 0; +} + +int fpu_test_11(void) +{ + enable_fp(); + return trapit(0, test11); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -788,6 +858,7 @@ int main(void) do_test(8, fpu_test_8); do_test(9, fpu_test_9); do_test(10, fpu_test_10); + do_test(11, fpu_test_11); return fail; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index 81d18542064550fb7064a3683cbcdc7f7048c285..d2320cd960e8c39417367d3b8d7fc730494c9eca 100755 GIT binary patch literal 21208 zcmeHP4RBP~bv~=#6+hAvlUV#1o)7^^QBu(%&J&W*TS;Ic{IlUFV8?M*UNJ^Mrg%cy zm3hmq$hL+IC1C6pJ75bBR&7G5(~_w@1F;m_2_#MnDNaR9u!KpfACoL;n=DA?^*i_8 zCq1nsMBK(q2fdl6bMHCdJ@?#m&pq$HyAOzrGNPK=AEehbfUcqCHMK;3@YaL39=!Ey zBFop;`c|(CmDBp-ub*DuFqyV`o0IE9@2?`$pqk>iMHNI^DN&C92kALM(7MQ;96u51 zmn~PE68hMS{+cU&A)*NS*@1p`pr0LU4$s|D+nl^Tl#>#d{Qa$VgLS6`$9S6`;qtFKVU)fRH~o}irGQ{?S! zquSnfYV7T#1HCb7?Tu4M?;Ij?ExC3j+BVxIZ42h^Fky?ywlr7FGZS(47+_Bq?CF9% z-LR({_H@IZZrIZed%9syH|*(#Jw33e2ln*9o*vlK1ABU4PY>+rfh`g3a+c|FxjM;} zqcTgbd#+*%bH)Hq~-WvP!2YaBo!h?CvFe%xu zYaDkU@n!cPxyk)g%1s{sP41skZ_fBX;&2<%ogIIac8iZc{T3g;`xYO6#?2YOz5XIM zxqljWbH+a;A$`OQ+y6BF79anFTYUTzZ}IVG4nKZ-eQ2Y|jZx(OQRI;kNv&+<1ZJ%O8JyRxwf6k5Sas`>5ZuNATWx-?El6hHuZFieia9 z3G9#FIh7!zZL^Ax+}ji;I`q!bVPwC)Hv-u=-r1{UvJdiSL+_Eh{oy04{9#kd(4D;O zF{0+bI42PFJW)~XrrhGmH3gxV<$~QW{fOwrpMEBLof)%o8iO?fb3O&kEoTEOTohQF z_z2 zy|6_;njR460&SMJD>!Rzi1Y%p)7omZTOFMS6*Q1a$U_-yAi&UW@=&Vs!cF}Hqw;;&`AxY)n$uaPY4v6{e3nf4wMWzQLq(LM2bboCTBcC7t?LB`b^@>|z@F)ks|qo< z@yC<8a4yzGMuBj$4f5y_Lc1a0P)dtFdY2P*Ta{3VNsZ5YpS;iBZib0YS`r5yin!7dk_FBwk@^Y((@;&U9k4S4Hu1c2 z%xZzCzEbs}p>JS1?K3BWuZCM_;?4e*Gq81pF_k zvHhX_I_*w;7ACH5qIU6&59==!-oE9>JpZcD;z+G>-&CZU4EO%DObdJ_q}xQskD;#CQw% ze5sovdfWOJ^ddSC&li0i=+vmT(*yn#Rd-WJ)H6_@{96lpYdifIn;XyF;M9ENv-%>> zl7P7ZcJ$|{1RL4!Q^+W8PF{lijJ@a%-uJeb1eeYYaa^+f8o7$|=*xE;S@DRuZ$?Ys zzL~rSJb^u6XWPCR+9eO=o!vLnxP(XkSvQ@8ofuD10e+*}iaq(mrS0nj`W@I$uzfr> z9s`ffz}W5qAG9-1jjc@8i&Xv7sOu;*hTE|o`#H&DJ?x=s5BkhgeeS~ZFB|nDRga*q zp{}d?M)BOVO7bvwg=){J_8RP>Avv7=bGySov`hMjeZzYt_RR$1-w^#ek^uZ%f5J`M zPwc#du4QX2!cQJrf;sN!%toKQ=dE`5B~?6!pD#^OJ_+`|7v?$tEcom%l*PC|iMl;* z-d`QXT5yq75_QI3!dhV4rxIaqGsN7w@qWT{n@sIG_9w_sO?ndg<+1D*WAPRn)Dg!m zavqPxGcAHKe51{v)_%TkT2r5!4uN-|pSKGiF)Ij7f)4djhp_Hl697OdHaOUJ>torrbzCqGB>@tplx z?={S4SRY5$Ox_zejFb7phjm`0-ig*I-U7tX{i@74@2!cBb&KDW@eWkHGsXA-`>3-q zs}?>T{F|RMe@{W%TG+sAn)jjZ#E$wh!zxbOvDfmybtDb9A%!LikW+BQsTyD!SHI!#@xjn<2jB+lQJ2Ol-%DivS!#bWy3kx@0+iGwg8MLOy zBPlud-QMwo=hYLvv3Y$+&FgWIR|e1PLuy_R^vee3b%vVPzwDO{%l z#uB1FvG!j;uFAvr92CnH=QT&u-wX2^NXek;gIOmx^RvdH{5)>=mepv)Vg}xE$sdb7 z5Xj1+@~o`(h3V9^ke@FtPs}RO=z+~yS=o^N{$b?yl60$OL0{~<4_m#F1!2p-pzijr z&7V0@Z(>a&N3jiu`t(!h!+J}=3{HDJ>_V>Kyvpw?25ge&3t|0EEwF;~XdUD`%T{~< zd%E;j0;XQQBpjTU8#;;o0k?Y_If2{p+0&icd#ffP*PcL|Z>F^L&B5M%)?N7x7v`)B z+UHY@`4}(F&#R1Y=_fx8+bga3FG+$FmdQ( z8{^eD-LPUr2wp8&otJrHzJ$p2l=hn)ld$%gzz$akjb&`~0E=r5kc zxmA9x(??T)ivSm)34IZYSaZnOQsIAnb5iv9=DNRQ-2wk1XIgh#1Qa|MF5F;K_n5<((WrZ)x5 z4m|4_cM0p?iJJ~wJ9OL)9Hj*OuPWS|3Aj6edj&YWWLWy%fVo@Y_9x(G0JjUcdw`om zP2mW}w?<(fRal(K7)gtdZ%&#U2nMGf4`*0MIKR;n}f8U#c6QkqtCG1OiY#kU|Lz3JJ@mUYQE3aJ^)p^}>ozDxa zmPhr=m~*c4nPFr_RPRuAJ`2oS8P!j#I-lunM7>qj`MmGRFGTh4sCqi;=TJYO>U^Fz zdsS55t?C)5Z$Q0K)yJWJ6!k|{eLU)YsMn&7wWhOAllDA}edqPh${06YoF|gsjdOmS zDQ+jcR~CtVPeIpYyb~NwA@k6^S_o_H(f4uINVKj~@MieUg=v8mGOrhOc@(yL0BbPT zjTmEmH}L8=3FZ2MxEjfh!eQ*t`t2y_5_a6@N1X9qbFd9=i8B_a)vw@lW?{psp=a#Q z=Mcv>@0%^l`L6%&Hwn*4tQqmH50~ku5F6uYMjr16C!)@KN6w@8p<=0HjIIdw#ldOo zOm1@|yCB5A<>w)vi^hQMcLWNy9n?Z)8o6t6Hwc>-Q;1r$GD>qdU`*iWOFsO}zTh)g zmKzB&YB9>7*3b9!Jr{J+YS6Z(0rPxiI)68c`V-6Dp_F(*!|Pc!4H4)O>k;iHKt?q8 zX~st^O~wFx=iW0*`J6;K>HF>mob5~beUg`q_t6RcJVW5@*f?~btfvEmv8Z+i#)0|c zG16U6Tb8>xTXT#Pdgc6LB|O53;YL{rU^nm@ABo0 zS?pbcZ%M&lzHC_$M#yv?)5X5!mCMR#mxb?aEqrHd;X7Lk-`QID&epwC_Rt9<=Y_e86W) ze3k?~;#`S!aG7OxZe||Nyv5~uY#F5ly{>5!T*SO=+_T@atcy}CcyK=b7kEAg&p^58 zqGiP#JS;?-@Jq|m96X#$P&Rn86_2TjI}f~b%&W@q(sQn=%%Vx&E%%Shq>6Xb@SY)l zFFgqHIpC0>i7J8f?oWO;={XmW-b~-;0rtLo{ucr+@Mi$){FPBsG*Fk>t zI`Syw=dL3^2zg4^$ok)ayzn~mi;%CljywhV%Y;0Lv0(f>1Ise0GR=#s7(e7^Mv!x? zR6$r{ zqZ;7FX&eC@FL{8R0lp7-j+-Iz;(suc7Kv~nwt8GX&b8L#?hGkiLNA`Q!+wg_?A7hS98HXFbaS5QpwLbpRk1t&l zeC|bD5%JgM#N|}}>g_lF>G|FGmkElZNf>v6zIl~W?<6(;Ni;mpTlvhY+k_izIy|nm zl5BvY!{bg@()@pzBmAZ4@VJgj@c9bU+K298zj2uv2p6Yc`f)5%SC}c>;>5i!T_%n? zR8f{~oMgsCTQs*RimG3wPvPu1j@ZgTMcLLT{d*;>S-|5$sj{hs-v};(M z>;TKUdHKMtta4Jj%p&RM8_x&TDN>=6+T{U#H!mLqRX(sOoFC>HNX!S@#`1yi%dMIZ zWE98CU2k8rbBAQz*x!hPjYv7>Mqc-^^1x8j6iuv4oV{Ndje6C-Vl>*SaHG-F z3O5?)#B&w;d4TT#@rT<39Nxx|7J;|t)?S|@07CmYt@rLr}P#jl~4pP zp1aJ)73>B^;nnk&dw!CTW>{yt$1xoJ!?B&CTBOmiw$%28kD^Rdc8&F5F&pE~m g{>yjTewU!8JAd=Nzd!%XBeQSgIzJD7@@*;kAE#Kl9{>OV delta 2116 zcmZvde@xRy6u|F$lv)s2`Bkx$TH0D^!SZ7Zei4+~p|YZtEsDq%Q>=)SnJi9JT&2}j zs>`y3J+n9xHzmp+#u@yTxI`CR)G+oBF4qKGxp#N( zy}NIF`;^eX-9U(C2_d%no3m^!s2j*4TRkBTWKGDLkTo@U7PTAR7IwGS718V2=f2+k zR&uvhj3v693bKD0W7Z{k4p7L8+{B|TA!Ot?TrX;f$4=0Y`@(GCA!AkCk1%UVB=%Q~ z?HlNSZBB;!oV6c!6LNas;v441>>ix^a^PrmE^-Emab)I+m3aLEDvV)BEl#YZVCCPn5rHmaYqgXa>u_M)#)Q@3=l#fI=IGt*Zmw7r%B^VL! zfoG{s!3T9|x!KWOk0q1JYvElx%WnSjtE@bSN?<^z( zbE=m)TqMrv*f#>9jPxxcKMH74)OoAXV}FH>wmg_yz1z*;06T>=bWSXPG1t`n$?R zH^)Orm5HHy)k=Fm#!R>A@Fc7Hb0Xv}AZ{_9uz43}IbGjLBehls+^z!R6gfK>WsC6w zTB3FV$rcCWU`TC-9&JNHv|s^W^G{Hy%cScV*mVkeT?SipMf7qTbm(mKY6$M@is``+ zsP$WDRtVZqU7CV1y`45p!P5EP(;Jg;cfNrhn*^!BK$|DQjB52H)EG=OdlC*9)=W2- ze;&LN{z5fme{lH(j2aB?+mQl?ar!}5jLdcUIjiDn@jGB_Q`z4*Pkh?;6Ucd^VzE)f z&8NgY6aTpXK-NX~x~6*_Kz=REKA&MhWIrOi6kt{J@nu@D5j?&Zxr}(+yt^}pdogh4 zif3MXQVck<#VekR`m@jd|hXaW}ye}7;DikHmy(e4T(f!tJe@)3$E-il`i*bN6tuy z>+~A3Plh)AMb;uA#*&v=46&3X%rHwhPX3Ay%G{Ow7vlfS^m&9B4-;}YpEEqGHFw81 zd4uj%BaHbGagpP|Frpe|teOK9&MY`;kp)n5;cP9AcOEdNLCuA;tvD>m^cD)~sq~{cIL?@gAA_R75k%%62X($$s-lDx1i+fF zl~$lx9e{25Ch1Nz&jg?&-yl7Xrv5S9#DHEjRZn0H%>bI+Phe?*)|!F8J(8!d)|-b+ zj%@nSaIcVMAgkb6psd2N7Fp#}C@Qo`PojD985}4yr1hg&`yBs_c-ZnJ4fezdR|@qe zKf*&CqVO&GmDJ+DmX`yR92^!Ly+MdCGMOfEiX$#O+{5+1jOAhZTsS*|Bj?41jYY!} N2(2u-U|Drk`X5<{!7%^; diff --git a/tests/test_fpu.console_out b/tests/test_fpu.console_out index 3e84260..3a5a601 100644 --- a/tests/test_fpu.console_out +++ b/tests/test_fpu.console_out @@ -8,3 +8,4 @@ test 07:PASS test 08:PASS test 09:PASS test 10:PASS +test 11:PASS