From 4ad5ab92038412d46ef0dc2477e079219b8d7ced Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jul 2020 17:34:03 +1000 Subject: [PATCH] FPU: Implement fre[s] This just returns the value from the inverse lookup table. The result is accurate to better than one part in 512 (the architecture requires 1/256). This also adds a simple test, which relies on the particular values in the inverse lookup table, so it is not a general test. Signed-off-by: Paul Mackerras --- decode1.vhdl | 2 ++ fpu.vhdl | 48 ++++++++++++++++++++++++++++++++++++- tests/fpu/fpu.c | 38 +++++++++++++++++++++++++++++ tests/test_fpu.bin | Bin 24416 -> 24512 bytes tests/test_fpu.console_out | 1 + 5 files changed, 88 insertions(+), 1 deletion(-) diff --git a/decode1.vhdl b/decode1.vhdl index ddcbb3c..c0c3465 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -419,6 +419,7 @@ architecture behaviour of decode1 is 2#10010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fdivs 2#10100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fsubs 2#10101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fadds + 2#11000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fres 2#11001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- fmuls others => illegal_inst ); @@ -473,6 +474,7 @@ architecture behaviour of decode1 is 2#0010# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fdiv 2#0100# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fsub 2#0101# => (FPU, OP_FPOP, FRA, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fadd + 2#1000# => (FPU, OP_FPOP, NONE, FRB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fre 2#1001# => (FPU, OP_FPOP, FRA, NONE, FRC, FRT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- fmul others => illegal_inst ); diff --git a/fpu.vhdl b/fpu.vhdl index 2584e1c..fee1776 100644 --- a/fpu.vhdl +++ b/fpu.vhdl @@ -41,11 +41,13 @@ architecture behaviour of fpu is DO_FCFID, DO_FCTI, DO_FRSP, DO_FRI, DO_FADD, DO_FMUL, DO_FDIV, + DO_FRE, FRI_1, ADD_SHIFT, ADD_2, ADD_3, MULT_1, LOOKUP, DIV_2, DIV_3, DIV_4, DIV_5, DIV_6, + FRE_1, INT_SHIFT, INT_ROUND, INT_ISHIFT, INT_FINAL, INT_CHECK, INT_OFLOW, FINISH, NORMALIZE, @@ -639,6 +641,8 @@ begin v.state := DO_FDIV; when "10100" | "10101" => v.state := DO_FADD; + when "11000" => + v.state := DO_FRE; when "11001" => v.is_multiply := '1'; v.state := DO_FMUL; @@ -1041,6 +1045,36 @@ begin arith_done := '1'; end if; + when DO_FRE => + opsel_a <= AIN_B; + v.result_class := r.b.class; + v.result_sign := r.b.negative; + v.fpscr(FPSCR_FR) := '0'; + v.fpscr(FPSCR_FI) := '0'; + if r.b.class = NAN and r.b.mantissa(53) = '0' then + v.fpscr(FPSCR_VXSNAN) := '1'; + invalid := '1'; + end if; + case r.b.class is + when FINITE => + v.result_exp := - r.b.exponent; + if r.b.mantissa(54) = '0' then + v.state := RENORM_B; + else + v.state := FRE_1; + end if; + when NAN => + -- result is B + arith_done := '1'; + when INFINITY => + v.result_class := ZERO; + arith_done := '1'; + when ZERO => + v.result_class := INFINITY; + zero_divide := '1'; + arith_done := '1'; + end case; + when RENORM_A => renormalize := '1'; v.state := RENORM_A2; @@ -1149,7 +1183,11 @@ begin opsel_a <= AIN_B; -- wait one cycle for inverse_table[B] lookup v.first := '1'; - v.state := DIV_2; + if r.insn(4) = '0' then + v.state := DIV_2; + else + v.state := FRE_1; + end if; when DIV_2 => -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y @@ -1221,6 +1259,12 @@ begin end if; v.state := FINISH; + when FRE_1 => + opsel_r <= RES_MISC; + misc_sel <= "0111"; + v.shift := to_signed(1, EXP_BITS); + v.state := NORMALIZE; + when INT_SHIFT => opsel_r <= RES_SHIFT; set_x := '1'; @@ -1609,6 +1653,8 @@ begin when "0110" => -- fmrgew result misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32); + when "0111" => + misc := 10x"000" & inverse_est & 35x"000000000"; when "1000" => -- max positive result for fctiw[z] misc := x"000000007fffffff"; diff --git a/tests/fpu/fpu.c b/tests/fpu/fpu.c index cbb0ee2..e62ce27 100644 --- a/tests/fpu/fpu.c +++ b/tests/fpu/fpu.c @@ -1111,6 +1111,43 @@ int fpu_test_17(void) return trapit(0, test17); } +struct recipvals { + unsigned long val; + unsigned long inv; +} recipvals[] = { + { 0x0000000000000000, 0x7ff0000000000000 }, + { 0xfff0000000000000, 0x8000000000000000 }, + { 0x3ff0000000000000, 0x3feff00400000000 }, + { 0xbff0000000000000, 0xbfeff00400000000 }, + { 0x4008000000000000, 0x3fd54e3800000000 }, + { 0xc03ffffffdffffbf, 0xbfa0040000000000 }, +}; + +int test18(long arg) +{ + long i; + unsigned long result; + struct recipvals *vp = recipvals; + + set_fpscr(FPS_RN_NEAR); + for (i = 0; i < sizeof(recipvals) / sizeof(recipvals[0]); ++i, ++vp) { + asm("lfd 6,0(%0); fre 7,6; stfd 7,0(%1)" + : : "b" (&vp->val), "b" (&result) : "memory"); + if (result != vp->inv) { + print_hex(i, 2, " "); + print_hex(result, 16, " "); + return i + 1; + } + } + return 0; +} + +int fpu_test_18(void) +{ + enable_fp(); + return trapit(0, test18); +} + int fail = 0; void do_test(int num, int (*test)(void)) @@ -1153,6 +1190,7 @@ int main(void) do_test(15, fpu_test_15); do_test(16, fpu_test_16); do_test(17, fpu_test_17); + do_test(18, fpu_test_18); return fail; } diff --git a/tests/test_fpu.bin b/tests/test_fpu.bin index dc5af293a27cd0d08eda8272695afbe7e9ba89c5..572aad0ef8df74ffd3de84f0d0435c5bf1706b02 100755 GIT binary patch delta 2463 zcmai0eQcBE7C-O1wsyGGZftE=y6#=K;aail#^~OCv2M&!IvtA_KQK-=U}hJ%+~soh z0(T$9R0G~XhUZ3!!B~irX+$O6HPb(25@IGI!9Os&0d*UNbSPuQO?nl4?m6v(yIvAb za(aHx@Ak%o^+g#f-9fI$0acLlan?0~jFJ3t$8n~2*)+@_A;zQUza zaNpXca@>7%@9BF=jbTRxz~s3fglGa%vm3+p71Mi||NIMpljqL-&m8VGS{*EjlVqwn z*az^^x&3pJyR4c^uwE)9aWvv*XFO%j@gX`CW|zUHKx^46!*~YllYBkwOpNpZu4{6(bkaqAG~HR$1!u76vr#(mD(qt z38{Zfa9x%(xvUd=Esfg2r$g$$F+IEd3ay;2)p7g9gM5^Fgs z)->q=-fvy0U9%yiI+@F;iHj!6w3Xl~Exu_7owmvNN5V}v&;vsgQEma!jfrYq3InJpp z&vVi4Z>cwb7EKcLse^Z|?{0xS_00_I&3i`b#EW?=Sm$!MvRIe?JKYfWx{g_$1#Yt46$>{}PX$D{!R-?@6XbX|qk6pJFjw@?9mWF(l_Ic=}3qa)ZX(tzepYv#$?(6oz`a}iJ&T*C03!l$n4^>6QlENzK z1^iQCq;G(-N`vQpd>0=O=4^J(@+(obgt;QR%iC#MNwb{h0ICzZ-)r==PDNs!n#8;c z?ZMkCH*At72@$V0(=(}%R;RlsOYY4HfdYI-R?AUjacrL4! z$}=+85ZP0r>9D+`n|Ut(8RE4G?(es_QR1%Av(Bh!dX}2SN0nkdH#3zwoqOZf<|8CT zk6Do>Bk69lWaIBa2KB&!Cqn8qdhZl^n0rMIuZz8GQVfi3iElBq>BG3LxOc(4;pO5( z(ka~IdEa0QPpF^-+7$?FCtaYVvQZy)Ounmu?sry04`cbbe!GU6c0QLvuSnW0&X>?} zV<$B{#*-z@rsS3dO8}g^0d^O2Ry5@I6J*SFlE;WsD6%ovO+G_h0TW~#`^h!m3UM9)?=H)ehrUI3nOi>b z9X>^&dJOlL+2svm_#TDMaXd|-e;lVM42`2hri1U6IfOyCv@k5AxU3c(3{ zkHWwNo~CeS0;ed%C(z)v%gP0Gd+m}RA6jP9$^cy#u-R+XSyBK-F5nigb-7AVi3(xk zT*(yj2Z<{pu8WC=)H--WT4C^|*Y&m-p#D2u9UrNSG=GiaXI_VHKf!-Samwq`jS!rU zqP={vZiZmzMGTZXbT$p$!6p0)!3Kgem+-Z6hifyzCG>%h@-8b4Ewug{QpW-V)X^4q zCo}0M%?~lWRBqP|5*(aFw`$jo67*kAz#9b5C!kZ&QdZv!F*tv{*5B4EAijY#fX}M= z*_#Qip??ettH~P3gT%GrQFVoGgwXUA{8_Dc8+9~s^|u1?9TyO{i#UFW?Ll&txS^}K zu_B<|MKBXTs<_uSNND6oA=)HTJYZ1hjB=9!8gzA(M%PlCYi34>UAzPtugu-pIV(hn z39=$`ANlAk)>pc+(tZ+xDK?e|$rapLxlpTK7h+rFU}e1qi`P0MExr#;=x#{G+6H3< v{okPeo4%Tebg*M{<3t3|`*^H2pRGM6#7!Y;hj|Q|@G<;KvP6uJwdnr~ok z{*;dIguW9(1Y@QQ#`4R4oRMFJbxr+>{4&PO$gM|iJ#y>UupJRe0^6}9Nt}vp+y3(l zi8i~=z}V=;8=My~R8!jQFB-!#hP} zA&}$!t%yBDpG5W9V3v2%DLk-%^M(bCbr^3fg?yhh@pv$7@mZ%RUu%&LUxP!waZsKh zgDADP&;l9iBB2Sk;nf>1tBb^WK@jJ=Np4$ak^X{PzNygXs}x${rf;s?yv8D}b#mo? zN>O?fYW;HL8&+7P3*hOW`k8-;yxVM%7SRH1{&d$~|6F;~N{e(15&}{|8=w?QAT}UZ zu3T-AqLE8d{DO`%CgEXOT{RpBIouHm#`$(F;o08}faCt)f|~yf;|4 zXzE{$sP!?K_z+(a8E!u4W@2ULf} zL3f0AXd}LdiV2f6-h@ASLGQf>b@AxLjqwn2a{+`%FaJ|+(?d;TX6e{N9FIO@AU4t{ zRKeOvN8LU&r(_K#nn(*`nq2Y(okL>`aZxzYXK6f9GavUDOLyv?*0^Gyn|G*BhI3zH zW%bX{%#zNaNMN-1L{ndJS~54yP4w2sl;TKoZFX3kfpAI1H8 zyKcvBnpfvQA|b}>@Yt*(yht)gU8HPJG>oV`}lGq{(#T}yA!_l zXtEDTOq*Y(W%*U8D@{zxvD$T`U&>g`p(3`Ku-NHZC1cTs$2k{fGAJK$CZ|sI#2R`N z7ptcogvT%zy_K=8bIBY&B`v8`M$lnPk_mMiFL6GEO9~Yxs#*4B&Wnk_AkKtYkK8y` z3gSvIm;TCm3lSK^y@ffBoE9quaW$Az$b}PuLDhz-yu$e<#=t8%RP49{DamnScOSfg zCA%MXC2PbD{m_o3d;of}G!DQ7mW}~Xr)b2H0Z7538iZ0T(jaWYVjhHDSgeE4j-_=F zda;}zgb6Hc2-K+>F=_}>QZ=Q;L!4KVnS~zlo|(0;c(5!*CN6_8Cq>3u{$ros$Ik^X z!<>_rLW>!Loa-=jrD_z#2up@xELEqdM7VJn)RJD&g0N>8a->MbIfQw?L4~AKOd&K~ zg*^zv@r#_h3Y`cG5h_RFnxs*bBkUdp4}(U&6X8y%GK46OB8s{W)dr2?62jvI17?Pk z7dd0aaMBRwABE@({7*~fD6%o}kAbzqeM6SQg2*%m4-E4Y>5aN^+4K89Z$w-la_5nw z8z~AgZzJbA4()0A@@xfTt3hs@?Ng4(?=~J60Tl~9v^Y*N7cz}u`gwOax6-{B)RGQX z3vw1BFrqL`n0^zm-KdW)oZy@&3X<1jnvsj4#u&uy#H@7m8*^kw(hCLnbV;