From dfa363ccaa46042f237bdda4a3403edc2ffab49e Mon Sep 17 00:00:00 2001 From: Jeff Scheel Date: Wed, 2 Aug 2017 11:41:56 -0500 Subject: [PATCH] Inital port from SJM R4 document --- Intrinsic-porting-guide-draft-R4.odt | Bin 0 -> 100413 bytes Intrinsic-porting-guide-draft-R4_hack.odt | Bin 0 -> 80388 bytes Intrinsic-porting-guide-draft-R4_hack.xml | 55 + LICENSE | 176 ++ README.md | 93 + Vector_Intrinsics/app_intel_suffixes.xml | 318 ++++ Vector_Intrinsics/app_references.xml | 70 + Vector_Intrinsics/bk_main.xml | 103 ++ Vector_Intrinsics/ch_howto_start.xml | 115 ++ .../ch_intel_intrinsic_porting.xml | 46 + Vector_Intrinsics/pom.xml | 148 ++ Vector_Intrinsics/sec_api_implemented.xml | 35 + Vector_Intrinsics/sec_crossing_lanes.xml | 111 ++ Vector_Intrinsics/sec_differences.xml | 40 + Vector_Intrinsics/sec_extra_attributes.xml | 137 ++ .../sec_floatingpoint_exceptions.xml | 73 + .../sec_floatingpoint_rounding.xml | 33 + .../sec_gcc_vector_extensions.xml | 113 ++ Vector_Intrinsics/sec_handling_avx.xml | 91 + Vector_Intrinsics/sec_handling_mmx.xml | 72 + Vector_Intrinsics/sec_how_findout.xml | 60 + .../sec_intel_intrinsic_functions.xml | 122 ++ .../sec_intel_intrinsic_includes.xml | 82 + .../sec_intel_intrinsic_types.xml | 89 + Vector_Intrinsics/sec_more_examples.xml | 76 + .../sec_other_intrinsic_examples.xml | 68 + .../sec_packed_vs_scalar_intrinsics.xml | 302 ++++ Vector_Intrinsics/sec_performance.xml | 49 + Vector_Intrinsics/sec_performance_mmx.xml | 41 + Vector_Intrinsics/sec_performance_sse.xml | 44 + .../sec_power_vector_permute_format.xml | 147 ++ Vector_Intrinsics/sec_power_vmx.xml | 67 + Vector_Intrinsics/sec_power_vsx.xml | 186 ++ Vector_Intrinsics/sec_powerisa.xml | 33 + .../sec_powerisa_vector_facilities.xml | 46 + .../sec_powerisa_vector_intrinsics.xml | 79 + .../sec_powerisa_vector_size_type.xml | 119 ++ Vector_Intrinsics/sec_prefered_methods.xml | 57 + Vector_Intrinsics/sec_prepare.xml | 66 + Vector_Intrinsics/sec_review_source.xml | 64 + Vector_Intrinsics/sec_simple_examples.xml | 62 + Vector_Intrinsics/sec_vec_or_not.xml | 134 ++ intrinsic.xml | 1518 +++++++++++++++++ pom.xml | 22 + 44 files changed, 5362 insertions(+) create mode 100644 Intrinsic-porting-guide-draft-R4.odt create mode 100644 Intrinsic-porting-guide-draft-R4_hack.odt create mode 100644 Intrinsic-porting-guide-draft-R4_hack.xml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 Vector_Intrinsics/app_intel_suffixes.xml create mode 100644 Vector_Intrinsics/app_references.xml create mode 100644 Vector_Intrinsics/bk_main.xml create mode 100644 Vector_Intrinsics/ch_howto_start.xml create mode 100644 Vector_Intrinsics/ch_intel_intrinsic_porting.xml create mode 100644 Vector_Intrinsics/pom.xml create mode 100644 Vector_Intrinsics/sec_api_implemented.xml create mode 100644 Vector_Intrinsics/sec_crossing_lanes.xml create mode 100644 Vector_Intrinsics/sec_differences.xml create mode 100644 Vector_Intrinsics/sec_extra_attributes.xml create mode 100644 Vector_Intrinsics/sec_floatingpoint_exceptions.xml create mode 100644 Vector_Intrinsics/sec_floatingpoint_rounding.xml create mode 100644 Vector_Intrinsics/sec_gcc_vector_extensions.xml create mode 100644 Vector_Intrinsics/sec_handling_avx.xml create mode 100644 Vector_Intrinsics/sec_handling_mmx.xml create mode 100644 Vector_Intrinsics/sec_how_findout.xml create mode 100644 Vector_Intrinsics/sec_intel_intrinsic_functions.xml create mode 100644 Vector_Intrinsics/sec_intel_intrinsic_includes.xml create mode 100644 Vector_Intrinsics/sec_intel_intrinsic_types.xml create mode 100644 Vector_Intrinsics/sec_more_examples.xml create mode 100644 Vector_Intrinsics/sec_other_intrinsic_examples.xml create mode 100644 Vector_Intrinsics/sec_packed_vs_scalar_intrinsics.xml create mode 100644 Vector_Intrinsics/sec_performance.xml create mode 100644 Vector_Intrinsics/sec_performance_mmx.xml create mode 100644 Vector_Intrinsics/sec_performance_sse.xml create mode 100644 Vector_Intrinsics/sec_power_vector_permute_format.xml create mode 100644 Vector_Intrinsics/sec_power_vmx.xml create mode 100644 Vector_Intrinsics/sec_power_vsx.xml create mode 100644 Vector_Intrinsics/sec_powerisa.xml create mode 100644 Vector_Intrinsics/sec_powerisa_vector_facilities.xml create mode 100644 Vector_Intrinsics/sec_powerisa_vector_intrinsics.xml create mode 100644 Vector_Intrinsics/sec_powerisa_vector_size_type.xml create mode 100644 Vector_Intrinsics/sec_prefered_methods.xml create mode 100644 Vector_Intrinsics/sec_prepare.xml create mode 100644 Vector_Intrinsics/sec_review_source.xml create mode 100644 Vector_Intrinsics/sec_simple_examples.xml create mode 100644 Vector_Intrinsics/sec_vec_or_not.xml create mode 100644 intrinsic.xml create mode 100644 pom.xml diff --git a/Intrinsic-porting-guide-draft-R4.odt b/Intrinsic-porting-guide-draft-R4.odt new file mode 100644 index 0000000000000000000000000000000000000000..bb965c7c195025ff5f22d3ae3d0266fb8273c5be GIT binary patch literal 100413 zcmd3uWm8-Yu&uG+ZV3|H-QC@NaCZp7-Q5We!QBS;5L|-01q<%(E_c7T?)`B7!TB)7 z6i`!a>t4Oq)2PTp!(c-|z(YVNyFber4zeLrLqI_M_X55J@!jUTg{zmNg^8o1osF4^ ztBr#_v%9@HlY@zijSG{5qlLY>gPGfR3wu{4R|`*9mH+W!g5bsj>@Wz3|6X5!57V%A z`)+D)Vq@pR?E3%TGCA5?MX4xBqaflV0>6SHDd>w>V}M#Dyj9fE9w}V#13D!oY`mMIwtq;+AXd-CMnN`At1{`WTU|g$~_L5<9%D zU*vA^zMQ@Z`dqM|-4wXD8=#bJHSNwutlL0^&%H3Bh}-foZgC)2Y&C_>u@r~TVJh_} z;48`V{O|n19dw0z`#;-eTn6oFu==|q%+LL9w%3uoS6L#d*e^4w>=|#n8|F6-v}+q5 zkMSRuUA}kHN3!(}@0FI~V&X!~#kgdhx6>32Pv0M>?Ka84-&*+cJelx1uDC7sjgBTb zb`4UWgETzT7fn6XAwUzq97);AZB4Kj(~T-KJxspQ7S}Dkp!pq&2booHBn{V=WS43P zynyEyipwH9WJSO~cI7%d$u7Kg5E(dI2r5#t(b~OLN6_~hX+P5u!Nt(;ZsFy; zeTrJh_q^@Ew=ko&`~BHU_(jLd|Lw#`_SlBQ z+Zk!$rx9hb5ozIP)oriMAh`F_sdohfIc>`f3v+T~iT9W8kC%Ql;nxu2*Uh|-r@S{l zt4YFU8pSg>V&BVv7l#j&d5-Tfj4hW^PkA+S++A03-H$QdDhg|W8-z7dvAtS zq*~-y5T@$=pGz=lZr;c10bla@Cvggot3RJVEQUKYrOsG~dq3=K=Rk zqvZsd|7rWxAi>p%!^geD2XJDmejkIoKi)1r4nAJ<#+bVW1_)iJML%`|UgKwqq=}sd zFg_c4lHzzCSCoMd1EEoS-qT{EokNO zdz|l%BwuvB*voBCi=&m$;_#adBJbHXj8Hi$ABUpMac}tE2*36V_dwzY?p%*CfBfYu zIQVJyV}`KU|H5a(=X!*ilz{CA|2YpD_di#x>8U*7kKJVDMLv#L0&di>Ovl%&p%c4< zY!}qf0u=-6ke5_VUD+-A+KvX)B6q>#MLmaqUI#e^uZ1y8vjn~MOSfRn61I3kUd#JC*@|(CEIjhFb$8+sRW_W%^c^YsTdi(iKs*32jU5-;$9l8bA4&Q!;?PwXe;hae# zDCP;h^%K8ewgR{3?c_iA)wp5Vl<=P%HnRsp4{ZUjLjf?@d+7I?df%I;gkD6}r3k)D z5IMqDG-1H!`rR)Z9-jzld=`a5slEg*$m1}1yaX6CKTZO8NW51nV__qc8#KbQ{d3f* zSuDKGwOuASO)zBhU;kbB|MfB0tLsM%LF*PnJ3J(n{_2)u?{e1s7kJ2WPdN|CaYi}s z*L^PT7*{MOvnyIvri9;)l=nmzjNbQ*wnuV2)@2uZElkpnX}F&GvAbHcZ~tUUOiylS zAFKJI8l*AJoY5e|TU!p2^o0iixkweWz9ZY1w!Q2iUmF1y&8Q+F6 z`v$$*x1VuYzpVW2k^&L>oYq6Yb?v5$9R_;Zy+3DHld_1IXp<9RmPiwO9~QjbucGyS zoCLfvYfkw*gLtti2#$mHRgfGboV|ajJ^f+!?;U5|Tg#uNvBkt24pH@ALZHs>8Sjlw zdV=6X&>P2AvjO*Gxoc9xsE;ZHOgE`3@qYSF66Qd&Vb?hEiZr3Y!-r@3565@oAYs%q zYvD4m3?ei{d+>|um&)oEd<(2WP_v)F)c{ugk@7KxFvS)_xER_e7R&K3_rXWNGwt7p z``cxH%uZZj3|_uO0t+nyB|CJk4gi$XeQ z=*wlbAF34Aj0YapeH=h9(abn-*y7sbt`x@CJ#Vv#w=@Z2?8G%cyD3S}3uOG1$Q}tM z@+9g{q%K3-wDZy*zm%N)wcR|$;7YZZ7U#HOM_2W!tZIm!g&%wwM?Py9W2%bs-ok|P z9K|zCY<=xXOw`R_P~mJ%$L_u@VWzDHS1+aM+1qG94!u*pgLkTAp8i5#RhBCtinK&O z#!T<`ZftK{;>*W{QP3`gNxn~9lEtNB4F|5ia8Ue%&_+^*_@=(fKBl`=k% z;uSj1m)7gMka0qATcEHDBj|OQ3Bv~S&7)LgHHWqy%-*RZu%>1yrGUo~W~s6d>t^xo zVO&HL`al4Qa7-iuxhmK1J%{7jz(6W0jq4Fn??%inAw)UV?yBm zd>U=atR&8^oPKvSk?>F*R5?FE2U}A8kgTRfTQ@xivb@BjAyf!U>3 zD6`b$O(ZJ{V`9~G4by5;EiU>h`;uobOZ65PI7$%N_m6H1``(L0n^K_5V z0v`LTbQ(4CWXby7z2-lKv$bT)x zVrj$tB!;=`aSJVADdKF>A1C(ysSGG5i#<*{qIz3DZdW(n@8h`F9|x#~Sly22zq&6K zfIoYvyCpZi|JPmv?yUkeHA_36-Yx=!%?4x7S~t2Xpaou6yWdtX{2$sa(sZbb#%62S z5j{s574is8JKsv+=Tn}4j zeGiXKH}YRLYBEXm4gf@j+1P*j;`@kcV>-OGdl^HNX>HnM4nKEOH6Vi?HUd7Rg|0Ce z4FF++L$HotO0o%jqI`Rj4&%=?!iX-A=Ph&vg?YszP~Hq}D@tm*o^8LA|H*M*vDqGo zIuu7NwD`$pG0N0_di{J-yWxFOjo$`HD51+9=(=*s&`^G%bWtuZ4oo=C*kV7_<}t?B zHK6?jlT&9q_W3>hi--rS93?9Y(L&SEn@?u*{@kk^pRBxH&@#Gv1&WVa$$#1nXj5;8 zMe&na+{`y3Fd^T53H#iiTBI8+wi3S`N+U$^aIZOvGrM7;bO6OP&vP4Ld%z-CB6bR4 zuu<;6=8A?U^q8*;A;}2C;(dFVs%7{TPGCQchAb-GEjb= zB+rzHf-fS+F6aRCVf^#{q91&6;r!>nc1qF>S3Gy(s1-9q5j@tt4ge8EejP#N4S&T$ z!tPGHJO*ebr6-Y!&dQqK$I1cE>brhhNJa@*&{uFbS~?auDfy$V%Z$Oop21?xWo0N~ zX>_pg+?+&Kvgcae&kH6IU&Al7TL8%g-PIzBPIE4851*N5w-bwz8 z>>MbOC}|!gB0$}K3xrD4p-xl}=s8zl2g9#)XrZ1bLwbn>mOjNm*g6!&am}Ke7in(% z8*_5-m6w5%t$~}@dGl4`ow#^Hgb0w|ImIMol zVWo*Vs38$wvZPFyZB3NkFFU&D%eHP3++eGgorb>Bo@obZcBUbt5W)LYH!q4c`3IIQ z6BXkwe6Vgg?)+l>$Xg_NiPJqk=!Ay8c z*Z6~+vBg1CGxXU4ru>`aE4c-Uq>eBtO<^Q6@olUXJ{O(b*AQ=|S@!sxwkhXIueXJ0 z2)R`1`L#_yGM^s!Dkcd!#Y%Q}jj+j-aiR<9n)=wCY~h3bV!o^evI|PrKSPsMoVg;{ znoS8lz+l7V$2rfUOZKAl&6hLRgEt7H6#$z*WiA$9m*D$4YRFnXbWi5jJkDd2bwT0fD`l$)yxo8YM&H$HcDMk&&5x2?NxUkfo z9p_wWvG)N0zsszp&RZnPAa4RdMI_5+NUbda=srOT`2-K#^5>I zu*1QDJjt-qKn=L6t+qThTBZ_qXuoHX#j>J=`I3-70e_;^=Jmp)n|XAPo92{4{Njdk z?BZc)!NJJoYb4_@+K^~Xf@jC#UuBGnD_LVU1)+n)1jZ`ZF{lG%vQk4nNt_r34pRwD zEQYkvYD?w$)XvI?Deck3-Crkpi--IuZ$CMSF62syZ(GN*5-2`ZETK(BcTmv#@_B$J zk889`7;j!O8eD!na_-)VU+x<$YO)mE0+g8R$DMd9>G?v&FNIsc;KA}uzJ2qH-bmeZ z)bB8a(5Fhmd=tu`LFz58$HDxF`fHYr%%U<{!rn)b%0J%WHV_3&9kYqP1e^V^WZU28R!)%SE?v6!$qPQnJuNMiDibM&rlsV~;tW)(?@* zlgI@0K_M<5BtT;6I{(4ZN?>)j1Ewv{(b;5<>_5sw7$}3&3ck|NL77(LdtHQ{n?X)g z4MV-P`4A>Yy^gW4xwy;F+^mo3T3fYtziJbJ@o7z20H?c;uNlZ`X8A!tI{$@OA!zKXoOMd^m#6?t{TS$25aNZx_|4(L(Wzcer95PxHdT7Ob_ z-TvGSA+$u6XpF&k1w3ucWuN1s=fenegNQ&IKw%uEZVy4_D;GAtMWbytO*;+>5&D}! zGEN{us+BX+y<04OOlB-$G7N35EkbA44f{e!&&lS34vd>Z6kcdyq1gTDqWo0TU4Z?j0$Go^dWg0t3TyzN($0UQzyE%uV3_pjPwz&SurDy?tF(R zbcTpzVN1FeLd_6UvA9GN$%rMv`6D$lQbd?d%8ozja!&}&?cMQk>TaTrk~#L0AqqVD?w zmg6kTntT{N7Gx&6mdPPW7zwIfyGz5U3fU>kcHCUfKULOsM{&&gE~=vxkXf#ySWfg# z2?%eiUteGdoVzv#zr68n;*xGPTy?2tU zP@pyUOW20KewG_CMijx9F|pEC?44G2@;>}?x^U$X%^2ZW?5VL8uhj0E!vR-e!PhjN z*L)m@CK^!caBG!;5g)#P{faIM{_KDDqvYV-%g{ z@krslgx8h!G(ohsPd~%24`b%+)_BjF8ZG1t8D00WtnDtRPeNQ~F9ZIcU$jD=_m18w zrG+fqDlzG>Oe(q6NxB#ZQ%3G(;LaBaN>s)CWg^d51{V#ley$2`A7oBBoKuY6!#C$N zSygOr*?3!W9L!JXd62=hu_WXwmZP*pnIVlOFbw~Ij#=n|qsEDA+Z2P{mkq&f?_-_D zyUv&NYC%658*slUrUw3jg?FE3iTY2&N+OH|BBFR4-9OWCbTcI^9Q|-yS$kg@PCI>&5^dM>na7!^ZRD0^wIC5D|95uzRcb+VUgTv> z`?*>lFuZpjG;gvgPO2qDNBk#7Ud~#L_6_QOzRE;q=j)UR1ywirQBK-f!Y|as+^~PK z@21mHh9F5H5ht}%hdp*4Mfnmj5#;g_aN>Jygn%H znLt2BGPWS|a2Uc-isfK7zDE14kh4b#-Oa0;-#g}rsG!{pDhI|GQIs)-PbDO4TuvHg zKkqRIjC72Sn@QrBg~C2(+i!N6H^mBs$EqwKf$&mB6fmS99}(md;C#~!-0=utrseX5 zMJOh#z(r*vuw<%kv$E6`N(T6a^+G{(UMaE(fDWofCZBEQz3h4{7@z3dz=hE)oee{|}-oyVi^fRzkPCWL3ZL`>tEU^2N$fdL| zUe{s9%=W~eUR#=G4bX-8g7Gb0D#_nA?K3w%#(%Gkm&_b_UUdifaDPPYN zO5snIPn7ZwgjE|0v_gpJ<^kMFZ(cbmN4x{mH*~4V4dY7R-*Oq867+jfH^g!9HG)!s zs>!=_s*p}9BnR6_LQ__As+Ga6S}diQFZ>E#2u0EBvZ(s+u%HSB0JT4I3L)q0h=!}< z==OwTt+C}Gz$;uLufls(xXGhL@C>OWse$3>8Q;$nuFAorDZQJ~9GzCuLQ87E%HFE8 zH(%lDc1!c?n|}w?o4|=T(R2nSewBzw!26u3C|%kY@sZlsRq(@RxSS&?G70@{Grk^k zBn8@Ya(AfciE-%o7Sf=qyyuyP4(}vgLN4|J-&{d)(oqQ^wv@{_wyygiFW zk^G6;Vr}@b0S=u(hr7=MteG+-`Lf}Q79kzn%Y}vu?DZoFzjbpax-6i$9OQ10Dl*0x zFDfS`Xz=G_7^n5A>|8Xf+T=TJ#~W-lDqTk5wVEXZBGII243ejw2)#g7nXOf&-EP<@ zz!2|dSwYB8(&I9ijoGWhn=v$Jvpa%KBc*=&tVs2J@3!{dy86CRofVP~g~^zb-Sr^H z<2q;?Flhjvj8O|a>bPAeHKt2dWAHw@@MbTbcTOF!TQy5r%;*`y&3YG;aVZ})hVlBt$I#m@h7+DRx7VUu#nh?-2Pa{fZK@(G_!I0_S-5+w>- zT$s14=Km^YmuRHwiqm}DSx>EX3b>@j*@5JT5?{o#Z<06=4-X~TiZt7?YiJ1bGn(YX z#~bVBwc#2xw3b~GCyh9t4V!nl+8!?C4IFzvEyoA8eJMlv*#HP<_9?tAmrgfR1&T6p zG{Katio|7mNOaz~AEJmOBC-0=w7wmHG2JP!6U!bLg86Ko6N?`3xACREpP6qHtda z?nHTEz9r`Uv@D;XVeZ`SLt>scgjW|L4hw}>X1B1ks_nejeT&9&2dvS4`jrmpYBc7~ zTea$G&?}7N>$%G$FC6MmOUj;T2WZai2!WLH^o%(gZPs6HfKSScmccg!CxybAfh1YR zEfgV1?sT8n3C(#wAWq2et4yUW1Z1|f6gtD7U!r@r&bsmm$uU}{ftK>7dzdnnExz?> zKg%UT(EMlSjT+lGu5h(4KTGylwkUheYY6L7(5f;>VW70Pf=sx}5aZb(W%SJd;Ws)G zr{k=G<1h9URcg>P5<|t<#C^FC{wS*Q$Q>2{lQ9B}$&X-j!ly~ZD-+Pg?iJN4{WKK; z`ZP`EKJ`F;beY(3^tA(MDof&<<$*$)oCta!dV&ebT0W!~MJHe<)8nQK8I~!q4hK@d zn>L%E<2S&-kl7X#-s-*1&r>auC!J&?%nyied65QB#a#B30oPC*PLdJ2*Y@o91) zjmnCBC);6t^t%#A`ib8*~$J&8w8BhQx&o*7SU>6*GRQ|dxVkQ3U$a0R z0upp;Yb3uJ&2XFNMAp7~ld(tBXowAJB5rtW`0Yt6L^}Mn^2N#0;Xu>DzNzGvdIud? zqvVJx?`lzERm0(J@0Pfo4c3G0I1;)UP8*ADHUX;;{fFOGj1dD;=V3&(0{AI3K7PbM zX%4;{JOr1qGhF;FTgJ;X?|P`8UVWWc=%~y(sBh|Z~16OZ+CxB zY9%7egyNg;7(vRa7SNhkYI{~=+w~UFysIR0(zU;~5#7rccp+r?)wrCz*+PrCn7Ut` z9Yg2K)23@hxL92X@zz*Hbeu-}WEw0QgkSumQKsVZ;*EtuW=+&RA+1T3d+Gq^YyGTSN@pk-&lThkX6`|Z z|AGX;cT}8wnWDB^IfFYL%VKUmMg0%qd^%4NX4xzwM~Vm#6MR309Zo3Ny+|;bqEHY= zV%;=r2ifwQFvMx~xY91E$nEwB-Iq*sNH>k1N2sl*(S;ILp5|@;Gj1Wt*80E4ZP!># zXL@QB=i^$K;-A}OV*d2B-Lo!GdIEB+I# zfq7*uvPklzyd<+Jk8Qjaa{7{ zlVy424zj4bjO7vZ$rVSIWRzUc%=&ei(RRvg!}FlHm|=KCwRfu~7pph)s0>KUlB_Tq zo~pjIfp1_Q3b8@(C2DdSof5kK9y44yh4`eD(K^y;MyjSMn#}}5lVUt!_L4e_^HS7} zrL5UvyaM!V#QWNKP{f}HtX56Pb&_!`fC4}R(xNJ+WIkc5KB#5WlLvn@2I2n}7d;a+&FOB_+f0us!8+k49^5DKnmk~;* zPj2XES0C7#;X*AS`ygG6>uFfW9&bLGs2wNVODtgUfx*Mi=B3N4JAYpgQ2H{~w}_rf zl#KH(!K$z!hg?701^zo`4NnzoA_n0Q!uoqJO%FT?+jEM+xvR<$oRRgdrcEkdtKnEA z^d~P3-?ub-V@2txH*J16CT5fd`E+2lNYxLkGf4hC zQk=-(aoX;BIj{U}hbMUq*q#W>Y*v2h;1KHYsqS2i+Kkwp|1v8D8jf>yJN=ggiPx2} zM71vD@@oimo!jy7+1PZ|G5Uw>EvQ;TdJSpX!Q_y}n@RpDZRP%*JIltlhbv;Xxl$ZO0 z6cAyau!YYn4IJ!f+b*1PO9ej363{V#7bJv zCAx6~61(96GkhA>cby)ETcco|*JVGSP%}{N>=K6X9mGQgQ`HM+U#5mrzzAy~`Kt_( zWdX*4Ep@XuVmUHvA;u2J&YN5ukv5Wu-`y^^J6!q8g__<9!dwQRgR3=ps#xIQ51J?#{3I zkA2S9sPdf#lAJpA`55y3J#~&G{;u~U*g~$L`QnZR-X+6r5{)=VuDF*-|HkBgC=y8=g(BDD!Ir#P14YsoT&o{HCRv5QLaqUvDcZUklfqW)S$x%$kcK zMS8|Y4$j`6rcoq$13TR>96rl$N+oJ9WY$Et$)bs^bzN6XvTsxJS0z0E>i)d!@JN3* zH*g0;^2tTVq@y=+vCxjZMG;=I9Xuk4^0#C9&CB_zRR7U;#W!MOBbA}=HTa92_+Lsr z27Hc{g0`B9)$(+~#{(W?Sz!eS)!P-{?hAxLe;JB-rF^()CYe^erLQ|Bm@=tj9-CWh z!kXVE4t&{d>YFqc*lW)5&zP&egW~J5_6`boLsaf!4}6waUMqCmZrUoNMPZ%{QSl~ffVIC^sY&vEVx&G}A6=AY+&`{SpS#Mt0vR(xQw0W3jsx6bEP z$9@#Sn>iN-o}ub@0D)-Y<8IP)%SY<=f1b;mbOu5)MJDKecR})WL4wh?V{l~);2_w^ zKOag9ANO}~XDfIH14$I`Dy`mKTlc`AjKP0tlN^1uE8vn?P3`_iK|K8PAh$_Y-(Q~qfMp6WL=qXfldgd= zX9b9!T8~fnP+~^+MSXLDIvE5-elDK7CR$ZA7fV zK%!IWGH3IySR57acWNeg**5d6B2Kscm#)Jf05RSc<|WMV1-&`k)4w|{44DMIdQyCO@SQ_RSF6vlq(F-luTL4z%aD*WpB z#N2+`P=#mZq%{ZRfybe}6S+e0bN4_hs_ph%0M*DX>@?M~WQrspDs_+ZOv|4;Qv^Nz zsrz-0*>9V=xtx2a`j3L&;Bll@q!gmWGDrE781fwKl*8>R&5xtam37{>2ib5{EIROki)Bpt0wC~xVuUm87 zM<*g)fCEn@Wjl=)_62cBXv+WI{c8wF^4JgHVMr|jHQNIdY?$=?lhw@iFoWI7@}a^# zsN#TddJM8B)LP9Lb<4cS^hY8keAO#{rSN~;74Lj%m|imtMXZ`upzDUv1MXbISM~B|eT}Hw-GZ;Pjo!PSHEH8XCz-yYIPTa^vAUsPAnu>RU7(Ao2NYqm_` zi;E#mRera+8EV-A7%u{!$nCw$YR8=d%2yK`1hA zi|WPklWeB%I^0u)MfyqiZoi-W2M-zlXd_(5siocDh|xLqy{RJ;fe9~>)5)$2!hYL@ zEThI$R?;mIF3~BuIt#U##(KztxP z*OO&=pONnruj_em5LB+SQqiX?3?a-?Y9}E~Nx-_T9g;J>Q5P_UkBQHtq1o&C3YEWD zOQ&W~x&9a?@MnKi()RR~jr)_90zqD!4NJnTL8f}XB z{Rj|IYss^uFXEc!jv#b~FKmEPrcbEYB%$4)7>JYgABO8p#eiwl46-8*l zvGzn~z)YP!R|aVrdQr9kG|RpO3jcWt26WAKotSyeu|fVjlzt&K1!B4;i3odv`o`EI z`BxHym4lU^@JN~a;xLb`;AS_n^xHGlyEeP3?N}!kN-8Q*=0b`l-1eZTg)tKbUs6mm z1;@N>roT$#(Z9D*Xy_vJoXz={SKJ_&pz@zQM^z-tqwuhK25HI@RZuqHU~HFBV{)@g z6hfgoH4&V*X=g?iW}{jYMLH~2DbVW*G{kg8>7xF8{g=GzP!Y7GNJ)KP=U-x*1DBYg zKP>x!eBCUc@jkJMF%ej!igw*Y}~j&YXCHg z9hJ8oPL1tz??tv=2Qderlg8Kz2s0clC2xlvcTYVAU2$ur86aj9oFYVJ#FW(Hu&dW9?nCC5U$NX8e) zb~;-(chkGF^5;)415VaxyTWx&WaJ#qEj|6D-QshGd zf!Wn^)^5uX>voqds|?f|L(p zdlu?93RA&G8$ZB4%k&#g&w6yZ(W*#g0gpAGJwm8>06~eOG?1Y&Ju_Q;byQ(srl#UX zGy<^jq^AGqTwi?Xl3`k`;Kd-f-||kh10CDBi<$iM>0%$IB~dy8X91hpi}h(yyin%- zz8zG#5iNp+vug?(lRlilotlHvPg3h-UecSsX_2&lSL-Y)O202-*(X(a7DXJ%M%5oF zMt==YOWxC?uiCdy^r*WD6iYOVC-t#U4QPMEE+X73F8`00MYgywr|OboM~|MCz00KGKP zx_4lVYPP@|B2$@S7Vc0=mjd!7cQ5va&~5M~g6K&^s)D zE5~IFW^0v=E1GvCk3~6;(_neGxj>r041z7`j7HdYsp!miT@6i7cK;&NZr3|Gqu|5l zFiaFmTMj#?;NfATOsNP!5Ta!_D$ouTf5Gl@7DWi$f zL>VJ$AC_MH^pLmeRGbZ<%;izu@h^)CK6JR6&UBf!a7KS-^PT3KA~BRTs=>MDY<#LO zk5@bWY5MkuqFdM$?3v5S*^%SAIyF|&sgZ3}u=ctRDEcJCGi|Z17p1>M`+5T!N{b*+$M&D8xON zl8cVTBb`u>D^J;Aw_gfPDV$H# z9GriwtsV9vM{uW2HpcO+@}jmR-P(Fw{^eO8R~!?D(1O?Ohl;{2mJcy|x{ui_K{Cct zulhZG2wvHQ7^w@zEqM zzXY!v0DJo(3b|c&JYZ44h&`rvK>i%M#KDrTW62%Uhb5Eqvd4Q+#yJQ7BT1%;D?I*1 zd8rLT4SFHf0D|3W(!8ug)h=zkNS`x%)9;4>%z+T2#s2RheacLv*h|sBax@J6MT8F0 z1^e{b9(kTvDnDrtd7#&OJnoM*nu)8-d*W`=e|@(d_YZAgV=pG}3bl(#l#86K>ex2u z*sp@HypUULZ1^iVW`o}`hun!3@<`M5$NFpfd~REd;|C^6$oM@@>y3YjKbarD&R>X+^#=bS^UpFPY2cR zGbtnA{!goO02@{IIhfcJooyjqdYe7CfL@P&gkB5liEqf%pq-Zw$1j@v`?z z;H~DU&TM-gzC|pHG#$HrRha46w;5dQG0QMWCI(s2VYW-pn_S0Ot#q{Uty;>&jE^?B z>ZB`KEFvS z14&oNKhk`Zy4?n?gEZ4QlVd}8g)Ju8mtA2uZ-i7a8>{rre=0Cm^WA#ccMW&CP@b## zf3g>*B~3KH#+1dEpR;l*lqvr8ZGxjXf$q`x6S}m?KDcHrupj@}QF}ZpG46dh6>=#B z34itO69Yc3@R9i4!trKd``j)>N*Gc`g*_~N0S~)NNEG?8$__$ZZYAZa+iAHzZfYQp z&Y|KZT6BU+BfkSMG*6+>ty638A=mS`rfNruCq#n5PVPPd;r)ku0D@Iv24;51~ z4lOg6HD0#&8NiS>W^z}~6~L<%+^jknh@7+&Yr~Co{cj=2*&3=;e>!AsTlXD) zW%0#N^DBQ`2NxxiqRvG2cRgy)<7A71TI{G32$_;CxUMbpRG5%Q=OK>h2DBb4$HybL zb1bQ&6>IoL3q!9Pcev8(K|?7@9Ynt7>AjRIl&!XVxwz+&+hrDIL&1F8URV=dv!lL7j_t?iXIq5$)Ti7Hue^r&Hk6S&c&Fx2k=G>#^QKiCH zSp^q;a>8w_i~vv8yuRb!0d}`4@-C}MAIt~UrVA>(d4Taf&2fOmebqnSO_c?8+V^vy zr{GbS;kEhGswci%XgX5xw~wtw(P?Mv|4USEurZJxr!tfK!Jqoet?dXse02SNEpVZx zb$*NFSJO!jy9Fadm#$9pa9WZ3*Y{(9Wn^xj)yN!HTOLZ6>;>T;+OSUX=4${1AE= z{-!m^>crY;?Fr<|1JM@LPH6{hf>}NP{+ow0W)=f}0C2c#pd$&tE+=o(OjpLAiFqg7 zJBuQVRX2qmOj~{r9E`llwACW#yv^;YtOtaO?WaMn*(h3Q5}JtY23tMO#q}&K*X2>Q zg*o&ImZUvj`S&=%;6}Tg#jl^|AH>Odg%T;x66F| zsH&|nCBsMkv*+>Z4Va!E0CmqiL*9+8q&zPsud30&?Opc()7m4V$De{@^M$2^V7eZxJm6{PV*H}^C z>mmTzN)SiAx+s=Sp6BttwN@D>G{BXhrLfJTQ&ZJ!utr5oNiZd4XjJqrIR9ZRMVH9# zH>P`KyVJfKEOo7p%I`KU3g_g;el-kXQK~VA{556p(}2X5?W>-(cMAmz{W30TnDcs zM%@iM3H_ERk@ju--wQ?7SVL}RPV%q#REd~>*EGV^R%|aNa8{Kya2iiY=4-x01YD$Q zTcf+lEhj9mL=C4sBg3VN%aM7T>>?r6FwdZ|PoMQgxM}j@r*24$p%eALj~4E+U{R5% z)w|j<#lWFT(8&?)NjTV)`uE+%aWTu;+8 ztHl{(o}C5X6Z)S$I(E@YKf1H|tr~x{04Wr;M%}n#ddA9_RHekT{le7_0<#NBC?Xyq zQg;%oh@n-MO|ZO zymoLSvs}gm8=bZJkhO}F;#%Y=L6G_2a25`Nrlu_eyJ?9T5#bwgN*odyE{?FicMoJE9*Vi z3_V}R#Xt*(0X>;-*&_)yQ|qFNf8(M+ou=BFU5y*`)ij@3OWUm)2sj$^8hs5r24NbR z35+Tyl_r_kGFxQpS8|(+n7Er#*7S91*SXAZbDr~#8E$s|WTzX$P+DbVeD_e(A~k?w z8f}Y>&FtRyt7ZAUo-zzyGT2bts=-Rr8u;X{>a?sQ4^((PG)1(G;oa>IN1ST;uL$ZKYq$ z|I-yQy9!mYbu0S+)}R?pWKj67btQ5o&3)QOz9>1U9|vHXh#o+Nl|rxDS6O<*g@6Zh zka#^7W8wY^G|n(oBZOIQDVib*T4+gePPd)Y1$;4d_;xH@XPUSXEG06QUrgU(DrEOp z9&*pmg{=@Fe?PJ!Wzp9t5QNID1sy`7DM(^Cjiuwr)9F{C%y<_e{JJ$H{C;&cM4U&! z>EeE@{Bf&{RdNJ$)Z`n$eP;+lcFEra3_M^=K1rk{<)1q@M;yCyn7nujL8?cKb*&H8C(b>vT?@v=}%Ln8R!2Rt2EJ*yfI?U9zo3vK$y5aYb_@FBNm6H7u*s=U~ z^U(Fs32gdV0KsTPvD41r1hDwYzwtB)^!i6&aaZtmS{%I&&;b9f3d_A81N$Wa|1Bz8 z(>{@S+2(Q}wu&K5=(MO~wMR1abdcBm2UpJ*SQQEYn3qze97MJMB9Z`LJ=PD%8*;h6 zXM(2$ZpMLisMoO1hCg|n$e>biUw%>O@MOc5zW=*1m54l}2TXim)A+sG!4IirQs~7l zxEok8CXF>EnfZ^q`SaN*ThQm8D$6eETdy;jEwJhnLG1sy@qV@e2#$uHTVQ*M5_Xna z=n4Ve^KmQU|8Cmu2)`GNYv5}OJZ^>nULtTQ1g?Q|j=O&k$gj^DaJ9zNKr+DhxeT6@Na1RIaxMqAfFL~)(Y1uT|7m(i-1(nN>L!zY+{9G@xq z)m$awhNXO?!z!lg!q%dtt^|9VnOClFxdFM+0qHkve_+A(uA8bnG5{+fIXQ(t&ty0& zlzqve`>i48fm--UHfm#AFx2blrrvBQ*bcOz>JRJ_Tz0$fpJ4v32nbfNTiZ?-I9FDp$N#fcZyhPAMmvU10v7S}vo%^^u!wpgg~P=XEH+ZB_F zT<*y`NvNERB%M)GcjLtdxj&`yk4DU`<~+yKtmz*(OroXggoCNqoJOYg&8uPcyuQw~ z=@toad~@(>$G`o*C(Sn(&*{DAZ?wSKPZ&zvjDc0> z`YZY9TDt$_Nj4U8yDTg_EOESbHa}Qu!wjVjH-J7!$K;^FL9Kdj2J{Q|`Vx!2jrmUW z-s+eJKFspu@;sxZ9t$Ad%EqO?dzGBBXU-RhJek9X&n>NIA-_Ysm?j+D{|{ql8IFyTgPj{zM!aeWIz27gNoMFa!=jD0M*?aA^ ze!EG+sc|fty3)W-M02K+XzhRI z-z0?jBW31Sz>!p{1^Kxuv{mCYADgv5)N3i>#=G(DgoT7rXjn1c1YcLn5%1?WDF6HJZ2LflJ;r+!$_6f#hvF@lnt=VM_%cdMdP4JWj9Fn`waxLsC zrTobmnwWqj4sB<$HQ$fR+YnmgukjRWy2JBR9A}rHCdA15iacAY=7(QE;?|%Qn1C3# zp;=44Z}!1p(BH16VHh%P1*JXF3}j*l;S^wRIWE`eKytBV`={Qff+f&$Of8FG3dk4m z0)ZrG{KoEGXp4Gzx?NDhF+A_R6R?b(g@)ewV*De#2r}*H%Rjj$C#y4z7Gnfv6 zupl2$!1Ton;=afZ3ZwPKZeKnu$xO^7d!IdCV65x(OX=BiDHlFN+53Ie> z^N_hN%lE^g+iFDi(q-c*7Bl?$LQ=p51Hf}_kBOY{B@$Yd_1 zyBrLC{JX~Cye%$xMZGX221n;->)=Om+uTviM` z<48W`*v|Zy7DKpT(;8t{+Np#Ri4Q2y1MQKzI!u*c6K=S6R^)d14l#1rBZhc_7*2?`+cqn4$P7I#O9% zX-KWvEt$X+h^NCKYJl)7QdD=MyLH#oSN?~bxZfbka(xCk4(;ITw@PHwxX7Jq{;+0+aO%#97 z7>hU@*e)UD`v0uGNR>wPqyHrGukdEV=*6WP9Vf|x>eH6%Infj^lxs=?DYWH}Qc?l-`o(v5)J@FYpn}*vk76zCS?uwx)tcLl!eoIC3aEZ8i>{eb zk5dr~^4m>(2iK^qwj@Fa=Jn|B6B7>UU2HL8=?iyTmUS6^n9CFVd`LYihoHoWUI)t`ZyKDMln~Z4r;OAa}#nmir;K>Q$Yni_|fA$4gHryK0DzMD@#(or&4EPwFYaGiSiB zc1|^sA;7ZUsf>PEPdF#Vi8VWjxzCy-|{&{czyf@*PWVU5>fo^;C{STMi zE_H$HX7yyONz8?suCeAfgIqLi&CWl)ZPr)@jR4jH3;Pqf$_Jc0PLm8Aaikk(QV}!KsQP;HhQ-Aqm1t2J(eeEc zg8oWk1fD+(YK4i&rBhR?J>syi>l$t4e;!r}hcLoM!wn5=4^mCooDVJ)EV1fRxS~Oy zoUeRtU6oWDi;Bg09jrJ85yO4>qm2$@^c8dIcI{P{Xu(8!zy7B_+~yzd6W#hgtlpTewU>4)o)_?b<5z)S3j6>U=F4?*e!UU0ydXxt zo!5EAP&AhNGh4Kj7`E!%(`C?U^3S$Rx*cIM+*(lZ=&{|ahIGVBtl+h2Bdaastjb9)*_CtChQ zLabk7Y@g*<3BK6bG8Yb*zKEZGWMK*7l>XSp_Qc9IIIyj+y-CR1xb>4mbDXeOMUmpz zYdfR=GRRlD-zLSHGKO$v5D?CV2a zBJ!2A`)oZLcJ|9^S*%O>+{n+jhjX@bIN06HCSUiimYUq7J7f@b^<@PG6k^_$$g5NF zBQ{eZlCBq})#C^$H7w$dw*k|P)8D*+G(BgI+_E6r5tER1xZsui)XAlr^|}w+5<~42 z!Z=P`ZC$paqNjUBt)ZlT)xC!#szZuq;x`_F9{yoImj3nd3RIzf*OYlivb~MliMx&j z4Fx}hxxYx(Dvlm>EEL*ZGhrS2SnaAutKm?~N5)Bq>vQ5!RgqM~IJoeLpD(DY>?R`R zt#Ko^L=CUnmkEA*+>X*e$myUOo6xU(vo?0LQS10S#NbmROr{(zebXR~B0TGLQdhPS z6{<%1fNl}pNDO^?hfE$y%(zy^NJDkbI?{Su)}!Z46Alj#a-!*_xEYmGilK#D84iid zD4C0;D(Q;aiFXIxpq9q_DrJ_*&p93C(KKuhYrL!MFtXY~sCN^Ou`o8`2K4Dq;2mTh zZB&+?g(n}ELh7K-QWAZ5>!@YWLFNT6EdGy7qR$=q6z^e?U2%&DuZlQ(o`2zL=(tt! zbNm;=+^PUZ~=XOQIdK5ijuF??GcRb?qj}xj8_w&vpZ68je&#*AZ&M zcsFWIA-42i&_2r5Sv+md7;#fAFcI$p*AFY#58!5sl7Rv6c45Q z`%gaP6z1sx2ve_?$le^co~1(nxjxeMd48n4{`6bPQYU}z3w2Hp4D|hUR4C zm1Ljqd3u0Fv1r%as6K_}(n1mvQ#zf!g{6jwqh@+h#xrV`HEHD5u;ur!^r~`R<1ph; z+K8KcQ$QR|#_gC^vWfCJD|I%_T&#dUN3-Ydk_&70c{}ZUrm`ZYwc_ZhIf6sX?fRDL z(x&C|j9||{xrBQ4TOLOeN^#ZUi|yv)P6CmT3wzgiHk;Wnr60GL@n&v84-$5cP7Z3) zSS-CPv84|Rqxpn;^%lk6mMdXnOL{5KvYhxes<3PgzgN-gA*J`cKSvNVO-dkXF)-VpX61^kOY6Ne2bs3@* zDyls(he7o5FHtqFczAcZ*8ZgwwN{$jtRd>Xvtbt({I?sD(y2lsG=%g63Yt(ZD}0^B ze`$mq_@XmS*TP6};{_J3i+i>b_X@GkdfD~x8$xK?VoBS-T=ERqQ zLH7gY>c~BmpVWI7U~D7w7C%$SWT(1$dj2I|jn0;lDSdMEEH8eps%HPtveM_%Nt!yG z!0)o!XmK(azuRb1B%1%$vXK67+x|3jLQy8Wf0Bs@bsT5lmtUaP(W%v+MylK$%$1!n z`{uHt`)sFCVesV2V5E8aFB*PRjWj)`S{ys4>YZa3n~IJr8wxYdZ(yw)>9SOXs-#Nr z*|+fyA{bAx%6f{T$&(iRGPs+jf>DwkhBCZJOBTZAsBSBaUUR5L)X%;fYL+A!kOQ6X zgy!0ZNo~z;%Ffo8a`2YHksC0us^Y2qKq?Iu?kANv&5Ku*qQ+=tK4rY;_CV3wsdWb8 zI@YZ|1d^0O110kUwFM_qt*^V-dl~WV$Q{cX-1dwsM_p$Lvi-qua&aRe-^^hV)^|0R zLB9~eHt&)!&SeT$MuRo5SgF>u9=y_VC)|XDEyfX5VB+=xZA&YkA#^9z*9==t!)Qdg z{|4tR4&H7hObDyo%Af*3pUGu{QZM_t<4B_W7EXQdnoT=aH<~`xb>y)~1(TBUZvQ%1 zUpuS>JDe!@+Uf_J$TwHA->ttLV#u_N{JGdp=%K_DV=FEK)M{p;(oS936O}pK&sR_# zHDu2UKeh6*nf!JaKZ#YXezd&g>N!Jk5Qj$>-I6g*s zu|Mn*sJhd zr6ZzK94`dFST2arUNNB^JD$@_HyXG4$sqaG49kDn_aS|?t^6w+e-|M~RgsGXziqG0 z3#_ld@AOI8-!A^m6A`?|F67f1DHKoR}`-T9+xPf&K#G=B{J_$d^c$liU_@w6n#I;>fufr5@)_#KF@=SSp8xQB@yb=aY|PBKE`J?>Ronm zYszx0_F9hJ`;<|vk0L4AA%nHccRZ=HuM*8AFM#N#ti_#iaK6{L7sLELG6SbN4^Yg` z6C22CSgi&nk7@F}$SXPI*OUsj)U1%+&|3VN-;f`{>CiQ;-?j{n|A4`hFkTNNKQ*oC zeXjb^vn~jc&R4{uu^%y{LmcILC4)^M(7*9+S&w1r>AtB$#~X|Tj^m8iRUT93*=U15wLW(t-@BKlzJF+$1*| zi(c;iU{Qt==X0GYdIlFn_u1pG(Vtk%-sj@dOjTm2gH9GM4+OlAJms5}W3kVKI(#-S zZ`?OsX#e6Mi#!7nHLS{h=hU~zkxi}IHM02O!k6I=iB5i{uxVh{^{vlQ9x%vSm0J{9 ztKYp**RaB-`n%*aYR|iha|~SEo7A)V0F8K99&7yjF3vKcb&HAog90%awESQESN>hH2=XQD3>B@fm`uQ%lB7H5m1<*hzn@?=9*o zOACh@R#s*jN|deq6W$jo(LMnsFG}z55jZY7&1c=G>-~P;O8(B3h{CqEP!6d7kN3^5 zuGL1q;{Ws{9-Ibyi%X`IvOd!xR=;%&v*TrQkPr2k>AS45T9Cl;keF1!aGO<8;&KvR z)c2*om^kPjVW|#+*2}uL=1Til9mIP=#EeI(8Q0QLd;BYos-t#Y)!k{sm%1sYeQ5&0Yia? znU_8nj6QzHTNvHkf5HZMsT>!j>&(~ixzw~_oDEBiR^mG~o}F0K9YkJKYBwwU&^NXP zt<02rH?>^TP|-vP3C7SAfKZklhl2G5jEwP7b0{A5Ah5+SoiP-&2nQiue&D9H5-1;BPU|K4Qs#Y>mYsQ9=DULZj0UKxykW=!-%)&PNOTlfP zE;D7I6aEHB+mQ#VTIr&}J=7*t16|%Xu`wKX`Pe=!ET%r*c{AA;A=dyz7LAhV-&C5; zT%ja-Ff0D26Fo6aR`oC7P#A$JKCtue&FW}wHlcO_4e381w+JT}=iS#rzl0Hp-9jRU zV91cq{aJT;$K`_vECf<$+6kg%oa>m^FHaXn0Pi(Ab6_rJZ1{*DC@T>4qUv*sCW{(j;;0TLGea&S*8pd*mnX5owO;W{y>d*y7KjsI-R;${ z1D?kFv&Z!ja5`**DXTZQz!Tem%SthqAMbO$%gMJk+fx94$!o$uoWT2fZRNQFcx)~> zO_NzpK(Z`K5KlD8sqQsS9JTNQ6;3meZP_$z|yw!cdXC=R+-DOLJ5+$VJDwYaC0@ZrJBXkScO>B_;8Lyq(YemvwR6I<&Nf z3d;!-Q8CGGFQ&&XFJruK-D=6+>V|#h>>|+ARtgNh7|&Z?k7ik*^VoV!zu6tOrwYNn zf-1tw$Cv^BiZ{n5?4O=52LN3a-wB{MKY^aoZN5YXNUd_Uz~S78tj?9P!&pFIZ2Fcn zVFOSv?|`0RhtfxjmX6mlfG&_wq$|y~f>%6sKzoH<=Lj5Sb^d2|hSaE`Jm#61+5vY7 z!hAgXRkz@Qp=r&@ueFVVC9E%1om#BOITHj47?&DatGA6m&_4q`SJuH)@`LJ@Dw0>{ zR`@$xcQ8+kLrDB0A^es~qg)sS>w4b{c+~V;O=dJ~e0eg_Rm?b2i5`DOH^o*d(v78S zcnN{tBfwGLBKq?g$XVZQQsB0o;8NjGU%Y~D?b$}z1>)_T&MoiqDmY>L&0$f3gIFD1 zo)PQZ#{s`C4@x@+RP@cq94o(AFitwaILX}NmlhNvNX)FW9v325FnUTfq| zUF#l;bPMBzo#`n{vdVI=dzn4LD!IfpSjH3csoo?qxwaNmVlld`^UIBHZhss;mLsXd zQS_-a+cb30SVAC`k5K3JQC4iSjlAA+1bunMe0thR3Nl!r{_==hLIB9PyS96eHC>A1e5~Y*yszHT=)1q>PX~^kAhVkH=m2 zwW|4}eHKy;30Cdh=-^JbXGA2?DYqH}3Pl=Tm|6l2nuwFE?@%jMsdLbD9hdly0zcU! ze@po6zuN}%zKH^!*5fC?Yy|e-xo>vvW9sudog2EVqj7(LL!NWvaK`!g_MO_QT)Kz+@n2wCsPk)xCqV$&oQd> z%liWHp53>za5YdAZ`c-vCJ^ zt43Xo|B;+g)vu(W&Sxju(cOEHvX{7!4@@3(z^?~rT&lb8)e^AfrTZHKF7R5tB|GO zPm(LbcAE-?Q|#5lo3%&QJU4FL-qoCc(;oU{ zzD-NeMb-W}pI0@=vp0PE_Oeg0Pc8e_fDMKd7_Kn>S^b^8gvyl5842Tjp;9xGh9_+Oe&-n;sp zLBPjgBsvU~`w5=$<8jbgfs^=z`0Vq_(>~`5n6vX7%H7Tb$Zm{fyJenlnhqG~Krc{8 z#ZMZ}5(ivrhWgh|#C^-E9x4Wdl$nQ#mubN*y)gBVNivMj0nn?&) z68PxS505F1+aZ>HPN3s#2%R>&1A+4stgT=a##N7($RxTADryB5@WIp#`Uo#3>u zHwDBjsqIWc;4HOiWsD##nc#mLXr;^uScXP4EDSHw(U@+MM=099* zK3`_%F*^>vJ$ea1@9?@O8P{|q1u}Yz3cDVarq_Z&pK||4c0o!$ezEeIb&44kM>s+` zg!EON)PZZ)laz#qB&7nfP9>ZY{z9`Y_Cdjaw+lKWQ(R>0yTt#%JDQG|qyKT4ue<^x zz!ENMFX__Dy&>zYDz{pKagDm5utLucU|pDfP=)FnBD)^Mq?m?-jlU`qmHhgl8&ovd z0mQ|js#9B?p*k;X2SOuMEFYX#7a9SoebAb{hRr;4HpWVgo2a*sZG;%dJ*gYp==E_W zJ1@BzeE&TEsxZMch(`U+BezUIgNu5y%I6=il(ws~Gh+K=t%8`3o%Nr4%kf00qK4bM zQ{-Qutb6TNjKWJ>@VMEZl4F$Aja&8xJ_0jxEp3yqMr8ov<57haVH*8jPHqIG6|%yq zaB=TdHc-Q%xZdW=w61+fJ{hWzKeh!G;+_jzE^CSt1~P^zZW7lNC(ZA8oc~S$Lw&}u zV#sJLpst*Z-H-2nB5G{767r$tk^BH;mV$DZ1uf-Uw|E{?kLQ2x_+0QF9sS0@qq4Uq zpUGaTksrn**afN(sFag|zH!YekUp#hFEhY1P?LffD+ToUPLh?_jsnkrw}2m(DMxby zOdu}5Y|7`foh|`5pE~pzG9vZLtM9K9mES&e1jDXV-9JsRuY*!=z z>&xA=fZyyq$f0%T)Ze}6M<#STVx}b!xM6tug2G~gV0Hqa^kJ!g%Dy0P^^y2dilfib zi`z5?X%%JC#RY1eSL+tZcz+w+1Z3-=#Q6Ni< z!*Jc%LBA_iPE2Dn&f8Ox0FEAaWXK)?U#;BVaR#lcDP5rc=02dW&imZ=Pzm=^N2G>5 zhHg zM$$J(0B4K`f;)}SuT4ine?v_zPaeO|D2(nkR}j^lx6aO~@9$YwcvC7J<|%5AWhkn* z0V-omCvbHOqtZ39;Hq{x2JXmB0iJL~k$FLd1E^sKH7SokO^%)?9HEf11+uyO^eP;( zfdre?$ALwI8?|PaeW6NF=>iyaKOk!eKs=SB_YGIYcK&}xG&Ah^&clT3vstuT>mu)n zX`F1Q$A>O8?kXkGN+u!b_MeOi0mfX*vxGltpbz$$6y!lML&Kh}(xMUW;I$LNtY8gUJkV{Yxpl>xO?;S`nJGFTP-1HfIU}7t7LHpm@{f3<|c{15O}T+ zZTa1b(IEKQ)aap-jg!>-qpzL@cBTB_^+=-j7IcQ_?Z5a2#y^ zSCu!SRijdpG{*q|YYFreAsCER;nV5lP{CWMSnHrG{j+LCg6#wPu?l9E4mwDV(r@-- zqY4f2%^~adfkcpcuzVne(Q#qK2U%@bk2vKZM<7ZAo>EZA+6h7c-{+h$8F-YgM$ADy zm~psM9(LvT16l9H3y*=l?glRlf>tJQ(1d9$UKVq>ft;lXepj<1QhbC?Pz-giAzegj zXjj3LV3?eo{Z%GK5E0L`@eCc&8oyS)wRlv}q(9Wd|J{2NSAQ4hL+}x@#!i4CA#(0> zL4xbK`Wu6Oi!=_`lSLW7V*LQp@~SwrKIw*yr}e*$rZ~d8oTiFTgI~N0n!9GmcEn94 z#h4vp1*nu9^xzKlMW00!#lDj!cT{-1R~78!v8iE9s;34etiC>us)@sR*iUiAkCfk0 zzE&+or75PXa~aZ=td(hTw~cC|FYVqcau1gr^UABrhJ5SAy{8q-NHYOUf(rU|joP|6 zz6s5=d2$Ua9Gm|6be-x#^V3dxw4?pE@XSpL#)Hikt|&-}a55n<4?~CdaGQv!A=@-8 zo*-;)?=ZGby~FZpPZIa7+Cz7?9`+4EQlq#6M?)&`)R?cXT{q?bn|uoU>~JM%Q~yR4 zzR)j#rTX#DA5DKr3W_OxeCbaRFoD2Xvs$LeeQ%MiG*8>%R7YL8ooh^wxhv?_Zx&<{ zZ{Xgpt~CPZ4Yg{=f&EblYMOz^>e^M(6jR5~ogjX!v&csnjC}L*#e!@-<4A`I2O7M} zxp%aG$&_NEQgAXbWo)kU7q7vYuA13saYdunnE6)~t68O#DCrE3WOoWt>{u@GFC?MR z!**)BGAB_Le^a|~0iK2&G9RjJJr@(n>iAmdw>nb4fum{XN6HjaUeoPK5Vc%FYzpzm zL=H0N*m$AG!~OlVo`>un8LuH;Re9|jhgGcH6R{=)hR-&?THp}pDkk+uHH8RaYJb=* zBs{8qd(2ATA3p1%DbHv{WPBRWra^qp8Y}cTU^HAx(BFTLj2^wBkJd%b7T{_twC=fX zE?LlWc|sstzqQ_jr_OaRs?tTi;>Tf7(n|T~bzbZ{+pMt+7!@iPAN^TYFw)G{Kr^ zFnD4@@c)ur-GMT(tS8|(tvb>9d9R&N0aysd+DHfdKu-D^_S~wg$;R64Kv{U4bS>}% zw1N|ROiKv93p)Zyjl;6Y!;6ld297Kxy19rVW+U-j^B*)-E{si|fZOwZqgSWE&F|V$ zQGe5oAOE?dmGdO8JM3Kk5v}68{Lt!lVNVY`Z%YRIn=1A3D~pB)5m%d;=h?x9t?sE@OHa|J^lOTV*UnXU?-e|L?RR`>2-wpJfomQKz17E62JaarSa zqaH;c>Xw;hk<*&Ie@mU)7McJ;^LVU#rxqPG<^Q!4mfKG9wea&?GZT0@D!OhOT$_(~ z63VxY#neI(d{dN;Wmj}C98`;DYRHl+Ek&Y$?<}q0J9cna42rj2)6r;bAzHXCEU(6! zs+gNfm@EBBJtAvy{l_U6{#p4V=XJs|2(%>YZ=cW}C<(*vK<4QN8Fm}sfg(P}1G-H+A93iVU|!_`hM z%nYj|%B_mD1#dpQ8d6v)PbprX58WqyTTjyw{F?Oe&^DH?%l?9FB6Emg_W55w{ts5W zWyUvuG*x(S(6EOd<##$ErY-~PO!D)GE~3(SMOBPWr7 zypJKi7E8!+2_ptk1sPwgjP}5QZp%@zz6;RhA*~J}W{;!e_oFgKN(kfr>&HI?w@@Px zXg1hFa|U=5(c|5#s}^_Q?jHXG7AA%MH?9vvE-ig$33-(#$fThvA7CgUTX66#@1MbL zf}k%f0QK?!w)tR|VO1i-dHn!+cWD`*4j32eu z9_=`+vmr2$@qO80z+5$rVFDCQ^|>YFr8uy^Muz3b{Wh>Ce>M9FYR<2a4Lt0QVBF?I zr1P@R(}VQm_0H(k9jp|{;c+yaKD%dKxjqH2L)*rePE=5qf~kU;16a?r_O%FE91_Qo z1#J(55LjxmpLDclROfMVC2~ga=Jp;w#VH(~7@o8xL5}-PD(o6$w^Hirxt(ouz+YnC z;18eWQogUfx`E@~+^z#5P~fm2#BKX%p&YXNX%~U`OK=q6X4?QW&>g^)vMwipyTJN4 za6HSuT971Nzl5VAT2(@l-PV zS|#np8xzu3NMBwb5vdET_cN|w%t5S6@#t> zQyL|X;jbiLdvtz-(d<-9km>9%u<^haph2wXZCD)uTRe7)L}i1sBqk77AVbVGMJhhQ z4ucM60W5Neo9Ot6kd4h+p;32C8^mAO?aXbJNM#Ej>n2sC0dpNMcr9(dTjiO{br9vB z{TY4s&%;bW-mdwwCss0))^|pvI3h0OQI|Qymc^>J|GX(U*m0-BElF$~#6_Wm4<9?C zgboI9Bk$KdQchsL@@^x}bkNXtdZuiaL)PqgkB^0C=A2!rk$*d)`BJokQ%v^KJDN!0 zN|;M;u1R{hDa_sZD90Je2g01Q7tl0?;v2Kbje%tLa|SIcTUk-C?ddIzz)ec{s}aTJ zaLsk9cdE)2R`HbR?I9uiAzT&_frk66tJ_4fvOJOerEXsRen&5@v#ubevCpw=JDvT~ zeU8BBK|$b!tg?i*J~~|6aU{~&^dGtRDY00y#DsLpDtzx#)d6ISMM^bBaIf4NJ%s_f zBms`Zd7_!;opBP*_S~|YSFuoxy*LsyG1W88mp_H9^h~7>y`1@+ z#gkeeVaE`e|EY~jWONMEIyN>HpX&Pq9qg|l+!n$!WNd+!eNWwviHqrVr;AQ5OFb^FJ45p|R*U8bu3D)L8RrAINl3VyZ zT}l0cISL!2U3Dx{Fpy*?CguUq7YtFHmi ze*<3seh|*IODZluP5d(X%O_yfBAqTmSw>W4u+r&EbNb;N@|~(gJ7oWvaQ>X0Q|2o! z%Q4VIu9E~)YE-BA_EOYlQEx#nSexE~g|V#Dd*AOAE)BP!%`Zs|uaWO}Jysy!L>`OS zgp)PBGxMNHGsgh-dI-)!GYSgsCWryQX67m!?selQUsh5IEh)&57;&KE@iZN z_#R?LXRVAm<;?kpzDD@ry)gxArLIum?n`<&7C`z0vw*Dw2}i0Pb>1D_baXG~dmXo{ z8X}jLMbSG?U3dKzw?nvpWZ1FODsC1o7OaT+%er<#@H_j-uQD$pFD_j7ipPBg zC+pEkT(W8VmBSvsw^{BNn4&MRCw8#Wm-As__9%=`?+?yPP!e&1=Q+LX(aV<%ue!!W zzo+WM-$MBRhamEMOA=N842EhiDcG3Th3!D`^+4a>=m!+2okbU2`6~*c>J` zQNQs?xl}QOhx8W|lM2y+Kknmhi{R4C<=&_Nt*?M0oRy!iD(d61{{|a2nCmA%j*c>j zL6`mm_g_nYC4WhHKUr3~?E6Sk#WH$VYsQ8T!{3h@o4+6E?~b$2f6wbaz$V-yNV^q$I*1wlKGM+6VobHR_|F z(r{vZq00-#@(eW<#!Flvy!!H!ojXQVCF}c7eA7|)g+aoOBmP07-7V z0I%>HsyO!Xdjw7nn(bAJz=Tf+x=S%olq%d?whp!EhTVlm8Y#@_+RYkPcQB22)n4}c z!Iz&X;5RIlJUZEW@@*!oen8Vlb5IWbb6k06d`RQ(B~G#b1Ta9a$w01?{wr*vT5cSw zAkvK`gcd6`Tb$@Zl(vH_rl_@pWmIlpG%#WsOcB+nlgXz}q1V z?FA&fWpYRNak~C|aB#^$_j8W!C~5Zd9VWE8M4}Cme>UbvDvW*$&|vXJKL;lT_u#cz zuZ2o&V(4AaV>3iZe$6UDPv#0}6?5V+yRL&kpCE8FxcvPd&~0@~QkfjBD0n6QU}JZ2 z#~}NjcQ!MK-)$SIJ6=wZo;G5$J%b;<#Tg*ZA~N*v(CdQK(RZDGZIW)qRR_uS8;ta1 z`Cd*Si|gQzZ#44G9se!M6(UQ;G8&fkiOP%nuf4!qI9YCzOXwe|s~Z894Rq+_VAT7- zQSb4t);cG`j7wZ)>>m-Rq}V6L(OnpDI75N<5V&xUiwMLp4n#G*QWaxPeK%h6Eu_TV zk_522@F}W`OpF_oQjS7^D4oyvKkHRE*lX3!ZEayxuFRGgV(wcehJkELRWf;|tP7fo z?VN|@d%2jRhI^CQ3z`Ra%Gd4Kh7v5)l!2U?7Tq)LQ1$zTI?M~2e{chYQ$YBGAzZHH zk-G1mnOFF69q!9%y2TUF3SVoGb_d`D+@|#?bm54q$J0X-L?lKRU~eL#-E%2HR|Nd& ztfeit=NFf+x0|0&LZ2wlwg7e`j2`x_Fk5{v(&e~1%%y~Hda+6h+$;PO_Sarm_9CLy z5q6CDo#CHrjQLP-beeYwLIhM9Q2fd0KcY)1vU_ThiFSM~gwX?P%+cdCa;es_+;7Q0 zS*Q=Cs(jgB-+r&@UekR)C$9rv~z zKLT%ufq65(71B=puFvhYoo?ZljY}mhbSj5UCUx_%tMO(k58;}(O=uw*=1#KTx}dlb zJw4^oI}|lVu^ix|xRa{evUT4MA8~eYUta$m_nzYHk?7Xr{eX?g`;PA?H?=U;ecZZt zn!ohqw8)lvISdV6q#T1nxLH%I1Y7FqeSCGRofQ-++uE$Q^(Id?Ck>?OscRh7@v?t< z^8LF=AD`V}DwR0db#2>O{YgO3}5a@FV~IaSrI_~>=V4#LBxwq$2UQ@ zW%=@_zfIuJags<=YCvbSfi{lM29J%QeCj2$ya|P4`-RgNf6?)RaEv~iskotTLUtBa zchO?*FnmC@l>W9G8)dJ{Kt)q4C)s-Ct(P0sX;_u@hq*V ztM#9$VNZ={-bS9n3(?2W#bExxbI?>lY6c5f6PWmSmcB~QnVN>m5p5h90sEO&m;b;^ z%ucxus_n7$h$uZ8h{>`Igfk{{_&C%eWhY|yz=8IVd*Mf;)i;^V^A)kr6wJ7zlIJLg zdz6G8DB&O*TuHvEE#pA?Hb|E?ogmw!n9a=;beX_oyGTn5m~?i%=b+(>M+Xt!>Y5^9 z647T6d7_gG0Adr*h9ya#(9>!0E@*q~_Q7~|Lusx=Hb5^&6eN%*Tcc~n_e8Z}Enl(2 z3o(}$l)ip?@p>x@>7qerM2DkZBIlK&EIl(MyjU~ulB$D>yk<@!r)lq(sux!^?NuTp zuQB6)z??OSWBD!%&|xQ{P#3$Em^iI97=FoDsH!)TX>)U0ol`KZ>adHEMMBU|Zj?i1 z$bm6`j*)*|ubI>qq_`m(pHuc~c$F&?-uAci;3wm0<88G=a3t1(&tcFF^6C?NGVW+WR=(;PX*`l4wn8-m{V zq=_eRS}(RN4%FH=C6=!Z_Sfd!-0xCW_fX&XWxNw|feYb4;0MK(OjZJ)p}3*XQE7+x z(Z5|ero_chCDKyb8?x?q^jge8&>rI|fJ7Cmj2 zwxbR9Wi(bU<=>`x8j~5|nczyJ(RU%Hj;bVmi*gdXx_@hWKS!?`2?JasA{3U4e87<} zv<3_xAItz<({tWK)@(mz6Nl4xU(PwNaoiVZu0m5lLUdJ1kTNj|w9s5m+ z6ws92YT(GJ2XurnMwNa(?GMrUc}$L*?I5XUo}WImMfbGGZhf#YiczqLCqL_&*VL$D zOOisbvAdjZ-w=kOZsGer@~IP0W`ir~8{fX`$xGXtpfRivO6XR0F6sR*!c9+OCfqAO z86WW{Sc0hDfM_aIUl|}>>Z!^%_2z7!xGbW^M5Bd8gX@`~Tk!Adp3!V5tmt^}`bG`f z$mE>gF0#{-=x%y?Zwkt%s`zRID$u?7cwDv|e>Ai}Q4tEI%uiWrC^JimSVPP@fCP$5 zYdyNNJ9}U$FP@sUf8d4;ebBgbm|Q!OLyD`oxS8ESd%AO}3XYd?^Mg;!D@e9(1lCEQ z9H5V*zWpukMU2n4Q@B6^|A4RW)CZo@69z@MmeG5+-El9nfwoMjV)b*dxzLAg;7!Tn z%1<=HRR@&f#a(cLpJ;t6(DXm=_ISv?!2)8)yC>`RaU%8r+^VD_Y{B=&-o#MX6#k^5#4 zbxcPojLhuGUT!9;x~hTnJ8ROeeEW@uf2A__{9yWc3yG@JrdA-V^&p5utX5gZxQ0=h zb9|1JqCYzvAePZcad9!BTOi!L{wq@DlMP$gb)fp%bil^!^&KqLusSClLEs$?EvXxT zn}Q~uRPZ?RRno=!kYiEs!7*ID(JZUk#hs>Uh{TG2V)(0VVz|*Op_NiD)iGVUT{~;Yt-6$gi)Rmkx1A{|m`gQtOeQvFPDqOt@ zbcJg{wdRs>%jULXA84%vWFN%wj8_-D*ik5rjem*w?a>TxWYzvmrY6n+MQC4Mu z5ptalB*aRm$+(W^nW5Ztm2);qui)^xM38_8*#@jGED}`wc((|Y2$mB&$!n-YJu&~h zvn5~yg>1|;pWcd;Kx84<{?7KJ!%Fw_g&QzjHlA0 z@q9tX6;U9Ft=+ndXNWowtB%CY4L(pgmuKy><6;uk!J1Ci)gZVpk|?mFOK+a1f&|Va z>fdU_bu()y!L?G4_(Tj`XyG!aZJD81ClZ^=&*CM`-t|!(AjY1Wc0@Ai{acOSjPlgS zRpO4~C^pWz=``10;=p}z0D_dpz(*FN!ntjlCKvs!WPn&ty6mXl{~nZF1=`nWAM6;{n^A~HOgua6KM%f_?} zYq@i)(TvN!O>!;gqqdag2ywk|T`j}F;U0rZAi*)B$44FFwKN>3=}7p<#?fvui^QC% zH}1_7mLj$cXl0z}k*?Hz;_8a=m_ux`n5n0wGAew*Nd>rwmpFH2&+<|-$fz&#Y}h1a z-T_AKiLmb}j!#|>%|c`;*2IagBx!6`-uU?0WV`zmh1mpYmrQZItu%PE z^+pBsVpy~0e}O-&YXl8L(-?I&LC(%KfE{dUy^eE(Ip3FCAni7Gyy=h3>DV=JPSA6| z#isC}YKn4Rfntt$T1?eZ2!myy2&<}LgWUCe$+d4UY}7_6Y>ZS4gw}wsj$;T48YrJ$JJl?k1NFG+d*XnD{eLbRFc_Z{RTjmpu#y!%Aw^uYhJkjoRwpYmYHJp7P3u~ zctYm%lm8~bBtVd7&9yyFuoqFl*Djsjb;T1g{hS>$txX)ynr7h|qo|Z%EYhsgl-y(Q zAfwWrA9GVQ=AujdV$5b%T_ss>8uuSIF?odP64kqU>`y4EQL@w;1}b07b5hCSQM{e% z;5J+qF$c`mES)Nc<#E6oL2iCaEC*~Cu~It0atCY#2K@ZvcN*WS`uMf($>7UU2Mic| zIiJJ$IIB4)crSLqXL3=XI6o=d+pahjCHqS7!(MC@DHE8>qISuXMg{CVrly$OBu^`q zc4dU?)e+eISn{p1k-x?T@@ zW%C(lI&*usX0?R)@!jWUl~w`jXc0Hm7Aa6;3#3s`NQUR!$V`3%dnAShYL1gVk=)b! zH|prCbaN^dV<$AnVjrq~aP;9Xhkxfmqn|bgF(XVi8TGbT7Ie1duAIYA`vF3QKLDM}v|7_afugHuM4^PjR3dJ)YvG2OF4I)ZQKch5*5xvf{ z$mAZN-SV0^7}xcdR)3{3y`zAKr}+}ldF(G5o!2`B@lJppw_{ZKbDWdP%%)H^V5xi1 zSVFP{6nlsV#h{lWYhr1}A**E2+3XJB65-=(zc>aw{az~$ZL22YF}sT{>ALVyLwh^X zGr?szi%4N;=n=PBH9XrG%RfN_wg(97Xm4GZso#BKk+_6}`rVLN=!Mu>1h!}kLlK&Q zCCcgh16z(M3rDC3<@RfC3q7TiV6Hd*v@4fiqUkOsqoY18cyM|mpooCn64LC+ah{7yiBpiha6y;c3 zPTX6?15P^=gfQx@n7BEn6}(wY%Y|2ZBpxuL3z;|ezG3s_Jj!LR{0HFpIKAI`z;kw3 z4zkYG7rGG(zT|fYj*QC+S{E0I69^pcOlp6~)?O&uP!V*!H_FN8RR?T%nfTGfYyk>` z6SZ=yIdp-wBzyt;e@Qt1zhAt<-WK;OhvM+*K!G0_2}SW*QR84`IaoMcC`3d=C?$_a zS;%kx?*|q9!`{T((cO*7%*4#v;&ZBub+QOS_(lnvTvOYTX%_cqTkM5p&Cj1t;6I4> zk-z;|q3o%ANm_*8E#19n7ZaJbP9KSo9umAQa$2ON(xu)&itLlHErR(b{bHtTV}mtg z;F4yBQbrQskre9E@1hSyKJ%qlA7?&Z&S_N%UP45~H_QWFJcQ(+_>MEebiZ zP*+_4HU>Sa)rDhKH(I=n+vN7A3P=1W(SJHMjjqDEnB-}$&FFAo){=@N7krf~ zGSYSwka%kiQIULxd?q@nj3v8y;g3EZbFO>5{~Hvjbxf5|!lVAVO90Eq6$ZUBT^y}j zL1!m7-i6SxS`BtJfB@TS&N1g{$?ff8rNr^4x~~=S)-~1&pRxAU{-`>kFNJ#P$qsj7 zd8oirtT&8T@@rn|bgmR8P79Cnrn?Vsydl=8`@m2-74w(TvP*kj!bY-dN<*A?;CcQW z&AO5Lm{drle*QC_SC<0w1q@|+`F~yn0J5`t2l+1c%YY&$sn_GVK?|Z**Myf9Tk4aH!^msi~^M5#dtMIsbSW&cL zW|}ZFGcz+c%*@OT4Kp)y8s?;7noz?HGc-xV+~Ar1dq3UpoR|ADpCn7NtR>H~y=2?- z^J~iw4hbEi=0t(0%pdw~hO&Vd<(Z-h8>N-C`#;_2R*SJX#Lvb;d1}p-dPcI5x~;M1 z)qd>n@@zd>KVcu%Y5EbntA?D2*KvkmV@7Ad{~Fu4s^;m|xnImZlyHjF1|_W)VNvqE zwoKu0slv?5RGWJ7Zq3#%Mkw(~4}GQ09KqFMj%4z^5IyL<>9&qD?71`gGQAN?9PElrSD7WXI zaCB$#cCbJ1-KgJ?$)I@_G7>${W3sopLbxi)%Xj3-Kb3o0)hTGEr}x8ykxD|moN1=0 zk+Csq?g@E`1haGw=4HN{L%CxiW-U}RVr$wLQ$v?@jmD%b_rlt3ttK%WjfwK;e_Iq5 zR-j%dRBYkN4)C+oKefWkhmr-%I`#>^E{?BcZm$XVKNo(`Mwjpu%??v&E1lZA;8L_e zy*7+(7g)@WHtZBIF0S2$o4Y*vOI2|A^}!ZUyYZ*yTPN5;k3;T5P_{M!zeMOomtpCb&AGByYS}(nPJ$z3Hq#1ke_Fd z_H?0<(az3}(UzW;tgo9x-1vdrhaIwWQerOiA{;Rm^oqNfN%Q%7CtLmUDy>0M`}N=| zn@r2mQl*XEQWDhK>hk$z#Y1#w&su2CZbn(%EHv?t?%uDffqr$3ZS&)(d?WPbf`MyjeYF^*E;fLJC=Ax*sG=G%zbm9A zXSM)FCQ+gKD?@1)mfq*Aa;eII0as_Q(rQ)_nW^%j;S^-_!_`BNgZbkC}R* z9`8&q*51!E`)?^L#8EfvA-m>Hr&vN&3fy)^EcbSRR3kG_33#%??L&VWxeWGp*s&f8 z@YCVE3f4CXs<*b^^hPv=FS59=ChGKwl*{`{G)WqHSEztHKssM7-wlL1NlG zU?BnSi@RW)()&y;I(0Xm^UOW)ez8WpDkdbL^d3&ACe^WAWh3CD|Jw8ni+UW#HbBh8 ze)GrUox&+~!@D>@`3e}fnZ&_Gcl<(_O})nbhkSIg;wnh_Q!LQXmKeyexJ}t&{^$1! z%;c;cJ#i!slhaAu1$AlKB1PG$y#-Lf5Hc=X;sycJmS?ThG3|R(BA4|MFER{-K4Q_^zUoQ`wE1!Xf4oju# zw_jejJDG8F4u1<@<0#VuJpQZs5Z^zza{dnEf=)AAgg@ewdk4LG;}a);n|B%i4x2BK|H#i9 zycO2^dbbVyozgffE9a=j)~_kt2vgL57o;E4A;dz;MPcJzA%v(laFiE~#$hnRvFKWS z&6^67k_x)a8X#Qv&pluAdjkh~f9PCx)7>6H>W3~?AM2Ky%eFfk$8^*oS*=vdd+53EmOwu2T@l`L>}-)QYB zLdYmjvyRs}YRi57Fl(XgdR{(d)VxBd_k%oZnCf2iW0T0};9xA%Bz`5*y^dMj2~l2}HQpE%=XXuM`$`i9iZ^-d5lBZ+rq|sLP4rLDNw+f@$?8en^vG z;MwmopVYC_$$}*&PCx;=84NXhYl>5**lwnu|S$-|Y~Y)hK8u4F2di=ZDSD ziw6DjE>nh8Ob_0G)QNHm9E7~QkPhEK^F{Lx3l6MuUj90VAtwchu|dIVSciXMX>~^& zZHk|=sP7>ZAYW8|{8}otKI9&O|7UmPep{5c z2qZ}7I@jER@=uZV8_NuRemAk&DqB5n*?84HEw@nir1oB|aJXoDl5ix<U05<( zQ0U;(kfFu0_!n9BTjSDPI-lNC0ml%>){Rs0R&IIiZw)=>lO-X#88uLkSuM(xlU+?W}3L zXGC3PJMbpOf4Vudx8KRmft2rGVM|MIs;E9o|0yfGI{m5b1S_Eeiv86SP-$r_A0CLj zBfVhM|6ku=mqF$777~#6DlLxs0C5DE`gCuJ3@`m}C)iyL=aP8|xr#U_cAuCBb(cC+ z=%!zsy!QW7*#ISnwcHCnCdky&4pHqYm>1)ej}sZn_nBujGY;23wrcmW8G(m?O|ir+ z%P)L+0<1WY@fPrxvP3-x`5h?-odY(O|5z9fQ}*E^mPh!G*HKiR4054GK#-nSrG-Cc z%e4Pi?Gg1+F6C( z%yl5a>Ac@9rJfLj=f;=oQi8|hLe6eKH?W+wxE6=8JTSf|;{-niP~YIAMe_GA!Sl|Y zAsMsABm*Z08CY2pGNo?RgPUaRAxCw86K%U&?4;)?ZW{l;y1Y*k$tp!#k;KOlm1SpH z?lCk6^)4a^+{AiZO87eYp8A_WfSk9CxY_2ePdRjxl3WqnJ;w4-_2tPy#1{k3<8aRd ziDhS7JU$0lG~PiFvo8;)jWM@&)lx$KoMT=UnbL(bBvhcSmv{ni@fMGw;wB3cw6C~E z5#GN#mTEc+6^3eZ_awjWa%)RA>d4mmky>sCP(QzB1psPLiAz>!E52PDr$4VheH#hx zzf$wZ5{b!?0g%AdcH%&p^`AyXu>!8247NQnWTvjIqiA0p&Z}=6B~?owd(ByPQ8dTe zk)8|ebRfNko(#s@UkLv25M1@>JB*5=9u2UFTt?Qhe-KclQoKK&)6?Tg{lfp;eeqTt>;b%yLXD=z~cH47mNofgb_5ty-}mSU%^ z{V=;%6ic>lePKd8l@(1AJrCYoHmWWYMMby%>-&7K-vy9U2;E|EmM>w1`0R%xjp(rPV<(@9^Yx&-I;Fv6|h*|SeD z(q!@{{zg&n#MJN(2^C)s($6`J&7xk}i?AJ%MGh$>D5_gl>Q+&|MM6C8s^*$tFQ~(>9G z{ny9fg!C*7X+xaa_$K3k`16QXh;fhV1RD_t#9z{@E`t5EfN|V!FM!3xMZouDXZ-8l zffhw#s1FrZ0OJ7nz4J&tH~d?%Da+yD$?G-ZvV14h^BC}kOn-OScqW|iOxrT-N;F|G zxZjE0idh8NV%oi=X?`T&2mtvU=7Hpiv>1;!kZA@AmQlTp;43H-p0N;NOec>S7FRr` zDuOVuAe!y71n@i{?V z=T3}m6ck30HM2j-gf3#>QiiHLxO7so1?_#xhbp#$7zzQUgVyxpBm+C@4P%Pe0pui> zVaoC#R|u!+?!fyCw+g0d%&QX*L(D@2*{iOTq)o8`^6tr@5JNHGtct}0|G(o^y~OM* zpO>m8L3I|Q6-XiytgHe)f)D5$hVlDvzXK`i&Bok}vd*MC`7IUNNYZ;vK=ZK~&__vB z;f<)O-n}m3zYinVn*|R~5x>7uCar;YUse;1E|{5pw@<9i^5tN35b36WJr@xeX{a%7glX|Y4WTxquP)Mh#H z8D^T-8UA~|^w*LkeE*=@nB*(mx4kX1=GrgNz+xU64_r5UKC%LbJ$U%bYM;r3YHZVg zA;Du^8`E1N1(Mzq7ppQ$B;SN1j!{+%60of)CmnBv+1XUDjD)|2NQ73T){g_Z9}}j0 ziBvQ!ihdkqT}+r%gu@w<*jIBXrC{FIl-ptZ%$OZClt5Tj570F7ZtypVux)3~27+;Gh zXr_Qc9QOS+he{F^5sVuh5fHIC1+qdAg6{is5g!KdmO_{2{9`3?1d8ByAz^JVgPsSd z2pV)bDMeou=&v?%KjK{D)wj3V7!bJIl@)I5rhHWOhpFoOO*ohYO@WfBH z1YA!V(At5`-Fe8_77qDKA@bI9htnx$a&DK*yyKhd9^2cfzs3Tn#u39C_bkBN;hVcE zC%Zf#M;A()q9I&q5}e}6XKkt=N&aj5fnK0FCt%=z?C~EqZI0T4>;wEW^?jw$TI2_` zCEY(hIc#f)J_qQGgzj$fWjjuLauz&z=vt87FwKng(~t4*A@)-dCaWeIPI_NKUA|Cd zT`YiuG?8cI`&&3VobHIK>DN9d!+swY!l3gU8b(*;Wn%8DbPPz*C1cU#|NbN9^MMK0 z0U~Ap+(eUbnd7TYPQx_@NK{#WFNi_8gzaOD6?#=Ds*g7asEhWJn+*m}5V zEOth~KafVA^ySeO9iTyU?+Ll>Om>i>8R~ip4bBxwaCfsWx^L^veBU@sZ3cq}MVz~- z?CwhinYhc`{A$!Dbh+BU2+mTnXIm=^2`jO~$PvVP+MqI2lJ&)yWuoTLWqICT(>XPv zqZ6#IB!IYKFn61X++Q^Bl3~&c9I1eDz+SI&*6Ar0vTbahWCfgGa@WGg8kFS;-`5+b z8ojd5J!%Brj5vx;?HgY>jj8l3C+eChpoKp_5|vAU+|7KRN9vH zLv22-j2M&jHSXw=DC@0!(>(8Z0XjK2;nf^Hw$Z*an%;hMkaJ*zZPM>3CPuJ4NRYSt zyl~9hiyIrcs6$IzQ=izajJMn735)C9K10nG1f?&ncOomU`BIe#V&w;}tLcLU4bKaf zl0LB$FMf65@dgWOsXW^?db7E;J=~bwxfQ%iTKSv+hORZ{=SFM_iS5l1c@A{Wl?+( z+kvM253G1H>YCMOGuPFtF40*hhd&W6@K;^pb2u%uVYqBho3)e5w7sBCBDMUsU;V0RH!HnuP~7S3#~U<9C(66BPEurj5Qd&plB{7u9!v zob1IJWy)IUxbCm9{}y!ben@ejtK{CAyVHmGB@qG^NIZ`?u0;FM;CwdVL2qR7CRSeP zupgus3d7`17?Osl5mlF4$BuQ`M${9g84NTfIp1--+t=p)YJwuZKlNdcJnS*GI_Ng_ zy2&Mz`XUv8uKTJzU4b!%&-0YzNK5ca;XqD(kpX}|58%O zfIWeE=xlcGs_LK8sLZD53;p_oB2}KnnT4?_7wG}>M1gy}mVhm~Wf{x~DZJC^q@C~= z4C*O#v3WS{g006ndH?XFXBiw8ELyBXSkf=A{?y;U&Q_N{PLgrGePvbGx!HQUirUE* zw}siZj1@{=+JpXPg(R)b;Je9~`xbNKEqna|jw6OCl;%>K)3enLPpER;5UB0o&=I@w zxu(s}K)9yiV@WxRha~w=!#QYhHvm-~HOx;%#V;d3gK_-j8J}j4(&;*UthVa@K7&YW zVxU>h7>)JGa_yM?Mq?>`g$?c&0T(~Z2b~MhS%x1uL1(SNy}3{8Sc+V4++0_0tiYXZ zb5g;U`vijje60YJqLM{WL#c;ab4ZkAX+@m5YW8*^U>Jc>;YxaHHC2D-b#eqSGC3^> zx5i{?G0$_2C-B`B(b7WPL*8Mzf)%j@cF|&i?TMl`yLXjtBa61j*NoBaN^NdXr4fhp zyORD;Q2@2hv9z36yK7?K-eREy1&S z{qWB-PpXN#q@$Ar*%C>5jjc}sB|m# zM6M`L*uB`MV=Pr)jj0h=eB=}^2| zWWF;;v&VEeKEEOV-q_w&0aQ!66<6b^qyN=G#O}(9Zv+$Ci;K9}=C~Z4lFBZsUr|z5 zB0l&Iiyg;(QUY{L-diVWBvOl1D!|YnyV`K2lk^s;bMN#C0}yr0HQB8yAYI$!_zia|CVOHdQ%_xs&WzE!DtcTjO>Lo7c?pZU(zTx#EVM~1|I z4+9_Z(Rs>%Z=H;!X4oHl4jlC|EUU{kFOoS=%vGum!9JBZm=}pgCFUkHhG29x@3~Y7 zw_1$mnj?d8j|mCMA$buMsAsE>Zr)+xs3=cynh}c1kgDgvA zC=@qYsw}@EQ@p=J$O<&d5GS)PIV8iRcr&a}K8lQn2Hcif|8!Ofq?us*kVIPn^{*5E6=be|sPL{6 zUw|rMTeB;YB$C<3J?+!_?S1L>uQq{kxcA~#)K^dypd}!pvraw`aa!cbL*g^SF;*aH z#H`!03Z>kZ7erRrD4!R3pBMVal0$a!V(_myI$yp1M`<(NjP!4inya|6K@~^v|3IG? zVq8qDW67zUI9NvPUtw_?-#VIAzSa8{=PmPyZn3Zo#yX10tz? zR`(U|w_*ySCl>TXlQ=0y14QfoSb?|^GZxKQ9&y^4OB0HMv4s|pf`1Ha$E*1E$&tz4 zt7N6~-ZTvvet_ulMyYnih~}b1Wbzx1is<_u;INE9guFjHU~-I|52UHH&juilRxl50 z#Y%@k08O-&J4y8T!#faHZXcIWX5VGlT0`u*0Oy}O&Ewtq(*BxpM2&>WC+HZFF!5SW zZS+u$$3Z7&=>Bdeg#g`E+bwF&f_TQV=K>>{&mCpHg6)y9H-D|61VVnSek|<`*`a`V z%I1Dmf=x{_RoY!@=3gEuxkcjYZMWpp{w+F&hv5u5 z`BHD`Sfa@KIpvYKH|zvv;}3?r8GNM3uIPOv9wgcDB}(Ezm^B(_om3L(JB;-1GfDS| zCnVpPwK@4Yex&rSC`Sw)WKM#?3eM3psyAwH335oWqVgF$u|!XmXp!BwEE&)x)7K}rJIAs`_9gT_@!Hi_$=TwPK_{1szpH~Ex|6e<1&uC3YvAi!o(mOJ>D1OzyQ z$vLinn|=NA)ai-{@Xtwx2$O9u~`kdS^vKIKEA$IPRCb<^d2<2x1<_qZ!sp>KJEZvX3r-Wg9z*|%wW>9qp3$ln| z5hl)R?b-G3<}YJKr#^wsYQ8Qj5p2C?&I$wvX#)As&4trn0QZcpY58W9eO#>a})Es$>kxpg^GoOV@%27D?+HwS_XXkT)NYDhv`S$ zZs&{uJD>6SO7ESVECZ-|UHPNLQC<5(-N!nNwPpTa8kfz#H9arFXz~A;ya;$Xgwx`) znpB*x!>PANjVi3QVO4xzh|NZ)rqOg&op$FzD{3N=g7CVSBuEa(;!D$BeG+s2UgmMu zWfM8y$RB!u?SP|}Y}a=6ZC6`z<6CF@A62ipZ@ZnM8{gvm$+g{-E`3OW+tPJcQHr5z zx-+NNDnmT{W)evK^bv(IfRDHpln&A2-;W^(pkEWN4mS z->&?|F`N69XX^Mf!eZ@@7hNUC+kU@F5bIW{oGkwRRBgu;`2_X+ z({-pm*s0rukluZaQxG6>MzM*O>$kpi=Lq*4AVGhN*K78mHxn77(nVFjAWi! z^j=zDf?lc!ewQYvg9ogafDx@Zh+{=^K}pmw$0-vhS-v%RYHcP-GRQ;@T_U+3qsCH; zKfiT{Ei2GB5rbIMJo_TU>Aqv;`y@-+7KByeRfayrsYEA@yT`}JI=wvd!Rf*iIK>`Sc~2DfH*jw-`h2YghPD--i1=7&^M zW-dfu%LGx|#+QVvyPu((&DKak=P|oWeNlq1&gZNVltLrv`PK$mKZNc8rKFOY6$#bT za=ybfFvuZh&`@n12SVKg>_riQ5nuDay5EPu1CKSTa)nfY21YQwEd)N!3L1brhOPLQ zSWhvol$wG0UtE{~8onu2O$!SBD90g@dA;W_0c0%N>$e6@+UNbJ2MXV5vKO=a+St{Y zJ(J9p!m7DOfniplw}H#{>n&d_rkNS8R+QL+Z8)X3n0H(F9@4^! zsy!XuwCH5|8(k3#1oO~d342Rb43}3Z>oIaRupRoM$T^EHwpz}FOb?27ELb~4mi z(26)ZOA=>B2lmOJ!dhM46jS*lpP#9MIiaz{xw(2s={rNE9$04gQTWmNjW}ba9&TpN z5l8y3wOQA0EVpW%1iQaNxcOT;-!{&~E*LA@ zp|=7v7VG>s9*<6tm*e*`7we+%=`4eyj_!wF=`%1AJc69nf*J5)w3W=`L%VH5UUhzM zmxa=}OKZ)0Id}wSV@NfhR`u zd|damFjp5fnI8pav~X7k6SlW`hKVy+8L5d%B5!?}I@LaItCt+4afQx==fn1^qYV~k zSRvH6{G)x_MrAQj@JS?b9)!Y~>po6l{iA>~&8kYDqSf?h9I4c1)aBisbD<&%# zwfJ~_4Tbslx&S->D8#uTdm9wNt2@<|VQ?G%Zsk$aRQY^va^0JH~FkEBjO%?xW}N%E;`=s<-;E9Qo2)5x1>XwCZP^u?fpIKsI305J2^s| zP+pQsM;~Yp4Q&>%Mm=8L>B>0{44xXD17caG3*S-mH~*rhrYmR(UnP{fnLvYi;}-~( z2FrS8QTP0sUc$=pDyb07fUPl4vr(cNUJ?YH6v0yOUg}V##@2zZGL=hQhnABO$G%uw z4}*%0?mCEws+U>eX#XW0N%$CPpAa=hYa^qmyfBA;w1t3+tC6 zSzIH@p@4h@SnB929?s*H^be%TPWYW5KcdHs9s} z+#ojqizST*yoCEcGgpG1$B9o56PWgc&llqzN#zyJVeO%G^Nj_l-WknsrVNOujNyA% z4%j@Kz#p3sXBVZFf~HnS9apD{@#4dXGD8j^Kz#P9e3wqRJlhL9Y%)cEda+8uY=#up(qSVr zBm>fLA+uc!Sj)xjVamtJT1x0E2-e7BZaj5?(?>D1y-f)Rd%*Mk7}{~LWf!mZT38v- z%0JB>Y_^{sE2Rf4o$-cP#qL){UM`xk2W!$62l|CFM0~nKvwuNFre4W|hMMiJ%X#EY zu!p(Y*j! zx|D1Ib*7U^`9x{wVbhwms2%Q8)HK-f3i7pC3lB(nB%uoMwOMMd!D9+*$u9r}^d^B# zI({``@ycXrXKGKJ2X%ou>z%OdPI4r+xOrfk3cAY5G#!@(pok2!u0C5cVh@Ih?s@Vo zPv={LaeKGrS6t|}F($cI3CpjVintMAj_C$AN!5C&%;DM1^hj(-`>JX9c-bQwtmVJ| zg2fb)xW&^|5WHd!)-}^VumY=Mx7z-bQ)oGqKOpr;){y9XSq;@ z&@JqwX|TdtRSp|oF3nk(!t$zaDC)m3AI|zAtNNm~q5&7CYLmNLAz{U*1U$EtbzI$W z_oQ-t%WRRH+6Qc}3e zNixw6G~TE2SqVoRHchFG7q_>}k>IkNtmpXI?!YHz zBV>TVhPc6cf$C-2Bd7hC^@8~yaKe?t#YS3cDrd?^gKgub=*mJ+2nP-*{&|^mXw0(| zD+TBY-{PDYbS6E+abv!ymqm`S!#hBWNL@8ZC0I>szu5g35&F3f)MHws_53U8A6$Aq|oz(6bs-%Z5}a5T@YGS~2X`q}~=^4TCE5tW!RMV6{#dCs0RdYQv& z5(oOaV2n87xe^CwW{$cc+EiYTi!lbAXVlP5-r_V1RXmjhld5Uzu~Yz%%Z!bYw$sz% z$NL+N!>au!!ofv`1A0f4*>fb^!-_VRx`%z=EHe+(!(`4*O*@8C8j){Me=H=R;SF5W zae?`S2SO5H+mlYb*}q4oDfJ1OVW57*PX*5dxH9oPbIWcT&XZrn1_DcMMjL7i2N?px zs4i@7RC5%Vb6Fc5j@};N;%ih|UiZ1H4knoUx0jvp<@eT->B`l+gyA{KUZ{t`8```>b9o()V<2uxEZd z*q^sxqZa+4a!2hqjm3whC@g8sFx6c-T$G(GMq3oARltgvmY;)H* z?|1Z=m>|E$r~daHUU)f-bfpzO#7PWaSwM}q_Q4Sc+?qeeeoNs5MPIg}$p*bp50^w> zu5et+1VvZnUiqviw4n~_=XVa?S8BaLHfA5w2YP8DSf$bf9T^iNrU!cUcNWT1MS%lq zop@%hD%alg6~6D}k0YBt%Af;5`1S;GO}H&MPU^M!uO>flKJ(7&0ac8w5t}Hl@@4u` zA%8RU$O=%}>3?R*^dM3Qvql_IU*gDiHNK4ivI^t+Bwj2^+D8ang?}(7yJl+#nj%jt zj~S6&uqD5Z5M&ev3%(;z7-&bBJ`StM33tqY%+#%a+VeixtXI8%jzo^O@^cPoj|1h>a`aS9bR*jCYLlJf`(G|G|zW6g}5Ux7dx{YM2M~8r`N3KGsfJ zUVppHbSXEx()yS*e$}*4e>Q!Z8rg1m`>%BUdh`+}CkQv}u7OT|1gFQy`ojY8XMc@7 zrqM}Yw)ckx@;?M%bRJmzHb6zv=bcvC;AFTkXQ_&8QJeFXiX=f4@oMEEN+7z>@n`VL=wt?N?{tc_3okYI`714DVey4*ciE(A z(tJPDY1|%9*&s!&FHIlj*qXl>5p(L1Df;Y5xd7lyKMLcEpIUjF+>48if{5Noyv{A?zX!Whh|ieKI@x0rF(BT~wd!F8-D%5{?zDkto3tD3@ao14}*yxaZffc`?!5M@65z|ThE0X!qm3Qx*55ZaqLavO!;5pm35US)$+EZrk_T|iECahztgKqTe-YokJ zPuRs5p!9wP{ODxdgAESAfw2L{L1SS_q~@662_LVuVrjDhd~n`xs9{)n2bVKE!PRpcI12y@}2aAOdz@MqRntM zU?7jfZQcVOZXn4_V!nx{RB+@{v-7+Yc!nSZ;VK6wd?c{Yy&~}|(7vio3XLR;7$~%A z<=A3;-)gFCY(hbp?3*>7B*Jw^IfI_=*$y_&7JDKb01Nv@=<4$FmkL`?h8Tbq!O4*q zcXK?k%b!sy?SPsGy=v#<9%lXlEM#@$?b)*uol3F5o3pZ={DXt|{VC}A4kgIEqXG@n zI^?YcI{rNH{p~W7Ezkd@Q1~sy37Man*$OUQ4S+lPaQ5>T^|NLZt z)9gz#!i>_)IU2HA0!P#7&SHPk&gVTfwEfsm;fW&IRfV4JKDbaj?+nHRsBb|AteZP? zqaOA?mzuFX&uCFvS&;HS|E%qpoh)ND+ez%&!oExxBZMr0KUq*0h!j?OEAjZpeQ^Po zA(ukM)>==DQ`i(`ElWq4(o4BPL6>^q9Xo*t;U{NJ*`k_~g@rYVipofBv$=A@h{%_Y zzd^K47^w(eR4XAwOd$cY-ZMjdNX*lxQyLJGI!vHB^U&NDTgtDpQn%#HWt%npp_nq8 z6Rbp}dU0{f?l71uDrRq08WdMeR$V3C|mj*3#pB<2~4jc$h}42DLA2 z$GpbN7&2{S@!LxZsx2R=d9#?hXvpiGtTyR5+)pL%>1c%){}+361mWV%vnk7Jw&8D6 zZ#H8?%Kh<+&^UP@bMjn zr{_=dz@Wc2etlvlC8K$w10O$he*#|fem1<_`}W;8P!DLX;i5P;;K;I>v{Durq>er; zSVcPd%$OHQdcKQT#YMG@M+BA|aXSpQ)Pe^N!1RBjrAF}{=DI79sc+trWJ}&4^{=ex zw%;>D%_}cRpR=~xCZ>#rSIhXB?{1~=8{36`fg?Q-k`=Z}>1T~o0_MrShAtqe%>vMpH3|`HO zdNyC2(t(8;zI(K?smV1P6utTaKTmDtdwOzzKe=>*PKurGaz%2y{=M^@ZpVW{@;+(P zwSGX2qQ7Wj`;#%=33STuG)p|1UsJ+9PiYIzXA2Mc$`pe|<-*EyVvg~rk#gn!rv6q8 zi~|thuMrSmSO+uvez=}Vi-ddn{>>k@mh`r9(E-woj+sTg0S*iR+MnO-w%==nsKbYD3O_G&Cc z8gH=Be^)R(?KjN(Q9vv7U$y7&qQ!(_oYr z^n4u_U?D&+n8H8(DBJ7eQp=FZZnI>{x6nQt@H|-aw(kG_?RJGLa5|*+15gJM@A#gj z^S3FI4+tU&fBjo$%AuuOv8W81v!@oFEiM}cj<=3V(cOO6jT28g_pKYL`6YK7Q)$Qb z)2@CG$DN|u2n@rkM6@ zA1UYNCvNX8@mgl+h7OyoBN68+Qy=KcH4*ZdFg=sSq;}G0LSJ2W=$Qm*cAhuN$0jVQ zrcR60o$KG{%|lONvxsT3(@f>^ZB?^Svb)5 zZn3r_=;b5wiktaC3lGXd<8{j@b7DWYu+s;^p0hufp^wxHrRz&z%QF@CXh3RT z#2o+?gWi*^wI7Sc9?F5mI$fZ>O;)el0Rq{{5Mo57%PDUg2rPDB253hU=Zv{u=d% z<%!Uw1k&mwjio(s%DB`R=xZJ{*KSmkezn7tOY!6Heeu_AaJ%^Vc1as$e zj+cesl%kk6>Qs3j60Q(0cXXMVF?jUHa$P+=%rQBd}~M z?T?y*bFK0qF^AiO^O;?`f>K!ilF|Aw6gUx$&rmnuLV^YaN8leUZeA4p6+<~f25i0Q z8@FP3BfDfO8ISt6&qNd`#mTl$6cEn5#tO%WL|l&(^q2c(3VM0YL8@a*Dvu~4b``i> zi;&a`3Y)=50s#auEPLy%EQrn3Q-R9Sx0dtK^|3XZEjVNL@)^!N_3K5ofq0HCMrfyh zBG{X#ws|JOyEo1C34E!71>h;T5ekR=^*&0HzA?LkeUtgqxB5aX`j6Ino(B#(gyvq726l=iKUM3 z^4OlL2$EjuJHhV;gnrT3D}>UA$*1(hva+#YVL(~n#gDRn;V`UVFeuC~QpBtDXPQOg zq5C8IX|>GOzbS8@&gINjDRchivT!VBxYYxJbKojz%&q;O7ySio+ENLrw{mQ9Yg%2t z;8jn9QJjU1iH&}W*hRe@tGH1X3-m}t@KZwW&jC)Ft3DP{*-QwpLRS=cJ>GTE{iO}WhdR2u!s=Vo(7*u$ z%Yl+Vs+X+s5(dFoalbQ*eWtJ{Cab3uE#ht`=Q6p84Kv6DIu+~oTH=)C!uEW#r1Rk9 zDRyX&#LMDZn(KH`7R`T#gEN+&dqWb$jEPh4tA5QOx$zd~2L8xkvTE@hNvz|3o_1V} zDG|U({t{mydr4?zucbe&`Np>Wy_$p@_ZlKDR$Y;eJayct+FjamD%!aT8-b=YiSf>m ze1<+-u>Onll(sRK@&K6&{ee-qx#%FSFDTz`kh)uDf&t?_jAxsNq75OGB6XM-Ok86YQDI0KSt8f>r8lF zs4E4nMndk*Tp{@*4rGa)|D==++wd0syKFyMV`BaL(SsyIa#tK7H|Gd@ zXRdkNUbOO!T#SciTqUhcK1wWai)F868j|;l7FT7%+qe#TaA0Fp{7?JS+*fOQrBb*) zLoE{$qo5f{oDi|ok|?UU8xanae3yd)bj8jSM7%FOe^Rr|yBdGKvoWfi95G100S1~n zGfMJ!@b;%$u12dfSg|6@1m5E3L)OUXqdAO>I&zh3gzXjK^hPR`)rap_g8+$2W>oQu z%7v|Wcgls3u!YUfS|20Mg$4u0x6jcH_luQQv+F$Nz3b#;b4XQ6Az!|kiyj8-YKgk= zQ+~yS>5lZ+S<_jv-J(ah(DEa`ux(xa4rU7N^-1tZhE`g#@*ft#x_*|k6vxc*i8ja=yPwrb zpZ+}zM>&)fg5t;4(n>-2&DWfQOB(C~+CDTVct~i*EdY@(`vNg35Ajv6JDTh~u0mZ( z3d$Hf=ayaq;ee(@z_Mk+GV z`*?%)sUG8D7$+!eTHV{>HC&fxGdh zP6n=IjFbcc<}A;|B#9V{dy;;=;Wk496EF9h{Y0qIE^bw3PScomM?Mf7VF(!>jo;f@ zuo4q8h8LeSPLlS>k4R-eobSl9Dt$KENp|G7(XO8+0icL?5_yQC%w6m__#!T!tRj{s z5zC6ZNGRmau`jCfrok0K0qtw1`aD*ppfDI3#^wqHj<*1tzOuJC)VDht@JHABNEb$T zEa#8VU1HX7@-NOVCgNt?;@iP+C(qa8>`alt^;xm}<;p`kinOVHmU!xx8T%Ad%5V7Q z)7GD0Y!p5HpJ27~9~q-*f768T8S+I@7J|-3m;p06XP960?q; ztts4@)U|21)D>tjohE>1m=XEGP%sF*)pOdbi{Rw5kDuAqB+vq%FKsxg3j>sB&?z8L z{&K@zWWKEKF0oJ>eTwe=;aZ$adb}eXS?fXSh|MDQ2-9CnpZ59V!4lWMC2*5y-wo4m znP~b<&@M#3P5BJ@W(p!xJ6R*n_u@={9-{=TvPg7jRgn*a;G0lzIP@8_3N);vogaU8 zaOY4Sn$sC77;835TZ*j2sEhuQY!K5_$T_nywu!Z+l_KCpi(<^1Q#K-hk3fgAx`N_0 z%)%Wkhq0dr7v+MU0qH)Zm(a}(H&a=iPq8yPC1mQsps3OglSi4>?-PDXU+Q7~TH7L% zvY4y%X=Y)?wpG66j{N7Z@63ptyo%jW4M+-g=EmAI2b-CJ-zhW)LuTEq)O(j#%>Ssi zm!|uyNxi=n3o*s?|By3IYaxTuvz!cvm~ic6Y568$0Bdj=+VqQZ5v2fbuU$qHjqZm& z3qT}1`ENoQtARib1|>E49zA_*6k|qz_cg)Fz-c-5kb(D%mf!NPug|agxmPI9c9>bN zui+$cI%vC%>IsBQtQ^WX_4#QM?1Y)Kpd*P9-HtR^1T_6na#=@;HP%Ko%Ph9$w%=si zm#~J`jx7`Zq}qRnP5>j#4oVng&7^0vgKqbrBV#DkCkAt}9{eh&<|6ty>1tT^ecy&n z+06L#Z}Q>Upbr+d+lDF{rh)Y3iUL6$ZQtPa^;MlT``>xHS;gIr&thDr)C6xtF2Z93|a)S0U^n7 zWeNz#xHG(P$3IO>D#vAM3=$af=$44co+K$ychn1DDo@-Bx~HPXX0HOfA}AKpwnud8 z&r%rV!8`XzJA6WvS(IgtIiSPomjY>V<9egl87&&NZ`_NHhx#>}KC3R!+96Gh^*y%W z^Z+WSPzOROeDY5Cvg)^4i)M7a^_v-$6Pvn^Oi^t^Z~gVjw8BjkkF|Qt#2Fn1)gI?< zcRFksnTOf?gH`bpM+chi!Nqvx-pJq;qq%7cm8K>px3ip~F1_T_^`7CJ7G2;HI~|dQ z7rYkhajv63^b}7W_cU7`u)l;TK%c;2Pi=0Fmn|jLUhSi^k50CgJ1MCSJZ3^M^H^_@Hq^{hnYDqQM z@67!2V0shZN7?YJ{1&A>QeJNwlfkN2+ksA-Ctni+ zk?0}D#Yk25-LHc-^jd79kGHg)<|-RefQ!%^CbIB>pTfnw>e9g23U%}lp_(&(V~%ID z=Klj+K%>6^ZSPMrFj2fC+2>X90JW-M)iTk{HXz0eLZ=@VDPw3{Mq#tllozp~HxQW@ z(@DtXA%ZxZ>;U|po2Dn)`(+BjI@%v4(hx#CfcSfar-RMs?&bIGh5^6~^h zh(#qC`B@R&C03USdx~-g#L6yPI9SDKj)PeUjW2?5Fi8FAImqU%-M^0FL_=e7#mCdJiXrk(rEt~gQf}*GZMck=PFXo=Eqya z-mTrKk}H_I%;^ISz~6#n5h%%9dw77FsSAue1UX{Gj^L)_8A#l*o(B#xe8i#fn}V_e zrO3d~jeMJQk<1S~*4jUX~DOn8kqRykIyb|p*rN9>;Du|?ydE5c7`zgYJ-ipkXA7&wBBkxkU>7ST?TKHwU#eFk3MlmPHp3x4`~90W)-ZrM;O6HHSy~-mBlGTFM99zqgNg`}v!G^Y%f(H94y;7-GLY@d@Q5=CJZ(+%<1sVq;$OaBye=^e0e@PF zNOO|J%lXYpY%_P<3687zB>0>&Q#9GBtu6fCvh*gG=z4_I_hsKE?I>>9Ud{j@d2wT@IDn{znu-C*^E6O1f>#pCRswZdjnFJx zYasqtMcJTWLKwRZkb^!@Hr4umBhbKjKa>m!sWcxh+RLK=01n|`axB{$1x?k+%?&$H zP@xm3@cI!$!Gm^a7g@s4wM2WEPM*i1XtUzjo?(t1!}AQ=am|ri)m8Crer0*rlb8ug zHPczT({#fbn*M}Z@*K#YVQPO}GSU;pMQ|=d?o#l!!u=*w#O}<$|LuRjGpsEH^SW|D z?H1@V3bh@UQhx=ACST8EhAB|9K_SzXU}O-jdR)kqDv66|x5+dCP7m5|eq`aNDMTzm zSQ4+(QA(375WeKQ!~+&K4)|3+@HU<-c2<5YEkS9pms0!B33su!)qF4E$z{|HE(%+A zc})dF;GD1gQ!Zht#tY)B0xw&EC_X5sRmVmN6on&WZRmZepFt?9@2{>zI73te6D{Jb zF`mtrrQAOuQmFjRio~du9LVkacU?NxwLfl`2}Vu(3Y1l3S(d@%d_ZO+3dc{@q6Y^= zbcoGc+@^H(gL!3?hT;_lNGWu^l1)nI>`jp_-u(K?tLq`}1qXozWlNZ;aaHH?E2~!W zqS{MCH4s2D1dH={(Y1xCd)xwwk3x0eY? zjK=@6o)q$I^Eza_fdD(17qU^0va&)3+HmEy6@Dpi;0Ul~+_c6%6keqA!im zxtQj*%x*P#Dgkq*Ubmww!MB_V{6!V!2!PAXlG)|f*I`5DNa0rCC3`gyeh4QbzcKZn zS>I#roTx%NO@*Skp1hIIKS^$uEA;W1w3~K6B5+Wv^o`gMD&jIrh2NU5@w$k_%fxYM5JokKHMSw69Lhxj zMTiPt{z_jgt)hmTEE+aX7=L8U>q`5xEFvaC`QZww-;`ld1a7y03sP3gQ{`1uWhP>B z*WdzxYIx-@)R*(*<2TBAicllfflpH0B^aA0k1HIoc|8!Tf4}CYSx+Y1wX7GUl{Y|O zLB8%<;rKSMtP0?ew!Yatqt*a8K6~*8w&Cn0l0gb~woK<~oYB029q&|k&i(=;igE+V z1WfwPd&YADR=8HF5CWII8f0^$S@! zuLK%rZ7RSS(A*K9t}*VEuHkh~*8oOz{k8UrXMGje;EIHwus?E(0tGu0?C)!A#W9!z z4qiHAOj~vr72G#m%?yFWkek_3j`{QnazaEP+Ulcp0f#Ck6UY;33Baf6^ixI`#;19x zoy*Ps^yzim*Z;lcIEfkYtIRi(_K;7X^4kaUm&Cxce+MvNS+BMKY}MOLFA{N^a%05X z>wXi}wo`Qr?CEq@6m>J&G?`O1*+=$jx1lyg-R$mF-GG@K_H~%|n<6cUY@=50fW&$ z6aa%}#7V$cuEB@8;(s`{f9lumO#6l{Vyl-w<*lnz>#f8Vb zSe|OFT)KE4%NA2*Q0DOjM0y2OT_}kFRo;&~-q#D~Y3+*gnB!4hIj7>@YS0|92!trl zDNS~dO*1icPm)!YFwYPXRr=Hpa$_g2X$Z#OFlnyo51O<}pjR|*MRiOeZn7_IC?htH z5mv?d)wJyks=18npXX81WlV^|M62zIRa}_R6;{qpo5SAF{jw>_1H#kZ2=bmSd3D!l z*}m2U6v{^g9q`1DAepx}6hSd7+CI8dWR~2vhab#W86rT_!h|7!wsp?!#_=?skQ9>* z%qCI|v0G0E@66))oGuXgYp|l^P7pY5e0=}Pwcq}*ZhS;7ddZk86c)#A93s%x(WQGU zi6Blgg%V47T~%5xs8Uq+$g7dggujH7cD6XsgmMay)rECc$88zgl=S*=ukE`1$6Lsb zr4asJ0<4X8-*njxi1>iMBeOKCTu;l=f%keiYu$ z!cnA*yPSK4G+-1*jRIrz#@D6r`I$c6Cuw-4y-~G_GGW6?L@}IIZxqQO%pqQgm9=ehb0Dt zw5_5$Z&hL9xLi!K{30NCa|SN5h_(#uF!?*@6)ra8VZ{ zE;eTX!H8o@G#Szs(0e6@a)n{YBt|G?t{OTsL24N&S2zF7r))|dNG{h)CYv+l#i=jk zKEgD&B$V=)LRfhvB9Ixz0C@R+FNDKra|);~NhaK(%{A31;>9~5riph}6Y?U5Llt}6{Ccrs<#bX9hfg+% zeV&s5vx!Blg2Su+6aa00KOnrOGe-^dr_24s#RcXC?!~nXTQw-#&1z^CcwU@*NJSV=-AMx)5tEx)lF2JP6Afb9Zc0ff-+|5x z8pPyq5t1_Hl_Le@c`rdh?4!bqr_vkQ&m^t5J4+so5>kJ8bHk$>fH3hKPVIY+aqfu~ z=a`RTQ${>(WePK~+Ys|RG^J!#aqp)j{j8dEl$k|=3Ly2OF1k?WcovKZwvc&iVx6TV zk^7?5raG5?61MBzw_ORXj(3=x8c7as?}+n1`q65E;a(brbKA!>jhImqpw%qnFI!yQ6cmd zMVfL)8wf7a+1C8gwXSC+$5<<$Kw%6-Ss>f=X|1t98r?0BHurhGO&C*$JcjSzzkaXc zZh7T|_azS(Fghh;sQUAUd+9$ecs~p3SC2L!Sk=I19@Thrw-m z7~BRQsU%*XVq%#BDB%-(-_G>yc6E-nB7AOBn7GJ;&codmsXs5@x@RkW7C=@&nFSG9 z<^WC}BqDn>X(e3-BL?G3|x$G8luqNTVsOvpM60bp29Fx6n-i029MMbQ8K!7xC5o)I8m&MJ1vy~c0FOLeh&HEo_kW(9iQy||)iRE*4oY;dTRc0J0c+bXB$Sm;V zqG*z^`$AP=sWhM{EFv-8QT%PVUC-ApE$tC;hm&Y^(Yy=Y0)%8$H=Jqv$$VR6Dz;c?Wsu@)azo zim$tld!bZOu#Qb6YBJnZ&LeHa^baF^+iil-* z*iGJ2@31B+6Otq9J>hn4xoExN2E;~S($;vx6Jg%i*Cf2tskFH4c_Afh4sNC{wx71w zrNxdzPIbwEfy5Rf;SkUztWbO15pLl^&QP|8wMMPe(o))D4;-`Feg?%a(#82X63Z$& zZ?AZUuua?qU{H2op^)7xj-x8G_y`|mgXpXzNG#jR1;FR@nwQRfj7Fm@tS z&x8nlu3sRwjYn`|fx9!@zb%n_W9cxr3*??ZrhY&~?sBddj%}0%7evF#6uj?J$KjwNeaDwu5`#9p!g>1PHkHr4-pygAHal2VQu`i1(5gBHTxGtQ9Lw+|@Qpa#V z6YMM+VjI@x0#=I3!%{i6Mq=_V?8SvVQnlPGpM7?AD9LmQ4axivbZ3ldK-27QQX z@i9$otda750ErKiU|CrGaWus3?8HZPYy`VM6bg!=cN1g1@svz>yfFkD1N4Z>%p+vm zQJ5Po1i>uu*NY;R7d0WGvhYaKE*TPrtKVHp$w_@)Sj zN0OrIMH20S(bmt;g`Y-L1Ui$?Zc`e4dv?26%(Lg$*PyLHnX4;^f16w}LGl`+KeFoy z;lcl)$Xpi%nts1zm-D)!3iTV_pM)-e!oxIj$XyIy3opwOhWB+zKvN&z#2b z8VHdmg~`wKQD!NEd`DK8fQ)%5#W~Aj#;R8`M-T9Efm|7C6SrQu=dD#hYy_Fl%J>qK z;J8^sRhbZx5zq3z9i>yQqBx4yrkc#ET_?e^fMhUXH;<=~0<(jfaf%nOM9T;<7`0Y} zia*K==KlR}|5wlKk{=;P!P7lQt&76e(fB!F(`-M$>^YYR%wU>+n z#t>x9E2e*tOVPdug$vH&-~aaCGwmIjEy)DWKLN*o|J(mSU%durlHO!cw)kobYQuD+ zwjhk zgVz&b$J75d_NSM8)McEJOWE%8(!A71m;Rwnw`ZQcC?dWDNE-B-{cbmr51~W4BO&X_ zR<}5fz4An~-?01p`zxlohiMXrR}5R%i$8xH-TvkKYra2xdo2BG9!6KUi|OQ0n*%Gg zu5>NO(s;K`c?Joyj0z#d8aNl<0dskCp}o2#b4cEC^CkhW#cgh1gNPpulx+|IP!o^T z#FrmL1FF}msmOcYROAnN>3*cnpo&u7BuOdAT7@<5gCcN=I%caV{&uN|L{@sHa%;u1 z4yF;pE->uQMDl>_KUHqM;KwtjK+#>bppuA=5orj zv`R_W8$=Dr)K(WvR#s4Zrfyi0#y2rE48dG4Nxo+fb1pA}%W;uit537Pkp*E=u3x_W_~z#yUcLX}9gy}t-l>-punGj)$3lOWn1(76`MFsY4#^8uTqsxb8QPfW zl*Vj7Cy!dGl?|h$yavK8oy<96V~7-nnqmcr?@87%=f%AFvd?2ch38jR$m=<%b zJJoZU;G9#j(M182aO}xCFWdw7Orx_~&{O#a;HtQ9cULru$3L{c`0m*YS+BcXuw9mB z5J$A|`&7`ddf!M>V=nI8`G9Z`f=yvs)@@gl3sy_3fhI=Nsp#Obwv?6n3RqpjFn0@P zciIQDyH~oddleDwR&zwV>zH0cM0=A(n`3%gC31Wdc9Zd!f5UCFj{Ew5!+jlcR9@$1 z2dLnq{Ys%6RDPNB=)bmy!rWR(cc9tr7^ERaV{f zTE>Vr7}TE5d8U2+{x^EidFDg5YmJP)4t(jxeciS&#@g|&jqPj4j%shg)9=UqyU`@n z@w?136Pc(gZTvz+jr9zo2|XNFIH95LFCoT zU{i3Il+DXaeI9WTz^;SOB`=sEe09lGSVI^d&aVIm<+@ zS$Ha!s0!&OCfZ3%Q*`h`A(2;KC6oA9gb__l$cQ>oC0E(C@padK+`uIiy94HC4X#K1n|_72+}$w$Z#k4tUw+5bkJ~Ww{jt2*u|RTu5>ef@EBX zn+;R&$}EVA65?8Oqb$=8vt&jviB_g?rBt&ZQOB)_ULf~itvn|pYogr#?n0fb2Q)=m zU?dN49poIG9q&w*C@0#(!!*6Q#RyI8EWaqxGU%oT+snR+1E`#nfLr<>a1)g~uw}d- zfkXvj%?>6iM&KJssF)Cin?QeiL~-BxSMA3iECL7+2eJODMYIG}Qy><;HN5+ie_T@?4nzjslu^YLA>~L4jOE1j zg=+Z$ZJzfhx_GWF(#KMjar(g2WcbG`N&6!euZFD9WSWqa3ptn&QF<>6O%{bg^Q<{U zIK8=B94!as4Q`gGN?Kry@D}=6%^34zyeD38GK_%3I(y-xZaoxFOj6muj7Aak24e(H zP&%?hSeKyEN*BjLQWUJi?3q0h9drSMz_)miF%=g0J5jbOiL`g`eu0YwmH|mvOSxQd z0(nBA4`taGV8%(JNK$;+L>Ru4NePr4UilH(%!u2377KOARf=bR7)7EbpCv;UCp*TV%_vL)3*wgY8_Z`P&eSGI^^_gp+)b4VL9 ze~!1$;|c>vxvg^NN~ZNKkP5q}_^xOZk=F)=ND0J2GBDBT!gs<>JS4oU4u?7QhSM#Z z+>1IR2qFb3PS-d%nCyU?;F>9>QG9b9Tb_H3Nr<&AE(s$N(Kvu?x;sd2XRl6?S*9fk z5$o6jbukQgnRLiKz)h28uCD9XSK1p{@SEQ$Uh$s}@fb0@dBh0IrW7Vtgoq0q?8UW*&J577>QPm34<1%?Bd zw@Z?J&v1@Q9c>3OMTHV1MsVMUj#Dwy@Ra%o4bMG;XoXsxAMUn887Q38OZhObut69HWXDNbT-M5i zCwY=5ooK&!Zi~sxkyWx>ea+d#dbOH7Pw)LJDJ88l=QTJ$V%=eG;qu6rk~dz>eaga8 z1F^?NhXN&!K%{b;Imbo!EaEq z@nq;4$u00X-UXkoj>D%V@c95~eIOm|6{R0@Ddy$-H#}WmSs~~QDwE{BMR=14-q{H1 zsS@rjm zkuE_HxO167`31|lcTcCS=Cs|}*?t$%|7qLdc#RwT0$QRWaBBc9h%FW^xsfNqvQfSO zK2fI(sl zwMC8g8oSpUq*4B!I}=JyYBJmO$>H47U?E(Tnt*}&jst2JmjKcq3#A;o`NWZH;`uGf zocrDsVpU&e1lZ2mi}SpypZ5C0YyMGgsKb~@0irOupYeKuvq!BYpc)JxuxZ(pX&>56nk?tsXqD0l56jzL zWdWh0jd>(^>E^J1`Lr;Mawn$R6TlqAdTxdXUTJ>-hb_R*Jz_lO5poYQk+o37=ZjOT z7J8HNj@7&tBm(S_ULs`(;$%QiqFP5j1_)Es0n6jkc_b+g_iLpfag)Q}anp%#!oP5h zHsluqH{-t|hFh43F8=jAX$3b6JFfg-p%UHr=EbO@|A4?q223-W$&V1?bx$6bX~M8y zcuv|Ll>Ct3W^6Au^J(zW*j9s-*w_Y@&)dEasxh*8Bh<|o`p5@hSib(KXoC#&FaEv9 zC{qySa-t;H)(|JkF@Z|AG@vLL;zaNXfI_Jls+{I>hclO)c}OYZlOE^Cs_IoC3+-<| z_ql5fC$Cf>^{Zqup`wiKIor->x$PxuyPpuQA!>f12(vAY4~}X=-zels;2pb}WUEd? z7^k9bw}`Q#sXyM?jLngGX`*+NE zhTgEfD9XQo875k~oN52L)qb*l$)m=UE0}>TdGCuz=0XMf-Cvi<;+xAqnA+tu%cv>- zed3FgeeW-66bZt6DP8)o!kV?!8NTwM*8qrUEf6*YK|u(`f*TV+vBTbbC@6@p%;)Hx z+RTF)vTN*?skS)0skkWhYc9RIempI9JVjl~OJOM5Jg2R{z4dfV=C0lE7w4XIGTh#? zk?NrcC;$4A{hv*@I$_5eTDD%J_zrBxSaujDkW_ zZqR{%UPnVOD)WjkRAI;Fwh|GT!Rxm3D=MSyWt)c}O)^nu>HapE6v-o3+8=n;2(@^~ zR`$UV$w?ZQltVg+Au5m*>VoQt3ugJ!2$gtSo*5C^&9}0BXg5cOP?waaC*Kubpk&VN zT}dRjyS%^L!3xR%A0n}nA64}k1brCew-K~oL{L7T#Zwa8ZZ*wcX>U{=wh53G39r0{>~zC56tWd2 zpr2nhg0lNVxhUD7N7(-}2#gJ4uD{oLMmS^-)5-HV{Pqm~VU8?McgL>f*t$Jt|JTh5 zi_6=b0l+(#g5T9pXUnjPX5gEz@F($tPU}8lHZvW@{r-fiI5%d9!v|~giAc&=E4!<4 za3Qz@G`1)ehr7_|BDlJch5jIWB%Von&9mWv#S}q0TO|ET?6kv{C2#=;(T^{|g-B*a zg9I6HSQP13sQ@p(d$S4XdOdIGrpk!*9f>DEm@YI?=slydB6ucxQj6xuK)eLGC(}fv za&f9vuF&4YLOG0VD?muRr<2L5Q8I`8JX^9G&EPFsoeZ8~*KVj>>buPh)EZw}~hB5-q z*WeQ29QFFT%Pu_NdqTq-UgRS$SF+JqiiFKqOD#u>3K>8rxDi*vaEPvB;GDXDhaBb z6-YNzE^ROcix=vaAiy)8*(5AT+l(L`m^QcwWOrM(xM$@Ekb15%5>_8Lolw0)B)ZS; z)WI)t5+2;J+HD%pz8Yf+-_@90!jm5J9mzxwm#SWh65$YAocAeJXoCoIA!u+Uk_GQU zpbBX6#sbA0iOf{LCscZFD2P+;0+qvGin!|*@jQm@$@2j0v)mQyT zvDa|yMc23}W;a927IfafDq*ZPXIzta8u*+kqv1ARjg=c+SBERuH`kI9h)S(h!tE6IVwF z8t<$&f5AmfFk=7gqQuuP6b*umf%Z28io*W; z)z#I1wtj_A&GQhVq5^iQy+kGj648D@ruLFFWh{PY2>%VyS*!{?)htaIn7C{q1Zk}> zOKaE4E?sUm!z&nrXH9%*z4-t;`S_q=yFS5=H)?_F9WW`1$gZn}jwzdv<{7Q4MQI9A zTFF#A{UujXq+}@H$rs8_PokE_!Kov3*2wamgsx6r!t9Xtm zr7wcMs^Ee+dpQ10BeRB`@z$M%RnH0NP zRoyHy6Y#rl-%#pm5P4gvDKeO3syV~d0`(Mh9a`jZCccd3no;9Z|*uVyf>&B z*qfGd`i*i&uCo%eyvTIV?!*O{vx{Xy9F;g)XbIIAriz4lpnkKAv-K4F`x?1%P}yXY z*% z`=jnR8p82a20Kn^^;Egh*zvQEvE$fh@WR^Y`VY+?E9~w*0NPin5ciUx*TgFMQ$qq% zj@>Ux_!qQ8^%)#zqs*1xd-l0m26?+C>y<=EZ5AD{W}|ieKB)*4?J6c+*8WuAc(maE zga}1_Z3{A}_^?RJ*rov*(k*b0xKQMXoGU2`Z%$C->R}1SS!t{3y~|WSB5uqu8hucU zub-^KChgaZFmF&vj|y7L{#2({dEp7{pAH+T1Y*KcAje(1Ly)1NOL}|}O&a%FiOhCE z*UhhwI_?qtY8j1IS{%am)u!=eMUXDP_V^QXx}*3v3|>xF^Ok4?_=}^_G|Dpgahk61 z-{zN&MhV~0&s);3{Zr$U!*u+x4>5=s#lIl_?~ZW`TslPQ%rreJw;(@Yl1Dz*98k8D zQSH0a^*L{o_zvZPn3fE|I1auvJ4ob-QERC+`$>c}5Phb$Q_ie%&E+$1heyL?#o|jwqw#pUX~}HP^P*{|9C#nmw<;x$TX z*Hgisvw3RPn^UJ0krqn~8)YIr-OSx?C3AOhn7P|~EOYl9G*8iu%f~l?x15&m;H>;L_}hQx)3T)FVjL9Xh!}_C(>xY`(Ea6V#j=aD|I>#;^T zeYku@)DqJ;R_eS$ALMWHZ3LefPIG!_vqLW3zOuA0bh~38u=gqe4CiAR&9ez=T*G*E z7t^oAuz)pT7c&_7H_?J%RiK_Sc4)}~KNZH2%0`>IvOQ(~)e;#1XeNyONfgEk?J-;g z1Ih*J0O;E51+3Ohit+OlOX2L$2V^~mQmIns?-&be6J482#>RszYF?X(yyq&xPdP!7 zc#7FUWU2`O1Ll-o gVp+kQC$G=nAUtB<{`HP@8{D&0uqZ!Bd*oy=ZZsO^}UOc^S z0v0FIq&=E`RqRAD22yg~!<^E_(uZBp7N{#-rVJ<-IGyfRHTti?YgDAaOFkrv7>>W2 z&a}ToM0iT`41B?1@%;Esje}5|_^%U1-*E-ol`bEi5DS+3&!{Jj>%#cp{(U04gw4e* z+GmmeO`d;1DmP~YAo2^Y*JCZ*#WDqD6uFwOuN0Q_30H5CcNeJcaxC@TO`2_ZWU;md z6}KF7ppN44Vz{c!0CWCff!-BNl7QQrt%unvS%GeW>$$@bs|!@2pjod{gh^5phV^gw zKl2h7=`y)akUUCn6n?FNq1c2>oislShwjfCR)17aPq;Oro)wN|1SaUWb7FrZ(O`Wh zz$D}An}#hU35-W#B$=VCv=S(6Kx2N=G1MN-IWc#Fcvq3I=or2xk(;nvPE~2bf2$3F zS*;6RZ*tAzw!|f#1fp1$l%O4Va?a3ZbzU>4+AN*@%+4ft(Vqj)QbF2h_%I=lP%!7zpDeKBYGO+31pr|9#HvpTB{{2|&+;{K@#9s(9>%~53d zc|FjG&4(B?wE{Ld%QJ!VNzBPVnvb7SVHG-b$lGt8DD+vO3j!bKG?5aROj|U4z+_C) z8Kw`#ByGK&nRr4~utuGm9P;@#!5mwV5cx62AM@8@L}st42NOo9mV=ELQcM1Pe*PGT zBMi%85bZ&mk5BRJFl03b0!0yNiEEGdhXl%C>m8pgsAhh-+zw+dH@nQ+NH6^)~& zv33EzPH1Y$FB|AFpHh7(IX<}H@PV+~3*2#vA*P1SX5Cik{FOGU|1t_V%Osre)daF}(0Q{F#%O`h^b?)ZN=NniV9##CC7gVM_$OpPTbE>3;CKD}^ z_ZBnWxX=?GKf2)NbiK=$bzA#|GxAY?Js!h_HB;AT~gUWZyCg&YJpw`3|_`R`x#TK$+(2x8iUg$+! zj7`*|I>cA(^01H(vV<`TUmM7|sC;K;>Im z3Jsk*hef~>nFB*=a(q>U3!hG{v*mFrz2vo4IIzBeq9kWMvb`oAAk=|@0)r*$S`9 zCWF&F-$fr=RWwj5vxj8dIc34#4|+#0#gljRT4{)!54-($zgbEit{6&r;RLUch=(mS z0x2Z`W(j4lf~2`8;A~w#ky=EH$aM z@CJXKpP@X3MJ#e25>6Qkpu* zfyd4gX+-0%voRMVD}4ZOqy9AAD(hBoVgPYC<#JsoI^U@askjaUsbiQoiH|Vq0LqwV zomydP^at%E7`wlQqV3YLEF*`gaFv{%k&@Ug7WaF@>$ok8qD0!Ykd2*^{cC?zW&7$> zh|mfsY$WiPH(MaI(yLK$B}(0etz$=D81JfDv+`~;rOf9Yz}RiSp6ScUf;b_(i1Eno zY2GHK*v!A;-}ym*n{BQoZw(SP+9YD8kR>w#Y+RM{;TH7+x?F_{&AYxnp!m=fV~=bN za&kJ{L`s6zbu&nZg|V@IofQg1D=vvRFAM2ixuxt1bj#_ex1n@g=&1LgbRL0gy15G_ z@sV-`Z~)pW7%K$A*h~%?yJm-ATP~Q0u<4Q=GM4m^aBQZB@W0&BL$DtgY)n{EE$&m= zqwEvgqk@WnMG3~%zBRnJjV`kzc*Uu?&NBsa5gkIm+xsn2%yh$b_pm6tRF?s6lE-)2 zY`mIp5=S`jY1*V0VB~?auZ?q2XanC4#mA$R7I$l$%2;l0t;e)BiqX7Qo{n*njn-Kv z#5A{r#wTh!iW4l9w+UX5F~L4Eax;)0%5n=C>Kx90%eQP-97WK-fBRPZE#1_wZpV=j zqq7~<0mtt`Mz4K-<U#}SARO&5_i(CGrxIF{21sud66{q3gCMP=Zx?Rn%uC`mQ$kh$6M9b zj~*;+*qba=;TEswqnl`cjmek0`5<`rI6x3)vh-au1f$*3~g3_@nc9RVa&H z7ek~UePck8F5^{jW`L!kZ(1+nUz7$u@mPT@88xFCJl+Z8;-`Z#}!$+RdZi>vnq0u-k992Ce>SzuWC@<&nnx;|$lr3@Nt`Ufy=$;qTXk@&U z>7DRSxE~0l;$$8Td@VgsB^ha{hq9sS9h+W0zb?0Y+AEfajSyVkUNF#Zm*~fu5FAtG zq9N*d7t-%!>rT7ve3`&jJFht(yjGali7&tRs1Suo^xGwi78YJ2#+_mpd-i*ZadxlO zATMZ;@3cIsy}o#t`6vw7;4)UEa_-;jp-yloQUd8Z5}#W2I1f$7|ITW6o?F#=+DkHo8m;etj@rDW^DF zqz238rWo`aSD!BfH{nizh4bO3=fIsV%G#Dk&Ae3hUGh3|&KEEL!9lmO zDTq``%+ysm5D_^)63Jke;?OfQK<uzZ!Woz1t zWD5={yANLH6SzV7siQ5-Xi3ByQ`t=lWWwbJ;Xay1oFe#!!)+_(R#wR)u-vn( zj7v`uB2zZbSnVGle*!uqJw!|a^1!GMWN<_54}>7MBEK%0><+P7+=IFH^md6^Bb_Lm z%FnIiW+Moj$Gr;3>sX1g-K+de(mx-xp!EG#(cL;3=CRyFtbn@HOmi-)>Uu(qe_BxJw^A;z9~A_g4Kuct%6 zd#)Fg=#xr@(l_y#o{QzK8i!cnsoIVZrtjl)^wN8QG10>b-srFs-R%fSzZ1#9rcZDm z5nbzhf$@3t>_w2fAEl=pUg~vToFKye{pys%dHP+J|5ujf9?;p}Wx1y;@AkG!YM31= z+b;t7%b#!QK)@dpBV-47LZ2jc35o1z%RwHdtFR*$u%uIdXQu#SonF%Y#cAJ=a#2@gEYbXcvG9^fBIpQYL3CtTPY;(_3AU1^A+}c22uwadd~k?MZ=TN zoBg0Q3_4*jX$HoR

0~S-aCve{}siqlS);T}ZL;Tl=-EjrHgh)+4LB9vy2vy1k&) z4w@V5ai1-wD@@>M@^Z8)bkwmrqJ{6ZcXrh7(wbN5X<%(}J7}E-y=JkeOm8vv)O6qB zzSY;LIIF>y<8(h;Uf+zX9H)Nuu?3T8Sv?%Bq_fT!0N;q6FQIf9tu7*#a zhlY6EJej;2Obrf`WP>Futbkp2uTM)yj6bnaLn!pFA*5)9s@4$dftK@nJ*UDc&&`Cn z9JB^kUB|2_z!{H_>I}5iyow4dRCoH9xH5zqFr~`b%Qqz}g8)z}N)xj~Jt|bS%^qM# zGu?P(R1`tDA64s!vV>@KZzzc+FGg*a3xn&BEY?%S8(ypxL+4ux)yFr9L^t@z!A;4` zT9rxy+5^>vE6u0^-Ap+_T(Ar)5;XrO95dnjx?;(v;wgFQ8x#Qed%6QRh>6tynhFY> zaduiP*+qQz#pxUhB{{q!`yEO>vaT@TgzT)zPwUvM;VH%dL3D} zO3}o6zDRbNt%p=IiD)lZ$pTrF#|cvN)ywE1uTCVC!wTQNDFfRW&oscBsV=#TtY}&%< zI!%U4tdNUj$p|ip@;#8`SSl(2x)FZ)4$C6wa`E=JIp$jU#QXu?Uj4EXmW*j9;hLXi zlu4Y$$OAN$-0z~{zYRL|(ejaSi3(;5V^@9LRjr`mI@2 zTGoxSXjNRl@8q6tZFduwFsb0M7B-cN8|h8UF`~7f2N|0=`D5^o2T3HKKjV;s+DlF; zJmZu?ZMr%Yh5EEM{a`Mg15W_&EcCXTEhtK19e`PSdoY#G0cX$d8%9{=^i^@el*(EK z`L;dcIOc|JoS?fvDpKV1xdoqiW@uHWm5vW3%5Fay zqOq=<$F{*q_l9L-UDdF_3RLLhVK+&%D!5*Rt(Eqw zl*X@(g%F@e%f8S~2TJ%1zQqayU@d`30h;=`E?D9avcYm^<7hdyGYc45Y?);a(H|RF zJW{2)z*3me1wBp_JZ#MxLeAVav#MWDYDU|4y++j;xd}y+W2` zP1T|;%3BoEF6X^qP?rx2ru=W{n>9$#wENg<3OB$*M)t|RGkMaJKWI!*bb{LN$ zB4VCnm4O-3J()&rP$L)IGD@Th?kHa5)uX;T`zkP9{qy*mi{)`WLSd|D+bctY&FNYl zc!$-pFCII^#0igyJD6q+GrMJW-17D!_{D6I;+3(b4&jwmot`{XNj;1-3Eu;>V#gy1 zMs&Cg4M4vsC}AQ7@5O|?&Px7mVWi%7wfdDvsKfSQMpNr>66&xNl+yG^nGJU2z3%D4 zA7$p(N!V&vfErbrNF|gR_ViIuX5G>qM;y*B&rtbl`M}J&Rky^jsNE;tbwz)buTa|x zB5$Bf9FK}GQj6DD+YE%+ssP&RI@37-jUs`tfKvX}f?C^r99?($*i0Y># zqz;(-$X_f~pU^0H;X_0`u%5S?)O2r&%a%-c_l#*8)yJG{g(t@2E3JYqUA~loT0oDl zox;F8Hw-st1Y6TkND#jrI{2Ba;CNIeG4<9d6v7=B68qSpGCt#5LuC!)Z@q5#gpo62 zByQ2LRZzLMDD0m0JC?DPw;z)8xr>8AoF;{+SjMiWm#)U4yc@-!VVvm%3fH zLH0Xtko^tBe)qUN7=-1B{V+T{ntl3P2R0=LGny^ppAN6&a_Xqpo>q!4(jXTH2RBb0 zE#psXZw`(+e}ug`Jn($#td13-cDV{*z#FN6+AFOH@EmTpZj`(Re{Z| z)d-B(+(&!U)CD#lwSND!(H{&N{U?9VRqy(lD|?%IiX?joo5Q<=nw&&?G~Ek>c5~`# z3Xm|%Qj_B&RLc1$LaV~3T3JwQ-oGyV$F604)MBoF`WA;zWB^&J0L4@kM@7#guE>Wt z;$RkKAE}lKC{8SeR5`|`5zmNS|9~0~1#fVeulMc^VPjuMJc&;4V&}7TOYBd`Gx|jo_XUz~u_^CEL{J>EzgT z*b^@DXvuTBs5v5Ca)dYYh;;n*E`|M-8bIgU8U z0&${t8m0iRvMyuJr|U=+t~BbFMMBuck2WFB6@65m;X&XW^ByC(Nn+HP6mduDfanof zG>I7-Vw`SDLE)?|Bjqd4E5Rdh9N2~8@^AC8!UtrSrc-`^xQcRW9V=Lpl_^z*5rVbS zfb}POBywgft%L2c;|l3wn=hPB4l8DoF$j$09Stg#adCzZB_vFuQQ=0PP?@lp%iq#5 zs95Ovv{fm@epsF7uzh%ly{wj63mHFSUm_PTR(a~80+EX!b$xSCh0UYNM5+)b9&s{cSFN@uo^i56I;s#S>p0r1<$GboVl1=P?@W5LuFyVyjE$`4 zZ%$3z123z<20E^=f&InD{?m&?^{jmqx#->5Kc#^5;<~~~dAtbST z^LPP}<=1aOVpIE5&$Vo{!5{PVc?6(qSTljZ=UkjO=d1{_#d?{f>&$>CKg#aVoa}Ki z8-!Qs3tCIRI#}jnDm*n#-lI=wfDBE~KK*A_`IH=fNF>@03nR9m_Ff(bZs~j7j@4!q zyvR*-)>fKs;ekR!!$LzZog4BNe~3QvcyOx9z+8>1p)~YY;GSLa1b+D0gAR*;xZW$J_T^M7u`0gVkgrK-)IIx&; zeGPa&7fVj$q4;8apR68)BqzxXS6pmFu1Y+nr{XbXmtmfE7Li{)#W=&b=)M;*GLMya z-rEn?t>lO69p;DY9PWqvty9I@xlJ>F1V}~OF=Q`vrhOMrrzAJ`OUxD}q(r0D)Jj7<4!#62j-pw@G+!^``Nh}Yy?VzL+e+IUr=v9n zk}P;0isTp>)Rr-oqZ*M+&gCzqwc@bhtIK3WVUzN%GJD|VEV{_CeP@12w6t;>D(WvO z=2n)afa8}&>m?N*HNT2+;$EOuiX+hFE#1ZH6^rcV@^zHdXbJXW6(g?2Ls$*a|K~<& zZrIKS6#^y(%{;F+c!*aU^-*^mw!PubvKmx0sV~im*e6f>6T;(W|7D=2ACZ6IJqH%g zMNfD|Is%-jFG%4$x>&}egkT~(LySH0hsxsXIH)&FL8y)`As*~Vz0yxCUQa?aIje@q zD|-zp)QSi=BX=#7hUi!2Z=(jgW$5}1qDrT-*c3n(_(`NaFZr?X8;I5{ z{RYzza2(w<&n}6Tp~6;sA1mEa)os?0K*o`I^J<;;_lt%;u{jV!1T5rrN~o}TfGhj2 z!3FyYVVfPG5?C=K$u{KjyLh}F5#_91{~P7SnuTpYrJsBS!lpfd&!YQ=lQ1N*PhG3lU!8%iDAuyDf<<}K?1K@mY|A(c)Wl0Jb!q=_8-7dsBhu_Py!l~FNm%qAtfD=oS-80#K{5* z^8Psw;9OrP=3tQ&%Y;YP_BVhPVTj=cYggPQnTJ}@YIE6iOD321=JTjh7paw}Str&j z!ZZdm0{~wQZ_v>vpIE@VSy4d_}uQ2GYaV?z@Iaf zC-MGF(;?Skui0}iBT1%p{Ca}9Tg!~vy1}<`f$Y-=Bu5Th8V_U4AJi+Fp{vfSolG1) zGAz`Xa3A?SUdFMpXf;_2Ts-4Db;GTV?;|-mSIG?CZnnJO5>3`q?~Jb!8taxSUz35H zVesZliS=N`)iZp8`KFuL+niG$&g==*A)rM%!vp=2ANaGpnH^4F-FiB_#c(GzJWm(6 zhk59$u1~0SO1We}COmuS5y0Iqzk$%cb;&v9LH3cZpfHzjP{xyF1$pLD;R2VnblKm~ zzN&mdWUE@q{k@5n<2y19&3dnzC@=?8mqaIMzV4~v?N-MN4JQkVz9o;4NvBY~ovqOF ziTIR=uRIg)PU;pTq;Wt^41X3E1uxi}_i$bpV70I)kP~NbF+Kd3xyp7{R|H&JG)~xQ zx>59vsqF^QL)>KW)?Qe&f|0Q@6(dAf&+?6Ev?^A9>bqBd>NW>f<)>D&(|5=t?H(TW zXqUzke<$hjoX9^5NskUhd9K&*G(As3(^CWf&epL{BSIld)iHuQX~I=e+1>7Afj+yB z1p4$|T~E+w)g?X{^!Z6o%;%lx7JZQpLUII$^CK+TKaE0hF#6JiGS9&(NA%G*rErrz zI;PB#+Po{`uS({l>00RQ(QU$U!c;kXr;$|BQ1M46&hK)s!8Rb|;4mO$@9<*K?cbWe zNLTnk#bJ7Q-5hGq=(c$Q3eE&5jwC92RDrP)S$6 zZgZ4-=GmPus>@6o_fY__LJ=-ZB?ugiG@zQq_;RVI%5Tue!5K!pDNbml@<~Nb#X2&> zI4FMP!#us)M9~<8DuQr9#uN%=|Li^-49n*TU)Q5mQ&5^ZjB$d%n(NhPFM^l9(2e+5 z-M_H=hwgPzYE0Q1>(mLh^xmGh>u;8m-Os!2!phUkOFd!6qSoO z%4t~(q`2b7dur8R8C(n(OjH_!R3wd-*I>}ehyz7oEQx1+HeTXs(*>iOXhGF~7mF!c zVv3D}5UIo*Y7&Q1qG85l2`Xt_lQ(+ve})Efa7)(bMY|pD%=SD-{!v7IO{2LsWcQ_d zhJ#Y}FkF!Dgl;m61)JPpkgyySM5#6ufbH_l+b@FkJXvJ|%f!|D5XTGJ?=@R*gs1e_ z#8axT>*-G*i0-fd_;*~4U;ptRK~1rfGWfWzcxoA9tf-@zCYln&H2aRqEf6XKWFQD8 zu*;FEOgb7x89Th047Em(*mSN5wyEl^BotmTqLZ#jMbtOq`;Nc z<{^&bX@a5;`b>Rt2Et}j%{BH&qohH~)I9rYQ0in(cb+cC@zOnYg_j3UqQc|zG6*v| zQ>zOLH8qVZWm2jeNPf&#OXsU)I-SDghlt#UGqqGWRxbiK)Y2L44>z*y@kOMV@u+9s zLhS`v)V!4%QvJNk%v1s^$%{ zNZc@sm9$51r}dyxIqdFX96{Yja9a0lGA}-;$PWKPk)Z!c$_cpPihA3cYpLeis=4rx z=0a}nr5fE!*U|MmrOI@7TZbM(9tQ0GKvH*VGHo@Pd~FKHuX^YeZ%jeJdc4X=FpD>I zYq7sQJ?aFT$jg=pk%MKlQQ3g3+xe{HWCo@sBqC(ZNz2 zOajGUfnRm>NA%aI5&3fa7oRD>jD?id%A!NZA56_}ExgV|M^fyC$-~_SrmiSkTh7oz z8S&(TK%x@Uw&wL?&XFZyM96of&xxw^<|mb4^fhET*4eAJfO9W4tQKyoHs&G{_H${3 zgR&=^2590%QsDJ zXX$!5qB2>!Adcs&94TLf<5%yoUA38o_$b|4YQd4faXaEr+1IVd{;@ z;8)$bV(RO?dMuGknMH} zZ*R0rBJoP!6d!MIv$b0ZYt?nh1K(q|4j#d5Eu$T5b^6}Wf_A&_S5&260b1bMNr)s@ z9Q=IZc~n6bP){EPS>Ry0_qX|y9F1@!qI? zFYgj*Wq)~euiZcYiEBObiR;iI>e8P*`m=AJxO4j97xd>V`t$YUojB^fMZLGE_mt@1!3Oai zs{35tMv-HaYqzZS6ZO+KH~E?O+YSgCvw(s)K!aAzDek(Tojx3ZlznaZ8HkKfPJ!lN zR<2ZGeXsR8N5f=QXSV0sn^%U7KCZ*pkaSvO4vk7P3|qP)r_N1qlK*3BkZ4jLbTDch zi^i=rt$O`YB)}Jdmk{35*WM`qO2Yu=c*PoXL$-LsCr%mGY6@9{ypKzc~X^`PsG;0s^?KF)vdlL)$-E*6R!1> zYSW}8$I}EJ*Jvel&C@i}7~6uboNG5$r9YRfaTaW8#GsRyKpix5Yk#+WWVx)`Qk2aXQc}=2>zZH%jzi z>}hEBR9fL`Kx`EZe6f^nq4O{Q_O_^36etpkyM8E|%H`YE7^jDwajPjiCWFswktlW%z`_BkTzhe|8qK~pqlH0d3w)R`3=3XR zSZZR0hr6Cp_FSgUKO!|)R;|*7oE6#MG1YnEW1)^$Ivtl)m7*1s?S0KJkX76N2|!l zFXi;F0xW0OC{DpHCM46JFWynhuP@%&Sg~O=|9}^(9U|;D9H3unqx41TL)ug6#RkBL zxQozZxr!dt$l;72oC%kuE+|9JuvCwOl`Ap2L11fUf)#l+5UUi_)LPEga2OZUhep7V zC!8n4pX%NP=UOEI;+A&%QuZ7AyQBgG+&El1RlI?=@g|w9;EF6$EUW^5QGUS_sppOo zU1O&4q2js6s@9o8UgE2McasvPSV_f8)H(9Cs!7{w5ftu=hQKbthB%Mwup6KWh$k8L z90x9lw`e0hvIeR?9*V4PW=c97Jo_+NSDF3MC8w%`D5!6Wg)=f9JUhDi1f}{2m^F6b z>b#mZM(fwrR+njL!Sy4ECLzOL@e~jDoOPcmcR*sX3T)Nlot^ zZwR*Xw)L}t08qG^OR57%`D~&k-X`%~P?P-k40>rD!5_VP;EVmLMzhs!9!9hE7|^e0 zpTn#-7WZqlILDs<4a2@uLSSBDUFU8)`CL=D#Bp#o8ev`8T+Mr$Zeee^Clqks&IucfDw&Lex}T3-IuF}UC%TW_ zcGYa(h<0L3v+{`^xOWe;Ry|+B(Q>}EhjrW2_js2o@7`^v>E12Ow%DT*(p4ARtNXCn z{VT;6QgOGf?)N)Yv9_S5sewJtzn?!jXF4I`lC5^nA%vZQ+jZRGk}Ywfp(n({=Z0VcD^(2dGQy#v6>0INndBSK zDXm4+6<3JJj*IO>Q1VEqd6`;;(zDTjz62v^rJ+!(_4q;YXBc@YCRGBOXGK>DBU{K} zf>sOKM?sExsTU}@j)RLSC|Kk{2DAJkxsRvxE00vnZeQx7qA{J>)~G59ss(Y01&^*e zVY;EoB`Z}n8s{0*($Ojg3!S`p9#MMU9d=7ti>=0`7Mz}fS$d0`RSY(<;cMiB7TyW= zQ52&rL3|I3>rH6Us%RdD2R$V1=HU+s37LTsG3G^honHh?#5xN2uyg7xscs3rNT&`!qiQlNGqV_n!n7ow-?6)ivpHkA) ztydl^%~Z$xte2cL1q}QyHwr1wtlvF;8ncx} zz_{UzRj>6qIZQLj?HtkJ7%MGCNjOFCBaJgAbTrHl}c&1+wP^b z+x?DJO1pX5aww(UJ-n1Q>lCL&QC&rODn2>xe$mrhl~MyZTf()S#j9lG_wM<18y34g z=lkYuCx@p#-L`XI0N<2Dm%H6`(r2XmPI1=n_-5gi_fj?5&Bs$K@RIF~`}RAQ`TGM0 z4S$pvy$#KQ9lTzq>jhDNNhX9c#84y}=AtH-GL;6&64}ypp3hNW!;TRZnD>GIu!{7- zOO*(2ZcDS<xwv^(|{O`hF`JGy-v-6u2Tbc0*^Hks4kUF1f)=@dm!-M4pi|1MoKVo@Sx zil=^;q-;0cO0@+1Ewp&hKQq_HTNB6I?RWgfK&PH~-)q6yVxEIeL7n^@x zbbA_t^82sf1d z9nK~1tko}%WNg!`To_eFuL{F~rB}7u?QXeV)p;x*RZGi|SNHG?AX5D3>vu=I9n~KF zcFW^Bw{jtN_UiHw4}xd6d3gOVXGdT2Ma2=Os;2trona`e0P%wX}XL^-k2=UQ(4ckS&D=P ze~-Z=W*L<()>Fm&iXTDYw$9?odP;8rRJQ z(du#B)@>8ib+BxZc-j|m8)jsGvLRS-r&$UElLT-b72~S~g(% z_LUqsW_^w^CM+yVH3Psdi!+>efO1czU`&my4<{q7*RP}s zb=*~5xwkv}SrH^cO~2!;fS3kc4l>OF-y2KTe+^JM(u8UCZy95e^5 zZoA*_ordl1DH;-+WBgL5>f(2fl2tt0Ga*VDEPZ#O%odnYs2%^1kgSP+49=4Uta1YtR`mOWNYrX;W4BJpF%44%MAOR>h}%_- zldY@lw(2l;rZQ2t>NAJgN^pVGG}J#V(ccKReMX`Yjo=kB=bQvz!*?ic1^&*G6kbWMdZHV-`jN%eKtjkL>H8#*@{_62;3CtQcIpFE|JGZF(E)q^v)`yU26; zUSGV+V02%?I0*NGqP_@IK7O>}6hc}G|KjnweVb&UMqmyK#&#%Jqy=U6T=cxDscAM$ zJ&?@H1w*hG%A+@-e(g?}}Bq z(r3%rp*p>1pQFlxYpY#H=8cC{FEKsg6xf+f`#;~8B8H5)Qk6EX22gP$sYL`;3evS$ z!U10wAHjK=X%I+>JZ`g#j-oU^MV)t|xm`dO46hb0-@|kL@x7ijt8em5-B^~U5EEx+Dkbnm|XWP&;R9o6+ zYv+HZM+*&@8gE-8n2?R5QX-G2rjBaCh~=c5{lLP-;KLNIFG4fS3)=_h5MNJpU_|rP z&e*;e2A0-d3cVUfkUTXQ#JU41fLsgADmHuwep<(Csf#8hFXh{Rxj0u`N+45kPqt#azdeTsWBi__;nQR)13B*&Sr^dEvvcOMxf$liZiB^{X{qp>=nm! z=YR_$bTb8GLA|5VXid!OpwcBAJSVQU7BoEC3lZ zM_{BOuWE~(9^<8dX7TA`M4HsL56E`H{4Y)M-%04OCy*9i`+>g0`g^}Rks7+Mk z%Ns1Z);BW!A4NN+6`j)>$_y+>L+X5hVZ^pdrUXy%#eMW~%fiUGO?A9D1$;3qb>Jyi za}E6}btAryM{AX>yG?T-?LbVlkP6-`@J`NG)?83}@>$C9lgT1RCKH|5igMh_#4*{h;WQ!2UI+Zkp;z;966NWF=%Ai@D>$X=3{OuUP@AM9u#i!gbGh%Jcicqt zkc9rda71XSmHJpUfqrMU9NO*zgF}B3P423pc6{%T9$n$^# ztK3{g#n7Cgn?;4+v{KN@yr8)AMP6vILzXa5(ByUUw!L14-^qY5ly_vYDq>6}fAZm~ zfKpxFgr!nBg;?N5?7uPP0rTzDAc`-df9&dF@$gt!lp%lLMj2u{ESQyTlS8DB93uVW zZarYDF^-n-Fl4d3Q#DMF#jo~{!@5Q`GEZa#hO%NV#}kX^(JjLy*XZvmEG(_4`rQspL@xI^8tFVL zc*7&Fsr%&YSaS~d&RK#A);xT@Fa2r&hf`_^4wA$eCqiM4ll&H!TLDA>ua@qf+!Rxej7>uH@yLLEA!h4A2Rb&PjaQZ-nOg;=I@S z@)dDi+a-1yidlu@4(sJ2&8Uf8IKKu*ERswoz^bvf4AxNo*5&0ZH6eo@TCAioM*fOz z{LuW-@Op3>T9PTx(%mv88!AgwN_K9fzP|WYvDWYKq3LGL9Nd!7J!l(eQWPuaI)NY1 zju7Yf4IZGmE~3PrRcIO|ZtqkmmR*~~HD2oH^C-%okgmI4h+KO9`j7v$UA#FU;5$W! zgo{G7pXat;R$;r{?y9LtERA?sDTGE5xw1Gf@!XV&R}n*()g*$IhM(e)thL zHPXHXGf1vucUFOFs*XaQvY>|bL+f}TR?BFfp(9Vg4(i%rVAFY(!P5Tx{vMiM`J|?| z(TDc#8^3NEzU`lRw%_vjJnS=K7NSBN)%_A+v=GCqg;Emb-STNlGNCf(hhi1L=8kka z2VYlBjeV$>roSH()h{aw-tHGUy^8Vn8hMjYc?{T5#oDyQxY@CZ#}r=`nau{gq=s)| zJ>b`lo=to}$oH(&CrRpk|1LagwN*w6y{C&P%P6KZi&qj4=VC{}Z8TjY(FQzDbcL}x zU6vQYsAb74fq_``A>>NKcVLL+9E=hrnc*q@vBcqJir|YPj0*SRvmm-nlQFuoa|gZb?hJk04b7Og;@wn> zH3MY}6)wTNu$FBnN91WmTJ4yxXTuoY_3K6so=8R?9(I9tqTqg-%s)J5RhR1c#n0qt zH=qZ-cy_Z|Ewbk)C)cCV@%4OtoG!0VQmU&qPDdI3_y`$hoA|erx6$YZ3n84RI7*yB zc6_s%P5))fFSf+*sfxrrULhC)haexhO+deh$4BNz-}BG#ljgU+#BVw5*q_hO^FF=IXWL*u)!JAOn=bn51p7T76iV9AG+)=hw7yn4;-9*SV%0%D3^qO z0M{U0PmvJ@^ss~;*Stf46wmn^aPy#-WdynhC%lZ3!txaR33FgQV zKjB)H(oaCmnog{-uV%PhztVM90WhxsHAOC^ms^%BavO(Ipx zh6VlE#T!2B^sO9tB?SQTN<^q+@>fu~rj=yaSF#e_0bL#w`gBpPcJpcbr(1Roqq}YQ zA1UV2rkAk-3-dNtyi)=qcZ;D1=y*4Eg)^7FU$^15Zoj~iAr-?-Z8tJ|>AtlEd$=IkKpog=++m4g#RJz@WHesqo z@C7m5Id-VJb>Hg_?DZDr@d;Mz(tSEEvK#xRblWL*OG#{Yzi!=j{X%OSux^*<8qB5F zh&-M?Sk*-oUr_PL z-G+}-|o5+nLA$VOR;YI$34ab`QBK@4{dk~kW1@wJ)_uwb~Bf^ zT~x5iM>J8rLS6`Q{ZL|* z#Bet9dNkKys^Q_Vvtvb0B)McL@=k(TlzlLbd_`-@x$cz4$`olrqFZPBR7v5|`K$tA zE!iuW8u|3>BT5I5Jo>vx3lpvuKnY`19f0nT6s*g`Ej76TdKRT&CK%9J7RX1(!Idj78t?E^>bIVMMz4@K_v8 zh04c|>{tmqm6R733feu&%Oezh>EL_Xqm$v!*tq#$EwtNi6!P4?Si{8usAul!yO#kR zTJxrI_iUCE;lTkUaC!G`{Q0{3t!DWy`j=Ams`4p<+6&Hh{|A_ZfwYG@>X%H#`-7#M8zwoU92wi~$W}8}V|*o8Z^| zDRsYv5v*$WPDYJ%A8trNv*M&t?$W(`C-;}K1+yw2#_1#SP!9%Dykg&2mGXhG z5&~o8Q4L9-SZE*)<)9Kdw8GO`{0WvBvZ5TA7}bc_05kNdz zw4kpS@thJAgI8a^4&H|*H-VzAqePbp8ojl%mv8FNJ$h(AGfN6Pf3JmzO+{~LWa7V3 zBk`y$(n7klLjG6D7Z7THh7hlDavz)>!w_PHeSE#1kMbbw<`Dg+=st_n0b*n2_*0~G zy(GknFc~9+Z!<8;+42Yyq%>j3A|m=sfnO>64m)qeE#ki$25!krR!PVT4Vu$c)6V6ksjR;c6SR zFkoozV{e|47$7%85V57Gz2oei%F&hXbL^V30wF=_q3~GM4j0eW45Bi<{8D)>xvwEw zc4h~s4rkUK4TnS2G9<7P9X&P^4q`1!m+%PIJvOUH@O*WA0{n3!fEQ_jj3w|wHW;{( zBekRq(p!v&=hA~b4R}nER+u#bY|3x=?$?g*(T!QL(Q~Du@wkEXYz<^7Y6PR%BK`>r z2OWDZ9zKG8U4v5NFE12@ovgm^c|P6mR|(6AfS>`yl@Y!i_MD!NDmnVchc`xRl_ zc_p#@)j_{?ruc*G+eY3yuDWN?*jQNb!oRclr#1SJ>k=|lk2hb;8qskTDx5|usYhy! zDw$94^{1Q0GJzd{JT6kr{Z4>=XE`?-n)O=ge0dn-MMxZCy1)dkWMZWJP|YhcCuqP4 zSGgHUn%~r?{Avqnq6~<)Z>7<8vr_1_>%wllr#jZ*dDCs&H-!Nf4pg{XS_ITKf7b{$ ze^&p@X_4~2a>mtdy;;B2{aHWt?oqx=GGo{DM4)W>Vf6`a@6i7B<`k=SnhVQaGo*;$ z#*eDcZd+I8FE?jarMvQZl>|roEW*uOU0eqL7M^I;4w%XTZFoY;14@>1KFom@Vi5AlQb+8Lo{MbN7KPKIJXA8*#$ zu9f%3-!c0$zOSkXXp4dL00{@vLp^7Y9x4f9e`4eHoQoD2fEikg_2qQ(9jHo}yS5}$ z@RmxLq8&!;f5+26WP1zri-4xElKw|g#z9WhA(H*)u^gFX;qRIQ}qQeHKm8Sui@j`NK6PgOGyCjw1e&mm22f6u`OQ zAURn>cdK{!$&*3oXkVNkoxu{k2SLzmhLiV~-ybQ@!jbkYaE^%k=YOlz5nR&Varg8% zYz8vgKt@~UFL6pH!S>l4xQQd=2c~0$zhp3uN-RoNM4UR&1_~3s+T=Dm>6=0EaF>_> zFt2SyM_8RteROj{g#M^`4Aab|l-F@3Zcsm~Zb0Aa6Vk%ZPCzrzs~?Ju#5#8+mva$f zNj^|%6?7eA#GJXtAJVl^iOy>&YUvdA)5Fn0F69L5%apcBn}@UL!73Z-H41xcy`>4V zY-Ju5y0A++IbOYZA2X0Qm4NuaRb=n`&ETbjF z{dwybBr4?aqXSK)AnRmf52>H>-?qFQpP5C@v^lbIQXaYYhJuY|fscC_O|L~X`p^JVga21Q!wO*P-d4z$ zLOHKh$ppXA1Y0;u3Oa#{Fe;&jEWLy@8^%=Syq07}k*8SAk^QwSLlpi!P&FiwKjGT^ ztuow5+C&(qhHndoHu&}b{J#RlK*!z&{0X_@y#k?acI}eB3d$Z^eEEUIn-NX#q6dSA zfW}kHJN-dS^)iLPcE2)4^8TEO4sO<`8bq$9a;+drg9DWi;l|TGtTc#Wnoh~l8PpZhaOoe;T+%FV#os>nyl{m`fs_s190VWvKD^YGc{e#Sd>G52{#y+Bs9 zjGS;HTn#>;A)Zbq#MQxSxd!Q@LX~x?&EzR%Jr9oFPMtbaOISX_##|v5g8n{#xVm1| zeKeXDadmsH1xb7#2aSCLbYg=x6-`;Adp z2CAV|R~a*Y63<0P%ZHsXRyljrNe(U4KGJHSLbPlD$NMpK_L$kYqS8p8+BetO*$~&t zTGT=wSc77CL&nG$eChn}hkhqBe!~d%-d+oD&!)ij@EYY)W#M|4_kmi}CMqId#$`CD zbB9HRoPC{ zwi@*;7%6Gb)LfceM~p-$WGCA#Zr{Bn|1!;(vC>N)TD2}3IrgVMU#An^OU-rn_SS0k zNG=0wZ)!Wz#n4Kxwz8!*$QOvBCl*S@lcCSAITWcwa=)r~^}x~qZo+D~oB@ogqZ@*2 zN!ul_ri&6>KpzgTk-7By`O`4r`|b+HBUy24@GoCBehsNXQfwl|)mB|9%m!tz!lM9r zy<-d0$6^;Jsd05jXZmfFYTB_K^t7X}Ex7#w%RC&gnn82X6f~NE6veB9uwNl*qu^zy zh!~}Sr*&V&yFpMa<{qP5!B#|j@$E6w(=~|l-Em}}S|wm>kme|Bfa|3$onR3?x3~Rk z6!#iyO98@$?{)^HoDU7Eo%ECh)tfQ-O7Qr7t~3pQwD*g8bMFktKvv(fX6@6=g zNqJYR>e3RRWOTcnpLKC%-^d?X^kkdC$dF5F&$u(BFVX;9=HunbWsZd7r;8Rzd~m|9 zDIND>9NI!ixLjOVKb$^+2G1^WuD$jZrpb<5UC^?&WB$PIh?>!ptd@6twpz458oG(i zpXubNp^g#`3r{zdly@Ap?KW0@Nsn!C+bhXiVH`ciHbtNK5FBgn>q;*HbdQ9~2Fcg9 ziVgh+3Bv!@ufyq&%cy`zaDJx_i7=8{mZnT(LPBjz0=LwT7aU*~X;`-qCh z=#G$^u}rNU@>FEUI802B6yO;*Hb;=Ak9e*SPzuZ&!S9jZv)j7%LK3=UJGdvq{)~46 zILo`nxxgxRg_kuvNb`Mg z7Aveiz@YoW!EGqpN?i7Wmblc5bsroCOXqCm`q^01{H0GO%h69;p@p5wjEPb`-a})Z zLi{Ivibkd+UmhL%2+z+)5fcM@{dIS8S&sJAyw|Jgw~kczsZuJQvr8JBf)4Cwe3zZI zp$#O4kjeOar>hHsy9_zfp7Pp}j`mgNf>`e#ZiY>7iL;xa znR+$gHZFXD8h#j7*VOWrD)E8}!TJ#)Hcly#;E=Qe>8y&XNx@Uo8{!_IF#!xM__{(M z9^jYuA$u-0cBI|aYt+wXQQ#(U3#^GTi3U3O#$FhihkFY?jhTIlm*ENJ5FS~Eh7qyB zvD8EdHA$dG)~_Ip8LRd3E!cx4=xAT=08jh}RLRBYy!DUz z_+m1Lb_IvR6+sNX;7}Vp#?MK86KyERf){XfEbV6rgb3a|9)4q*s;68suA`^oXA4g% zQyztF%}=jZbODU}FpT}hlb_gv=^45kG{vX%UCA*L%!0N4nXn&KJ2~geAx*akdlYbi zl-xbO$WZ^~A))=3azy-W<}7gYbtb*-fM#V&4s%7I4Bc7NG~<=#&_R)peiGOeePk2BSh z&N=ErC&yD%`GIHaSwgE zj<}j`C|H)MkLsRNtA@f;1g6vvG53z&YR$NLu_cVXJ1!NY2P6%bkd@U-5z-*^{hdhW z{P~6*eT3x0KdZAfeX?4(rDEvOJnrp_@gsW94r-P54>x|z)|D#E&WvP9^vK7??`_Sj z7Eg(&J|rSpx`ue^h_`zRVqvn~pz)evJEdreiLj8e@mEA>#_(gA(ZZ)gKtADXEb1$= z;{6Xco90iW$fJP`LT8-bqP@X^a1?7QLsQ|_WZwl0p|oxa_T+y)tBnsAurNPz)Nd}s z(V)@!GLqbK1SR7c=w>PB%83x_erVbtPurU7xghn9lXD*_EwzndySvyS08d}ZyVUwPtm=BWkF z0T!Ld$|8zkA=T?=Fy3S=0pN=wL=k)?ufyobVW!ZL@f`z0VumPuOd(gwr5mTDL@D&MG(e*Fr?ZN7c@A+afhkD@hga0g9?#Jj38mz=Hs57*g@29QlIn5`tpK(_<7wGH zd{inyrl86T_As^{zlW+f7(59v!kYgfhR6fzlX}LJjgF)5i3J_hp$dtlr_@TQg~h}| zJ7%G&pOqWabTm$--4qPB54Dz+BSvWHcUQe@W>0b9Cthm20;X);X=sp${ll~-_JY0P z!fQUz56tRSj+E4;gF{v!7})W!gaXzFX?^}@jLmkSWV**A0=G%pSzs=Rp=10+pPkA* zw{Vqxe5zGbKG;-K{=mFK2fyb-qmnj7yMj7accyT0WKw;QFmw6&_orlvmR$}}%wu?} zUh3*<1aSz@a%of8nrh?R6Rs=Hd}LEM0a}fk>R0#j#p>$nLkqRgiV9(XDEPI}_gVWO zwLCG#uJ-bmGTAl2joJ{x}gu=TO63-29kYl*_ zLhr$CIM;_A2CBV#-2-=cT`D3>msRT1vhiev%~Ta#XUkB!lT$8IpfrdS zlM=bv%O8W|=|Z{8?_z>my1_5FGM%We+?wiL{<>Lc7{CaRnD$5P(S9EtQ}4r-=|t^P zLQtaMnP8I?07_@q_WkBK-=`5b*{Pk!2OscGq|z|1cIuaZ9g?$T#s_--0VC>Z)b(ZQw|uUk*T|RLvRzwnI*QChg1WTbFK^|?Ok^`SQ{IC@#$&i1 zO*^w7pLIqk%)m-?iBnDWxoDzFMHy5#%(5Qd3!Fs8<=(`I9hA@N^3m-^dmok>vUT*7 zZ5g!I6<>m9&+i8#DphomW2_IxWHJNMq#6=;(<&HEI#)+r@8|^7O>2iE3b9cJ)T5RF z5*JBgtd8H;cQmT1QJav?zINP<8|e2X=c@V#5me@eqm$e6ly_w>Rh(aNUbCA4+Pcz< zgJlwjet~m`0;!Po=H7iJ`x>3;!#2U!hyHEMt5*A$!`uZdM98W@Klp`KF(M?kRoOuY z3rZx2L^mRuH=`E64y2v2Rt|CtpsC*<=0c$cv_?bKHeIrdGMAoBSHd}sgH;o9=s0)$uYI^ok3_~tm~rF z&V*H_Kr#|GF-(+m`W!l6b_R(wtw-oeF(tOD$WQ_{=|9}<;(sLbQXz6$_h$bj?P5mz-K;Aj8 zq^ZJgHY1>3wmD)yw#WCA#R~710=IWGc-6gkf>GUO9D<9OI=PWP3CjgI?7IBEK1TEgQEgUahj;YB_=q|INM&1`21A5dSv6853GM6(Sj! z0u6r`6G)NIuy6*`XD|BFO+dP%Uyo>oRLcNlwyu05YOs)$k#_{~shwak&Ims0>er6c z0X%JJ37vV|Wp`Uh*saTbk6b7qmXw}Y%P4M)^kcDP@vIYJ>R@fpJ#4oA!YXT^m$2p6 ztH&Q%s7dZM36K!-OPIv)_Y&oscv?@uIxTAxKfD9r#=u*G<#9n+#f^u7@^hbX;I$$q zr@P*0<=4>Tpc7oZzelc3poc_CJ5mMHwZP;fNd(k!CdFUNpQ`DYv_mlPPxaGC0E2c4 zHWPBfT6{o_s<~p}8mYClr;eH=PXJftooZM6Y&jTywb#ynpKaO66^`&Eu*uFMC?{Kx zYx&rs4W9O2o_BI~xRr9hWr>VMVk_+qJp_K9j-E=U^D2a%4xUt3XV3%-@k`w#4e;AW z9j!0k1%mBXrNej{G&-b@PXP9bAK)A$@e=z%IvJu!$o&T4$FACqxrE4H3P;A&7+RZX z4rWYeI$)cToX9kB2ZnqkRT~hQRSGr`@=Bh^5g@)pmgs)dI_7F3m4JUFtoMyZla<}* z8VwZ5c3=*GA*#H`{pJ27LPf{x?fh&dB}K)@;lqNWGZ+!Y;FH632#5OXYT=J9D+ z$J^rx^`+tA-UL%M%vTGoQp?uO|A-mzCP5r74omd}I3}td5)bt?X!Tv*Nlzo5fEJZs zHXJcefBFtY^$2a2fI@27JMGlzP^hc=9>=k)-Q80nS|7D({%O`H#0Y$&xD$5)i&Wgd zMS-Udj{~vYx#BhyFhLC(au9OaG~YaU@422kH&cm_MgAtflZbfvC}e`QpcIWdN~vy8 z9Bzvo!XJFlhHJ)!x(79*f;HW-Z5yAiA<~XU6}?Efi9m}KT- zHb>*C>7*Ii1m`xgN^`#V?8vJwq%1zr-b9-3D1SmJ zcE}&dubiJ@Q>gF8HbYQ@?7w6#r~@v4kL8F@Oj zecKsPDAG#WF=S@$Yl(eZa3%H#6u~n>VB>X)SrXNUp*FL7G}rhdD$YCtNLK(Ei4C2b zT6&k~%{|l&)i)@JUD(`#Ee}I7qR}B$$5Erq7eX_`j&KN9VwxE|s-{t2!3gt#wy!N$ z*Izcq8G-J2ODJwSsddaI1>8!_tTVKQ^R?hpT1k~3RiU)k6Vo&r_{YNA!eTEqzV8up zk1#D17V^p3soX^s7GeA#Y7uLn8fCk7@zF>y2O4CiCoE8!@?MO7H`AO zCB;;LH8PM|0O2~hvR)-xLHet=09^=eMmL|gbLgF*O6y?oEF-OQJXkp&XqYo4A?G_P zu_W)bCS-$5V?I$23*u?~52d>!4p${whIFUF}xXWHT|NZ1h_F72#Ppp%*xDl;vH)`CAITB%H6j)F29HF&i|dDUPy zgb30$k8JYq=8<@x3Mb;y%-zQJTDJxkNw~wRoH>S%{&pi{V)=lbGcB!loXgemMpY-3 zCLkc!Ew|imNET<7UG$zTcJ5OrBVmNb7;RAB=@HBqJxiR`RAC(&E&BQ#<3nTC9iXUq zbM<*qD(vZ&#*6wKaPlf2N2)8oex-H@vrVHXZKb(aE3FrK_I!jGPY%hbNB=!DLv*AP ze_@~CnEM>9LTVp1v2``q23~oO zs)|X-OSfC>;=5oZ_+_6@8p|y2>jhM7amqI|uiz=%?VILINC!Z>q}%XzVJay))wrCp zs*cNrW;S0L=CN~SX2_MOnew8?l$$B>Janb3E%y84V#)$irW{&S#JowruJ^y!3w&NU z+ZzA0IEW;caQp*<5r@NH%M~Vzy7>OkN^*dVeZfvh;+J>vBfPr=5x00?2PaO}+fP*v zU=K`ODGpo`T%H#-9)cVKb4s$w8gW0Px(0JB%h~ zq$|(!nkqz~#O|n(Z>8l)1pScpy>bZ2r?-z!W>~N&le$6HUrFK*9G4HH&;FB&yXF6iaJ{L^VYgtno1_QNn7a zqVQ67kO!E=A#9Y@xKh4fRXHQT66tONTPB1QOG6?sc>N3zNdT|?edT5gs>}zYQvok6 zpRx*4d(xuXEQU*2WBr2TSD%q;h6JA`AoX0=BH38(@~?|?SKA*qPk><3q?+D)wUJfP z2M0bJfCkB$RcTiYm?{s6#5Q;OsIN>Pkq%UBd`Ajy9V0<^)2&2(EamfCbH5mK+5-g5 zoNz9JUn|sMQB3ZKUekvN5>=FUvdU-y>C1&#VYwP zwaZHo`d+@1@LajLxQtvzMx~h1a|g#|R(8J>``p;*WX`Oaotzz9>2tDmt4$W?+#bJw zZoNG#eNDn(;AOs=el&J9>im(mh{io{Qp_VOIsRGR{&voO>IP+Q^06Bk@p6s24fJ`+ zYH<3pg)o6~f3)D0+{$yQp~t3t^8!K)EsU<-N@6fuv28O;&K`D1f)V5m4Ee#HZcJjL z<9&1RR&f)dVskb6=KKKl%@Z_v;>Gp$^yBKImOM^+{xrmDxOOw^MW&V+HMnMX@s(Mv z*x`W2LxS+CE7N+aYF!zTgFi{>!1&1#ck_BB9s+YZW@!bYJwjLk4)!*pz>?FWzbkL( z@#9^`5nCa3`&TKp`W!wKwNY*#DmmrlDbM?bdBy|lcg1Yl*99o$C#Tm1Sxo3hj$2#PTUGWDH6jJOr%&mM z>4~N%xi2J23NxPxgRUXaGJGN%W!GD zlbI4FWhZvAT;*e*gWI2hn02%{>u_)SGMb!@5w-a+epI7klL-S<)9Oq`#om^>tyw;B zpb(msaDii^iOIm4;0t%YEfSdBJu(Odmq^>zm{KXq>h0TO6s|b8cCFCbEY0R7Y8NiNRI2#$0UDlqznG*bRXLjkuVVB zL(wL;d{*KEu#6ZtuxGm@-Tr5k%?&K=NkjH$a=ggQ{Q=A8s$_`)rinYJ#zZ9ZO_)U; z^(?WP+Xv_9dt!_$?mCQT=j-th-If<_@ooshF~bjHgnOSe#}cBpPR~0#L;2Nsv6(6c zE_)MQr~>0nJ`{Y2`M!GaHd|D+V<|>4$_E{IOWM`hT=IE+-@digHL{gLr7w7`kYU{; zO3HltEl^jYAz!Ct9yL0P1kcv3*mBmLKW}kdKgd0^b=8Qmbp@*pxjIjAl)WQPa(2OP zjs8#|r#ff19^UPATN=+;zglQ=U)(gcb~_~OIzz6iAGQc#TEcc^KC2~v$KZv4NqE+d zlyFJQil9fEjv=yeA%hGc5AW~Jm@D&CxaR~*X7!wS(7imm;5!uC2ylKh zyy#QTdoG3TZ$>?|!vS_sE9V5)ezumyq@FkB46k2=P)fI}h03{xA4PyO^H@z9PeJ~r z<~8Qx?Drim({`m9?W)z_LAim%%&GGRWI&lq`0&S6-}L&1OT}uw3e>yF?&44G`+4zc zg_qjHWB8M)-Ch@NnPOXepB(9jPMHAli}~aChcSd{?fe_Zj~BzK&4w{8*oE93AgA^R zzZ0i&9@&x?+|nxk?s4)9&)jBizicZG$`4e-fzv&dwWE*0~n1>H~oXj?Taln zG}kRWOYm*#`qccwxb{Ib!sn0*IUx91TzkRQUS2sbyH~iIKO8ZhTM=k^!f2D00VNjc@<{vh3Em(h=+8x+*6RC-K?&^!zZnJr-x=qWIS$$H*6T$(ucI2Mr{Vm!Eh z4f(t_8`QqF!4O9$T30*AEY9NC_tw`#U!6!rVH6nz980gLr553K=MAYcSxUna4@zz8s2XCzhSM-x{>%LVV&V#t zeucTISy?&1$2-cOC<*aGAz`TELHxp-XbdQAyr%`rljtcXUlgFMR0%2~iQwPR)>87V z>6%+r9etJ}qCCeQw;MK7=>6l-bp5@;fHDRGphNz*s<^a4!_5zF^3PJW6}$2eQ*Ry|@xn zv@)5uR`yD)bgr*9-spzSx8*()lUp)93hcb89hAi50JJCD_dcvbRd9|y{_>F&6V^~T zd?lm^oC9tmCh+MF4lVFYj#CpIBe+Z2=CP+D8G7w~>Oj9U{Imtm=4 zb|cYc02OW=gQ*jXm44Dk5Zx@jU~&ip1zyC_yr2DRh1`WUSdpxh`^+naM)A?5a*+mD zxBNQaG@(K;PH7>sD~~8dPNls=sg<6Dff`T*{D7486D=8IebVJzsj=D7$|aF`iW4=jKvo;U|l{|FMcbc&H*bq*`jP`M0l)GyKnO`8-VW^nt0~kFn=;Oa@Cg~v$n)_ZQHV-II zqxoQAYL3!tAr#^OC}9Ruk1n48T+p}Syky8nSv41fCEMK9xaL>MnqGkdI?0stnIe~m zKBsDKd~J4qw%Yd2RuIS@$|ShXFwD?`%%CMQQje)8wSsqKUB%lr3+e*PXw03b-9V;d z>?>^U@SOpffKgUKcr;D%c9!;{C~D!m5p;52!Gz zPj3$foU}6W7a2ahv1RTFrjLmegvgw2m__hdg(|16Lipy0WUmB{8(SQVCmYwRgd%h_ zAA5A`wgP5<;?j$7Zj#gMgjjZNr4EO-$23R;$IjI}PTI)olSHirSqLR?uCelt@aZV?FQ*N1hKew|}R9nkA)w< z{=Bf#867GY#V*mq7y2=;TJa%+aq2dMmOK(i^}2KEMqkgeOCUD<^+krYhEC4We7IbR zL8zl$-RdEcWjDA55~IQXMdSnB5MeikF0GfH^-v#+jVN?9rmo{Ku9_xaF6378#?4gY z&bto!@J77M^R4xR9dfi*^nBQQ4X1}yTzcNjy#k{0m^#ycQx#FON6TGGrd!xTe@SeI zh08!ylO{mj5`<<7;g*^9G%Nd5E!iaHG~1*J+lJdIWWRmBhXs4}w8QD*Zb8IhGo8w8%bW1IQ;u}TV9>^+?Eji39G660C3N+NXYwGV}cc0nAinc z%4Vgvyac}T5@R%8zcO+-Y&p{Gyi*3DP*a+`Zb`?ex#oRV@L9sq5eH$sUru#qrP&{- z4Ur7hV0)m?VK5cjWa6Qq(0-2+CR6Ljin?68tqUqjYi+FIVJJOTglsLj!SYSU=@s6K zggNU$*gdm8mObwsOI0wdchMY=Dg?=Osw0{h=saxx+-`MBn^zGZk%bC!3zsMQE(aq$ zr~!f8Vs!gddRZTLs!+3P*H52&J%DA_9ahx~Uh(idv5r1|h28OW5lO#8RL6mu=6BzE zzjB0h!g#g+XvKBKgCxDCjDEvYfN8R@3*e*M?_03y44^Z*P1 zAdM2>KdcYHKi3CVMvi*F*9K9ElhGNpNI?%?kb))_k#zxm;%H^%<kv{Ri0z<-t)^QJWY;jvJ%FWv`?d#%%LVvJHV$X3S_&Ne;w|n^8Ws z&Bk!fTBwe!GsS_VQVOh&CXHLD)z(r>vLGS(2weg|a=u#=_YfZ}iqq7DW>qx8T-?Dp zzw&8`Y+tjF=a7V)Y=EMNW{4-OV5H?8bG&SRzUyzr85o}8M=H*ZcLJ`3u9+uv zsoEiII8;QUSh%912MwfXV6dvh3E}7C2?A_Wg`?v`7vPD3YqhNm&$BOwJN2#bi}NAb z3?x?Z2M_29%?Iiua2!Y8y}rdp45LXQQP-05&8GL!zJ&Ey&tJfMX$R3Lq{2=#CH7p2 zGolk51?ON8YC#fD=)Mv?2*08gLy7qUVF<#?$TuCXUl45!4Qc>%{5<0E*ATURyevvm z`jS7k@6|V(%Pg-Z=lizW6`+yEUA0H6l=KZO(I z@8zbWo2Aje7Mo|9Rxzv02=CrK{kC=w`?*c~DOw&i8Gu$aqOfjeG-j|aI*{u5R?PLJ zq6NccFQ|rqZGo3%oGENW#pl&((baM&&xnM0tpRoT0h7&Cd5ndLa9`wUaIM zz&KEL?r7t%8Bt~!fmUhZ(r5W#>^jo!e9+m{Z93~^{P{lGq4Dfz+Ks>T*}25erkiR} zf8wtM?PG9+U&)*y#CW)Gmm`fH0{Wagx>llXOuI1xZ=O8yN`<#ZFO7_{!K+y@wt#QO z!uebgVHht!85Wedrz|28lQTf3I4VHt7Je@7*e#;N$56zW=QE9S7zofVx42-rY;M?D zKCI(4-~Sr3yaWy1@v+fXQ5|K`I^5}GQW9btW2aHDUwtDh5?0*c!zhJwVZ@RKy*7#P z@QC%wKn!+>;9?)e$s?l2D8Wpb^QFN85b?!jlVoz4LOsKFV;{8>b}$B3b9Rx<&5EO| zVOk~fzS&WnY#-nKF$TF438rffaJv<^b;k0Re#qJRSWdq4QMsL9el3B=VH@Z9g4f~VAp<0c3#{dH z_e`P{me57KwHhR}y)}@aLKqmC>-1`f)3;Duxo~Atl01Xzp1equ4^#W`C4=3M&=208 zq^<{2Dt8-G@vZ?D^u^1rpIe;*wf$t47--MI!>lqEl<0c!kNw0e+mAH3*rBIV&Lbv@Q!Zhg&w#BLx3VwW=iytr6vbmscbs9=W z8cGnfaQ&g6b;Y-R4>FKZBKm-v`7WP-+f(?!lXZBl*@FN&tk>VCPZBtk*|bE1W6(m>IxqI~K+n_HA1VZHhmh{rH7BtFH;) zR|5s6wX7GLMs+^ZGoRBpZf$!H?txH8V{a>{xACTxN_)9fd>w+)MB)c~EXyIWI3$!A z`gq)or__c2;1iG{{V^iWGtsbOeLj&!MuX63-6);E2AL&FSZeWQ#TV4*91qwxBG@1K zpfvHrsUy3b1G+WBP-RAeM<{TD4gR~+G|hU4_ASZ^C}<0@*ON(h>49Ire{b)&d9w9Q z%0=BnoEeZC7S!;cQwmd_2~?Pc;|u(54=`185fw}uKp}q#593qEewg`1Q&IMW`8qsd zx|(fZdymaOq~)yb$&0;@qXfuO(pt3$5+ecgB%QH@r7&mXz9Op9 zEtJ}N;FsqIUZ5Vy(u-x3LR}c2zkX;JAbx}`1jGs)p|?n(i4jJfJj|#TWF^AVR}J?y zB*wsiR9_j;Ljl7!pd1=gw36(BcBnaU4*dZu(lgqS#We%ci9^eX2ypCUyrz&(py)`oo1*4KD`0aeD-rS3aT$gVOs*+v!z zeUpZoVh(U&4jtmdm6WhgQ9kh!s@5S1B0WRN=kYz&o%xBZ*u5Fq{j04v%Za%!fvc)P zy*h(8%GZydT2eLmrK~1zV4QOOA-{BAm>Q#{`7K;!r??*qN+qaJ)OQL8g;=`K1ah7X zjZCoKfLj*ULF5DnUBToW^WNTXew2r|wF*G4PifoT_B%M|!_PTEes{bTllXR z5F*1(OO4R%)O1CcNnC7IA)N{$rQZ`4L5(7JKWFH&IsVAm^)=vV6CIVt;W6SOawgCf z-q}x}_AZ?h_2zMe21;{qR&Oi1o7PqY72w>}n`$k1KEaNnJu^x}=7Pb_qEFuw4P}=G zI^t?xFOP72ItVIjKj-Tc?nBfCV<}^eatYti{L8Uy8dmW59+ZHQ{g+ZrJPdAJa);G2 zM!<~(07e_u2m`rGu}6E@4QL%2u-+VpiPH%<1J(o4tx#K_*@IszmZ+>|LQac*0rx5K z^l|L%dsp6;bFhJG=utOS(o(2Z+v4Fj0u0?WsJ~7Ux(gGF60?|&+x!r75|&}S<7BPe zf6PkLl)O=o9h{bLYj#)_Doo8hpXpU?YRjB%ril3u)sa*@axO!TEm>Tqn!vm~a-RYm2jRM^*t5c2OZCMSw1(b z&YqkzyQ$>W2mdJJ&3DIsYg}E-5m89TI3xq5P^dLmTgDrR^#4Y5Uu!8gAu&31nvx+P zrqFd~-n}GnJY`spK~vNmdWI@3!Xtpl#c?s|p?(0*jU^(AV*+fKF4q(;8q>|8RT{it z5<0mfmqV4UAXxtLY&h>26R6}}qvUqlvVC)_a`!N9&MNt2!Q%QR44J>9xC+&I@BU>m z&!n^Ol*Bav-33gis}JId16fU3q>99^iu<)q%PM9tc>~Vgm z7pX2vq!?F}5AB1)OINLh3*CJM`aUg7*#jIBBoifX=$32RIjJf$NX2DGh5@tM3x`>j zFl^=7nULrmAZM6@+~K)l-P!0L50vKOd4!2|MS#561VNlpD18D@R5wa!pk*9^!Yd4+ zR`d!axLo@cg{j$8bB-uaQH6XFH8j2~O{;`L zJf0|g+z@V4i;jwY+tg%4dZ*S2o32NS08{j9;0ru4@rSVjlld5=L&AfFb-}xWZ{ z@2Eswwa)HgIO(A;qXKxsUY4GWNGfntxo%n5FB&Bw*3v2=w=c2UKwo`M>$L%)wryg%-w zS~R!=QR}&}OH_wV(w6zWWaVmps$%-LtVX)$H%g3a9%>knp|L?B&D?zXnM-4&pQDZF zH}-WkXzQHv3(r8R2EdnfuPix>_KwU6pHJW|nPyfi+I@r8=Yj;)HBC6q(pVr6dcFB^ zAhI;~0gTyLQPuPeLJ*$Uo3`nmMA?G!wOQ)+(tXr*lp@ql6eh*H=|H7bdBrp)3HdM6OtFp7y-{8{ z*km$lVW$Y9V)jP(PTSrJ{q2laR4UPRXAsDG0gw!VE0Bw$BIPPUjNyyFzB;+emZ3SN z8$DO9E|+&3itXlZ^l4}Hbd+-~Kx?A7C6g&KYuh+;HG0=nFkc*khi;7le6h02T`#=TTVCRle(tg zaLRNN^ewX5i9!!IMuS7(MDd&5?RjFbP3=G!tH?Q?=Ek}`;BiH)vzO)8L+UllcZD?L z#rg3@K}e1VItF01OG^wfB;Uxf_2r%IjOcdF2jp|=UQ$F)|JVqHF5cJP>Z(204A}Hc zGIc*UaCe#?y4oa6)Xk(nmpSR+aQirXr=I>m->GSI7PRD@DlmDDJ#4(jRT8QB?Y_nb z(>ahY0003%f4C3eGxzZCU7LSla4Y8(Gr)SDE9V za9ICA$VVk6 zEll%2^Xqd=niM6h2k_w550Ow-$7hU{@>tarPax)iBrkvP`S zGMfR$h&9aS36uwn;e_2ixO35Eby0kgsMAJ5?H*>Q40->dQ=ailbLGngMV_^vW=`n^ruLD9x9qTS?OErnOQo}IR0@+ZEJ1vhn7F4EBI?n0)zL z(e&Suf6ayc-BBhsU;ZbZ`tPW}{($_>ll>{pcAxhCCu{caxW6*z-`m;Z{b}tlKQ@0L z{}|9eEdT)go_T-Dz0a5bMNfa_*}o6*pEv-3-%R_bQ2YLGJ%8uj|I+lwVEsn^L7u`yWM2TlJ65C5lb{yhSF{z2)VEc}0W@;49vDR=*%<$tmA|E=urO``Yv-yZvurT>4m znEr#7KUw?#*7El#$o>Z{e`WFiUp-6zpy#iw{$G0jh=IRS&~IMkTN`Qd= T{u;vPU->6HnsHY%FX{oIUJK4D9W#EsPADE$nRRU2To& z>B+rj9hF?Y@O+xP28R3|1W2Z4`^{pY(PN&=@|c z|F=nJZ)+AVFDnlF8|(MKEwGXjB8va?2oMl35ESS?YbWT<5eNtrNdAwqC@u~T4mCF+ z7A8G0HX8*7DJc~xH9I8(4?8U-7Xu|53kMaeFg>p_GmiwDs4DLteLi_3QFdxkE*5z{ zhCjkw>SF8$vfL(0ydsjaVls+SN;-;?B3eok%8E+LD(V`lsyZ4kA1~yh!rdBSt z4o;Si-qs%8u9n80ju!6j?wZ#A=5ApYKJo58K_31dc0qqV|Fyr#K8aZ#(S<$<#X&Y& zAr3kLb_U@d#=%~0ao%RZz5!AGc7Xw|!GYdUfliS@-ibkG$$#B55l<{nO)YHS=o^v8&;GSo}ZFXn3-6Rm06yhRF<95m=m3!T~L!B zQCg5$Ul`L^lvH1o(Nvz;RG!&coYY>C(ovOJ`cEnws_Uvt>RXEIx~l4%DjPeiJGvVy zOX_PHn(K-i>#Mrzi#i)CyW6WfIy%y;`m!4aN^ASeTZXH<=UTf4>wD(A`?{<905t!cP0-Uf06$U}JV)ZNYGB#b8&h!F<=?;NZaU*brcNcx+;9cmOc+A2l&C(LXXbI59ss zwLU&IH#s*sIlr5e{qT9XFtEQgez-YuxHfsdK6th{xxcltyS;m{J9D(Re6YKDwZC|Nuz7j5d4DkV zc((R=zkTvgF7K~zua9n@j_%(t?;bB7-Y(z1?k|t;Zf>7%k00)?o?oxtACBH0FF#+e zA08g=pWmKdpPt@6-<}^{UtT}n9>2f8LkqsJfqB4a#Sh|zz-6s&aAGF7yEtP%ZPU}Vc`a<=Tx zw&=nog#r}&J1VM4j-?P{?uCNeO7e9K%x6(CRAO@4VMYFGSc73vSwb)fOim>%@@u(L zXtiA8p&x#yXQyx7d>^y-@-u&Iyu|sTPI##rR1K(y)c-a@9ifU+!Kh^T|4saO4Tjl} z9Zr5-=d6(!e;))tUOB}&3-z?xaL#no>*|`Co?jnRS8xhfzMTg}cK&!gIBM?DwqAM- z58GxFaeKMSBHR~i*?L@J4Wxa|lS6sjpL7Vh9q()qS1<6dlIT5qUHk0zcEh_kTYF(t zQNC2Kbvj(ExTjm&4}~4+(75J1QAIY=H<`rA?!_^{9ZiEeLub4 zd27tYd(XD>-hGtA?bst0#`lQ&Oy&RHKjD9$q!;(QfcAS2dAjL-K`#1&?w7-TzfHBv zCD!{~%H(_c9-y!Pxn8mRX^!3g{@$J8{p``(a9)r_hj=^F`-%>?`gP#R0w^|9z1~8K!ksE9d+wh&cG` z3yz{vX`ZN8Bo^p7AGlCgc5tG)$ib=D2#X_>-1Hl-gZ+{qkG$9NDO;4rmM^9s7p?or zR?87K98xn>;ghMaZryM<%GW%n%QalQj00C)Iu$2UtYSd3HRoEag{)|f4f0h9eLJmH z&*QAUTPC>eU#WrL#1Eb6Lo@zL0l2EoGRS=}f6k)x$lA>1(4;v#b*DSoTZAkbz%~y& zyxXt|wjnbaPswSJF(Ja)^tm+v5CfaYJKq6r)W8Zc!9Y+WpWTdfmwOxjL4-gu7zl$v zuKnSEzX%5Uarc5pTs=Un{S7h{FmjBF0iH4b2+%yX12&fQ|MMfEhTjJ25>KlDQBB9RENo_pmLv z6wUFx@Qf2oKpeTh8A6q9Vq0GC>Tv;(Sb+!GB(yhBF!{XXX;A+4f)}xM8mmdl!7&OU z7i7K&^9Nz@o?$S->Wkh-jCHb+n5m{TVL$l|AtMkw(|$Q}z~}CngWdswJcd6g(coj_ zSf#N8GG?uWg_HBrKE1S&pHN{mtJ-GI#?cY(M@HT2CzF0>eb%kxc{5$t$v}>n)X55Z zYQ^!k!a@g&jLvwk`jOKNof|y5UDx4&nDoc;J#ifZ=ffn8PubF4rT-mCYPg%3yt*VI zrX_@juU#n)yW7lYJWc9@WKxU}rcf|T8cbnfbrDtUeAI)~3kRv1_F+8LF$FDQAXZX5 zVPiumib6I~bsG$#IhA~$CfLM?LCTy+J@&LDnvj>40|kjP$+1I(Dx66i$CJBC%0n7n z6ovn?%e{ZtVovgaJKo*a{s6j%0LZX3fr}Q zJ}hxRcP}%Lq;hbui+<}+|93zB&F^OjFO9Z42UdV&RTKD#*ToDt#t~h)WTfD;(|O50 zlt}9{lj4u$^WGTj%%rU*J)=C=BPzHEuaQt121lh4vsU^_T`}(HTcf1{{TfLmC^)NH>m4F5 zzXy(0p%J;1QIXh8*Nu|Tg7efO_ULQE6!UM!431kb*; z>?}2qZ;{(=8@y9udJo=y@$alnUa4KWxJPD%7Q2PPxI$Alf#e!@W?nSA|DB9#~Gt%IEz-6XTX z1z%t(vDU7IADxT%i1qU0!!ZrC2_&%OU1=e4KQqk z_>|+ppn#0yhoS&P+@b=gVre|+v@Nr6f+S!bon*8BVsS2N$1RT zBYY%Rm*~83-pWbiUp~S0f>lbN=H>yOK@5`$*-|)+OAy&mlyhHZ1oWeY8(=8DE%0Mb zO^C+w$Z^bC>vGONXkD@a-AitLhFE4?Pg802z+HQJ2KFH3& zx`uXIP0qv3IzF9EyDmT$+%5k#tSlhmdp!*w z6SIDG7JGwVETS>k9K8Br*U=J3-r;as8f6vZ-raeA$!ug~8(xS?NY@R&xg zxmQ$Ri;UWAvDrH{q}k0=A2&%FTU;kwxeoH=3_>3}SSxy1rtAPkJvW$Y>0NJ6Kkh%^ zvL!pWV`)6sbV^%k=uc&7vjI;@i!47j=xeWY)iR--b=Ysu52B};mRP5Ty9k!FoTi5y zz?GxcB8^bOT{sd}0zyp-K**lqKuL7I`UxD?{O^<@4m5f*&8*e&N=9ev5j|8 z;C^)9xhlOD^2mU`NO29rCLxQQ>`H48dpjOC;Q0x|ID_aG1TzM@E}fC)TEh)!jdqM1 zO0%V4Y%2UQfS2R;dR;OGa&g^Z71Rebs)C3U5<7f;-hPt;L0_jRCSuL67(OrjqDW47pa^N;l0uA= zJW_hn)hz;YyV%4*L@w zI8GyVogd+82!Y)Vbu_xrks^~(D2XKQK$;`K(O&9NJ+zaI?i}L#acE^St=eF_FuK;> zI^VBEo2a>v%wt?~PldV^s@4FKKamfMG-H_b=OP5nnOV(C4qgF`b@+FGkx$PBKAz4c zYF5wi_9!j-Oo|+>2I7?T7uVJJ5%(*V0nYU!2m``hU4Spy8dLzy#hvx(Ey)I-Y}$*P z7!PQvCm|TT11>JFVCXnnO5r?E!zX-??uBPdce-xc9k5=W7S92EI06I*s4K{ZT08;8 z<6k0z^?J^;L?Egg0bdy3S5zV2*$rSWaD31u3TQ4oZiST{gu0 z3KBO~hBB!OA2L9wAu!d60N88Vtj#7P|1Wtm$Vt~)CwGAePx$MnlWzY=kYm0QW3hns#2(d~s~TrD4{* z8BJ3;ltt33KN;$Z%VIf`o?WN^o~f*Nvx_62fUH?H=colxBt+Abdc z1qsr`Xzx6thw4MZ!r^d%N9@IcjWj&rFdr zxdPGfNQajpP6M|@<6m}nphpkGZPm36lVq9ybq+U~K3dcJfQ0Kfw_1r>ja8DOgS`>M zDPFxmS}Ngm_Sul`5l%1fx1?L`aAsm}a>Rh6|5wqny@FImW-g9wR4(nbtYi!zWyoU) zH(lTW@`S@k%U4a@!)AMeddg}!TUnfuSV)O8^Bg! zgdk9TEP5msnKZB=Dw1YZ+z?*sY(~oL>+05&>7LzX|DAi+9*a?nK>F!JF0$J{nFkAh z!_L;sQ28mC=Bu3!@l;rT-*-~tq$tBUZ4QB>@kY)Cf)SP$&Y2zb7w$a?bU${F{UkHcFY+DzDJN&TcG)Txm3+ zsOaS0lhLawUe=;K$>O}cr~*|JQgP`^|eAP@tn8s0MnV58^(`=4?+`ckDoU2wR&C{rr#{;Ey_ZA>JcOWc_#;F!i&; z{1@}blQyiU(PM{arI`@S$JqbK#n1S!Ur$Y1?&K9Aut){vw!D=G1)B#@6SDi@uvJv~&=20zajHdR|S8dyFC`^t^G06HjlIsr%92bIS` z)au8%$dyZI`%#bC%uUIhQj;X~t;>c}^Jhv#@0prfFUc+1ia335@}&Y(2hZ|~r%Zl3 zNi&6B6PlYT8mIBIhzvDN$$EzlMLKGwhm1iKjA5|-;3Df5%Y0x}gau#m`M@Gg!9GP& z+T?w6RP+M))QAI=UKCLlq5GtjfQzNEMD~P{viyXH;0UD%wWZV_uC*SW)ge)^4ScT;ZGT&;EeGHc?Bg;#;^o^%X|aJ%ZNeMR(tDDzpEbx z6eA;1WI!pivdz`sy0<=LH!?{-HF&R`;{o8G!)wq6 zegF#GxVchX&>$!YRXs`CCMV*r4{1V7|1V+$kFNdlEqY#t`;Bsn4!a49qqCVyF3DU4 z225zak6R6t7+ZG{$t=%Hw!xceWDRb~+jtVpL5^ZeE}xO#4^t;h?+UaE z8D$Yzc&W4Ie38&^SX2l~%p;RR$>aURo)8DAtj6nh7$XH;og&1QSZdP?WX1+_-3s(Q zY!h@U|H4QN(n>s?j>e-g(uYFn5NT@tsQsmWb1MyQvjfJp2F8pbSH5sE>I@B*3C=*8 z$O>$ZUO$-fI%HR)z!ef<7Omo^>;R!93QZ%f7Mdxk<%G1M*c18kG+Nn)Rm`*UJ;nBr z%%n!?3S5>;v11nMH7U%0saLcyUp8hnro>45IYqdU=UH5U)KT&v z2WWolB$ttb-s;RSio$8MXNYe`_<`B$)IfdNH4p=Ne;sf1HZ^QOP#q&OPx<983e@{b(W}jeVYZrC+P*K)oJg!p%QSoSQ;wKXm2?caulDJR-6~O7MP7ehp9WBtq z!aNNd+lP*+*fne`qOU=eC|onMq8@ecaod-KxxrsyodW<(V;A%Lldumuisnvgm3xJr z3N~FWseV%BR_yzCOiTLW8zf`wcGih8jn7d^K27~7K$MWnMK_IY8zJ)KX(+*!d`6p% zyoN4OQqBIVv)~6A`Dnk!K0U6h3C*Luq!^lp?1}v&b3X(I#l)|%C#i>mJkf5_3J{V6 z2^Ny?nPZn0pR9{2B&IYZj|o%;HO!18K(t;VB9ui06p#+dv@2Rh`8$rJCtRrO&w@A@ z!NM}jiEVKXW|@5l4hX{I2npL<5i71*ng^gxVi)xnxNekC`i~hH;XY^=$S-PY859AY zq`c{IJPIK)OKsFB9pU41^m|UOC(5G6?BL!I}%Yg^fdV@!4jZ zo(sBPk2dST?4!ED^Y7uQZ4&m?h)1q{5Y@NVBv6l|uI|OGBywc*)iqEyrJ|6Rw)DmC6)8tpY_gK97HZ zwM4PG)BdEGx7r|EZ#3bz(9{~qtcxlZu%gliTJ#%Uaa@^aI!nh&uTpBNShHGZlv}P2 zdm^tyq&zxz>#LO3sD5@|^F(_#p20~s%{Yl&+;18+d70CbF6oj6ZPZq^t8}&5^d6h| z?RRRao!d{S)XIvw@d<-#Z7n!+o--r2$ z4{r%WE@verkU+fot#>p3Y$V7PB~VpBK$;Qy(Id1d-vgIK*7ns@D}-|QL1m(S;loDj zWVw#nz1aP9vdxHnk2}$Ha6`2Q@3G!v5uwBqFGfxL&J1I#?20%v#u5}PNtp8P$d-y7 zcQBrGB}SB;9b%NcIDBdKKA-m@8p zLX;GW@D_zm%XLgP6-5fjG7Yst5QorY?iqC=X;VJyD zyQdI^uL-%?rRkl5mv8dm%g?DDx~<1dP`#d?W2W|-5*c>iHjOWKE8p`H?iud{vdhm& zv)&)OpQ~BFoY#bVYO4?5hnM@@&q zEBF-tuQBeN&b^77H&7wA?RP)hBCvNreQx^K>Dk*6#g27w5Iw@26T$oPY6i z*zES-pYn=pqTV+*(@+08@zA-|r(cS6|D~Ur34k2%n21+`cC@xv|>}WyCSChRFz8M_&2HYpPo( zP}DIlL|5r5M#r!hZikdf=H@pBakrA^TN0B%ux!+1gawL3 z&*pr!0(TZoi#(%Uc~=%LJ?WV9oU#dviv$ikO@W$JJj_UjLY6pJykLnkK?!dJc`UZk za@lZvJS5yeq6L+_6IiOVb^1X`F{{m1787S9kIe=InWbzibFqL@3k;El*qA@sSmsAX z;c-@ZafWoU8bl!;G`xe{?~jeyCuG^^zS`ywPq>>{ybh;VenOB|zo4sQk`+$b#SaKL zoU^<195Z1E%;(3xH1U8QWonpXcc>sk?n~yCi9A0)^&xPgkI+3gR0@wFQyPDE0{@;n z20y0vfj%ElzeD`otJ{w2k&jPok>16_!g-DGWVW00!;P>0{M4b@MJR+4lrrbAspNhN z3k+0Q!k{5!2hnV%#4&v8u?lY(3lKf?5Aiwm-{BN^_w4%(`r{u<%=f`0EGNuzG>f5IYvD@-adj_*8cXyz^|nP!avbn}C2m z_Vul}7bIIqAih4aXrp$$NZ;El?bMt}pb5 zZy=b<#DKW?C=Ic=7Ab3(H?OfOpmZ)U1^~6gO;1o(0uSj@C9>*w*bcdf^@#&`DMSGWAo0c~PDv zfQRn>01t%VtkH+8S*N~QcWZ$~dL-?%^AXm59xMV!5cBJ+qq#gc~`ejL&gs|NV>y12pD|;Am$FR9ck_b)C*n zPF?13WxI^EK=E&s+1>ZgO550|IB9KbgbCYWH~i1Pq2Pe4C|E(#q<)5m20SB ztvNb2L(`A8C5D)d7qtLnpP(Q7X0ksdt=B8GmC$PE;8v#Nc;Hma5E^d-D~yn>xP+d& zAphpn#29(wn8hmoVx!M6W!kvpZ+4j!f~ESXstS+G!NQ!}J$AV+f4|4Q=c=D{ido-oicB>QU}nKTDp|q-|5B(Yt|j zgRsz~TKf}QVA`Q3gDxRO3rM6$7E((0VARc#w7^;tyg#svmq^ghYaD;U7Zpz+iD_&g zG1sg!gCTvokcoca>tCO!wY7_m+>qw^OZzbpFPz!N=_2}5;B+*htpJ#pHJ+0@tC|${ z7>-xBU93>P=UO#F4L8~ltv20&Yq73$aPLcJs=vZn-C8Ux4^*l_lMYdxbB-Y7&UFU0 zb>Uw4vz5VC)nR+#G8Lagr*>6f%gr1jUvVC_&Kmv9MygfGB~L$vvU(Ey>%Ek0E3Xy+ z%QRonWP=((LbEZfX-)?{?)p_6r82op25EsOlQ^!a)Af|iVG1#-6 zDg?7C6Cy+6U3O4E{@Z|{g}L}ZNcr}6;qTLw0(%-9q!6G)d5^G&>^&%AKKWR&JRtp@ zZX6&SoDfFP-vvZs`INShG;#Pi5Syn^6YYC~r%2Izx73E#u-eIV*H<#(?;$*lD1n4sEUs4y>a^x9vx|GoB%_YE-sZ%s|4<&7%e3|x zLRGbi*338-bDl?$8HyU9QjkbwF<}E4atQ*|wM8s~kBTzlhVlvoVI~MG7)wgzN**ZJ zGD)BS@&Xbjhb)^VdUhD*QwX1aNZ73cn;xp?x>>cSjDSDWmDbqlvxW2n6p+QfF>~)F zhM769pU67|#5nj@zak?8TE*JeZsqOu#Ty*5hjUFV)K`9EmAKyA9kdyoxpxTIM}wOx zG+HeJM$|{=YDUTH6<94pg{6iWc~r?fws>rftT8)emQpWZ4p>t^O}y28zRpP8m--w3 zgLvRZ9)X6?RBbTJ?TKr{@kE;2IU>6D76C*Kd=HuAAhT;O8!s<2lXlzlp+_W6=te7L zqE`sjmbrO&@sTAY*Hod5U~bC=(-cfp@gt)ZBt zlD|Y}hyX21l(*w6o7l(M_{FM$hm4IVDEdV~6Cq!qQ@C$cgCPblhc|w!oH8{YATWM#3=rDT}RjjiZr+W zLefuWoe zH6uLZ=s8UlqtL5OL+5ivAm1332t1=OCf48&PNU-ZQjA>lRzeG3#MIcnI-ltL)L27| z)w+W>9dr2sj13oyaDVyPlp^QzC)3V$6Rguj(6z=!}aoo zF#HYam=^(t4?c#+zM~a-^9Nx8&`=Z!|GfFI7V$NmicP3wDrh9OO%86H<_;V&0!=sz zJFGy|&!!xAFYk6f#El2x`!ZDZwj+m0Nf7;=aK3Nnu=7=%60@`zlGJ+rw+0;Mse}LA z3WQkFrgL}}JB?=Dsl!R~^UEb(qmm$`3VV`9lr6?TAa)Bxr zje`Vpfe-VUN`5^QOw>@03gCO9C@hl(MQW@(83EZ2B6LnM_A-*DOR`*ky%s6|6T>e@ zL_#S?s(6?!9qmWjRlj;~!=kF|*v2`uSRQIRP3u{(>0t8LTsnGenmt_bUe@6Z^LPT? zefX(;fu~eXm)|mt+dXE=MUa8S1_S+1v3N(dn{5j6GM=u)!dVu}e(@}gv<6Cic!^I>HtHqfcnd+k_6U&C`_L}=DtD+{u|NmxP| zDj0(qx@Yv-Gdbf?cLiacd2w;qIY@FB-SalLm+%@Ri9uCzNk=!E@h2u8YQb`4xHBW{ za$e8l_ddwKVSeeV{Ebs%3oD2!TIJK|%2_Ov({!zmv8hA~Q5~ykGkBoYwp1>|G%?6b z7>A|TnU-67%2;sDI_KQ*0d-yHvt4tkRBtLrJ@(Zep<7=`#5#G~ANWiiw)>)i-&-A) zmf4)B-ae;(Ygg%sSp=B5WX%f;*~yBJ)F*@$;`Q7irGdC82}c#2>}5sGs*yGLw_mr4%UY$>ul3DJGPS$m;N09t!j9R$t-#fnbjk-r6kLgr4aE%SU%& z1ahyxaD@Sv{LRy626}KdWvVZM&_Z5h4^<_zRs+y)wI2{8oM!ZVy{W}#rK39r|B*Vg zasJdJwxqcaE<6~&cjJ{9Drje$LydP|?G8N^ib+b2m*EHlpgv$xjf$wsw3V3a;uTf+lD*R`6nTB*tmy|5 z;?e6_a{f?{@k-&`yE|$NC(snWq7{+8{23wN{?*0k(VuCqb;Gq?rpP(ha#UYlf&J#u zuRGhwIX_P=uEQr-w$?I+*ST%G9B$LII*r(kSb z!_vXwD!*Q*2N*3cpqrjz8qm!Cf+vNjG!6?;iUk{YE=EYH_ z%pjkaB7DA_7v0w`=;iHxr`i9mM*TmCJ$G*Xrqtmfa9r1Wr24s@fj)xdEBCX_kSH}@ zeuMgvVg009Oyvd@5@siKI^XXDj)~gSuIEwC8k9g=>OOU{v>FACQA%q6M{P=BsgQbY zt!tW$)@ z*D#xk%bA1VG+HS+x%Qya8J>)`2Fi=!HkVl(OIrtr62^;kht;d@a2Jb)kE~XnzueFd zBH0_5Fn*fG;0gI>+};esH`-*6WWim9;S$P*`q~H(<_jHqrmrAI zI+(%jc60ofeL+Fp(bTQer1PI--5F37=R%3mk?SPy?nf~KZNjJpJ!Dh`Tk1eVCUFs! zqdcNf;cZUXe0L?m>j|4X%(R3vpZ6h$1X)U%WO47iQF#SpWHfm$4PLTL>ZpLG%ehCw?OnOp=)rX9rR+HXyk6Dz8Fv=pq>cx$q<4z2|-qo`Xnw z3X;45%eT!cvv=ET2DXBbWsyJ<0(n_@unkdkC)h*v>56F)M)Bb<1OvS@oWXr4&-nfJ z`VGU`Mu>SR1UeZ2iEYO7!-^D+fPqtbK^|Iw18oZ0>DTvm1xFt+88fJR^l~ldlp%EgI3S>?mFM&FImV!=#mLDiwJAJAFoHN-b2rqA2kJUTYmeR;{xLZ(onJNUaZsnR4n!m1J*P~|AuKhl&(>1H~8_p_} z?Q<^H8)U4d>dj+-u?%5&sbr5)+vN-6`aD?tjI#FJz}!MsWz7~DrR)MDPEkc(v-WS- zQp@(_?xZSqi?;%$@>Hu7pXuslS3S>k8oBk8QKAnk!R2+=D=N>pJr6F6E?5K2hq~wv{PJ_@^lsLdgGcYmanXI z^{?jilRKJsNneog=<2!h=g_{9z&&S^%-r}2y;W@<4|2i07e;-U z*sxBv;=$#~q$3VlKX_j+?qpqWN^M%ibT>V%@OohgwKDI^oSDJ#V{pVXNNxceZHAs zjA2*EPS;|3djTzYC-ZAq#p_@GumLPAs|SBGD&cOkXX@p3tW_2vlQm# zRi;VqDxSj(in9l9$H%ByxKFl5%%=ZAd*7~+h~`*tGet%+8x}NijRRD8H`M$#sZcx# zcxHv)Zflke^>b$2?R3Pr+wnAzx8K1G;gXZ+ZFG1T;ZNSW2ct_>lkK@$WQbhtLu;`9 z_!l0>bDIn_U=^tVu-6JAHcTqh>9l7(Ez68ko8RZsqh$Pwvp%MQ>Ex#ZQ$}N!ghsEp z8mdP&WTW4~t80CAKN`2%QI+*vRHh|2D)i3Z)HicoPw(muPH zCklacXuvf&m;%(OI`K2D+kE>E)H;4CmmQ~Xc; zR2h}BS7t(?Y>u2b`lA)i()6)gwpa|*A+Ig-jQT@i?=;BK=28^MW+BIlEQg8Stws0h zT2Gy}w@RDSD=T&vpW#J`_zL0S>_noJLS>qhvJah-ro9nNdGTZ+_89(ZU8!i-5)(x0 zYi_riWglD1p3{z1+KmQQOqg^4 z5mn^`+~F<(`Ol2VGPMfyW_gY0#tew+Vbc%Flite zhYib)oi(NPgl;QGt1Zp(`qOrzM;qLZl@3>>J+F*jdzq8IlC=6*OBOZ?l|-{FGy)h~ zL6lcCIUHChXQ$VwsqvJu&DYtuFhUcp!gWMTD!B~AFhXjCrqte?NGC}PK-T!pR9<2~ zzU`Y(glrl3j7wPsEmNLOmm%tCX?0Xx3_--Lpg%>Xk&}#$>Jm>yjH*J3`~U`F(NnnV zIkFVb@zV;sSLZG6=B~i0#%BGRI5-3KX`fhetl}}7|1$(5ke*NVE{x} zx972HFFV}(w5%vhENX?M#nym>O;33qy&olm>M~BFp{1idw{#%OxzX2b&8Rfd7LR|m z;6BcBiP@=-+^pVQonn)CuAnJivNvr)zuF?&?AB?HKIOPgH@V?t$I3cgi8SrO+S#;t zO6yf(t(L`J;;dJAdrMs^#tne+_;=>=u$_`Bmmz2x>Uo-MoV6w!MMu7F zvUBaEHi%S$Is9($}VRhvEHX;nF-v0-d|9_N?B2$0BV|{^XSi0x3 z|B=OjBt>OKYJ~Iy|2Gj03JU6fvD*H#^4|jU&#*S|uyb*yH8L97`{eO3rh%Sccm?3NCSJO(cW$B3hHVKT&Lufqu;6NYTB za&Xd(!CmIHBxWT$9xYmt&2zMO%jVNLLVgLL!XkNKd&#zO7&a(+uoq)z#Ce`3f3vh{ zT;EZwT`(y?7rrd#o8K~Y;L+zBI`)!u3h>{BWZBX)h{~#(odk2(x^t4BQqii;agxen zdZU@q02cU!cdOj-UZX`EeHHpEN#x_IemQjI_0|b!>{zL&5{^$Naqu+%VN;g;X+`V@H3CNbHycSML{pB0l3-3Fhyn7F z{jz|yQi{U2__$G+MyMf2R0qb^w6&*PR6fTUMU0dPEnFQtew}r%nyeGfR*G#DsHU^w z{-VPC@|H1*84DJz4p!7T#+Hmm0&+rCVNiC_gklZx*s1WUBkyV63gTC=?uu=>l>Jh3 z)DhHC(E^ypIT+rtEcw;tBS2=E+7a~pt=gkV=5cC^+O*#u*_usEGzoN4@srgkPSiaYx2l4m*jp)M-hY2UamGJDDR zjLa;_=&^n5K0=4+q}5%6rJntuH7i2WaeG_@yMBXdPR>m79;p(og~&1B@HFV5xt0o!0tspV)7wjjoCkovRAx9azwv%;`3=F zJ*e+_5*_IHN!&iM9oFk&v$57x$3GzDr6HmCveFJ{|1mR(|M5fr-_PtnWKJh%4{H-A zI(HlED{Y(D4K}22-`)XxhnJ(g=A%>{ui8uy8+r+N4-0w=csIS@ngurhVl;`x_37H) zy-JGkCX?EOyhs*-#3Puoz1#FXdyaLx-VK|CZQ^NYp@qM`?#!1j_E>XiMU{WQYw>j< z8o%A>zwc>v=p=)0jIffivl{aBKfus=PqWZo5?*KNA0BfG0#RNl*LU}GL|<42%deg7 zoOUA1jUzFttlavoUrgOcJ6w;untROWd`$n1W1O0<*3%yXWv{NK*P9<|Bmzlci8{s* zh+!#Re@XK5J+4HVyae^Tc6P7D*qQfW1wXub6O@VV0&Y!Aa-eEBuy(;7#v%mWk>OZw zAea|b_@=EQlTtFlr@1R37#G)<_8pcm5o4)iEelx3xs8MwR$AR~+;+AdtY5YWTAnY* ztZyO0_WkU1)zks(Iw$*GtSX|MV_ft~jvHT;#bV0af>>n;ZY(&m5cg)0US4rNnaH6| zk-S_0ynGTStWxaMd4GBwAaQ?u&OfYf)96>Y9$WwiF(*?n4Och$yzF?!TGkB`--mtW zsgCi3)v@3EQIPtUK##leyI1UA87Dklzz-u3Q0&fCk@^i^i~VqW_I(~vYkeuovaEWT zt-VDiza#w%g(wkjn6i*%g~Cr=^|1TJG=yd_6thv6p;#fM>Ir zzDT&V_jW!5iy;KA8BAb(tFU3AgI!3G`{aj!)b|~X0n!U<(u!KhRZUALdGP|skMo)$f)pkvYX4VzvfLkUK z{%o_9G+C%=bwl6mFqDqimn>lAxh$`F&9naq+?!G=`hb`9qf~g;oBzO<^{TDY>q0vj zKE!^VR#>8=&+)`%)Gc6ASW)0@NN`Rrw3aF8Iac9^#0&=FFaxWp^>zWDI1`6K4+|cM zKFYj1bd4bU!i)V&AR>Vz+>Sk!iiok1ux)2h=5P83=Z6yZb*4hPUW{=If`ArhzW+L( zzw6hahJ+%y&rS(*TL_rF#UL;{Y8w-TRYsy*5+qXscNWgR@1a#BP8Y(7D_-mH1(r%V zcfgVLFD=M>q8<@R143G1PNa<5Go3gNKYuAQp8?_Ap)@JgBx5V57AQ_MPBHtjkSQX> z1r*n0?)l#kRz?9B-Ey%sAVBmTn)QtmN`%}F-RyvSE&&ipi<-FU>Wz_L_AeX z5UG#+H;enI47TcuM`Z+sIl_Lu1}DjZbgF5TPbfp^0e_VOH$}5oeW@V_b8ZtPL|_+) z?UxNFB@cJ>--B7jdq7OYldXy+{7I)XpN!sQwVGb9DFZFhUDC0Bm0{Yn(jNq z0ga(!&~nTvX&X99)kBW2TF7(l)g7|k1%?V921%xZsqWRKhB2IC;Pp2MBGDFp zv!H0x(RE(E_E3VIwvZV}clw6Q*KZ1N9S z+}~d5Bo=thMpzFC3c^+BBn#+t@sSByBzF47vDW` znhTZSuRH}#q+3~;NMj7S9uDOYw>2a7uPaKN8a)N$U9oA+6Uw__Q%Fs%QRQZdxrP;H zTKOwLCvJr`5EOxbS;;C{56tFnYqypju>vEUmi_vWNxcDZEqcNirzB0GCxgw6roICE zXzqgc{#1_61+mM%uabP4UD4yI+zwh)=P1DVUv=z{A2xEU&j&0kvjk$T|n{R5So_YVO|JJQjYp=bu*I9etjdRu= zfkMBXSfy$#Dv6%Rh6P{OXO|Qa{H5SLFfD52Qu|nr{j}SBk_nKwHFv^N;_AqT zOB&WIk?N#VB1QKd0n6LI&1GoLlpxyThT^*ou4h97%MPS{+#0{SxBvFWKG<#vOCM|y z4H^x}ieNR7HSX+}pn_t7%Vz3BL4jPFFZttgko27|C25*%^JLC(8mp>+jhAWn)xNum z=U7rgO=4LsAq$)Bn%#|J2OVm@CUxRNPBVnWff-a^!wSKJI)kSG4i{>`@%r5EIUU zqC-Yc=UT(XNSKyrCF`7uT$l5#gOg>dOqNM#;^3=nXyUTPY1uZdcQ>ZOQoAhu8V0cP zWG7gLF>+G@efZJjzIH;bcJDvziA?hv)dv>bi5F5uW!DPk5li0Nj(3bq)AhF-?Zrkv zwtMXY_v}OviZPYTL+_5-W`oSdyn>gge%vqbunQNoR?D(TKZ>dDKdL7+dt>v+`Y8KU zR|NMa{&fymv?;)+lxghfvZzLv!*w6PZuj&14}05Q=c-V&PaScHbL6QLuDOW|C*4mL&eo)BK~4ja=$N+G58mw>(rWlFEv&~e zX)bVOk>x-Fcu@5=0BkJMm|9@swpuw{xx>+{sZ3on8ew~Gq*7IBldk3srvIkbUiE2? zTsRpbd|);5UTAFw`FGxS4J=G;9EaK3z#ymiF)N0!vs&%w;rW2%PjdEHx51FkZbVuI zd98H0I`NYNI615cE`|l8CQ~58#8~AKULi-=Rb6?5hyfC5q}Cu3#(#}eDrx|Cj`|@|gGzJqaj$Gfy9J%g~ zTu&CR@31(sxEOIa-5oto7p~8l9l5-WIXZ8To+c`H+boWr9#W5852mljDtDKe9X%fb ze2+)27Yo;K06y-YxFIsIAvEv7i&GbGZf1`I`J33y!8TQSyS7hbiCj12ZhI;>M`03G z36|rxpqAa2S(0^>$|pSt!1w-bOe9eLD2Qrr@*b~FB)?DB+DnF43$s9)T-cR%hh$su zWYn-c=UlgapNo0B2V0QH<3mV( zGRMXw{zAU)w_ujkjX%QF3VpmWR^Eox@3J-gZ7b^KI{0}Q&>6>8GY+sJE@+*15XUvT zzNts}DFe5u0Z`P>A)7KkE>}szKgqw7zWd(6)p3Ue2tYBHjo_Z=MW}mETgkh3TgVG4 ze@dw~QVS=k)f1y$X@ZE4ep_35T#ma*OlqO=OCU*?yI{YMv6>}^$WlTl zm{KVhQ#9nnal)tf_paq+*ZCKINoIxXOgPl4;8(~ zemxa4!iA2wVpyb)6`>Qtt|nM}AS#KMB!QiV-QweS8u1D8wJr+!*!i>dwY#A5pVmeH zB-Jr;)caZ$MJ4Lkq|>4FZ$45WoR-sr`*#y6r4^Z_saq*9n%SVzv-V-v#?cqPKQMkW z#2@Izti5vb3@)-Pzn_*mYx6Y=3|C5WZ94JUD?_VUt!KBvfSH3NR7y_nmwn!z$+kO-Sg!c-*^GT$2a0{0_Fxt= zdE5TUsIV$N{~u9!ecHW`b}g>Eh_ny>Z4d-)i-$VmZ#be8(5}`EFR*L6JK1k-vY924 z4NGqtcJ`(XvEUL7K5b5P;)4RdQna;jkya=kD?&5D=y?r&Rmm&!f&LX)x+DE!#K7OaRfGPg z$odLB2O~#EGi#H7*4*Rj8V)-gNZy+|aXU^w0|=myOT;Hue20VDqQqU7rqsxKX_;#;Fu-^bUhZKs+!-z#3xqa>XEp-N18 z2t9$~C-#67{ik-z64gzB*+|f9C2rH|-_Kh4SRYLhq`?su^z=bMc(jiBTp@BY}tM7?g2CVL3#2+4=|nejl^mh zvjOhoKWSOVeA7B6N8{9>a>B8PTHVc1U&bnnB z^EVlR0(PYu?jJ^9z4dkbi$< zed0yl-JircOtu6>Ee|>hBL;`ftTT3McE)Gw=y*4jBX#fKIu0+(eC0<6dus{X>ruaW zU26w4PSv8e`2)-Ci=Cu1WRxv-B7zODa)%Q7gusf|(EQXJbBODpao5O#(N?pw-7(U% zw@6g5E{KG)KAyPZ2e(~1c#5^+2qCT z)noHprm1@G&yPxE^6^_Wcwh|sA;_4^1adlXr@C@Mbnnap~wdMp+TT^>#zC+B2A0!Vx|$YfZts zfWP3Z{8Ye|p0dWHR69@(rc|ZihDbBLN&XBqav%iTJtM zg`sLKof)J`?sp*S%o`WnID8UJ#;I*HKM&$HVmXLjcX-7>Mg0_k`FOP5DR@pO=)1tm zfDPLSI=qemSPa`-iiSRfpVjSpzxOFh`N!pi+A>+#%4~HyDRedyCFd2{Zi8>(*PCdB zCZP8=?Ryj{5QJ!d9T!7o{vaH(DN}dH#fGf^!2?n%Es-)47ivL%(?qFYOsNKUcK9o{ zH*_MtVNjF9y^Z|{@*DIim5lFg$!JwQUO#2Z%}qqZk4j`xN1WucOtDOw%{iDLuai7^ zKP%^No;e`tqV>-ts)=z*Np%Jq!zN>;6of*r6kzIQwNz!usAC=XnXp@tyW?+!B1*Tp#&RymI5}uBh_LY&g^f z5`uj=Kj(TsXz_7#0wRHhm>3z!?hBzA(kpV$VZP1Tni_95TuB9OVjnrHf$xQLpH9zf z@}I~FtzZPH%LtxQ9By4(-B$D9Cf{s5wJ<{Ijlw8rr|5_>bz#9F?xAK z_fqCyoVkSKku^8I(XxbhK$fOTui}&SB}-upUEB;jF|+I+quI941D4plgzF>nEu8An zy4j4i1cnPgWZbAkrL?15R1rc28(cDHITyZGEe#|=8h2l1g+;j`8k z>gDbu{oYJuCY_#Z3yCp6{+xJpSf_?dazZ1a$7`5(H2rbdsgm6-)8;wgd0BZY3e&Cw z`{P56Ng@H=U~Zs#&anK`B%Q)D2q0(gL3f#R+LTT z@TkH3{gC5Q1xvg9u=DT~E|=?H0rU4B_yrEjuB8@egAS&^KcZ49_U#pW%e~ej^vy8SXPr6#mlHa_~>>7SMM=J8nx+#%Xs}6J5*Xs zNN$GFs5d`6+`@byCZ$E*>19(r)JK9B$4JOVb}1*q))CpIIc__+9coGout!}u)G~R+ zCYI%Cl*nC2{g}^TaJ`1{F=OsU|Gqat*$ZKF+^fo372RV;aic-xFjsfncS_3kLwPOP z#J7TZX`DWNz4KHp+2mJxmWfmFhUZCi+4%O>0?x`tmz8@p?LgTjjey4z%=s>@pY-*F zNHde(m>xMqS*(1b0rXDyCKoF2ds&O?(c{A7C)n3~AybrR`~l+Iw_57|HDCB&=^X)6 zg#Z5F0U$veYhyDLC;NXAJ327Xnpx?Y7&*`im^oVM**ehvpXnU`n-c(F{R^|LjjfX{ zAQhz3fARg>>DyOc$^Toao`Hdpr4hge8+%#){MV)RlBAF&R*h zjEI>w9{=QMqxn;~%CWN4^b;=)_u4$5Fq^W~#hQ{`Cpn0k6~=P1K9Afr?UqZnov?eJ z%}xwvpdlI(>PM7-+5Mg;7W>2wl<_Zl$J3lxw?};Lh;@$AoH}T|CWVfWCj2-*z9=ZE zkwC`)oK_jhe#YcmdG_wy^PNH6j@f`bF5Ro2(UVIXp)f`J+FKnp7n%W^o=K+e7Y6Q5 zv;EhbM2Wf?3>UH|ZJchOhaWVPpO`z<4bDQAd=vR5FR_OWH+agT)nB35@Mt;%2K4P) z0O0WJ|9$X<3UHsauo6EtAjc@}fBO*-&y`A&*1h-$>xU?4t0Pm!%DHT6N+(b=AW~OL z{Qlkk1mQM1oFmm`3d`osw_ca@AQdd4m*F4-h9Js2(w*VJ!+|Rb8e!GvCeJi1r~M-! z%izeT5%MrziB;Al_QbgO%LPzeq~jtXaHVjh9>fd0r}tsUC+kqE;rDqh*USrg?0177 zD(i-aBXF)dQ1_5~Xb-9yfuCV&DPr@Kf=w<>Txr0 z>HK7Xclmra=LKeN8ozGHYEaUJ;lNo#NE)X9;C}kFllFwAm?h^o{yEOwGuD`4z_2h5 zx|4gd&o!3xaz9@27wF5~4ywf#Q~-D9{>N3%cYwPMY^)vsYtT6BSgl(XNhW{d(eYc# zs#>zQvEPN0vu+beuB>w>{#{g1w?1np;3uhXlPKm77W=w$5?$hx5}L|}=%9U&i+7;M zqt8>=X=uL>1}cuwMMy(jsf9`)sSEta-sqrJ=VSj7zP{96bcRaiKu5>a;9h{>79VQ& zd3+Jw=i|zJPy6j7{qtBSo;)1nhD*wSK631LK7K0gu@CCY1DD5y&-;sh(nL0yHk!%g z;YOt4W;}J$QMk$+hE%8U(-b$W_NbBXwjfgR##tMUJgDEu!-ALH4Yov zk9&^;!QI$JD03L)_iUv}K}jWRSs0wFM`y+rE~4;YXn>k^raco|MPk_;pP!jKyUacb zt*xK&OWU3nMoofaxBux7S@QIVs(HE?)7ZFJMt*#uevf;Al(X40z#Ta~X7gx8HpDZmO|N!P)H;9_P;k=;6iQ zX*gHn-EGi1OZZ3hsS@FFw54~9;tXj;qg8kVgE+?aw~x28&igYz4WsxfN$LqwN2m&DWPEqYx9W^Kou>se)gETeW-4K zg17wbd?L~sdL2y7o)VNeZo*-XD2~zKt>Og^WG6zo3Xvc1>8Q%uleWjEO}ZS)WPP-5 zXIhXrc*$%QtE+Ajy(xCu{EU9E#?e>_ONVM!(P;Qh-j=c&E?#MVyNO5o6%ov1Jo zA6sx6MG)bNDQAq-4^Ld7}YZY~hSA(~k`gccFZsQEh!a(Vh&+E0f&ByD0 z(C5QX>T-wyt{tgGBKQn~OxjqRg30C1k{(*srEaSvk&htzkvQuFflkOVWF^ z<{OBYZ+9*aF^E2=+^G73w6L^$A^gx7)Fl}FGe%MjB1VO0Gy&45NMRWC_+Y+3o zxA1krQSASAnSpQ>m+wJypx)Z?O(*B@6~&)?`>)A$^_`2f78UKt0*-2yn#)RGN>9Pn zblmU$4NY(NS5dkU>EVEda9k;M9m}gz4&K+U|#F9YhmgRJ$)&92+lk4okpud zQDumvgE0Pd?|md*t5A;RA{DU!kxOBcFVVQq-s4G$|5b{rkklWegXFXm=cSX*Tg3mz zRx+C7kk*rVJ8^bnj6f%%?b+}MI((J`;%2zxtOj`^LY4FgL-Z!~m;ek@=y;9v z&oabvQtK^q$n1e@*4=^zl0IWl~S_!X(`&@30mWG=UCYhIQ8(y?u{80 z`6(54OwyL<>{&c^TnbS*^x(B!`DQY0-yTYt6oO6r~x5Laj2sAlASKY%m`UhtW+v62&D4R@t^MZ1E6B2SKf!iUx8b zxc_hk@Rr!li%fa+InbfZ;!tWgBHrnlmSBXN!J*cyd~+Xn2I&>u?z{|DM!D<6Q-cG4 z$#tz{o1JF~ByYf-tHcdoWbxHjbP^=P;Ge(&6#O@?i13{3gLfL$G2G;%5Tbb51Fl!V zr`nQHUw}w~y~wr8%jNf;m}`MOVSoX$s0n`{0KLn#H$mM!B{0T1x2KaVj%7@dm#ru{ zRrio@ma=zaiLG9s>S+>UL+XJD6aR>;0rwBZ0YnyfxL6+{3|`4HwK{k>C)XX$u{@o; zh19oRLQDM1om@yp9A#MZo})ZL%OGyYaKb6)FY=kp z>R$m5;x+>(l2VMFfTdISkMtX7+;BQk*2)|aQWwkha7qEcHsPhP(OYSR1EkI9yIer% z-n4UZ&yz(y9mqk)dhOhJqSwx@6P^j}yP@=G>YEEfRXqR#I0^x6rAaI9c%8aOMsvm{ zkZwmRBm6DK8u2Ui%hiz^NfG#?F+uev0JLQ2PJ0Y*y1!e+dg6idBl>~^tOoU0iv|SUW?Jg-ZQ?v?8vfjM8qY4@q;<~eWji!P!AHxF1 zgSM@#rudxYPP6iPhH7Jv&L1#^!L7xW#6w5jq=>;F1ZM?_fd{E#MW;yhB5tt;X@p4W zVGW1xy^L{d2?~4&>DzyjeW zkjTT+tOoW9X;k`*pm{zT>2?b0H#LoXGB%78N+6ecXhX+V1F7a)SymQK?3DIjGQUc= z$9hrVG=mwF#u=sHJW#leG*kuxQo9vQSHpm6X_nqu95NwLksm&c6Urdy#hvzy_}wQN zQb!4l^Id|QOlN6*SnLO$EW1+7S5q#894%ar ze`^7NgsYf3HFc;ehg#=`` z-1B{iaQ_`_c(Ww!O<@-+9$^Z(sDPpczh(*nf!P8BWunu=gCuQ(3PyTo~)Ci z%<-l>BE7~HG)n`L**a3EXgl^IiXe_o@Fm5VRXAtkUl1--^Ce-_V}nZP-@v)wLn}L6 za4Wb~aWgW!1C2n-gmk>{(gqcA(IAiB3oFw4e+HMIj~{eXPPZ?%!Hw0w|S$ot!iI}0j>bYPx3zGqXu z>V$>frc)3V-p~2fa}qmK4q234mVwGgJwEw+%Hs0I)bO+4_!wZ%rMGjv!RF=VYwyBv z_rjh?o8!HKAGaRNp`;8AW=w-~guwGe~Qw1{>v_iMnISSn`Y1`ZQMj@F%3Tv=wrCU+6=%50QzggXhOyZXEfH37p5}k)sF%C6yOBg z#JnDD-i2;Jw*}v?@g-W1t=o?m#isF|uFjmM1iB9iOWw|q7_+MK16)!$D^Z`1HVCj8S*Q4O9usAC2Jv|o^_Z1m9 z0kgm#jDT)&R)sEnHbmPhry(03*T?GWkEcoA*kfF^0l&pLgM+rJles1nXwro6E|3D? zS^1FOP5i9>l&Y(>s6M9=*9&OHbx4=u$WrivI!4Q^xQgZV_k$f1WW+{BE*GM|S(sVr z^&b;go)cEs@B(MB3(I6R+JkN1N?(GGEGLe%KQ+*aKUU|#C;76zkhUE`mA35qz-5w*mPjk z52>#RoS?H7hZTX z3vf0>p1~J1g7F;Pjkm!~+6t}fixVDzl+G~NO0<&(UX`OOJFW;P)wuOf-0)*7pY1d2 z({I7!Aqh}9AZ^P$$$7>0Qcy<)az>{o2&_JEYu;G2(XgG<`eK2Ois(+#j z4$G{lY5`I$4p9~8;0!R~-mC_Vrv(MsljATgkR_Fmf|dPp3krl4D9=YIOg}%EsaC*$ z6|Z0l1IqY?=3lX{y1fHy{~mSx&ffr=xmQ{gvYW~cm$4|rj!RPlW(NfBmS0L8F$nlj z=d&SHEI*%SWWNxvfZ6fT3+3R5_3MLlge0Aum3#rVGm zr1hbH36Wci%`(@iR6%1Jhu6eTkgI%Dif%pe566_F}A{SQ8k0 ziVLdy0jGjM2Msb`k;~<5c!C&i`=E_ri!LTQPlhF-bJ}sXNt2BND@Vr%SBU%_c##yMc~eS#lmHjPpu*u47n>CUI1zg^eP<($kT`vAj38?G|R8FZT% zv(b@O_rZm;iOxmI+fM4be{kb?+Gb>~bNOZ+O7?SQkEPIGk;2Ud5#6hm>fw{-pv7oG z6wWe{7mY427j6vjo$v2xN3JWaEERd1_IQ6AJ4z`YR_BZEG_dy=R})7x(I#uo)0`Pn zqByB5-<#X)()Oje+Uj{!+(r8XvstRVcbRN^W(OLNC>H4v&r5LbXOnkc z;7yZH%$ZBepHqs$d#7LFk8qVCKF?$mGzd16Kawv~BicV?MbO5Aao29w?oL zCGe_*WmozyDu!6ty6YZ}g#rJC^8uGRx;?X93zm%qn0)Kq{uDo!k6}L6p@Fuh;{{Op z5qc1-w*<^;!`-N%u!<7HUD;v1O$XY{&6Ne%U6hMI*Api76aknF9v520m`ir6@?}h9 z+am8r;ZF)rO9<9xX|fwkit;<%WZOJvq8FoAzIzX};lN_?caQI|HRa00au5{hE%bY4u0x%aOdi{8gQ8|+%M_FawL&iG`kxE#lg-w+g?1n0 z>R#%z+zsFTIv5WY;>b4?jIoKkEct$6_-5rZ&2)f+Xv7IG2<=5Ta3R^iYMP1iX-&fu6Hrtrwugf1Lkr874UfE z4R}uX`PqRnjcr^}hlY+ng)pR*KcVcZn8zDoE2~O)@dDKD`LP^cm05!?@x3alwMv!3 z(waqk&WsEQnw4=X?Nk?!^SAye-E-jy;T1@?ONxW09odTu@Uh`{Hj`?%ky#hKzN@3h$CIYu`$x8i0H9VW%j%xaR|gd90g6(R>UX9a}dc=micRsn3a-YE{D`bFq%C-%KVUPxlR6MBPu*? zPG_@PkO_Wo+G!lhTpf0l{*~oyFWd}`+8u=cgTYR$zTFrHr@k)Bv99Wuv1oLuK0_sJ zC0Qm+Gec$y^Gg?y$3lX?VWl*D;p{_!geqjd2Vx ziV8A<)qx7CG*IiP5ar133d@p@!wF)Ae5_saR_XlQN8HVFq3V+Jc`D5iL#V_w#Rpykc`pDJ^OUiN-tmT;ng~lh9!fldI3)3=>Oo z@rc`SBFl=zDAbsZYYWWvt z*2+OCGK(#}q2THUY<5x5i-yG(YfeS5^gA%nnoCDjYvJ2SPkEC47cJVf3?-wzt_YbK?;xOa-)bzDewfBF z0~Z&w*SjI7G2Ig3AL2XH`MP;< zB-a=Kq;4D!;bhJ_VGQv#M$>sJND|7sE{^yPJs9K04{=qHhg(`pF@sB-6O+U&6OVz*7 z>kZ;ea=!}U0dQ6;{$FeKFU2UkIR6L@t}wdp=7{gx)tDg07?IWmNW9kig*ygi0f3;S z5{?AJoE`u|%FMtvZiXq8r$(c7taK-l_gBdcUG*0jUpddeUfJ>GJ<6N2MADDf} z!v=Yx`|{1A4Soe>JGTw(ig95nH*#5!7cK+viW)U_tHM7-ccx(!vs(hFZ`hFFwG8*9 zf^W>(za;e1OWTltZ*{;4D`pWa{GXJTlZyelmRu8d#6QL#oTRbCuh;+{vD|E!&n5nG z2yn>xYaDZ8-!8!WwY@_W8F^V>tsJDsNrEx@u#G0ZW{W#_4_F`OC;p*%GyMQ;H!SM0 z!Nc@Cge#utS-66T?rTg;rrCeoxsji+2 z`Dns4)hpnF1&|d*1a=~;9TH1ewBa7bMDkCczeduz>aU0a_HXP(xVb36Wu79>bFvmzH@3v zXRFeLY1o{&L0{5Vg_n_}9eMUK!wXK9RhXGjttjYRkNGX2b2}3qTvn27+HsR*t-cQ4 zdBduN1RT~&|J1}tZ33U3O(OXB3vq6b_$`2g~ zIA%7j2&)ZR`tYp?IIO0v2!o6Mm2`of=>EI(QJ0~|U_rza1Ze)dn$qYy?!le=3Kt#( zY~D+*1Z`eF-L)a>nB{hZghaOB-5X-heX!Ey@4IH^`2oyl9&T*L5=`)L@n-SfUe`g$ z@^D7XNFXZC54_XCVSZvrxZ!e5MFE+U zo1MF59|?X(Me?G2#WSWc49HmjQ&vR*4#%`A(p7uu|D=6&Oe%fEA1k``eMiJyXfuV> zap17K@^^Hd25saaYd)t*;Er?O55a3P=zQ)L@G^GM$z2A;C zhSNK5xKTa$ZL@bxD>s}NX;+_UE;WKexK#a)K_$J&CuQ_@k-nIQKXHj!LK5NWB6w6W z;v+89K7^Y3k0HBgl#L80(>o4N_9o!!CZ!X;-{L+;JkCnEU5+z?(a3t(ud5qk-4r;| z+A|B9&fYgil<+yv9$UMSP=ir(%Z;A|8K^9Dcs``gJyw z2Eqj^O=j+PeBuEraHhp`*4ABaKEEAt5n$oqfK~{Y%FIx{0j;Z@p^tTU78YYRWGo`W zS61};eX?#6Gx+9(td;SE=j9yNosVoe;)I9YCnI| ztkT?^BS4~6C7EqaYQVsZ9AL{HbkP?4$vZa_Vl3&((&|r*tZN1WEoea0NQDi3192>X7U+Je43X$q*aI&ePAm%iY^7?QLrx!bn z{OJ^%%KPob^hN-)GhDSBWW;W@U&f```wiut3XK^xOSDEg`}n7NgvpA(wu-;deG0WW zt5;q&R{*#{r549UUj^;;*o6K3q*9q?unwFIZ0IB<#n-2|tRir6(tc%Ld-X5QJYgBj zoBfKiuAmVdOl^j>tIx1~PuK~(EVO$9G+{4PnK{}+ea1$CKj75O~AGG4YvC1{X0_B zsotjf4afhYx`F+o@&_DdSlE{47!@vrGjL_;tpgg^Q%>RZ$`5ZP!s?JVUE6E+;%1@T5u+wD$6@NT zhrDiSY233&Gjk{DEHyf3J8MToQ_*P@X_-|;hf{L2lb0jPyFtt#UKOAYPq=LPS3BNkY? z4`M};>yH9@!S?fBiR&RoS(Ll(fA~AP3NN^4WMM*>{+!D=X=HCFtk7J#pvY+PE7wC> zyXzfoKKa3YpE+z7@{8N*+u-mIzkovQKSw_(4!8HEPW?D(kpe4Z)OXZwG)H?I*k|Gz zIVFoCVVL=Z--&)|BsY92GL!XH)>TGRalq*pS+3&F*$YS*(<>5MfG0~~F3E?cBZ16Zkl8Oa;uud=YK;b_1e?LN zC-+wTX|#Z%0BQ=^_}z=su3=G1o2pPB9WOv|KU$&CAJ-`2==olL;7VS~nsX9&nvEF@ z=fHv&n%ngt=G(R?I*3rmpe&B9H;+#`QWjmos_g0>8BW_&v>NB9jwd5lqQ1$MnvWu7 zXji&Y`q&&b@GS@#>cp?CaE-aS>^{@1oO0P6Lyf!}Sxu@m!O8+ql=-&7F_u`VTxbdm zdq%I=q@08kAcPqFfjNZ5PolWe$4XDOz_)NtkvcyKgE{MBIt1XP$#Gw8BWD2sjQi z2qXnjGcD?q%V#H`1lr(JNdlO}Lamfa0-~l_1mctCNA;?}V02W19JvjsbzQb_6qE|9 z7_VE@_y4ktAN`Xlc3)DWs36ThWLg2%@HnoH0z^_27tb&%rp`P)E}96ea@58(%SxkV zcQ*OOHO|Jog<@3!Psc$Jgd~bE!X#0uRF)|tBbdroo}my?1bHrTh!92ph=Xox6>sDH zw4C?j7T(ZZG}&(J{#b-HmV^`zvS@)QN#>p+&+#(5e%24;Bn+o{C5&Xg`$Xrt~et^i45Pg!xHjh*w z!uE54n_vy0t(Oy#qjEH3hAda(nJC5s!3hBy917WDzo2A5g-A1#rP))3*!r3Zc?dsY z0BTQ=)!cJ-5LT04grYD0{8JH!tj@NKmD=`;q0oe_q!SUGLA=UPLM~CRT&SozNeXjj zHXzKPk8ok4*%^tV1}|8$Q2c}*iK0EPMnE(c@L(kg!Js#fB+rQ|ito9% zAbW(q3}+|b3H3;=#eSs^np1%PHnY>ybP2EvRhS$%gZ8*hTbOe=+m75nOIQH;_FY#8H% zmg%v%sma#CEy$}XJq%=hWd8sHkEG0TP?#i&?yW)kamK5}5aTzc_`ZlIV@=|4X~Jtm z-Ag3@fUxp^OFWiYhWYEAtaftP!~v8Xg=XsibjoyZ;3Q1*CTV+L3a`CHtI`-NVU@sI zu@tKW9Hv!R7(7wv6$Ly_1}7zkGB;Nc_3?mD8zG^q7_?DBk6Woz#@K$#boyNOGM;I= zV8eQ=fxrXPEVJAth1E!?@meL2Bo)J^@su3JR(aoe^A%$$Q#fLST|lfQvPWN*lk=B- zF-SEG4OG@qQ#g>uVdX+P12`Q;AIaBqL0L)m$ltEy}KbYZu<}vdiheRr&0} zeW1E3Ud2rX5l2Lv<0&w_zxc~+)!IY?ODr5|+Amqf5B!}Gz4;FG9#uf{*V-Gr{s}j2 zZgwZ1DK+YZzQ$(%?4BZQM9$%9$E9R07~)l!wp5Yc;+RZcgVt0 z^vvhX;5%*L%F3N_l(NOnz*@;3;FU&&hbe6;R23P~X91(n1U>=fi#=}xqz}_Xs&;&B z9^hG(Pl`d3Oi$)?NqN5yr@jgy$2sBc<*4SISp~X<)%G^1RkCj-1Gi=wVU=raU^WdA zJH8Zuh!ksgKg>BuB!fO7Co!qcvipgysj(mAfAMzuWIPWuAsalM5|PV)-1qZ-eE1yx zq?S@*V_T&Y`+G8!k3H;9ELu-oa4XH7{c$F}oBj;r+^|=#A=uZv+)JJ+li-0A?b!-+ zH8OKc3!O0QGqX9f`c}?^Z z^7|I|GwuaNy{)iv+pwlln>tf)i6yu zMl3;-&VL=x-bZvWE&q;Gz-FI&=RW^tVf&=47xHX+4LZk_#J(y^YT>) zzFxjSPxCnrxUFV59WZIrG5sinpZF_54fq$5;9Sg<4O9@p6`{!D&``jaaV-IGon1`= zT#;8}0M~@IFL-wK-|@(6iS?nOblDA`WapnSk9*>;yd7F@Gas^+yjcyO0A2w4?%`k2 zpsf2;4L+yhuRb#$leV$|a)4R@dEdh>K+5nxQf&SmZL2sT@Y?+UQ>jwJr^N={{-0)b zQbrq0h&PBAit7Nt;mI@amQ!b)vWSQp61C{}%V59~YlG~_G`=dd0mE>w`X@s%8=N@Z z_{fhN+91286!(~X!^VsO2L>bv!L0Azgxa#j<(9+g{6J{xUS|P^vf{7RQ-7O2hFnTr z@Fs1R1}+9y&M#1({aka&QwhdtG?4eLsE)+5FTH)87M?{E_u3t&SgQ{u8I+H*&A29a z_|%B+RL5-+)0J_M+({Mpak-C5tva8*sW#eB6R4a`T#NUQC_eAg;hTm2h_{!<3ys{c zOrW?YehT;(3VgOoZRxmJ?#y%fMiqllIPY-UNYF|S=nz6f{0{(eK#srq?b~0jl=tty zzf$PsR$lz{ixTI{knd+#^aXFJcqvhWWHw9Y(?^9KUdGQoZ8$%vI%MkePC7@8D$72a~Op04DRwz<3(qQUe zTzg~l^7TT?882PZW!=Zi+X9XN*tcmvX&cMi^6M!;#6i8<3Pq_=6-(Nw!Gla9bGc92fp7o&w$dokywiSM)HY^h+uI z5Iquiv8TLLWRqg$jy{EreOMCueF{_dNTGX>H(=5B1_Y}V8u0)QC@l1l^V82ZalM7) zfl_T)L(R!>cTWb}u^c^egWxp4UW=m}B#TG(jB?lc4Lb`;maNi%Jd;yie?!4~_s5^V z{}a76wV*%)=5H#{NblEAZ+|Jm$)^A5{V)0E(DU+Qo=`|qkY?c4^u&A6@+n?; z3_ePL7y%>^B?!y0@&hFkcThZOXWbA_;IRjW-$F znMb*hFpnGkCMN;c=>om_XP+rJJu4! z-zzWZ$u5A*UZp2~PB?v@idC`mmQ!mnc$pGH@6W#_D|*uB3W?dK-NZ}f@^-mgWY4eJ zLBE-=#z}f}eII{{um2D)-^OX0q|eyb=nZSIdMkq4>t&KmDK(1c*WU7a z<Kh&}j5Nqf>7|^$E@!IB};x zQVBo2Vju9PR2&Z<`G=N_;fkfzh&1}eH!+9d-UA(nvQUt1Ja*EGt9{;Jxll-*cann+ zd7|H2VF2s1$etD&>B&n^iQ8?Gj+T!Lve^&s?&kS@94>FazOcr&##{>Q8^1Cg>H*|y zF)dT9{N#b@QdE(;CGpKIv(0I`=Fk_bXp}{X`v1(&$n`2q>8bP;_M^fDhA(F`es0FM z{6Q1K2OpJRV`uBZwLjYCC?z2!wj*}(ugx+2|8+bMrmK)#->>KCX@=e>c<6lw()$cc z{0!r9K|s9TIFa(->|!*b9Mw$d9DlVgzWJ7F9U)l|E}18s=H5{>gRn@sN;RdjsGJKg z6sZ#MhBUbnBoCwx(b#n%E~YAL4e|-QZ8}>i=Tmp8H1i7UJ9Sph?s_LGfiPb zRtD0Tt@T-^#tPV9ynEf`i!Uf~qZE!Z4k~}#7C)Zc%kMDRLTY3rY@+j6wRB(KrTcmV zy8po1i>f;7ISLOR;elp4x_lrwgFSY2b+XGEw0hQ{I)+`%8g$CnvI03^BbJ_j{mte3 z_urdi?OEZG*kQU)@{OxoX&apjvl-Q@_I>XC_eSa07TYfrzqa#Z4qW-iNEK*g@5l7g zk3CdDw)12DvC?m?M>92NHv2fEXEh1RK3)Fhw^YBZr6SMD#W)1XZmGv?T2Zf?sW)2+ z4$z=A?MbRvO+}<=la_3V!Eyr{Xh#mWV1NwzP*fvx%T7HN$~GVYeL)xsm_;g{$IBQf zlmZQm7nH{NkMv8y?L3(#H;)QzF*_Y_h(L>qgIjJn5f&);9hzuUrcRGkJvi(o`mqh{ zE^q1NH|#mR;PCz669r`&GvPQR)jOtAEG;*_M|sbmR#_I)@nJHZW=+VdUCB@FLx`@` znCNPbQ6Re3*wQE37SVNf>8)8uZ`HA0O>F71zKA3GwMhB+`t94-KYsYr>mNV<@bUGJ zFW>z6yYIi-B?Mn61Qb~1cYj?a%ddW9Mij6J{cgJa%Da2Wh4!1}SGmFOtDJqqsJI~7 z!zfU$=xrYJFP6**5=_*PaG>|dY_>-u*d*O9Y?4kNxRd%#GU_%-ADa%_?JA~dX|PiT zENe?0)O1LnD#9xziJ&AfwN zIskWf{wzM|rLDPoy7NZ!J8!u9hBWM|hE;#(_11)0H>ntMUoLgM`2KyhpjlL+t1$Pi zM@0$7{E0`|`YnZxfbr{*{wcnC#f+gD(}pOQ;9iYeGF{LiGY@=3RL7)OSO^ZF2Z94m zVM17m%rh>EnMww#&*R$ceoyhwtmq)7fH(i)_=8l~peOxS)f?wqM?s43G@`^(6;-_D(;14}_deURcZT7Pj)peQu zFC;+>jtFVb&n1uVK=R;Pta9&V04@WJ%BjqcM?X1cj#NT5%qjCqWmI*g{{XaRie^&n zaZ1I+9oWF1$e;)lrh#)~BAvSET@@&=7oEtqsLK3@VNADV1&3qY1YOmFAG6YLr~#Ew zU3?So9Ayq&Zf0H|dMbcCawOLapWNhI{34fyuW|v!6CMG+_20i|K5y>WrGjoW_3o0i z198%@7)qE4L--`-Wcm9&rjGo6WXX&@Pv~mtAo|4f6Mh(!Bn_EAi#zQa=mpG=wdo#Q z(3yY|Y`DsDoWEN88XA0`8@e~XFa0V7H3q!SAoVMk1)cvHtN@HIpa4+%Kwpus0v+NM zC+4SIenGh`kiTbRg<{uJ{A9UIW|^{DFhxcLFo1G>rTpXX|MjfFGIE?0SCSDzzsl4T zK)nx!uf>#pgx+g!3EE4IeMqC!{lzp(l+3$hZa;qHnA4I|zbX9=qp@_3j1n$Ni75@4 zB^ftI{RQX`Wk7TY_z0P~4IGf-?sK!1a8ocRE%Un2m7|?1R<)f`-K%E*Or)=%a+N;smsl4l=6xIo7eAO6yivj9j@lHluimG<90AfoN?KSwdPE2h?t9uxrtxOn-v*)C&G$=aWSgt&AsX4uSB>ov(=QlW!DQRqL420Uh(Qz zGJo6v;Fhw$Y>|LP;#Nf2aLIyz19h}aM&L(Xrrtc`U5yyt7%=Qs0F|KUfCQgv4HzOK z$qU0+1WYYg4E=NoK%z*vc|7VrM~P|)|K+h0_xTN?tD!uAuoDbk+f2E9|K0zhynS)~ z{^g5jjHW_>V;ntFDgcLY1@w_HQo@B7N*ivyMJ9q^fSODxeVJ2a+yQ~%L2(6pw@(=T zp&#OeH@`RVF#ux*5key`4Q}HlVOO&WTP%>1UZWGn`i}XI8}ykS9|-I^B%^^H&fpFL z&VI7TY)NM9BV3>;Mt2k>Qi!LV-%x?I$T%6lDi~^;czTT*VZuGc6Kps41ea(yR;{VO zTEC$nUw_HO9gafb3W=taSwWl;nKOf+6la*Y4O#=G57XGoasi+De;T0B&Cd=0F#yBN zwGjXK(ct%lelJdyjH*Rm$^!E!?U*mKf*ADWb+h?|%0${%1%w_ABcIJyDVDG;sDTYBO?u&W?B zq!dNg2fBlG-*<5{Pq4 zGb#CIQmX|rBk{?|HK>8v%4fm7V(`fKn9mYU5$D5MOb6crch;gw~e1d_}C1^tS;2Tqb;=@Nm9=)dSy)Lg?@<`PFG4byE#eYKmwG;&6-u6$=Iq&28(S>++OYha!7hlMib6Ni<4o;r1lKqDe$4hd` zxNyO<%HX47$X!eoLlTaPLx1AWU}W*FH}F=?8g079dhkk)s|Qi4uf(l`Sfx}bSxYI_ zGa1ug-&1)-i=>L}6McE(Lf1pgzJCc<>x$njV1)D!c2_qj|7GLN(%5GGus@XOB=)K~ znB{xc0J(CXK!I=wq>!QsL5&4EqHohYAFN5LMi7S`LUE#IS0PX9aPnpVlX{Od(+0e zbfbQHUmbtNUfv`y(!r5}{WGP)fnYgInAt<3uDC9DMOo{L>IcubaJx>T;VQgXFc?6% zVfxNXW3qgdCrV5N!dF%l5gROjbKyCzfzt~ugt`6dulA;WZtWwqO}0PatVbLJUZzh2lpCb5$z7XN zr{@?oJOF|@e~!UDm#ZOLCbMn2!0n$*!vLS?no$(w+%+L9N^X&~ji<>Xje$w$hl~mQ zmt`b3w+5}lsjCS1meHay4c90h{?>Rjr7>=E{s73k*4q^AUO%zoB ziTVCB4CG@3dJJY{SGI%MtIw@1#LRau6`rbycsH-;Pfs44_~uxkoL(fWQoWXCPv=CH zxrn*79w&3OIkIr7Pq_}ySFeMs4z5V4H5>``k6KA>dN|pR1fEyk% zD;9VL^W1_g;Z5qzW_)2`^7{%=x5ORx&d{C1++dnv2shYP9DB`M6bIim zUoM@4P<2N7V%~0Mn>TQ)nQEQtP&6-T+PjU=59h>?X=C! zmNs1l2t2sYBZM0UKUS1-FTS}1PC=Q>bS({W=Llu=&v4iP1_A{wn9*XvdH~lOU@31bi~6ZA6TQnADJm6@j6DN^%(hAztcsL1OwK z3j=}G;7`dCl*2#I!A9}c%RX&N6R1@Mo2H3+wgG`*5O4jkOesTSA`P9Lq^!OUxq)!L zm`p<^K6!}4Ne{pum~DEhyj!IZ;iLRXAPph&1Bib>xH{;pe?r07T*-rPBeolECkU6Q z?+fo6_ZH&zu{mHuftqTFdY1+0UH0MEpi#)Oii1->dW}Jh(XO99e$@M{Zj>TWQ%B6&Nq>ohx zA=tv=ZxLi3<`gzI+$6K)j^Jk5kgI&MyeEW;?eG4Si9&RL%2>1u3JQL(crZ8akeDpa z3O8F$t#H#widH~?h=zYG-k2h1Uk7}N^9m8%SnB7h)$W?Qa%!L#{LDgT1b$P-RfL+& zjyDg!+qzQ)Uoi8TlLr!jdj`iMP?ERu@Brmsml%17<(A|-f}4(|Eiun}9yv(y5r^Dy z3d#!9JOf8J3LYsHp@5lsU;*+;1fgkR!pe67eTBcUAjtx=5jI!vE{VfTGF_IU0xHOy zB&%%uWe9RuAUTz72yf#I3^{P9C4Xmedypi-3Qfen(g!@tX~QjyT0cf06wv@s&TV<0 zC>(VxoI+M_+g`rR49Vj_Bg z@`~}_@0kA{qY%=Wvt<;bW^B%2;qw@FX-SvSI$UKdbhh5?v5wG9a6UoUv$fkAHR+a* z8?|8I8>q>0`ZP~ulm}P566F&iz!xAa$V(sd2m@AQFH-u3+&HRFhi$f<@KDaIV{xoUj8oBQs&4( zVE|#Ug@tcOhwBVHD5D@a3u<<{T-@}Rz)A!!1KGX^4?m;8)3!uE7Gkq4{^k3{N+!b) z@TV1sG$To@Zs5GYHgm@v=eUaR1)p+eiY7aql|4SJT$sL%grhj?p?I zY0v?z2tT-);m(PS<2CdP&FMxV#y7Jc*r9Dx+4(_GcU2)d9_TGsUvpfACfB)a_G-D6F zy)Io)vjsYoMD2&A&^ZF4(YN!UVG7i2kUzN*%nXuQhYOhzDX@9UZ8A$hPyppQKeO=D z4C1382#VDPDWuu15x!!(#Nr)xPWZSRcxz7 zZVFv?aZPKEz*t}Lr(8gjEhYF@IbQYxvHqZ#RvsHEkF5P5TSM;)of<;kC17(U+?OI- zvnUbcjq!Z3D&!7wiA3gqmLyhfyWWV2lQlKi55iCNDCn~yrR-Aq;<%`LWMdOvUQW9!%Z}D&&_x& zv+mY+O@3`V(^MWbs5O#EQJ5~cP(8&}?h;Vxrn}*uS3C(|qrp=aq|Cnq+dfUGkjZjP z;AiM$LpMd>bCs>@(G`p300dAkiz~c<+}wiA&@EAzBaZneHgQuF?&v2Wa|2MceKMtK zpG{bz{5-_K$RXe&7>dTnGUu0pA0JzLNnUD6q4jU>)a<2p{6Rhy)MK}@p zjj8_3_8u#jo-E!|SDXqe)6IA40Zfyd)dqchCXDV-t}qiE5R_>*5H#&YPC3WLOp|V9n`6FdsW94U2xQr(bz!j3eNxLME`)&Xiq`VfV z$}6|>k{D@nSK~ndFKO`$)g?sP_?;4>JW@&a78DeB1-9zx;|2$;Uk`-(-*0(&wo@1T zEhT1b99>9duXzuQLz~yMCd*a4vMIDfs{C$ekV>l|1^AEjkkFUB4AQ={Rk}#yjO6Y7 zSl_-&`WG0nE^&}d!HZD8&nyjLgVPm?FEDwnMxJ*IzOb!a++x~SUYVo8(aXyZ6~D&# zCcUB6t;^U4K+WAG1w4(ZaCqTPi_mq+24$6jY=GEs)r9Ghc}@A3s7)3lF5*e3UNXgvQzzcm~uFvEWp>2<;&^YLSTl|cX$7NCk8FJ^`zLmEj7!STX-(Sh$%^xix!HfIN%=Xmef>E|_&n}0H25*} z!jPT9_2e(E3Xpu;^jnS2q)~Y$^Ha!^%JeXLMxjyuSy}(<+eXzHVV;JkvXbCae)VIr zBvKWTMMXV75wT1Guysm>IDBgczb-HKEs!mVPOhr3p}@U2h&?wNiC;y|?mB2{vzSeO zJWA}NdzJ!0bhCe<+txK~swX(wA&f<9(hzO`ebZ{YVAkI^ci(-B2fA-Azi+b@ zRVzP|MXcHKz{kZ)7dO6iZo^AgQjB%?(#7J^xwk8J>AqCf|7v*Y+{TxVT`Cp}B2-z{ zNw_E?*H!^zB4=LKPqwEnT)YoO+pIKQv%m*W_#b2iD}ex6h>-dCw@dsf&B_NcXVCt_ zQ|o)HK)c9w#71#WVH|v{o5{KOW}0j&pLr?^uO}#XkeO9k^+hl_M0?^rIVE{?v zHk8y90zA9IhBTM6U}o8RKu+7d^cy4qUFPB3MNIG#N}COvWdIx3FgDIklfz!q5429( zg*dv`oV;f%R);oPHLsxopGU+uund(TSu{6EQL!r~&BIW63e0VD_~}lOta&6U`+oDU zLo^GsP{*@)>ZO>pWjc{;5#D$@cxN6j7G#M?w1X8vF;scP-@SWjn{U3~HglpDy`sz& zN}%I54iOOy=Z=|1lL!JjQz#vkSAZqOLNV)#C3)S{Irq#k(oPo#Do;)Uva0yA@=_g> za@F3fic6!fxoxdeJEiU+`IkU+e_=UIl>hpoB6LdV?+!It8Kv0Bpp!q*^;s7b z=DlsUD;hU4F6f$bkXEKP?$790_J&fXy6%ovHdb|gS5lnR35wR(;!Vb?h`R=Ct0)AP zgdXfX`LJ2^QP{_&dz5=)*N!Y{d}cf^Bmt#UaugWLcfKw})=%W|K1su|@>*7I%DBBO z;iZ8#y=7hoVHoj}$6Uyp7|(YwMSshkOg+GS&GDt@g3>56B+ z`Gii%;^f78z3J`@dGYNlv5zp#trrR@P9m(h5+0`wV*tE-zn9#hwL1k=&-A9u;m-8i zC}PD)A?Av8o8wY6)qU25qT)=HGli%fZ>uqG@!N%MV*%o#19`;De(jXi7j0AEqry%^ z1r}M%a`)@&6-%d>JUDE!Y3#F<3z&^BViO!z)MyRR?)L+a)vRWyfu4Q2AA5a)Spk23 zEyG4l+jjKGCNGIH6VTIq9Zn!i&x+Zpv^#}kRrFl*{&qsP<)xV6{n73W{FC{4;@`a5 zsbR|=Z(W>XOfD}YUv`W$`i){71l!(w@RLBW$Rp47lh^c>7f)H*pLj@km{HY90~!%~ zH}fPDS9s2w<~H4wf+l|eo#!-&O7A>mX2$Aha>(=kik#R-YcHeB-lRW!Y02GLv1pl) z_^X>67JUJPiKVD2-!Y6cPcc8oY!sa`Vp%yen2FwonBAcn@#ZD>e&(g0Wb=_Cvsj}7 zNIkFn&Q(B`4Xh-6B3g(>l3!D>7dkNM;N@Z+DAmql0k?$<%;0x$3)1ropHM=<}ASSTyzIUx(3%TqN-pF#;lCiNRq3f3Sk}N4u^o^$ z^SFM$T|O4<0M#nLd+`lmM#lJ;)S$|pdLXz6$6ozMw>rTU{AP_j32PG~%5K@DcWs5; zQfqIwG?`26O+uMEBwBp;?$tXP7t9Jnyel}sfYC`AL)O>V?2-StWc@zCAcA(6naB21 znA~nUr5Gr>%OD;N$@*q6d{y(1pNO9LYvYRE-kwtgTR+5w9Qv zAZB+BR(Zu+G`FD{+sUO&+RGgq%@egKp^z68H95P@3=NlT$*C9P77bQF?qr#cpy3wb z>&OdJ;J~4BF0VGsXp9Wb%_&W>2;923hqsZ_>V9Ee) z{N7>e^;x1|7E`Z;i{eUz)4~Cihk&0=C>@{Q^Tv}nbeE=yThaqel!^p=eg0Mlloz#CZ%aZ%e`a5Qj^BaOw zAm2uT2o_9YGQc*4| zt;=q4nX>ssr8_O#d!yizM!$?z7BGepstEsGAq`)f{w_cxbqWxwdW zZzF3HD4K|@yDF}Wx!n_15k?3y^w1F*m4oN3b(XEVI`OQ@IEo#Ba>0vBtozdD9Ripw zmGyq2*9{w0r?JRLym`uOXP(mQ%v1Wf5?$i~RbX-Hc_9UB4sND4wx6cgrNNHd?uOYAi7iY*Bt;juLiM^M%)*77p=b|l zj#{gkajeO$o0Qwnpg2dmyu3tWS;qABif0J+#7zK(VC8q-Z1I(%K16?h^Gx~rYel91 z{i^#@y$_?fHFEnU_DUeiSVDl%L1gL~7ojiJE5x?3A`@8P?lk+aE8HB+0f)e9 z<}ZNLQkiuQ;jqW{7(B7&C)QlanpaP}`LuZR5S#>v{9X1S2t6R8HJLobc~%Xn&h4(b zX-v{RIJE`CJSCbe;)u&M!Y~uulHgNTFg9FL6?ok%K{RkCGZww_j2Ty%fj^%_mNq~e z_%$d&gS11jwJMiuTds$l5M!ZXvpw5pJsf*$5C{vXW=TH09cH0=6EtR@%Tnh&t`Ev< zHRt8p#Wz2G_x-nj{zLP3pesCr89;YSFK-^NEaRC7LtNWtx6V=S;3;nV`l77Sb;)Xu zifn;3!kSyu^xnE5)o#MxGs!^L;5!qipOw2opp>tY{S-Ez-kdPr(*XoV<9780M2+M;7*2H&4(u3m-z;S~NJK-KT0wN zZvm7)ilLzuW!Uk|o3bcn2vG*;5e27RWG_~j8!bA)Eb!N>b=oj$LPWu2N%2G=8Lp{; ziSxPP8o0m#{V{>Mis|Eex9$kKo|xse({t|K;bn#{#IxI!L|_rA!a1Io_Z{00MwuBbqT3Itmx&uE~%~knl)`9OW+GI zjTmwl!`H%5ap+I{8H_Bx^#oqplI$$b>5Y`)zBG(IWt&JTK$&fb|L`{uHD3B(;rI#;n;{_t|nw@?U+|HrA z=Ob^W0KDC2MKLqtrJz=O3(t?I@iK;hmY35w_*BS)*}i`Z!d=gvl0D9oMV2>Mr0;yS zT_LX+GQgPtir66HbtcT*S)?$?xe$#4CM;5cv)e^RR$;MPL*S#-=~T_cO(mm6Gq zO4ZAGAhzT-O;$Izi&gXW0>lVz1(&O1PGh+>gvb?5%RZ4uk=qFJowvaRM5Ih1I$D%y zmJOsCdVr5}Q-IHT9DbSlrK>Uju}H_MGGEJ@hopgQaI%@_G8`eRS~(k z+ay?&#SEtO=CQ<6V0KW5PV(Z77#l7Iqt=SBCQR}&!2kIB|J5;jB=gZKy9SFlU21mC zei&up>{v`uw;Vx2|G)niiWtKtzJ4(>bmax5fH8z-vpVqa#Zr{-Kuv?Q_>aH;_e^=~ z%~#$O&p!dj|M>g=KwrH{wF^p})CqZvVXUbQ)s0rm&3>{dN+Y#yFA!`3MS0-VN689f zihxNm(R-7NZ}y0?&5to~HfF+Nu=0{MNqTerdwMWf?Gk!C{jXzxHe#bjapsLg57d!9 zQk{{1tkdm61KvDNjDw^=1Kw{B6ZsH2BsiUz<&^jT#cAxNC!+a=-QVAjspcN0NgR$T zwyu{ye{gR9^4&GtAGSS)I$ng)_;xv)9&2-8rN&rQax6^_+mvUJW=p9MLac#v@hvcy z*H_BRTWNE!9%^x>nk#HD*ZSnYOOu<_o6V%`#ukk%GKFGg^wJc^ z*VLDWNaz;}R*v97c`>Ho5?OQPRbHzC$jR<X%)=c=O@)Pv5_M_x)QS?LFS9 z7gMkb1j>iC{wy#JwI}j(vnd>U-^h|gxtdSWmX`qEc9utJOt*RZs1*A4Py)*eD%_Ik zg5f)cu;W?AiyKnX$M+c2_e>%4 zThLS41~jNm|IX0u!<-u$G=<~N4HIE$H|V|lzWFVsbBLPZ#R~MI{sTYJYv)o4$*Xpy zSali)IB?!098Oe00kgqc72eEX*sS^niW^C1yp74$QZ|Y>U{w!IKP+P2Y929fk5yH5 zOTy-j`mlN1(%qV{`8oqQ!}Pvt==etV(5w32aoeoozW(2EUx!4PSGkn}$^eB8i74aLDcGum8fv7e1x2JQS zDPO+(jUIHCUXdMIh@-0mU$}8!Hm$y)H!oM+kR4Utf~Vh4`}acOpNhN8(jTb^D_jCx zL}S^Uhy(;E7|g0mCeVIT!31$IAa1>B^h|mr-oO7|`Q`1~b!SD^tdV$ksK6~o*HA8Y zwhEwSxH;D4%P{3hEL4IA%#)-%JDQae2WpF(t+{o8uWDUvk(Ee;O~GMOv~VvpyTm~N zyACdzyebEG?IkmAJfV2F%vS!0RfzPasEXKD!DfpC3Ve+PF30-Eu{d5_ze32@K@6vK zeKtdWUa&FbuCp%FnTQ0-4FI9+-h{(3?~sgxAV)1Gc#*F-XPL+~bI0ZiRlHrr1V`~{ z)-B0UNMt2my=nX_!sf;&q(t45bzSMT@mcpj9EF;Z^oC$52cl{|o5jP*!Ur8jL5^@U z@e?eqO8w6VnhcXSq4!=qXQ3V&Ep_lV1+z*>c+Uz$r(vC-0I>axSienKddy_!kgVIh z$r`cuN&1PZ5TlrzQu87_;FK3bm|tGh{+0|N1|8jCd+Aqk02LD-a7+IqZlYpyxQsPD5U4<`*}w$RaC{>P z6&0dz69|zDIgjk9$vSpJ8VmFQhx)<`93Z_xj+PWj(*szsEmqK4#&7Ney$OQx z&3wiDo(Zo%odg*?9@%4Ktis=}MKkFUxJ-T`!ADiqqo3aYGWu53su|^eE9Q#-qWSov z1q=b=AT>bt$rhk062!GD4(}~xA6H}>1)ioiqf{|PNI8N6V>xkkHC&b@Q$O#IWcgfK zrjLbE=H!8@$?%Vtg7!x;ybW0fN(V0wOZ0>Q)O%6Qa$S8iPt8L_-!q^Optb4m zB5w27zy}m}J5&@OQj6RuU<1-~Iv@2`mGGuoe==-~_U)MIUOvFTsqH zM3JEQqAN0dCleAVJ-p&0qR$eu`7D?6kjp&M{4ny8mc^W<-nb;Nj?0W37SQsAKGw$X zxGEhgGqSu*!t6hU*YI4tTCtda2t5T7g78K0R~j%y~e>oWewZ} z*Hn>>;+yN(aP4bMmM*jrV>$v3KswzWWZctNXE=${C`go8M;NG=VR*=-L+$}?nl!Uj zRlOc7uSFGdcBfb!ZX%4Yn@3Uf9oh_}Nr5Wkt_C8fm|?#R2-c0s+o&`R?_{EAH)~Gj zHP51Ba3O}XSdx3(=MkKXEqlc>I#ROA3d{eF{KbPlFiXPvJ7uyY66TD-MF5=wzLBjc zZ(%A3?{~uP@fEa5VCL2xRmf0ELEXHaP;7w;Wg|79w zQ~%8JAmCFU;zb;5zJi#pAd~Vuyic{DLi(_b0;v%z8M9W^5=&CuAWb?Is4# zVMSsD_kC!C6;q8+s*BKt4Mnq+l^;{xYDvF=n_0PJhX|pp(3kj;!%Z|Ek1?JbTxmFt z!@R8o%H;zYyttV-y@a&Q3}eoKU~bkmIi=?x;~<7P-+y|10}U`={Pc^Z>3HT*Q_I{YtZ5TtyWwm}AgI~Cy_YuO!EedpF@<-O%a_j-T~&>du4<0* zDy5oqp6vH3Pw%)=UeAL@s8!kFZaS2L!brW45Ay;Z9_4`aI0*~QR?YGx&mkpKhY;^-F&{)xicc`f#3ZEb-EzG>`sZ=y3<7O6#2*`=W)*@-f)+QdX?LW{p zLLLH(l#!Ku*iqg+Sxld!vU~(O6GhYk(*bJf-B7|6bK&rh@e?Gnn+u8Bw_X~u8&7wi z?#BVsZ7yIV)V}0m@t(r02Hk4TT14b!mNAn?B2$j{?c$x@Q;<5F6^nIha(rsHR{%*Sb z%H!l?eEYkbCAuEysjY<-v^;Dnb=&6fmY_cqu2QZ$TW4z`4t|D$jVD7FZf<~2`yhNe zIDG0HpAV2V2+~2XDE*L2F)!Y|X4L?s6@tv6@JZhAgmtc9?Uta(tH->H%;?N=oRE~0 z{~UaM0U^qQ>`@|K=(A%ei2%M8+wSj^RX zI&BrF?as{lT}1b%?SSJoscoqQ&Ubpi8UtuR#W3C{9C;E93l&w&CTf*&HPLE>seX|2 z;dVRKwvcIilPFN{^bo5qg9eWUyE=`gsTc+&1A4nKOs%AdE{e{|zNI!O9A9DgdW|&7 z-!o@I!Ab4ScYSg=_cT}tmxaz`puXdPI>aS_^v6Px_;x;V*>yL8&(? z@lx#!j0X%I8G{LmpyCc{fT@JdV+O)P?R%+iC_-HXDF$>#PzjLRn!95+Od}b(ai39{ zGVo3>H2B3epm2EO-+5krk5dhb57@M9Mzs&+CQVigX0%Gll!fu_ugcJz+B!Ci9537) z`Y)fX4WrD7Dfa|02eF~M;e=!5kKnKc_?btH#YjS~LdLTk*75xO)XIh4q^ylLD>aD$ zyGY6q#7T498b>|`hzTzw|R3`4Z1J4tf96)e}H27`h%nmA~9h7?;fL4L6pmhl2}_!Tq(l@ zDm_z!I%S9x!6pC-C1a>^n#&x{Omb!+rHD^jj33LQT5DOTzy0iU*C%Ez^EoPvqGK()<&F4w(>NDamss%^B5~4E77o2 zHfV)Hb$v4P32+*@&G{Kucxgh2l@r4HK@m%AE$_jt$KtxEWCRsv*BSa-Rqh%PTIT;_ z)^;>Q@saG9GGXAwB2Ex@jXC?t@ysLIkRU_K+A7VlhB|aA&x8=d51MP#SM(~0@-QdO zqC7_GSk-j95avN2xSCcU<^hUJ%P^0k8nDl3VdAVqI(fPm6d~qoRrKDs;qKFgT zk#On53M)Iie{4BM3CBL#u?Y58I(si-L z;Z4a!sa|v8)$QYH(BsL=UXHk-X!o2p{`TI}FT?5&b`% zZgawxHMf}WrJkGpi7bdGT1bj0Ol37+#?!TBq7c`$Z8iZW45pS%cNm5#>Zl6wnrkrm zIuBb%@@|%6lVwQ|GdoNn#vKO&gMd7cV725mJ)kf@Nz){)R0n(MF{eWjQ+=!9_aUFZHRy-#G$ zYYaifA$p=jS0x-+m_!Y}c;W@OdyV>I<+ZH%HU+XG;1$=9o^H5?Tq?t)0Gp*#i@ zR8Mpc;+}&HLTZDUY5r9{6$aVEZ2CM7zrKKfXtrirnoSim-Eu6;&}^f`R`Vt|Ai_H% z&M7OXv!R*m-sZcnuqW}7%&I=2*G6!wbi|)Awe55jdIaFZykR0QrA(RL)i}7~+yOdn z6w1(DDP$RpuS7{f$UBMWf?l&cI$$yD@SZ(7LMe8dVaw#WfP?7ABXB-?^L6J25vsWE z9w3tlUVQs{7trm>;n3!l66-rJo&sUIQh2HPjELIf8E>)8dp!g3;^f|&B|Hg?QLSR> z_zo7zV5D0C;^jS=PB+b&Ipmkwir#1n@46Q&bTR(&8DG;pk=#M_Fk&sq3M<{gji|}; z*xhPgN6MUlnCzvGSB#o$mVbHVe&tVd%Hx?&kIK=%^A<_=6(B#M1g8p?^@B=V(7OIh ze{GBP@n%hgcPW8*by!bIg_Wp**ay=6hsv1@ZQJR^&g8ZqpUHeYBRH=HA`OmXXL4&a<&H1m)Oo^ zX8+4F1={=U5Ht1WH!FI+lvmy?o<3IKr0OgG`1}9h){@Pj31K_gQeAl}JRH-4Tj@o{ z+t>&J1di-hBI1KsJOLipTVUCn+FHHXw*v=_pBIgQq5I_g$Yh4R0`oq_47M)1yXD_ z&IYt^#fcuN16}E!y+-||mV8UNUokuPuTTNo+SN%u6*Kq841H34v zH$%ymWYN9y0Ude@eI~c$of?LwJEG>q&WlWp;{79;8#}M@#-mMTQ>#_Bf>5BmG8_N* zy0rT^Pd$y7%B=?&k=da1-*kOR$_rFb%I?jjfr#l#Y_kfq>szXID3^Q-;SIUyXWXvM zQ-{Dr^(iaq`OIu$-eLh401R4iPkee0=#ckx2+y(Q#INZAXrwy0#xR=0mN^>OhFJ1?tql|iW_qri5E2VM2*$L- zZ`eVESP6PUgy04Qu*j)ms2?nSg3>Zci6O?buQSVrB$Op^H2xGM2uQOjuFerO654EL zgPSx@pkaE>T@r^9W6Z6=>?6&;2*p29eGDcBkJD#WXT&~`LLBYOb#+JHw!uSHJDS*t z9vR>B0F62+4ca$TRug_5Ow=od|4q}FVX5TCHiHonHm_-p@I&#mi8;sv)9E9nmKpaj zFk8y{5H`2=vIBwDW2+;*HB)VPk()!wOZzq-uR}oArpU``#`9c%+f?4f%jJ|P-wSIT zCC1BsMoC&8h*n6fV`+6YqJ>Luz*RvOxRNvoG8&rS3@Cs6xAA!VpN(H( zQ?opYXkF2}&@LkrLJV)GAX1OLG^Jv}!4UpCqElKKc(VB$H%c*igbUxw+W4(pOY3*B zc@C?{0A)a$zYP96{-y0^3iRaTgPQ66uD<^ozup0ryuAOvy?0w~+(_32Uj^e8yMyee zBoaKT9OVv`T-9BByX=aRU3Ry}p#VuxLKjKU1gS%NJEAWp<_YFzBG$T@i+wo{F^}RW zng5iDOz?nAP?BqT+3uDl63BD@=l}bq=xnZ+N)1+>achI->doD17nv%82`!y9@^QRyZONrZp%MSV-`X__ zWdwE3#_nX3I*cXwQ;`4l!l`T_cH>f&v$Y=zKu*R5+*nZrrzj*CyZ;b7PD1vR?l&sW z@nr@(PHF#CiPG5dv!~c`>@#?7ZFK$n<_{Hi*C9E%=;8P>6}(}h{~8Y_=Rib98)fEyAX8?w=PhPfldjh_$5DPL+9yo&to^CJ!G_>}0==SM+Y(V!WEIjf zwrK<#=@z(0oFkbd=ZY=@7YS-y-z-7@ENvcfuuNq@;+hVl(Fe6|nh6!ww?x)~T~_p< zf*i6xxvEuO8Uy>Mb3#mBlv48BxXWvpZeU1<9)ccXXS!E&MDM9wAiqB9xJU5SG8(Nk zfrIUDT_$ zcygGI@ApL-q8j~Ij2pb=cn2&Y0{-IADA1xcbK7FA7O)OjX0?X-Nw%EmChj&2gJkh;q@P=PF!6xi+bL_H#W<5Fc{ zJJc`*EE+{khYavA;X4k?T8wf~hnfwgFOn-|`>3fcZmP4qe@tlEz^$-GL)!v61+Xo3-HY{cWbU<_ELZ@8jbQ-gJ#J5u$MStyaH7^RkK&w6qpK^43p1*zF@^+I zR^$OPJw;fSgrFA8N;@o8Hl#WrhGVTT{BC~5;dVJY-+x#3gXM>qoN_i#&5C&Hlwx4T z5}ikxP)|1nwp&So9UP{>_8v-sJqOuV#9BFx-~T=MTku`*?GOK16;mv}i;5{(+(ab3 zy!?A3us`@)@q_P*AAI|SO6=ujtco(KL9BG;)|v@bl9vwOdZhw{TYe z7X1A`^J!VqaWM{xaYT&6@o^rDzv~l0XXD#S$Knuen7K@eO4BAt4K2HFQEeQA1j2GM z-=UlVLIKF*;+UsocGcptxCBhEKGz)7sC=A{SnPHr=IcVTQI2CSuZXl_>dZ>YSD^_x ztGJRt_v$vS&~EYox@|rU-3MnZH^h6ameI!-5hOfQ4!rE`kx~1yTZt;Ohmf)^&=G z@)T?G><|-VJ%>_aQ>OzN3u+^7n@h$zg)C}bB#O-BD$`KeMv{1n$ws8XNpuFxDZQFg zVG#-)`Q=~#PW6g$0jXFqM#SMCQi+ge9N%Ft;wiX}rwe=W^twsxIiWZ0Q2-t{u)+?? zE;>j`?no=go27&;I4?-M!8Hy5|4>P7mSJOzTJ zcTP&cP%^lZ54CV_mnq1#2sC|lse)4YE=b&hHFf*qLGN8Baw*g6HCGe~QSaAR>hSLdHc;1g?279TOj976Dicm;7ZebO#CE)VtQXp2-n-0 ziS<<8U z70(U_yGDh`1{|t{sOwsrlrc*ZQSqCGv2cX6nF&Czimt@0yX1~a+fmb4VFB3?s%P?L z1Fh^+s)8lQ2NxV32)n(&Jz5c=razm_x~gU)8L?d)gk8&G zvRXxBw!~usq#a`IG>HgS z>y9QoTwW{m|DtM9=+3du6MgY)BVvX}760mGqG=EEfo@N(DzRw_93)wMF;|YuQQ`5U z3vN!=yU1F1KY%%Yze|n&YCHxyU}njbNb=oJK7)H=S(Wj=7!g`%-A0W@1!zT7Poqo) zK4nl`!OV!@3>)fnApi`{D;&Y8_W%h5F)Gp_$jkQtg~yngxeWxv}| ze)0KO25y!F>2;_Tk`rPz#&I&H6(u}zoFx--EG1>*iO7JkLTNzQ+U-HV$Xkh}W%0(! z63Nq)fC#0oeZo1wml^*T$VA3mN7|5S$ZlYahI}EP;KVMoq}MTD(mPf6Kkp^+dRT$| z>*ZQ2+s5fpQK9UK2vo2sZjXG#3s7=cd!Kw3TaodN?JKsB6@z}{lX#&QaWOVSBGn_po{vpAlqScfHS|MEtlO_D5{4HJ+ASC*VZ=gwgf z@I>a}AvHO^%2|dSRf}@D%S$hLtrZTe7m$?XkWIFw#UnzsYlMiwl6mlF!rn!73Wsim z(M51T60-5gO;>kR$p+QN7grqM9M;O?HB46x?Fp&NF$p@wDOKJhAJY z2-}rni@V)}T(jN7U9)Wjp}86Sm=sre0R(Fp)bOiDfInS4QEdclyXjnSZS59XgRh`9 z`03tSm~Cwh7z%y`t-(*X)-dTL9B*yyM{I3Ot&Ppr_BpH2EA=cm3qHj6MjcS?7{Oy_ zku<77pwl}SBP+K6Z=-&iZk2T_IBS4lq;k5hGpBFWg;YR|ffQlPn?xpzGJw*tS*BLu zIr>36L&ol}p^(G0|I0`)DqJNeHd5jS#=1$bc^$Wt^CPK**dO+=us^(ywswi3x zg$+c2@@5N|T6#4K7Dg!=v32ZdBI8|EYgXQEriA&t0~ot)?lVn1Sr8`_dSWoyJ1IgQ9B^F__4bF$6a5MESUif&ks3LLKQ{ud^d)ijGNE`#7x~t{kOL z>*^jPtisq>kj@HatQ87HoENY3t}jz|*1P4P+1ny}TtKt;MD{!YB6f3EWW^I@4B!B? z_&-)r3u7}yWbB$Ff^E6PQG`vG9FegkiG*V_NreA$OA^6;T;gfM{VMWDwMW@Uwnqh7 z081x~t$k~FZyQ}^*MJw7<~lnqdJ-5TGKA*aB^s$jBY9|)9V#jT7tG_Gkd;@HC$ZxL zS<@!=07ok*{n|J#g*K3-D1aX&w76^IROWMY$33PsC|1&Wr7RueBpa=>OptDFD6pT< z^~im&P}U}3EMp24$;iw=b|}j%WQcP(|1EFXra1DTfA{8%wp+T1U)|0VA#4;eHr~pd zwdF83MY)7`bW0NWdu0jyN-od=A|vsTU8RKYG^c8ueZ$y$Jg`e0Z?aKMIj*08DQOrAE|YG81?}fq za-_BWPIv&B;(`i56R7Mrgq=LkrG-C$`1fgi z2mOzqpZ0$}(@y{4;G3{54x$?h>~jp98)Uh7j)0!Q*C|4jfo!;pGH)XXUakEBP^ING zaD19z{(b=RMNxwqFA(^4v`iK|p&cDg0OKzZsWJUEIw7j|ee(xWJ-=`LfXdkr>py+p zYd_N;hl6Lm&JT6dOomhlzZ|mHjy&V|H|Ja$*a1KaSj^g}Lg*m3opujAW2B6JEN3h$ zoUul$-TbxA84-Ftu02QVzwQd5$nr3-p1-k`LFkNw=n^&nWa(@?4)T;ekaHM6P+1Ra z5fCO$`86Nnj47;YoIg3FCz<|`N@sX_ZE^Z1Y8CWvdIiKLAbLL^2VX6NEK0_?ot2}o zK;E7O?$u3uKyKYjpwlX>R3VUP>ii;F`UC%zXX@`dnoiUzT1YZT#ucGfUR}Q-nYqLj z2j#)b-m9N=-Ur`+lM)%X)h&}ha2y`q`VK6jNxPI%pMm(6-78&F z!QtAPzx%2Zy2mO?rW;O!{d3s>*ejb0=^REH5mpb^1wqASq!Wpi1B?wGLqtJ+Z3Hk~ z#H--U2(j{-ZWh!qz~m+i5UA8^4!{By1nzG}HW89@3f$b|f(&49^Kc}uF(mWN+;?V! zT#JaQx8_~ma(1t^8_vIb+HZAQ&31p#8w|Q(x7{6VkvqQ7W=SV7UqqoaEGk2ZiKpDC zV7{IW<0XZfW`^UlCMnY6&4g?NKGqFOxKuUmaMe@R#ErW~5&48Vy_iNLgHxur0({|q zAYc$R2J%2|>3J$iNlQJ1jjjkArftx#ONyJfCkvA45^Ycu3cyquYZ!jKE4J@s>n^$N zTF-IUG+>0+>JY&3#djWMsSy5RJ5|sZ-HSZ9Bkp3)eryzH_eu@&Ee-OWrX{skZ{KA; z%pMzD#tLK3{k=Z&^IN&$i1mS|*lQjd%gL)#4YzSk2NY{sS?C^!EWsP8!+mS-FW&7$ za(c{DjN^SJL2nw##qXxR{Nh#cKFnP}Qxo-v$}zdG9-@f|s?-eK=5oM2q)MESPF&rE zdF^#k2#Uq0do#Hg!=f8#gpWDJZ5Rhr8x`pyE!gBi@ubY-Y>^tEo10|NZ(IYu4BUh} zdKS)y*~Ya`(R1w1Lrd!W5EyJ*7?OFZ?7QT3F{=s3lvdIDorDE#B9vA~TKN8Ad zR`bv^Gs4;ni4eIocBrt@_Yzi+?->G%F)E1wn#R$MhxF`K-I1sd_I?cqR8F?JZYX@n z;m~SfK;i6W4Fg!z5#>dCjS_C9*EpNU&Bq|`uDhj$l&y&_k~TP`G(X_ZCvb!EQ%75v z$w{0=7{LRVOc8}#>BVd3z9^Wfgw<^*x6o@Ts}y_K`{mB{9h3kiHZdC&I8`#| zpR6+#ABhphRO^$RkZ`$yxR0h0=L)`NSJ{f8mQ}h4EcYzi1mfEff??zHDcEF6%NeU} zZ>$uqn!shYM z4fb&(+DHH&DsU>T;PA}KM?`C6nC1edHgMA+H@$VG(B|Y_if+G)2)@=~e=6Qjsu@|; zm@_L0W@5@;a7l~EAA&i8*;cnU==?V6rV&uyd6v2WOO(u~)$UaZ4h)-z1qTie4h|fY z>D(0B6jNxLd$>Z+%bH7FsIa%k>V&F>OGMctPs9k<^Xuu5;6B%jN#!a_hSD+PH9Z&0 zU1AO?=CIO^P|V-cbo9b|fxCUj@JEN82)9Q_`khGjAAN-Th|#yc7Z};2XD<@j`y@T( z;iq2b#VLfizaRJ2;XM6L%l|9Wa*r6=-)XrgE${ZXQ)-wU3fs>E`Q=YI495}$A31|S ztoTETT?L>?7hzohunX>58{vyJ`&jGss+0kS&{0Jr4*4y_aqn(}G}*;i{+!;El4YQq zn|&Z=d~PhjqtVKr)g~ANLiH6Fl>zNW+_Xxm*#H*gCMPLYZR47;V+L}2iw5g2UaGvT z2T*!?ZEKnBh@KqeOd~{+ofgOR{^GmB(^x=a|GhuON)C57m{q#4uJLj3VqF4_KvhbM zE)nHA+`xT9x96sdTyKSo3^(uU4PBoG+^Xsuva-dd7dOqT$JB&3@1t2AwdNGy~&f zIRe2=*6uXakFH;5)X?#<3zinXwO{D!7uTaxSdXmgdUUMy==OqEJ7{jK$6dCVt}s=j z$;;8I&{4+2 zd38Oma-0U$kJIC@JtRA?D9VeBWJ2#1ZVjP>H7paLS@DV$bbZ}99bMa?u z*!z43G#d3R2A0pxpM06=+}ABMDq30cZrDJi1p%c-x!8DF>Urp-v20S{Nj=rI?8pqE z$s@{8Ldne6i?5p9(w31F1wG>)s_jz=i3{?qNtX-FRdaFTub*Dhv{33Bl}M zQ@ToC$=a+;2G=23tfvZ#yjYfo&NnA)!Dte3dhn5ho06G@E)@l|2db-Hn$8BgnR0@- zHX0T`X#P()W`YNH$&^pIS@J47NC5DAx&zl3{iy$%O1w8m)M>J0*9+R`)pMxHItm~A#EK^&GND;854^EBSvPn9#VNGLd#qw z3q)BSCkWkFFF}=PH(RQR`u4CE=6DvEtE~rQIXbRQVNowgDe`nZPgdlU)R4jWB-g@K z@E`smpZm+zU?#(t>8{*rB|ue=>_SCRkWsoJ;!NUnsGuGQtAy%g=sb1uq?Il; zhEo5yrgIC_EvRKbE?0FWT14A67?=N?PDqmRF__+Kv~7WcohHL2me)m$Wei1#@;#6= zSSnNivJt*~i=`HHt$F)xcEuK+nBN2M)tr?g&lr{xuK8I;Im20uI6+g%4KFJG+XAhg zEb|DLxR4szISqTiR^pMM=gFwHocNNwBRWGnqO_iNO|y+PZFEk%AH~A%%?8}Num=6b zTdJcTkThyYE98^@)T+cjG^peg4-fK*6R606fH|#S8;qr8-6#u<#r1tBt95I;^|%Cp z1&6h;5m?-?a$1g#uKn=I*v!cvgLgbg0s;LAhZNLaaPr_8Cl6}X)v5l}$F=DPL+~7U z0(fVkx7BPx&I{`R%+i~K5qJ(bdv@P2!YZe)iV3Ea)~Wz;+aiu*ZrI8RnhT_mMGmi9 zu!(1eRAp-E$S9F^TM{ZM9C!(|bo9bbM=RR7=Qru}hQRGmXHNvQ9?2{SYpPQ0oJ*vF zoOokaDJSWl5sV29MJb6O24lfyT!3W?E2m{8!^@G(7wc6Xt8Ipxnbr}`haqDCr=KA@ z=`Bl#?v5(Vh^XNG6oCW*g1l1`_v7yNB*O~#GXWie!ud9eC+iZOY^XIH%uGrs`Ld`wL%*Yn@OTo!So_%uC!OB=zndjp8!2t`h_++P-f6XbLmBpgC6UdQJd7 zN0UjEt&V2ViV|C-;sv^ba0AV6(}i*!p~}Nm1f~R>%X%=JU0eNF3Fh_^ubk3V#xd42 zi07kwT6aP|$_uJei%~G4+9t%DOq*z_lGdhh%av5GVa@C+O~*2J?^C`XDzQ&t`yk&^ z$o_go`W2xirFWl_Whr&oGBO>R1B=SvDVElemC_YkNRzCovbRNgi)7m6ycZPe@?pW0 z|ADqy10+qGkFBQQ!FepkNGdeF#dwVOpC|)bF}1;X6cJGK9IFt_5ZK97YJ(b?;FeJ$ z1$RgBA}>1i<=K~kY3iTHS6qOPYa9xqJ?mZ>;&M*c>cBg!mVNQqC?-aDO!L8nVwl+t zqvw{l7r_^^MT%F(raBbHtjhc3c}STFGBm>fO+pD1F%T{$WOY{b zZwq4ezN^)*1X3Ng4^x_2hl5myrNL)?|KKx3m-UI+>Q@Oq^XnueLESe3snSF$1)pJ0 zPZE6AEh{0=!YT*5EJNk1N)E3-Te*)hjsd$cKny79X^q(Pf9zyB3BRSs6yoG$3&zKnES||ELES-D0uF1 z(N1m0M@`Ddx5Q*is=IrBHI3>iCtHEVc)T(w$kOFY8K?#HcnuD@$DnA#aC1hmH4POB z;GrrYT)-ZnSb;C#WoEal=^MJM`ZKcaxBYC(D-() zmVR&91%FM~YP?z&&r7d=`uJx9dMzA{-|(CrZ$Z0BU=S91rWN&PTQABF+~3l*6i_BK z8kKSVlSyNLHnZo+av4WsFSHu0F==e;*tRy|? zuHQdx^aq1R|IyjJ>RofcGM3d`MT54SqhC1yPmQcX1RfqY zavw|mRd&7UKxhnU|A2xJ`FHcvSOn@ZUH0)dSN%2*lw`(%poJj>88vS zPUbQ)!SZwyJOamoU0*JLn~xPTkYSomnE`R}<up}#!sthBH;YtG*tB6VDI9ggX z+oRqUvdA_$oX#05CYW(-7|}Z#R4U`*#2?C2m_(z(jXt3|VKGLF*Pc77<+opx*KIyO1tp)p80hWB9->`B2=14CGQ#b^fcbH(jCXp`z_M3 z>vH*ECH1TBiKqMQkd7+2&$_Pev+~=wL)-N`lfEu~-p8ld$a>CmYGO5bQ6-w7>l#h4 zKkMW_Jvmft?MdjOcW1vz0_hW9K$Cb1vZeRt$q1#<<0Decei_t$s~=h}Yxk-5+Wx?O zeTwyO1j9y^SOX90?>QHI@#K$o%7|7gBZQzJ3~#t0K@Ig23NUtW9-X0x3G6o@(W(7` z>g*j2{+MUgV_dq9cBa_yIalY+p)7)Iv0f(WIx`}dA7ppJ!tTJh7$m~0eg>_jUm+}W zF;$o~PTu3FLj$B}diLo*v&yIB@IxX_cvu+G1-13^IB?7A%S(Y#n^EvQSJ7EoX}Wn0 z3Jnbl4ZUz~$ea9OP?873sY(NLF|LNxuz~>h=(Lbb5pWn7J?05d@@~t;p+ut6wNUkf z1SI?`Y_rwDO3LHlc%6of@?oLlM}`YQLC~;cG2a1QlrS2Nn3(Ht*0-j3?Q=gJUt;9y$4T zYuB5t{WXrz%Z#pFg`}FUSGXld)A%NyKL4au!uC2C8xkJVzZPfzno5sdBHd)izkGSU zkBotw_ia3#616R%vlIptjaE}Dr|~%W0*F5nbaC!{wTS0$zxwv&J1!Pi+U7VNt@@hG9-5}qM?EO%9EddZP7v!>ZJtA0JyP7zXku{a4eE0g|ydI3>EV-Hoi|b^XX6fP@ zbeauH!6z{KXFgW1O;_7qgDbmhYW3|Gb6?9X)VI%_A3vp%yY9^+Rx!z< zJ{=I@PBccik^}ta)bQ$ZQ`Bt)5-y3tn%R~VVh+xx7y~x~v6CaV9Ar;R;-e@hYYP$a zO7G$ShFC2Wr$vMWxLTH=h=IX^0gEnw2M3KSz7`I90Ay%|HEvgP}(d6>pe3&YAky^Q+ zbwbvn0Lg$M%z`U8C36wl_n(lEWPGzZ%5I_{vw1oPro`(8f41`gM{_xexC5`UFAj_& zvqM}tZtXUe=(ilJn-JJQxcsPZzjZ zxx=ikkD_%Px&A;VpnYf&z-=zSYS6xQn>kTJHjM6?z@Ts7&l3p+!sSvf1J|!~eP7cC zs^mk2zFNsey^faSThal|nz9;oFb}~l@f^^6-Sf=b7}V+EvXccx+meSnqg5yzZx%q_ zHxZxe^p$7g-AUbomoyHj8Tp^ZMG6awY1*j){=jFuNTTC4OBA3n1>I%(ki)aZu zO*e|(nA&C#J;Y5oaBaIqbr<0;Q_(hbDK1|>NULHQslIy|scv&%m62*SJAH?w)b8QF zlHU=39(*Fd6~rIyzVckJ-!Xq41@ot3&~o*u8*vAiFXI>eQn}ZyQOu7D$5S~PyfWb1 zlda<%#Zuh7Mu0yb5nJweM)Er&DPbhPgU5XwjKnkMFYNfoG17!9$8z@`Dpa)hK%t`E ztLqsmT6KvJ4;B5GC+5Q^!lKd9fk=+va6ZA3{bmG;gJU)i#@};M)-jan>(VHc0UVgu z<^*gI7cpZc^U-uIkojm$VLxH2jJ?xHs&y#;qZ6)pxiMi|ROR5XsLI~q1*hA;HbxSS zK*$D3*!ghLI>KnixLlj zym_z`=-0I3qG@*1rHk&|-7Z{rF3~TzbN$AN)ku)ailv!~Lwa-$3o}lrY^-0mdB}X` z*`04xmzja?BY|THUtE|eR+2JK50f(b2TK{hmmv7vj`1?1yE%n5 z9)_awdrg+#3F4Cw#Q1lD_&Y&-3WB(LNP_s#Wq(P|{+dFQ77^k}&Jpi&5l1=3YXMR-o6m#wJXvKD>xpasA&wVxTGp)lQIMvGk)THvfIY1d z6yW>Izy2K;%1jVc1O!6}CU8a~RXcSw ziZV8rGcIe50&3H_#`>nZypr0(hft$t%Odjf)h+Do3u-Lk65fKyAdjn?hA`b%>GB>w zhq=PbGvNI=-Mpco#yIY`1g#)tiLq6DBfDNtCR2W6i60E7@q7$_){ld?+?t2&C~S6N z;R!r8lX%@|xs2vlz6f=++_FS|z2i#M*4Y}(H?YTW?B=*gSQ{GaStp3bE33^z9LLiH z2`BWK`eYo0&Aysz?309vgOsUx_SK-o%N%(*?y;bjD{^EWO86GY{z#{(tY_*x z78@(xNq9e9Esw2o(7*PEUoNu!d;ZuPZw$m|t1GxNP@THHPDa;c=;jTw@Z>NDmSj(V zlXoS@diOB6qV5Bjyn8mT7#~yws(-HV*#9Jz4cvm2`rDdospi_Mx$uzYLT>Jb8r=)m z(e=A!!u0m#@KDp>9Jn(`>P}6jttOMNP2u=eciiHQDG*o>u8df>ctf|ABiz%Y&f1B* z9JvJSGFq}D${i7b$%4oYC^rLLW-4w8+ocO9_UIP~Re|cx!hrRs(HQ}J+qU>6kVHymV^xp_)b|f>)x14gs$flr?VLUSO!&Z|t zZ1Ep2%{nY%sj!zgh$d*-xQ%n=%1o%uQcga$B#waMVb%+6FuK+!&SRjVUnK>Y7YyAN zM{Z+LLsi`v(O3;lTpgI7T8$uV1nox95m$Dr2_LnZA^vFNj}BG^VPq+O1-|O&NAzpd zh`ikX*=GtcW5I{DQuEMk2vhT03$HWLk_3Ih0rAj*u`9~;mNT?aN<5h$5az_xt$Fy%iVz`2)0RtvXP8gn5D`zuTlOiuX#4hTxpOiBzk z$v;V5UWu`*o&&6*dTi3Yu{@qz4c>nGN z!tMk!q(#~Ql*MF()mOZRrP3O~dd~kP4C4mdB4-+JEPUyCaolww=sQhqZB3btC~&Q0&xoNJm*J@&Fg`{G%F z0(XQsx(fL7>(JlWjp1rQ+i5q=%3Fr|Ybv5kia^wMu2lnpcbC9>qh%6_Rr~e$n&TYmq0x|NnC=1 z>fYIAOLA1gy`mDD-aEp5TwZ?X6_ilDpTGIXB=lViw|Ll4@|Xm4xL#fg`}8}R>p4RI z#U3I@&r1!WPDa;}M+O&laEIoBFnC@F;B{H+E$IL#1~>1T%|-w}Tl6!epP(3N?hbzr z=)+U`c}729G&ZE~ioLg}_ZIcuqTX9}?=9-RMZLGE_tpzPGhW^$ngV`)aI4*K|HQQ( z_{4SS5OwKikAC*;6L(G@eojAM($7~9cjBn`7WLku-dlF>E$Y2Ry|<|M)~k>C#QB{8 z-?*!%;#BoO>?aH?O0u$7X%3QEZaSGq<}LS2Eb@tZ`ddS8RK4MQpQ;^@5HsZd3oK|@ zB|@)XwFtm={ou4sGUC{VhwM`d1j=m;WPU?z!P|5>&JA&*lEaEhk#CqU!86V| zV90JDh1*m1KAS4mldwz-+F)*bdcwE^niV(8*CH`a=aP3o546GzaexL^o0H#lKaejW62CI+41{nfr$8e(D^sdq#@BkCqhYeDGgfr% z^-DuXAJ^eEgsj#Pi$>8K4qLh;qt113lK(L^AULTHIv6$fMdQ|*TD^W0QS~LjOHlOb zwb#miQ`h-h+kxn0#0uq(;s`(kW2-*Km$_t&hy6+gvAc(*jFZH0posvt{=rQIRU|$C z+y%c*3=_fQimVlL7qF*~(njFBT9qU(-%ctU3H&?yNUQ|DKqXHqID4*YL2?#WP(jr5 zLajMS)bsBNPtK!?%zPrY{;Jrc7OGo)5vqmL{s!0jaiwW~z33Gr;W?frz;mOOKtoT{ zNQd4QMCe?Y8r~}y=^H85ORfnQpyzOK`b2pdc2-7SU0H%UqERv>fB-e>y(PZNlC;`` z(}IWE^grJe`wVLD->`-N0-(73KuDQq$xYlSLl#2svDs5;1+D>A2;&$UOMaKS0_AUS zbD%|mLc`emIwAwv$XN=$JeaKKBLO%vBy5qnUl#zAN&)DBWneL~VT?h$1hOsNNPR91 zHy=@D4h-X-jj>mX<485;OVB`8gxH_ykFU+HaM8d2eC{y%N%4 zS0gMOkVob($1S^blMNr6At(SrVJZIp!UF~Gs@G;)UCmo5GG}-bRj29 zPQ{q&QSo6?hccax%c@_|I?JJW#SF@-O&Qb_VADweOslSUeIDO>w0(5J$+536m=bp2 z=q(}nf2G4=>^_k#Zd~|B-_I>OT38uk+;yMrg^i>7;mGi_^>#&cH2;9NyB%V@a5zAN+D0LeQnuYyS=B~l6hStjsdW|ItC7Qz!#FHlmb!#Ta%!mJ zP*#%1=o%w(Gc(GPR|A8rf|^>(*&5FFVtU^QI93bi$#GkC?>c*}l5De9Z;;n}B8zJx z^(f#1I zG+&^Eldn}x+E$C80@i2S60pf_5z+umE;1a4AZRB;+6{ zSV0+`_sQ8CAbTO4v`hsFi*fT@!qptixn$(`jyDKfdE1%`VdN@Y%_WsTq(nRsRd15` zHmFJ3e+Ip@j^IbH9{2{W^-rr9q}$EI(8V4iX6=*DVAdOp`;}UpW6yuXuvqSz`+S%4 zek(4vSGS?+7RbVC&D7~!3^i&bkI*eF_q*T%6+5p^wsqTe?zWT9HHAwY2WO)Zmeb9p z#>eRv_Lh4>0{6|Ff_hOTlTlFj^RY|kVcY3M_p#fqn(b?mPK;?*KG6gB?qSxd=LSB9!9~QfRsen!@?zYu^C2KHNTGXIDu=Sao z_Zk|7Pw=l8BssoU69Gw`~+ zI}Cj7(6D6`tQjS5!fku+@p$u-&+Mc*qyiWi7OhAvF!io2!v9`8tkP0X==%CPS90HO zAA+J}0&&bBGnA-~=Km#VRVy77wpx$x6_SXNp<{wZpuShMy>N^VSzu5NLWL{HqkrlJ zXtU$s?Gz+7vWtVB|2Dacr}ULaQfhY~b!JgJ&usNp)m+siyu>7crbNN~p~)pHRel@i z8Pw9zDh8FG$gxMf0`CsnIjqH2<5CMwOk|ec;ARzlWvodXnXv^9!$yyylPHMqU~#>n zJz5p@&F~;z({3KlYlsdF6csWr!mIouSX9`N2ZWtdXF52@b)mP>=mT~C4O!9r8Gv8E zn%le~qu3$rrt3uad)#l%ZU`Lipnjd^EB$tgJ8f#()wp}lNobY+wTg1V7C^Im&w2M+ z9{p-1;ew2khpcxmbY!#+qidIW6WjQN;YLvUl`}1Ex4WNwW)XRhS386JERL_Jn>C31>Cpg^*)56tnvvj)!2F@4@shytw-*;=rPa-+lEi zVBD<@VmOa)Svl%ew4mf^AN%9JBK&|m;dA~qdH+W=p9gjX75n8a8;aoECTK!%n zxyl7wR^+NM99VKy3v{S*xvKL}Hma5;7BBCB-eaKq(O2(|csr^w{dUXa-nMdTb@uY& z5DS85x4FOmuV+V~I!9TqIYs$dO2f?t%?;tEtqBWFCRS_cop0&(j&&oke>G zaci}YKB;b<>@=#|4FeU<*4%MNlTkZtKOmVXgL!YGDb^qqR}ll@qi&mc;K#M$LVl(k zA`%ARPYHT*Gy-LiWGEe-A5E`PV%V8?#ACl(NpR{OCOCx;MUwQ@{0r}Q zb!%ojbXR&>`f8rj4t*h;_;=bwFT&Ig-Gf?M`PIzN2#>&5#cPKi@>(}1M5~7}Ter<9 zv4d%YxDr2y+b|>jlNG^&In5Fn7%qV;${4Q}B%{PyRp{fM5?x5=8PcU3+gEZdsYgt( zSLq3b)sCh0k(MaM5cvM${GDZe%%Qm1@vl-dzD(yw|2?{i(BLR48;){8qf|M5Dyl!r zBY!dqFth3_DD2}Um#d$nj|mI(QuPE-z~T(&9U$FPi6v7bm&HjN>-8&%LLGNes1^2X zQK;1(l#4>`)_EjGupke{@^DE|BzI*lcxAPy4-2 zYp|s_#uqx<8Q(cdR`G1lgg|U84dCwSOq_GlDV_!2VCESj-J7i0QIK|f?3aK2M_I-x z2HBv{{Cz?sDE=WhPZqGs4U}Wi9RQS4OS6sLq9(gKOvw;6GK)@bmq1RouCm*z!!cs4 z7j@Y_b(pRA95@|B{b6y0MzHM}aZ)sbmk2F%5_|>kPX%&n5_0+_~=)yWd6&cm-5;v#&`nZ0k)n^@ZBa6BJgj<&MvCRxtPHCC&m*D|rQcI1mMUawHGuLQQ7uC7QjuPZB^>Z&GNmGP z%V-=QrISfXXE-=TUd%2e8ZEQoyR?-*FOqjt23|ltB&*G`YfL6$1S31beF1$q8kKDa zwkGL_c4j8bd6L}4NbH4BMb7sg@KSDIrmYP#WQ$lsbXRgF@w!~Ejp1maDpN{;zlyg z_IqOzKuOuQNL3t-uDLEP9N;10jldneQ<_T2?uEdgxF2)>3M0Y6QJW~xmp53ny>DO( zJdB)-D>|n&lo?pT4XN`1h7sGsnWAGw-uvL=mW7dVn}T_93dn#g!Qe4ha}E6}!6Uwl zM{AWIyiJB6IYE@QkZJ=h@J`NG)?APn^I6IRER#iyl#C~b7Qg8FZ1CV6v(k|Sdwr$(CHLYn|)wXThwr$(C&1rW}YZ}wW-{<>dpL_PX zJMQ;ZtcbO$BA-OPxeD=QW&rTn)qR0iM8LwSk5PWM%90HPS8j~~(V$0BCVIn`sdr_S zJ}#tFl4V;1MewG11+CyWKlDmTLjP|jiMJuZ8W>5V6X#4YT=x#+-saL7kTG6R4MfB@ z3C><%frjxiO;p)QC^U3ekw(n{Bo7Y!as|8!NN`(9w)PcC{(bQ?cn*n9U=^%VG4zK( zMOo)&{0--@htlc`U`{fTR^3*#ZtIA3Ah=f}dWn^mO`Qr3s8JyS;wzj19YpGbIcQ}V z7r`dcDR0qQBYg<&b155ODSOz|56N)g6I7Zkl>n#_N$)|Ggx?^uhF0O(MAVw@Dnk)i zM8Cln8XdaPhvgkZY$`B%%!})i)1~cCA|W^P>SzIZc^s!!4xZhxpvg!e3On(^Kf?Lb zwTb;IL5RKFQBsDE4h#Ho>OV)4xVK#OlPQ5BKk6&#Mh2SzfV4M9oRDzyb^WRiS@}f% zfQ=`XkFJw%tZcD)LwdNU?O+@Uy;I9lKItmqutwoDV7ELa8BByzdUzw;@zf7k2f*O(!a41&t z!vwqFYj?Hw$&w+zVTMr12}X1nEl_Yqr$e_=npWD?#!=x6mt~I?iBwPHU^u{$zCkYR z3RkkCY^ioz8s?$5&!$s^PoW4*L?sB(D61PuwDCS~TfJ>XRcrQ}#e6R`WNU?164kn%q0(~Ep)_#c*t^$W?WUoxz|ks zV&A2K7+mnwfNsnmhj^2vp&HJ9h*yaO8H%}z`W9GBC7<*C{o;%`;0!v=1x%fwVfG%| z4}En$PCdHuB;zxe^aS{1GU{R;HATJdcX9uR>$?0ilWE)?WC8*CU=}Kd6vN`aZ%rgY zK|awG#x;`)H;vR6;`7T|M~%jl6hw8Y8_&EGTCiYqZWuIU$0*D0!G=C0fp7W?fadFl>p@d9-}>k5;PiH~7fMeUpI~x4@y-z^CDInv(C=NQ79n&+Q`RQc2DLZq}kaPTBONcdVt@U z+ws0!l^tw-XPJG_!k-=FH?$K`{=MO$4H9=YpRQF_q_9JmTSAIb=5h>K3B>MNv?%n{ zVNrD@QqB7Q-mmnoyWUKA??Pi%@^ia3o(-ya$PZ41mOWA;8!ppLCcbjaihW<+V(je~ za3kA(?3m!#L)9Va_=oo=J+DQxiZ~3@j98NK;cfab%}tNvdf89%j2fge&@>bsK-FF9 z5u2ce2D7w2A^IC+`a&E6Z2efULcTG|p$NDq* zYSkLgdaa)gB0Y00ET<08LUey39>!;a9Y(r&8S%g52nxwxJSFds0WT1~h`tL!OS=VTLt7D1jb4Q=3HTf#rISa$W&+yZdqPHeV3lsFQ zNvKlk7^sf;4(n3 z5u=(0u~>03z$3NvSY%7BX@vBHf~2H!AS1HH@%V0aE*O-+<(2%3A-HS=P$ADd2E8c! z38X#()7IY(o3mIP()UTJEibFvB@;7*f4e4*DKmdyyh5XLKpX#|iBl(6W!mMXaV6tp z-m`M;GO_Ds`pPrE@96MRDgdzet7LmQM(vX?Z)}ZLXzVMLEW;sj9z8;bC&cf`BvlOd zP5a30_MTSr+nFfrEs__~ZGkfW;R9}Lqxn6CPg<2&F>l;Z_yhd--XwMW_Fj^y8)VcJER> zyDcNgAWU27CB&56wrh*44`h%1CR{~U=k5@!FW)yI7AzbU_=YfT1pJ$6~D_1DFfT5s_#3lzbJu3(a_S3s#Ev&759f zSp6=>Skk|mQx&P-z1?t*_}jH3$mayhsi!_zx_%F*Rvv=mOX+N_kj_%6rQ6%I zev%iX+t6{PUI3@Q?en|8tf5iuIB4!cylnB^I!Ghj#hJVj!ASuD+wM@G9fQi7o;tF| zATw`3`p(rvwk#n=2DFnqbfcWPjP=r>ZNN~Vfqe{Jdh4_nf;VHPPC-d%(b5J{4P>mR z`hD$UOTRM07FhyAPJs@6?Eb79@0WI~_j^)s%FH4ggf{*++k)qA<~W>f8__|ViaKSN zq{_MI)vU6?n@^+Y-stuLb96_e8i!!@r=R2>7T4KHMc*c{3o0BhKCI*9q~k|BVdnYh-!k;d??|h zy*P0+M6r_?m*)@a8V$j1NWYs(-pU1NBzZF?dx1wze7{M$o=~>{8PGC)Qu&U#7+#A! zpxK~?sS?P}_ZqeUOd9z??=%=x2`BG?t%8xqLyC6q;3_R^59=Nu+B)V*pfA0wR*Mw7C?!&3klL(K^M`gInCR{*HCq{ECC(!UQey=B1 zU%s(3wd?E)2N4M_5OJ#Obx3b*=Wd1XbwDf?S}iq#VI_GFTEv1qT2>#W^CRtm@5jUG z{v~~xY_F_c!1cm*rF2&z+D!kRfz53=9G*Z#w3LRn5ZlyYJu;b}TNfSh( zBWGr^{GPDWz+C#_F?&q5~#~Me@o1UY->gTOXX1H6_P+lrim~R?rIJp$UTMt%(jJgwkOr@VzdaTfZu< zY%C2Ut2ScBIHFM5O>+Ywnw~dy8_L^(!_G;v*knGmO|YVXX+N)S0zDo7Eg9M=@{X}J zS_5;>^2*TG-i;xhA|Q-S##+U+QWZb>jDNJ}I~`+^q&;%1%B3TkezZ3A;$&^|YQigZ zopC6-;oJm5wI?QCj6!03-$dMf+f!O(MzOShqI2vvwmeWTw*auu7NlP?9ilq8x@8~< z|1BazdG9?0-!a!aAqseo(h!S@25{yrPS0!iL8Oy)O2oZ(xf!nwA&@qxeH?bbSxcDj zP=wfMPGqRpT&*Rd=NnWA$av!NBi~%dDkW zGdcsa7*WYk&R9+t_*=bvkcAT-9n&D*l_-fIs)Rtpm5Osj2$$@M+8=#?GI6X&5)LeM zk-j@>6Rs1XOxKdQm)I_DmKTte|1Zm>jubKUVFN!Wq_ts67=JmED89C)h?2wK9CLWVJ>+Hq!E=>^ zA37}cjPPqh31HNh>BtPVo^ji!5M4|lqc!ZxWg{CY(UlRvk;w04ZjaQpe z9CNCYW}ZpaRGDBm3p?M~0)LR#QSr#t2+Dg&snc@7(mP`1w-Sat-u-zPFIByq-}sqM z5HJ-%Sr3e6!u;jT*zx1p24(0;D|WLJwC69tAAJ-@kO~ti&tkS=4JX@Vsy{fP-Tfa% z;Cmg+v5J5ts%*kupSM|@IeZx!Q6;k}d)2G6H7~-q7ZL3@;xsgZsoh27Rl&}wyJ$)hcVl|VY{*6W~!`b|#tYmC25@c31nMM=L< z){G;Hd~K}t2g&J?i<{mKff(_RdlwRP#HI%>av!qK`#d1PNfMlk8*yvZgHNhCmO8QZ z9gYBjGO#9)wIel)=82$Zo5g_kgCOAeRX7~BPQm~(DEG}q|zjjnZ zZG)MkDiaWV3ZYsA%TbG9F)~yG)X(f-0PMKGk_W950>s75*d$uOSdbI|&;Awu{&nzb z$E{Liqz)+g1h_jXb2~p%Kkq)FQ93Fb+*rYSY0Dlkmq@+8|7;B|q$V?_ z1G4l>Gz&W^u6ktNq6y}Si&bT_I$K(n*_xSt9r1T+*FaNDnx}6<%(`i?`4~l-JO)nq zRl1ZAe1TEbIseNv;wo6MT`wDxC2KzBA_8TFC6{bj1cX|>j!faR_6j@!_R}QWB{H@= zc{(KAPf9hCrohU!et#**Z-@3+G;juKB}&YtjI(#=3OaDT^zjMt(<68+70W|L`Qt%C zgC&nWiD1P3TsS`O&t;u+P=K1UB&7RIXz!#XRJ{BB880CZ@P<-;b6n zu1My8>LbsBj_8Z&rc6^i%8@JArP{dUYpU+CQeYv3(~!PwOA)X5FPpK#g-AQWsl~yj zx4$kh@GJUsBjG3A_Eg5P+m=N+pf9)ph-}{tyu=Brs9CS>uIg!PgO%t2W>1(9sr^Pg zLw!P${T)8sg@|>$y%*L1!il*SI&|>~$W7X8Gdgyb)XLn4N z4kC$-C$oLg4lhjQSa{ZyzPt0nt5pe-xac)Swjar|pgSlo?YMe)P?U`>+JbliSl=Hk z@~8_opqxKO+coL80Ki1qgD~c;h_FEBPt*H@9$h)$&AFO{U7uT!J{D|y9P*DoHIo&* zrM16Vp7-~7^Gb~oJ>0x|iTE}~(zVhoZd$a?>^t%zc^-e0G0Vs+i1gBLO)o8hZf|5c zF-9@SuBvjTGUYdkrxy@MZBm49sNEH+|C)6(?*etv3{a05_b>AwSxW<{*_7!VhID2lDs+mIw>}8yteVm^&MZKX3MyWezmc^) z$EIm9yEMXO)CLp`vnbA^gxe>DYKZW`>qe}^(a%GD%Tn%thPn(*AA>AEN5L+O05!r( zvVxIWDY~(&hi^#m6FFpOWo?re9mbb`YLi7^&|jcskZ*|o?TkR;wYL{kn{?PzmjfC< z@@Ie0+2y>j=8wLgFx_T!o)TfcfG52}SULZSq!0K0NGx)$dV}063xrQ0xBt z;d>Yixtu92%mcICw|1mMqiN;o&7`Ij^n%>-n6Cb?-xe{MtjeKKvTwZB>nMIs+ui_ z-Cv&rW`^2A^Bcf!g&S>82g^jfi@Y!y2C}XKS|)^wl_%D3Gk?ulbN&HUaD~ehko=%B z*5wz6*oVj_=Yy3~zGlMWuXA$>D+(3>gnTq%o5sb)D5&+uJ$i6zoE2chHibIZz~q0( z{KEEHZKamMz(JslobC}CE^|3(uhdaMGn1mWdY~wa#P6Jd>p{2utEp(YuFUny+L>Y6 zjK&?DLJ83~YUV})$JdFSoT8(yw5_?Y6R&{S_iA@|$x+<;xoJ?3SJaiK==;@Bdt~>S z4gBuixbO>_+NPl{RxR54^7a+qi1W=g@t;fuNTkZzH>6Ipl(E;9W?-?BAxL7zXC!FIk|<)OQYCyliPFZ|3_h@U>>1EQZ} zs8Mlk{XtJn6l+GaIBGKD(%MaIo4KCB#TzT@ zFdZx9(P1s&`c~5NxtN2kI4UK4StYq4fC5}AgpE|bTyqB!k@|IZ`PUJl|3&?`a!2%7 zlp_sA<214zd#pieDfVH5ibr{MX0KA5M+~}2W($=yAAG!r2WF3Dp6S;*+eCEzb=6a4wX_#JW=QP3pf{ z;w|7R4WgxuEQpfzDl=1Evj*ulp!g_q%LS>}2J8lZih(M6ux?$_Oz{MF&fE}f{2m`! zv4Z2cmLz06o1rTR(nqUil5Pf5;iukfdOcg}E#l7UeLqo>>fB?Z%@32M#y5lan?AWw zC*UEHP{$J`W(=|KABIXyuS&ho4pNK0dsSX<{d8%(h(&J^jC0G~y zUaN}F(PUN7oe$epzdEH(Sp44{@C_M6jF@c;1{^! zorV;nW5RUtZN3g-#DGb9Xi8cgCvDNYi16GzsXa{VZ%j2$57 z!*Fw74T^)O!`-^Xlhnrbqf?V+%rB0+Kp(8YQQ~LUG7L{F=75dMIeM6!bV-s7>bFCB zb^UHHmWbW)KY4;|geOoh&}C-gl(oQr)DC&5JNS--poR~_(3}n-*?veA{@KsTCEkod zmrv5a+0Nw1C~-Jr1I||9iADI%Niv*=cGVV8EH;>G%-;aj{7ku?*}^ZK9yF91=u6z= z3rBLhU;Hx>%@NBybU+O2kmV{$=;-`o)ysECWDORk07BllN;;0p=q=$UIL)4PD;g25 zxPg70hQE}=?p`$#P{)oDwfnY$I&Gp(e0^o&#G-2iGs)JA25qydz5ymtb*rchCxs_j z4tL32;9PlAQDq}nTNu_(6h{R!ozE_7v0cY=q*03LGCT}=qb}v^?k6!vdO`pm^~)kt zo678NedW0E(h#D6b--LhvR*`EmeBsys9 zYEt71+6D7KP&J7qIK{3#2jcLSIMr{j z_P|`I>JCV-UatE#vs^d0s8oo}?ls3}ovb!^<%AI0M+LfKmA?!#{C?y$nY?*3Q?BPh z;bQNeE`Pgk=qxN&!@yul_>1qBw*XDjT}0lQf#KbwVws+y_SjM*Y;sZ@C|W@<@qOMQ z7^>k2D;XK{Q{X?Ar($GL-hlFx`DeHR(N{m-TGqoefCDgXPAdqL?U77exe zIg&Ub_&r&QOgFi-v)$6zy3KDt$ZoPGzDQ$tUn63-H7hJ%#)?V@6u09BAdqDmg+4#t zexIWoXs?ccFD98A)Ye{;jpabpesY&TST3^!EEa-K)jnL*g&giL{v4`J6>#8I|2>Lw zEw-DB`pY5B(O*4O#<@$lWKIXTi#XHWKHr9E;$5M}B)M@VRQGH8HaFc|-}^NXh0;c+ z-j9CYPD}QWO`Y?_Pw$@MMG0M0gfM$Ujhxr&N$dMEONIs@#n4ei1$?(l{*mHJ>V4F4 zhys2Am@K=~fkos`sU3bg_A7dM$3==50=BT>&Od8(%L34z-S>T1B~x06nY^hV?V)3_ zd{2^H0PyG9qh$0Tr8-4*R?56oQRN^>+Uuqn&rhT6cspfZ<%uj6LseE17p$ipkCZ!_ zDQKIL%5;le#*OY8_4q%~Rr_C!XP@I2aSP2;8W5b4kM+G@_rnT3H5ADIIlL)FqXD?`6bFDDA(lR!AI^$bmvB z3WEkU0_;IS^74h5lCozxeWe0O4)pM`^Mwh?$93v-FIW}?N^ri~7JGEcP#juqC_cH> zK4MPhd(o5w&pSaCcD71W@1KZoPndCage-CnSgn;W;~Zsw-#`%#HQ18ZWJbxuLl-FU zx{_IzR4g_vY5z=!szF5=IDr!4Z8!CimohPNACh6R$@3D`lG>|qw~(Mq7iYQeWuFV( z?3+V-pmWyBZ!_8(kTK+$jNxtdh@$odOHm>CGi2W;t9eNlYIu zUf&AaIqd_nUr>N7l}+P*JTW$((@rD7>5RY*-u2QSM5c?kEJ7c*wMF$8T-$`M?$AcK zRUh9obN2UVw+SV`#!?#k#>mg3$<%in2~YxuDg&g!r~s_SKZrMxQPW>vY4r;s+QZRk z~ z3xpkHkHbMUiuY$;&oJyL5WE!JVbt#EX7!>7tT9e7YJ;d+W*zee0XNx+N!6fqY|%;? z=@`JC==+rXIQmFb-*>S{Z5J~a?1JFt1rdv<)s1c-qhR?Vy1nbvY0t)fQRGxL2G57n3=kd9kj{mtNr1@8fjL3s`p5nD4aB&@8FyTqY#m}%|nqpM!oxrTbgh3OE<_^)I%4B-Y zqD*b|Oz~~P-LMO*pn2!Fv5I%sdv(&>*fItZZ1OemvwzE`fAu^UXAB4UULxQdE^Q*F zc#B)X-8`+~QKxv+CgwpWt@EIh(0-2)Y@1m{ISgxOZhVITM3HS_OYJ4asVbUWG4W?! zBlXX$0Ud85khr%p=^7x<+F~|j)n6l~N7M{69vi8nCBcGxy z!RVnS!h{4iQlh)${p84N?2{)j()hvU5owO1WD?Pxf38d4NXXLw!Nq`rbyLk*Hcy-j zIoqhD!#2Wefk=Qm^4t8kMaN7{Y>!;*M~qJzamvccP1XWZLt&$_Ol*?qv_fM_5!`A& zfbbZHtl#7wVSK>Y`-r^SR$HADVL5KUm0*T#XgCFC^@ zQ_A0(L7S7?cmNjE;T&)ilpgcg>vea0dzY%`^XF=M9RnlP*Z29VajY;UYy0V~lA)*lsq+0I``rrElC3U3s2GO+C*rnf-l1YVc>$<&wnu}48^~#Zfc>W4 zJcE^V2ZV{aB<-Q7-7iOXV07-0w4~&bhTgHGj=KVFHBW>FWgVv7gu*qUGbf+>T>w@9 z2i5gAwr>FmV29;Ds`~PvbUIZ$M6YIPA%G2nEm;**R2)5OTrru1qV9>?O73d@}sI5V(d4!?G zUlj6`zS=DH6^z5N1UqQRrlj5rGPuQY!r(gQt7oQju=Rs_;$X2UVBw)|2K|6qGo5I* zqY?{fh%+5@qkTr^rUSbPb!(4GSqg*EAO*Hi*$M5@*)KF}g`_dK!v*+EFsF-6g2nn3 z{}^$c(`7mSUye&Rrcq1rq{Iy)>;(01h)MiyUrXM57TQ?WYQDYJ)2g^`rQ?Uec@JTv^48 zUeFMGxNTQ&P>oF5ic)~^kHeuC+I+8O=}|@>M9x`av~>fV|EHQ(==RH6A-qhCD4B7b zjV+SF){lv2l?!KZ6yF*4EVB%G+DQ*Cp$8GRXgEcI7>uA)a=_0C?NYLlxACsFM_f-- zYGS)O;}_tRQJ+AK?S+fsbB^510w5~0QzKv`==FRu1fnA2dHgi{fW*Uzbf!)BW_2yf z{jJDMH+KYW_G!VoIjS|@qR3RuuUBv)6N91u=>0sIDTTiiZ0A00BY;+Bw~ zN-=+GRg6O9;D7mPUD<}`ChAsWBxV-YL%eJs$4;>)oKEuz&8pT&+WdU5SEI}*b_Xv7 zSfws5o;g-v89U&QTy*x~OoTXqZ;Hvm<4;oxKSGI+?StP_SY0yT+3rrwsBE-${9!z9 z&w9&ZrQ`-e{-rnJ^_{B)PO;b`-{-x-M_PdhL3zSN6W-2<1bi5|iwr7?lbfRdpy8lLGsBk?eu6#UISv z=>Xp%krH{cUGZ6ZGw8;{Tv|EgR}il2(@GZ-&Wx&)yk5CjJJxjH4&%;GQEuRu)*d2K zI>#`{1&)Z&R9f?IMYts&TI8L%XE`Yg%x1WwM(f|;lJVb_xI5LiQ7;#tzQ!KtE_mSp zT8A_fY&{=7C{AxO2}p~${WzahiN)Cb#dG~c#kno)8wF>6wRJM8FbW*%$QLd zCOcA5$eW84?F{Cf&N0M~jJUdiAJYtudxh}pS$Vx;htru`CEROPsxjDU6+0*h-GPba zL*S@3CCiw4!({~e%tu;r5sjNiL44IFc`_P>mPbY3Y!zr1nw`+<_{%izkC8QvA9zhY zVN|g;?nDFvNt22@G|1epp8mP*4IxA4nK~7gUU#VJoE@6;uq5k<$1iG;h)H&UTe{7z z*u+JHgRPodkjg!ARx>ta`hCFpWPzNQNyj2ge%LnjOmCxYN}QWF)*_$5{iJB3s4sTrAwY1N^Bz`C#a!Bd;+ZTUL~bHFUs zG_vV5L%wACT3~;>hte#bprokdT%RZbCtau=|6Ls=c$>!UcE-z*m3&ygFjtLf%x;cU zjcCZ~_(SH1l_|Gc=%Bt(A7M6sad-*6KiZu0Ih7j$y-;DI#)<^Jv4#W8vIw#lX7Vg) z%mHr=`K$aR2&Y11)C_r%i5~F`P#>ZKAaoTQM&iL4r=fuR!Fdanj5rcEW=Vsn?;;oE z2pf^GjCq(q4v<3PU~;)Z{}H;h8e#P=fC=XMtMbj`fcMMn z4r?n_6ss)OOBs%=+~OGttS{<0OZdJ_A)4N+qNWMVvsk0Yh)X`0lj<6)0`99M&tMIZ zwAa`w{ZmlAG*vpMip_|UGOE=%A`G`Jxed zq?dSg@Wpw2X2TXw8x+Cn-+&7OB0|H*LQ6~|muQwib2M2>gTCmteaI{|L_YNk`O=0b z@rgg<7sE>ZYG8w4qxRwXp0HcRr!7_>p7&$J8JX!>r&gY|)^Kub`V`*NFvEN>{HXzr z{XA&FN~9UF62r65z|QVmJeP=l0d23KiLZi&D<(8x1wVzKw6c*;fwGwo@F5W>414Ms8-!lDC_Vx%7e^uYjg0g_$R|tbg7d zhQzwtLs;kc$eYjiUbAlUAEWCEJWZ$H(Y40}182Y5wR|!2cb8jU0V(CYtuOT$Rm=kK zYbJL_0Yjtg_ACL}+;_==xdAroz}mjgx0ye+)1PJ*&sa?!^)A1NQGbxfWDn;LK!zP3 zq}1A5{(ttBtbvjgrr+FqVT7<*=wS{B+afge`=BfuKmRYhz_y59zK5Z7%tZyrJSB$b zOMePDjJJBz$=rY?rEsXr?oQiHfa_{D-`dZ&{dI9=mzErvgieYk?p{ju?A@;Cny%~C zOBYAa+>U@sk{;R@0eU93c{spBnfsR2qX+wtRqdjqUm71Dt}3==f-Zf6YHTi*u#0m5 zPq0#fHZ@Hu3gvXm;N^6Pd!r8=d`l|_W5{^A`6;-At?MP@XV*;kr*PHeXm#czHrLB~ z3kRJTx~K|1`JI_N!BP>iPVXP%a`g+|Hg-|MnkoH7vvwGfXT;*)o1QH-5F<3}MH@!%>pc*Ie&JTrNy zlAEZRP-{|se%iAoPrl~EKLs+)yWHf3{%~Aq&1-hmnkn@TOlJRP9eS|i;dcOE83Jk8 z={BB6&5-QkiZ>r4z>P> z;%O3yj19Q_dbrIgl}l>Loqat9-5U8~{q*X9a@vDnn6^IvY`ICd&1Qc z0kj_SRKy{!KhQC5rqC|V<;e`!hz83?9=#!2f)5skDYY0H<|d&$aKkQL{6Hb8TCQv> zE|C5?&5A_YujtZRO{zq7&i@bD?m9TVh}}T*s{yPQerIb2pH$^RHZOwKAIxk+p56xb z9Xf{&o5&~JH`Bmb>*z%rb{eLp2LNofH={_Q|Fw}BIhW&+*}{H|G;M57T|fZIO9O+T0-*u{K>`5{WW7k~ zjxfVg009C0>-c)=zm~PRwXu`Co$-H5G2NlZDX{|q{p-N|n~dz&m9m+$wSf)5+{%%` z>913II~&u#P#_^8|3^1}R@wm_C=d|Imtxqzp{#u2S^?Z`ot@|m0fuJ(*8i7e@W+1p z&wq)i0s%q)O_Glr2*}aN-OAXJ-p$(TzcuPl3`>%N00Gs4{kOL&5bb`^ktL~LzL%uSsg08Zw%Hjc~; z=GFjHV@C#Ib0=$nog>3v#{Ux-_TRv875{HAJ6k(vJ8NSb=YQk;6}*3me#PK_*1z8N z|Fe|;0S91cXl!NtWuvVFgQ2s7gRza%f5zyajQjhJsQd>|6DwPQld;3UP5a+b_Wl9o zU)~u2{@*Bn4-|&d{~ahNTU#qTfQ_-$|I?bk?%M&vSP|Ybl>H~8U79N4=MPcHzSz8K@@)_v!Shx Z)4z8ZIK;nRNB(+LeCZQq`%(<_{{cAVlSKdk literal 0 HcmV?d00001 diff --git a/Intrinsic-porting-guide-draft-R4_hack.xml b/Intrinsic-porting-guide-draft-R4_hack.xml new file mode 100644 index 0000000..ef9b62e --- /dev/null +++ b/Intrinsic-porting-guide-draft-R4_hack.xml @@ -0,0 +1,55 @@ + + +

+ 1 Intel Intrinsic porting guide for Power64LE.The goal of this project is to provide functional equivalents of the Intel MMX, SSE, and AVX intrinsic functions, that are commonly used in Linux applications, and make them (or equivalents) available for the PowerPC64LE platform. These X86 intrinsics started with the Intel and Microsoft compilers but were then ported to the GCC compiler. The GCC implementation is a set of headers with inline functions. These inline functions provide a implementation mapping from the Intel/Microsoft dialect intrinsic names to the corresponding GCC Intel built-in's or directly via C language vector extension syntax.The current proposal is to start with the existing X86 GCC intrinsic headers and port them (copy and change the source)  to POWER using C language vector extensions, VMX and VSX built-ins. Another key assumption is that we will be able to use many of existing Intel DejaGNU test cases on ./gcc/testsuite/gcc.target/i386. This document is intended as a guide to developers participating in this effort. However this document provides guidance and examples that should be useful to developers who may encounter X86 intrinsics in code that they are porting to another platform. + 1.1 Look at the source LukeSo if this is a code porting activity, where is the source? All the source code we need to look at is in the GCC source trees. You can either git (https://gcc.gnu.org/wiki/GitMirror) the gcc source  or down load one of the recent AT source tars (for example: ftp://ftp.unicamp.br/pub/linuxpatch/toolchain/at/ubuntu/dists/xenial/at10.0/).  You will find the intrinsic headers in the ./gcc/config/i386/ sub-directory.If you have a Intel Linux workstation or laptop with GCC installed, you already have these headers, if you want to take a look:But depending on the vintage of the distro, these may not be the latest versions of the headers. Looking at the header source will tell you a few things.: The include structure (what other headers are implicitly included). The types that are used at the API. And finally, how the API is implemented. + 1.1.1 The structure of the intrinsic includesThe GCC x86 intrinsic functions for vector were initially grouped by technology (MMX and SSE), which starts with MMX continues with SSE through SSE4.1 stacked like a set of Russian dolls.Basically each higher layer include, needs typedefs and helper macros defined by the lower level intrinsic includes. mm_malloc.h simply provides wrappers for posix_memalign and free. Then it gets a little weird, starting with the crypto extensions:For AVX, AVX2, and AVX512 they must have decided that the Russian Dolls thing was getting out of hand. AVX et all is split across 14 filesbut they do not want the applications include these individually.So immintrin.h  includes everything Intel vector, include all the AVX, AES, SSE and MMX flavors.So what is the net? The include structure provides some strong clues about the order that we should approach this effort.  For example if you need to intrinsic from SSE4 (smmintrin.h) we are likely to need to type definitions from SSE (emmintrin.h). So a bottoms up (MMX, SSE, SSE2, …) approach seems like the best plan of attack. Also saving the AVX parts for latter make sense, as most are just wider forms of operations that already exists in SSE.We should use the same include structure to implement our PowerISA equivalent API headers. This will make porting easier (drop-in replacement) and should get the application running quickly on POWER. Then we are in a position to profile and analyze the resulting application. This will show any hot spots where the simple one-to-one transformation results in bottlenecks and additional tuning is needed. For these cases we should improve our tools (SDK MA/SCA) to identify opportunities for, and perhaps propose, alternative sequences that are better tuned to PowerISA and our micro-architecture. + 1.1.2 The types used for intrinsicsThe type system for Intel intrinsics is a little strange. For example from xmmintrin.h:So there is one set of types that are used in the function prototypes of the API, and the internal types that are used in the implementation. Notice the special attribute __may_alias__. From the GCC documentation:So there are a couple of issues here: 1)  the API seem to force the compiler to assume aliasing of any parameter passed by reference. Normally the compiler assumes that parameters of different size do not overlap in storage, which allows more optimization. 2) the data type used at the interface may not be the correct type for the implied operation. So parameters of type __m128i (which is defined as vector long long) is also used for parameters and return values of vector [char | short | int ]. This may not matter when using x86 built-in's but does matter when the implementation uses C vector extensions or in our case use PowerPC generic vector built-ins (#2.1.3.2.PowerISA Vector Intrinsics|outline). For the later cases the type must be correct for the compiler to generate the correct type (char, short, int, long) (#1.1.3.How the API is implemented.|outline) for the generic builtin operation. There is also concern that excessive use of __may_alias__ will limit compiler optimization. We are not sure how important this attribute is to the correct operation of the API.  So at a later stage we should experiment with removing it from our implementation for PowerPCThe good news is that PowerISA has good support for 128-bit vectors and (with the addition of VSX) all the required vector data (char, short, int, long, float, double) types. However Intel supports a wider variety of the vector sizes  than PowerISA does. This started with the 64-bit MMX vector support that preceded SSE and extends to 256-bit and 512-bit vectors of AVX, AVX2, and AVX512 that followed SSE.Within the GCC Intel intrinsic implementation these are all implemented as vector attribute extensions of the appropriate  size (   __vector_size__ ({8 | 16 | 32, and 64}). For the PowerPC target  GCC currently only supports the native __vector_size__ ( 16 ). These we can support directly in VMX/VSX registers and associated instructions. The GCC will compile with other   __vector_size__ values, but the resulting types are treated as simple arrays of the element type. This does not allow the compiler to use the vector registers and vector instructions for these (nonnative) vectors.   So what is a programmer to do? + 1.1.2.1 Dealing with MMXMMX is actually the hard case. The __m64 type supports SIMD vector int types (char, short, int, long).  The  Intel API defines  __m64 as:Which is problematic for the PowerPC target (not really supported in GCC) and we would prefer to use a native PowerISA type that can be passed in a single register.  The PowerISA Rotate Under Mask instructions can easily extract and insert integer fields of a General Purpose Register (GPR). This implies that MMX integer types can be handled as a internal union of arrays for the supported element types. So an 64-bit unsigned long long is the best type for parameter passing and return values. Especially for the 64-bit (_si64) operations as these normally generate a single PowerISA instruction.The SSE extensions include some convert operations for _m128 to / from _m64 and this includes some int to / from float conversions. However in these cases the float operands always reside in SSE (XMM) registers (which match the PowerISA vector registers) and the MMX registers only contain integer values. POWER8 (PowerISA-2.07) has direct move instructions between GPRs and VSRs. So these transfers are normally a single instruction and any conversions can be handed in the vector unit.When transferring a __m64 value to a vector register we should also execute a xxsplatd instruction to insure there is valid data in all four element lanes before doing floating point operations. This avoids generating extraneous floating point exceptions that might be generated by uninitialized parts of the vector. The top two lanes will have the floating point results that are in position for direct transfer to a GPR or stored via Store Float Double (stfd). These operation are internal to the intrinsic implementation and there is no requirement to keep temporary vectors in correct Little Endian form.Also for the smaller element sizes and higher element counts (MMX _pi8 and _p16 types) the number of  Rotate Under Mask instructions required to disassemble the 64-bit __m64 into elements, perform the element calculations, and reassemble the elements in a single __m64 value can get larger. In this case we can generate shorter instruction sequences by transfering (via direct move instruction) the GPR __m64 value to the a vector register, performance the SIMD operation there, then transfer the __m64 result back to a GPR. + 1.1.2.2 Dealing with AVX and AVX512AVX is a bit easier for PowerISA and the ELF V2 ABI. First we have lots (64) of vector registers and a super scalar vector pipe-line (can execute two or more independent 128-bit vector operations concurrently). Second the ELF V2 ABI was designed to pass and return larger aggregates in vector registers:Up to 12 qualified vector arguments can be passed in v2–v13.A qualified vector argument corresponds to:So the ABI allows for passing up to three structures each representing 512-bit vectors and returning such (512-bit) structure all in VMX registers. This can be extended further by spilling parameters (beyond 12 X 128-bit vectors) to the parameter save area, but we should not need that, as most intrinsics only use 2 or 3 operands.. Vector registers not needed for parameter passing, along with an additional 8 volatile vector registers, are available for scratch and local variables. All can be used by the application without requiring register spill to the save area. So most intrinsic operations on 256- or 512-bit vectors can be held within existing PowerISA vector registers. For larger functions that might use multiple AVX 256 or 512-bit intrinsics and, as a result, push beyond the 20 volatile vector registers, the compiler will just allocate non-volatile vector registers by allocating a stack frame and spilling non-volatile vector registers to the save area (as needed in the function prologue). This frees up to 64 vectors (32 x 256-bit or 16 x 512-bit structs) for code optimization. Based on the specifics of our ISA and ABI we will not not use __vector_size__ (32) or (64) in the PowerPC implementation of __m256 and __m512 types. Instead we will typedef structs of 2 or 4 vector (__m128) fields. This allows efficient handling of these larger data types without require new GCC language extensions. In the end we should use the same type names and definitions as the GCC X86 intrinsic headers where possible. Where that is not possible we can define new typedefs that provide the best mapping to the underlying PowerISA hardware. + 1.1.3 How is this API implemented.One pleasant surprise is that many (at least for the older Intel) Intrinsics are implemented directly in C vector extension code and/or a simple mapping to GCC target specific builtins. + 1.1.3.1 Some simple examplesFor example; a vector double splat looks like this:Another example:Note in the example above the cast to __v2df for the operation. Both __m128d and __v2df are vector double, but __v2df does no have the __may_alias__ attribute. And one more example:Note this requires a cast for the compiler to generate the correct code for the intended operation. The parameters and result are the generic __m128i, which is a vector long long with the __may_alias__ attribute. But operation is a vector multiply low unsigned short (__v8hu). So not only do we use the cast to drop the __may_alias__ attribute but we also need to cast to the correct (vector unsigned short) type for the specified operation.I have successfully copied these (and similar) source snippets over to the PPC64LE implementation unchanged. This of course assumes the associated types are defined and with compatible attributes. + 1.1.3.2 Those extra attributesYou may have noticed there are some special attributes:So far I have been using these attributes unchanged.But most intrinsics map the Intel intrinsic to one or more target specific GCC builtins. For example:The first intrinsic (_mm_load_pd ) is implement as a C vector pointer reference, but from the comment assumes the compiler will use a movapd instruction that requires 16-byte alignment (will raise a general-protection exception if not aligned). This  implies that there is a performance advantage for at least some Intel processors to keep the vector aligned. The second intrinsic uses the explicit GCC builtin __builtin_ia32_loadupd to generate the movupd instruction which handles unaligned references.The opposite assumption applies to POWER and PPC64LE, where GCC generates the VSX  lxvd2x / xxswapd instruction sequence by default, which allows unaligned references. The PowerISA equivalent for aligned vector access is the VMX lvx instruction and the vec_ld builtin, which forces quadword aligned access (by ignoring the low order 4 bits of the effective address). The lvx instruction does not raise alignment exceptions, but perhaps should as part of our implementation of the Intel intrinsic. This requires that we use PowerISA VMX/VSX built-ins to insure we get the expected results.The current prototype defines the following:The aligned  load intrinsic adds an assert which checks alignment (to match the Intel semantic) and uses  the GCC builtin vec_ld (generates an lvx).  The assert generates extra code but this can be eliminated by defining NDEBUG at compile time. The unaligned load intrinsic uses the GCC builtin vec_vsx_ld  (for PPC64LE generates lxvd2x / xxswapd for power8  and will simplify to lxv or lxvx for power9).  And similarly for __mm_store_pd / __mm_storeu_pd, using vec_st and vec_vsx_st. These concepts extent to the load/store intrinsics for vector float and vector int. + 1.1.3.3 How did I find this out?The next question is where did I get the details above. The GCC documentation for __builtin_ia32_loadupd provides minimal information (the builtin name, parameters and return types). Not very informative. Looking up the Intel intrinsic description is more informative. You can Google the intrinsic name or use the Intel Intrinsic guide  for this. The Intrinsic Guide is interactive and includes  Intel (Chip) technology and text based search capabilities. Clicking on the intrinsic name opens to a synopsis including; the underlying instruction name, text description, operation pseudo code, and in some cases performance information (latency and throughput).The key is to get a description of the intrinsic (operand fields and types, and which fields are updated for the result) and the underlying Intel instruction. If the Intrinsic guide is not clear you can look up the instruction details in the “Intel® 64 and IA-32 Architectures Software Developer’s Manual”.Information about the PowerISA vector facilities is found in the PowerISA Version 2.07B (for POWER8 and 3.0 for POWER9) manual, Book I, Chapter 6. Vector Facility and Chapter 7. Vector-Scalar Floating-Point Operations. Another good reference is the OpenPOWER ELF V2 application binary interface (ABI) document, Chapter 6. Vector Programming Interfaces and Appendix A. Predefined Functions for Vector Programming.Another useful document is the original Altivec Technology Programers Interface Manual with a  user friendly structure and many helpful diagrams. But alas the PIM does does not cover the resent PowerISA (power7,  power8, and power9) enhancements. + 1.1.3.4 Examples implemented using other intrinsicsSome intrinsic implementations are defined in terms of other intrinsics. For example.This notion of using part (one fourth or half) of the SSE XMM register and leaving the rest unchanged (or forced to zero) is specific to SSE scalar operations and can generate some complicated (sub-optimal) PowerISA code.  In this case _mm_load_sd passes the dereferenced double value  to _mm_set_sd which uses C vector initializer notation to combine (merge) that double scalar value with a scalar 0.0 constant into a vector double.While code like this should work as-is for PPC64LE, you should look at the generated code and assess if it is reasonable.  In this case the code is not awful (a load double splat, vector xor to generate 0.0s, then a xxmrghd to combine __F and 0.0).  Other examples may generate sub-optimal code and justify a rewrite to PowerISA scalar or vector code (GCC PowerPC AltiVec Built-in Functions or inline assembler). Net: try using the existing C code if you can, but check on what the compiler generates.  If the generated code is horrendous, it may be worth the effort to write a PowerISA specific equivalent. For codes making extensive use of MMX or SSE scalar intrinsics you will be better off rewriting to use standard C scalar types and letting the the GCC compiler handle the details (see #2.1.Prefered methods|outline) + 2 How do we work this?The working assumption is to start with the existing GCC headers from ./gcc/config/i386/, then convert them to PowerISA and add them to ./gcc/config/rs6000/. I assume we will replicate the existing header structure and retain the existing header file and intrinsic names. This also allows us to reuse existing DejaGNU test cases from ./gcc/testsuite/gcc.target/i386, modify them as needed for the POWER target, and them to the ./gcc/testsuite/gcc.target/powerpc.We can be flexible on the sequence that headers/intrinsics and test cases are ported.  This should be based on customer need and resolving internal dependencies.  This implies an oldest-to-newest / bottoms-up (MMX, SSE, SSE2, …) strategy. The assumption is, existing community and user application codes, are more likely to have optimized code for previous generation ubiquitous (SSE, SSE2, ...) processors than the latest (and rare) SkyLake AVX512.I would start with an existing header from the current GCC  ./gcc/config/i386/ and copy the header comment (including FSF copyright) down to any vector typedefs used in the API or implementation. Skip the Intel intrinsic implementation code for now, but add the ending #end if matching the headers conditional guard against multiple inclusion. You can add  #include <alternative> as needed. For examples:Then you can start adding small groups of related intrinsic implementations to the header to be compiled and  examine the generated code. Once you have what looks like reasonable code you can grep through  ./gcc/testsuite/gcc.target/i386 for examples using the intrinsic names you just added. You should be able to find functional tests for most X86 intrinsics. The GCC testsuite uses the DejaGNU  test framework as documented in the GNU Compiler Collection (GCC) Internals manual. GCC adds its own DejaGNU directives and extensions, that are embedded in the testsuite source as comments.  Some are platform specific and will need to be adjusted for tests that are ported to our platform. For exampleshould become something likeRepeat this process until you have equivalent implementations for all the intrinsics in that header and associated test cases that execute without error. + 2.1 Prefered methodsAs we will see there are multiple ways to implement the logic of these intrinsics. Some implementation methods are preferred because they allow the compiler to select instructions and provided the most flexibility for optimization across the whole sequence. Other methods may be required to deliver a specific semantic or to deliver better optimization than the current compiler is capable of. Some methods are more portable across multiple compilers (GCC, LLVM, ...). All of this should be taken into consideration for each intrinsic implementation. In general we should use the following list as a guide to these decisions:Use C vector arithmetic, logical, dereference, etc., operators in preference to intrinsics.Use the bi-endian interfaces from Appendix A of the ABI in preference to other intrinsics when available, as these are designed for portability among compilers.Use other, less well documented intrinsics (such as __builtin_vsx_*) when no better facility is available, in preference to assembly.If necessary, use inline assembly, but know what you're doing. + 2.2 Prepare yourselfTo port Intel intrinsics to POWER you will need to prepare yourself with knowledge of PowerISA vector facilities and how to access the associated documentation.GCC vector extention syntax and usage. This is one of a set of GCC “Extentions to the C language Family” that the intrinsic header implementation depends on.  As many of the GCC intrinsics for x86 are implemented via C vector extensions, reading and understanding of this code is an important part of the porting process. Intel (x86) intrinsic and type naming conventions and how to find more information. The intrinsic name encodes  some information about the vector size and type of the data, but the pattern is not always  obvious. Using the online Intel Intrinsic Guide to look up the intrinsic by name is a good first step.PowerISA Vector facilities. The Vector facilities of POWER8 are extensive and cover the usual types and usual operations. However it has a different history and organization from Intel.  Both (Intel and PowerISA) have their quirks and in some cases the mapping may not be obvious. So familiarizing yourself with the PowerISA Vector (VMX) and Vector Scalar Extensions (VSX) is important. + 2.2.1 GCC Vector ExtensionsThe GCC vector extensions are common syntax but implemented in a target specific way. Using the C vector extensions require the __gnu_inline__ attribute to avoid syntax errors in case the user specified  C standard compliance (-std=c90, -std=c11, etc) that would normally disallow such extensions. The GCC implementation for PowerPC64 Little Endian is (mostly) functionally compatible with x86_64 vector extension usage. We can use the same type definitions (at least for  vector_size (16)), operations, syntax <{...}> for vector initializers and constants, and array syntax <[]> for vector element access. So simple arithmetic / logical operations on whole vectors should work as is. The caveat is that the interface data type of the Intel Intrinsic may not match the data types of the operation, so it may be necessary to cast the operands to the specific type for the operation. This also applies to vector initializers and accessing vector elements. You need to use the appropriate type to get the expected results. Of course this applies to X86_64 as well. For example:Note the cast from the interface type (__m128} to the implementation type (__v4sf, defined in the intrinsic header) for the vector float add (+) operation. This is enough for the compiler to select the appropriate vector add instruction for the float type. Then the result (which is __v4sf) needs to be cast back to the expected interface type (__m128). Note also the use of array syntax (__A)[0]) to extract the lowest (left mostHere we are using logical left and logical right which will not match the PowerISA register view in Little endian. Logical left is the left most element for initializers {left, … , right}, storage order and array  order where the left most element is [0].) element of a vector. The cast (__v4sf) insures that the compiler knows we are extracting the left most 32-bit float. The compiler insures the code generated matches the Intel behavior for PowerPC64 Little Endian. The code generation is complicated by the fact that PowerISA vector registers are Big Endian (element 0 is the left most word of the vector) and X86 scalar stores are from the left most (work/dword) for the vector register. Application code with extensive use of scalar (vs packed) intrinsic loads / stores should be flagged for rewrite to native PPC code using exisiing scalar types (float, double, int, long, etc.). Another example is the set reverse order:Note the use of initializer syntax used to collect a set of scalars into a vector. Code with constant initializer values will generate a vector constant of the appropriate endian. However code with variables in the initializer can get complicated as it often requires transfers between register sets and perhaps format conversions. We can assume that the compiler will generate the correct code, but if this class of intrinsics shows up a hot spot, a rewrite to native PPC vector built-ins may be appropriate. For example initializer of a variable replicated to all the vector fields might not be recognized as a “load and splat” and making this explicit may help the compiler generate better code. + 2.2.2 Intel Intrinsic functionsSo what is an intrinsic function? From Wikipedia:In compiler theory, an intrinsic function is a function available for use in a given programming language whose implementation is handled specially by the compiler. Typically, it substitutes a sequence of automatically generated instructions for the original function call, similar to an inline function. Unlike an inline function though, the compiler has an intimate knowledge of the intrinsic function and can therefore better integrate it and optimize it for the situation. This is also called builtin function in many languages.The “Intel Intrinsics” API provides access to the many instruction set extensions (Intel Technologies) that Intel has added (and continues to add) over the years. The intrinsics provided access to new instruction capabilities before the compilers could exploit them directly. Initially these intrinsic functions where defined for the Intel and Microsoft compiler and where eventually implemented and contributed to GCC.The Intel Intrinsics have a specific type and naming structure. In this naming structure, functions starts with a common prefix (MMX and SSE use _mm_ prefix, while AVX added the _mm256 _mm512 prefixes), then a short functional name (set, load, store, add, mul, blend, shuffle, …) and a suffix (_pd, _sd, _pi32...) with type and packing information. See Appendix B for the list of common intrisic suffixes.Oddly many of the MMX/SSE operations are not vectors at all. There are a lot of scalar operations on a single float, double, or long long type. In effect these are scalars that can take advantage of the larger (xmm) register space. Also in the Intel 32-bit architecture they provided IEEE754 float and double types, and 64-bit integers that did not exist or where hard to implement in the base i386/387 instruction set. These scalar operation use a suffix starting with '_s' (_sd for scalar double float, _ss scalar float, and _si64 for scalar long long).True vector operations use the packed or extended packed suffixes, starting with '_p' or '_ep' (_pd for vector double, _ps for vector float, and _epi32 for vector int). The use of '_ep'  seems to be reserved to disambiguate intrinsics that existed in the (64-bit vector) MMX extension from the extended (128-bit vector) SSE equivalent. For example _mm_add_pi32 is a MMX operation on a pair of 32-bit integers, while _mm_add_epi32 is an SSE2 operation on vector of 4 32-bit integers. The GCC  builtins for the i386.target, (includes x86 and x86_64) are not the same as the Intel Intrinsics. While they have similar intent and cover most of the same functions, they use a different naming (prefixed with __builtin_ia32_, then function name with type suffix) and uses GCC vector type modes for operand types. For example:Note: A key difference between GCC builtins for i386 and Powerpc is that the x86 builtins have different names of each operation and type while the powerpc altivec builtins tend to have a single generatic builtin for  each operation, across a set of compatible operand types. In GCC the Intel Intrinsic header (*intrin.h) files are implemented as a set of inline functions using the Intel Intrinsic API names and types. These functions are implemented as either GCC C vector extension code or via one or more GCC builtins for the i386 target. So lets take a look at some examples from GCC's SSE2 intrinsic header emmintrin.h:Note that the  _mm_add_pd is implemented direct as C vector extension code., while _mm_add_sd is implemented via the GCC builtin __builtin_ia32_addsd. From the discussion above we know the _pd suffix indicates a packed vector double while the _sd suffix indicates a scalar double in a XMM register. + 2.2.2.1 Packed vs scalar intrinsicsSo what is actually going on here? The vector code is clear enough if you know that '+' operator is applied to each vector element. The the intent of the builtin is a little less clear, as the GCC documentation for __builtin_ia32_addsd is not very helpful (nonexistent). So perhaps the Intel Intrinsic Guide will be more enlightening. To paraphrase:From the _mm_add_dp description ; for each double float element ([0] and [1] or bits [63:0] and [128:64]) for operands a and b are added and resulting vector is returned. From the _mm_add_sd description ; Add element 0 of first operand (a[0]) to element 0 of the second operand (b[0]) and return the packed vector double {(a[0] + b[0]), a[1]}. Or said differently the sum of the logical left most half of the the operands are returned in the logical left most half (element [0]) of the  result, along with the logical right half (element [1]) of the first operand (unchanged) in the logical right half of the result.So the packed double is easy enough but the scalar double details are more complicated. One source of complication is that while both Instruction Set Architectures (SSE vs VSX) support scalar floating point operations in vector registers the semantics are different. The vector bit and field numbering is different (reversed). The handling of the non-scalar part of the register for scalar operations are different.To minimize confusion and use consistent nomenclature, I will try to use the terms logical left and logical right elements based on the order they apprear in a C vector initializers and element index order. So in the vector (__v2df){1.0, 20.}, The value 1.0 is the in the logical left element [0] and the value 2.0 is logical right element [1].So lets look at how to implement these intrinsics for the PowerISA. For example in this case we can use the GCC vector extension, like so:The packed double implementation operates on the vector as a whole. The scalar double implementation operates on and updates only [0] element of the vector and leaves the __A[1] element unchanged.  Form this source the GCC compiler generates the following code for PPC64LE target.:The packed vector double generated the corresponding VSX vector double add (xvadddp). But the scalar implementation is bit more complicated.  First the PPC64LE vector format, element [0] is not in the correct position for  the scalar operations. So the compiler generates vector splat double (xxspltd) instructions to copy elements __A[0] and __B[0] into position for the VSX scalar add double (xsadddp) that follows. However the VSX scalar operation leaves the other half of the VSR undefined (which does not match the expected Intel semantics). So the compiler must generates a vector merge high double (xxmrghd) instruction to combine the original __A[1] element (from vs34) with the scalar add result from vs35 element [1]. This merge swings the scalar result from vs35[1] element into the vs34[0] position, while preserving the original vs34[1] (from __A[1]) element (copied to itself).Fun fact: The vector registers in PowerISA are decidedly Big Endian. But we decided to make the PPC64LE ABI behave like a Little Endian system to make application porting easier. This require the compiler to manipulate the PowerISA vector instrinsic behind the the scenes to get the correct Little Endian results. For example the element selector [0|1] for vec_splat and the generation of vec_mergeh vs vec_mergel are reversed for the Little Endian.This technique applies to packed and scalar intrinsics for the the usual arithmetic operators (add, subtract, multiply, divide). Using GCC vector extensions in these intrinsic implementations provides the compiler more opportunity to optimize the whole function. Now we can look at a slightly more interesting (complicated) case. Square root (sqrt) is not a arithmetic operator in C and is usually handled with a library call or a compiler builtin. We really want to avoid a library calls and want to avoid any unexpected side effects. As you see below the implementation of _mm_sqrt_pd and _mm_sqrt_sd intrinsics are based on GCC x86 built ins. For the packed vector sqrt, the PowerISA VSX has an equivalent vector double square root instruction and GCC provides the vec_sqrt builtin. But the scalar implementation involves an additional parameter and an extra move.  This seems intended to mimick the propagation of the __A[1] input to the logical right half of the XMM result that we saw with _mm_add_sd above.The instinct is to extract the low scalar (__B[0]) from operand __B and pass this to  the GCC __builtin_sqrt () before recombining that scalar result with __A[1] for the vector result. Unfortunately C language standards force the compiler to call the libm sqrt function unless -ffast-math is specified. The -ffast-math option is not commonly used and we want to avoid the external library dependency for what should be only a few inline instructions. So this is not a good option.Thinking outside the box; we do have an inline intrinsic for a (packed) vector double sqrt, that we just implemented. However we need to insure the other half of __B (__B[1]) does not cause an harmful side effects (like raising exceptions for NAN or  negative values). The simplest solution is to splat __B[0] to both halves of a temporary value before taking the vec_sqrt. Then this result can be combined with __A[1] to return the final result. For example:In this  example we use _mm_set1_pd to splat the scalar __B[0], before passing that vector to our _mm_sqrt_pd implementation, then pass the sqrt result (c[0])  with __A[1[ to  _mm_setr_pd to combine the final result. You could also use the {c[0], __A[1]} initializer instead of _mm_setr_pd.Now we can look at vector and scalar compares that add there own complication: For example:The Intel Intrinsic Guide for _mm_cmpeq_pd describes comparing double elements [0|1] and returning either 0s for not equal and 1s (0xFFFFFFFFFFFFFFFF or long long -1) for equal. The comparison result is intended as a select mask (predicates) for selecting or ignoring specific elements in later operations. The scalar version _mm_cmpeq_sd is similar except for the quirk of only comparing element [0] and combining the result with __A[1] to return the final vector result.The packed vector implementation for PowerISA is simple as VSX provides the equivalent instruction and GCC provides the vec_cmpeq builtin supporting the vector double type. The technique of using scalar comparison operators on the __A[0] and __B[0] does not work as the C comparison operators return 0 or 1 results while we need the vector select mask (effectively 0 or -1). Also we need to watch for sequences that mix scalar floats and integers, generating if/then/else logic or requiring expensive transfers across register banks.In this case we are better off using explicit vector built-ins for _mm_add_sd as and example. We can use vec_splat from element [0] to temporaries where we can safely use vec_cmpeq to generate the expect selector mask. Note that the vec_cmpeq returns a bool long type so we need the cast the result back to __v2df. Then use the (__m128d){c[0], __A[1]} initializer to combine the comparison result with the original __A[1] input and cast to the require interface type.  So we have this example:Now lets look at a similar example that adds some surprising complexity. This is the compare not equal case so we should be able to find the equivalent vec_cmpne builtin: + 2.2.2.2 To vec_not or notWell not exactly. Looking at the OpenPOWER ABI document we see a reference to vec_cmpne for all numeric types. But when we look in the current GCC 6 documentation we find that vec_cmpne is not on the list. So it is planned in the ABI, but not implemented yet. Looking at the PowerISA 2.07B we find a VSX Vector Compare Equal to Double-Precision but no Not Equal. In fact we see only vector double compare instructions for greater than and greater than or equal in addition to the equal compare. Not only can't we find a not equal, there is no less than or less than or equal compares either. So what is going on here? Partially this is the Reduced Instruction Set Computer (RISC) design philosophy. In this case the compiler can generate all the required compares using the existing vector instructions and simple transforms based on Boolean algebra. So vec_cmpne(A,B) is simply vec_not (vec_cmpeq(A,B)). And vec_cmplt(A,B) is simply vec_cmpgt(B,A) based on the identity A < B iff B > A. Similarly vec_cmple(A,B) is implemented as vec_cmpge(B,A).What a minute, there is no vec_not() either. Can not find it in the PowerISA, the OpenPOWER ABI, or the GCC PowerPC Altivec Built-in documentation. There is no vec_move() either! How can this possible work?This is RISC philosophy again. We can always use a logical instruction (like bit wise and or or) to effect a move given that we also have nondestructive 3 register instruction forms. In the PowerISA most instruction have two input registers and a separate result register. So if the result register number is  different from either input register then the inputs are not clobbered (nondestructive). Of course nothing prevents you from specifying the same register for both inputs or even all three registers (result and both inputs).  And some times it is useful.The statement B = vec_or (A,A) is is effectively a vector move/copy from A to B. And A = vec_or (A,A) is obviously a nop (no operation). In the the PowerISA defines the preferred nop and register move for vector registers in this way.It is also useful to have hardware implement the logical operators nor (not or) and nand (not and).  The PowerISA provides these instruction for fixed point and vector logical operation. So vec_not(A) can be implemented as vec_nor(A,A). So looking at the  implementation of _mm_cmpne we propose the following:The Intel Intrinsics also include the not forms of the relational compares:The PowerISA and OpenPOWER ABI, or GCC PowerPC Altivec Built-in documentation do not provide any direct equivalents to the  not greater than class of compares. Again you don't really need them if you know Boolean algebra. We can use identities like {not (A < B) iff A >= B} and {not (A <= B) iff A > B}. So the PPC64LE implementation follows:These patterns repeat for the scalar version of the not compares. And in general the larger pattern described in this chapter applies to the other float and integer types with similar interfaces. + 2.2.2.3 Crossing lanesWe have seen that, most of the time, vector SIMD units prefer to keep computations in the same “lane” (element number) as the input elements. The only exception in the examples so far are the occasional splat (copy one element to all the other elements of the vector) operations. Splat is an example of the general category of “permute” operations (Intel would call this a “shuffle” or “blend”). Permutes selects and rearrange the elements of (usually) a concatenated pair of vectors and delivers those selected elements, in a specific order, to a result vector. The selection and order of elements in the result is controlled by a third vector, either as 3rd input vector or and immediate field of the instruction.For example the Intel intrisics for Horizontal Add / Subtract added with SSE3. These instrinsics add (subtract) adjacent element pairs, across pair of input vectors, placing the sum of the adjacent elements in the result vecotr.. For example _mm_hadd_ps  which implments the operation on float:Horizontal Add (hadd) provides an incremental vector “sum across” operation commonly needed in matrix and vector transform math. Horizontal Add is incremental as you need three hadd instructions to sum across 4 vectors of 4 elements ( 7 for 8 x 8, 15 for 16 x 16, …).The PowerISA does not have a sum-across operation for float or double. We can user the vector float add instruction after we rearrange the inputs so that element pairs line up for the horizontal add. For example we would need to permute the input vectors {1, 2, 3, 4} and {101, 102, 103, 104} into vectors {2, 4, 102, 104} and {1, 3, 101, 103} before the  vec_add. This requires two vector permutes to align the elements into the correct lanes for the vector add (to implement Horizontal Add).  The PowerISA provides generalized byte-level vector permute (vperm) based a vector register pair source as input and a control vector. The control vector provides 16 indexes (0-31) to select bytes from the concatenated input vector register pair (VRA, VRB). A more specific set of permutes (pack, unpack, merge, splat) operations (across element sizes) are encoded as separate  instruction opcodes or instruction immediate fields.Unfortunately only the general vec_perm can provide the realignment we need the _mm_hadd_ps operation or any of the int, short variants of hadd. For example:This requires two permute control vectors; one to select the even word elements across __X and __Y, and another to select the odd word elements across __X and __Y. The result of these permutes (vec_perm) are inputs to the vec_add and completes the hadd operation. Fortunately the permute required for the double (64-bit) case (IE _mm_hadd_pd) reduces to the equivalent of vec_mergeh / vec_mergel  doubleword (which are variants of  VSX Permute Doubleword Immediate). So the implementation of _mm_hadd_pd can be simplified to this:This eliminates the load of the control vectors required by the previous example. + 2.2.3 PowerISA Vector facilities.The PowerISA vector facilities (VMX and VSX) are extensive, but does not always provide a direct or obvious functional equivalent to the Intel Intrinsics. But being not obvious is not the same as imposible. It just requires some basic programing skills.It is a good idea to have an overall understanding of the vector capabilities the PowerISA. You do not need to memorize every instructions but is helps to know where to look. Both the PowerISA and OpenPOWER ABI have a specific structure and organization that can help you find what you looking for. It also helps to understand the relationship between the PowerISAs low level instructions and the higher abstraction of the vector intrinsics as defined by the OpenPOWER ABIs Vector Programming Interfaces and the the defacto  standard of GCC's PowerPC AltiVec Built-in Functions. + 2.2.3.1 The PowerISAThe PowerISA is for historical reasons is organized at the top level by the distinction between older Vector Facility (Altivec / VMX) and the newer Vector-Scalar Floating-Point Operations (VSX). + 2.2.3.1.1 The Vector Facility (VMX)The orginal VMX supported SIMD integer byte, halfword, and word, and single float data types within a separate (from GPR and FPR) bank of 32 x 128-bit vector registers. These operations like to stay within their (SIMD) lanes except where the operation changes the element data size (integer multiply, pack, and unpack). This is complimented by bit logical and shift / rotate / permute / merge instuctions that operate on the vector as a whole.  Some operation (permute, pack, merge, shift double, select) will select 128 bits from a pair of vectors (256-bits) and deliver 128-bit vector result. These instructions will cross lanes or multiple registers to grab fields and assmeble them into the single register result.The PowerISA 2.07B Chapter 6. Vector Facility is organised starting with an overview (chapters 6.1- 6.6) :Then a chapter on storage (load/store) access for vector and vector elements: + 2.2.3.1.1.1 Vector permute and formatting instructionsThe vector Permute and formatting chapter follows and is an important one to study. These operation operation on the byte, halfword, word (and with 2.07 doubleword) integer types . Plus special Pixel type. The shifts instructions in this chapter operate on the vector as a whole at either the bit or the byte (octet) level, This is an important chapter to study for moving PowerISA vector results into the vector elements that Intel Intrinsics expect:The Vector Integer instructions include the add / subtract / Multiply / Multiply Add/Sum / (no divide) operations for the standard integer types. There are instruction forms that  provide signed, unsigned, modulo, and saturate results for most operations. The PowerISA 2.07 extension add / subtract of 128-bit integers with carry and extend to 256, 512-bit and beyond , is included here. There are signed / unsigned compares across the standard integer types (byte, .. doubleword). The usual and bit-wise logical operations. And the SIMD shift / rotate instructions that operate on the vector elements for various types.The vector [single] float instructions are grouped into this chapter. This chapter does not include the double float instructions which are described in the VSX chapter. VSX also include additional float instructions that operate on the whole 64 register vector-scalar set.The vector XOR based instructions are new with PowerISA 2.07 (POWER8) and provide vector  crypto and check-sum operations:The vector gather and bit permute support bit level rearrangement of bits with in the vector. While the vector versions of the count leading zeros and population count are useful to accelerate specific algorithms The Decimal Integer add / subtract instructions complement the Decimal Floating-Point instructions. They can also be used to accelerated some binary to/from decimal conversions. The VSCR instruction provides access the the Non-Java mode floating-point control and the saturation status. These instruction are not normally of interest in porting Intel intrinsics.With PowerISA 2.07B (Power8) several major extension where added to the Vector Facility:Vector Crypto: Under “Vector Exclusive-OR-based Instructions Vector Exclusive-OR-based Instructions”, AES [inverse] Cipher, SHA 256 / 512 Sigma, Polynomial Multiplication, and Permute and XOR instructions.64-bit Integer; signed and unsigned add / subtract, signed and unsigned compare, Even / Odd 32 x 32 multiple with 64-bit product, signed / unsigned max / min, rotate and shift left/right.Direct Move between GRPs and the FPRs / Left half of Vector Registers.128-bit integer add / subtract with carry / extend, direct support for vector __int128 and multiple precision arithmetic.Decimal Integer add subtract for 31 digit BCD.Miscellaneous SIMD extensions: Count leading Zeros, Population count, bit gather / permute, and vector forms of eqv, nand, orc.The rational for why these are included in the Vector Facilities (VMX) (vs Vector-Scalar Floating-Point Operations (VSX)) has more to do with how the instruction where encoded then with the type of operations or the ISA version of introduction. This is primarily a trade-off between the bits required for register selection vs bits for extended op-code space within in a fixed 32-bit instruction. Basically accessing 32 vector registers require 5-bits per register, while accessing all 64 vector-scalar registers require 6-bits per register. When you consider the most vector instructions require  3  and some (select, fused multiply-add) require 4 register operand forms,  the impact on op-code space is significant. The larger register set of VSX was justified by queuing theory of larger HPC matrix codes using double float, while 32 registers are sufficient for most applications.So by definition the VMX instructions are restricted to the original 32 vector registers while VSX instructions are encoded to  access all 64 floating-point scalar and vector double registers. This distinction can be troublesome when programming at the assembler level, but the compiler and compiler built-ins can hide most of this detail from the programmer. + 2.2.3.1.2 Vector-Scalar Floating-Point Operations (VSX)With PowerISA 2.06 (POWER7) we extended the vector SIMD capabilities of the PowerISA:Extend the available vector and floating-point scalar register sets from 32 registers each to a combined 64 x 64-bit scalar floating-point and 64 x 128-bit vector registers.Enable scalar double float operations on all 64 scalar registers.Enable vector double and vector float operations for all 64 vector registers.Enable super-scalar execution of vector instructions and support 2 independent vector floating point  pipelines for parallel execution of 4 x 64-bit Floating point Fused Multiply Adds (FMAs) and 8 x 32-bit (FMAs) per cycle.With PowerISA 2.07 (POWER8) we added single-precision scalar floating-point instruction to VSX. This completes the floating-point computational set for VSX. This ISA release also clarified how these operate in the Little Endian storage model.While the focus was on enhanced floating-point computation (for High Performance Computing),  VSX also extended  the ISA with additional storage access, logical, and permute (merge, splat, shift) instructions. This was necessary to extend these operations cover 64 VSX registers, and improves unaligned storage access for vectors  (not available in VMX).The PowerISA 2.07B Chapter 7. Vector-Scalar Floating-Point Operations is organized starting with an introduction and overview (chapters 7.1- 7.5) . The early sections (7.1 and 7.2) describe the layout of the 64 VSX registers and how they relate (overlap and inter-operate) to the existing floating point scalar (FPRs) and (VMX VRs) vector registers.The definitions given in “7.1.1.1 Compatibility with Category Floating-Point and Category Decimal Floating-Point Operations”, and “7.1.1.2 Compatibility with Category Vector Operations” Note; the reference to scalar element 0 above is from the big endian register perspective of the ISA. In the PPC64LE ABI implementation, and for the purpose of porting Intel intrinsics, this is logical element 1.  Intel SSE scalar intrinsics operated on logical element [0],  which is in the wrong position for PowerISA FPU and VSX scalar floating-point  operations. Another important note is what happens to the other half of the VSR when you execute a scalar floating-point instruction (The contents of doubleword 1 of a VSR … are undefined.)The compiler will hide some of this detail when generating code for little endian vector element [] notation and most vector built-ins. For example vec_splat (A, 0) is transformed for PPC64LE to xxspltd VRT,VRA,1. What the compiler can not hide is the different placement of scalars within vector registers.Vector registers (VRs) 0-31 overlay and can be accessed from vector scalar registers (VSRs) 32-63. The ABI also specifies that VR2-13 are used to pass parameter and return values. In some cases the same (similar) operations exist in both VMX and VSX instruction forms, while in the other cases operations only exist for VMX (byte level permute and shift) or VSX (Vector double).  So resister selection that; avoids unnecessary vector moves, follows the ABI, while maintaining the correct instruction specific register numbering, can be tricky. The GCC register constraint annotations for Inline assembler using vector instructions  is challenging, even for experts. So only experts should be writing assembler and then only in extraordinary circumstances. You should leave these details to the compiler (using vector extensions and vector built-ins) when ever possible.The next sections get is into the details of floating point representation, operations, and exceptions. Basically the implementation details for the IEEE754R and C/C++ language standards that most developers only access via higher level APIs. So most programmers will not need this level of detail, but it is there if needed.Finally an overview the VSX storage access instructions for big and little endian and for aligned and unaligned data addresses. This included diagrams that illuminate the differences Section 7.6 starts with a VSX instruction Set Summary which is the place to start to get an feel for the types and operations supported.  The emphasis on float-point, both scalar and vector (especially vector double), is pronounced. Many of the scalar and single-precision vector instruction look like duplicates of what we have seen in the Chapter 4 Floating-Point and Chapter 6 Vector facilities. The difference here is, new instruction encodings to access the full 64 VSX register space. In addition there are small number of logical instructions are include to support predication (selecting / masking vector elements based on compare results). And set of permute, merge, shift, and splat instructions that operation on VSX word (float) and doubleword (double) elements. As mentioned about VMX section 6.8 these instructions are good to study as they are useful for realigning elements from PowerISA vector results to that required for Intel Intrinsics.The VSX Instruction Descriptions section contains the detail description for each VSX category instruction.  The table entries from the Instruction Set Summary are formatted in the document at hyperlinks to corresponding instruction description. + 2.2.3.2 PowerISA Vector IntrinsicsThe OpenPOWER ELF V2 application binary interface (ABI): Chapter 6. Vector Programming Interfaces and Appendix A. Predefined Functions for Vector Programming document the current and proposed vector built-ins we expect all C/C++ compilers implement. Some of these operations are endian sensitive and the compiler needs to make corresponding adjustments as  it generate code for endian sensitive built-ins. There is a good overview for this in the OpenPOWER ABI section 6.4. Vector Built-in Functions.Appendix A is organized (sorted) by built-in name, output type, then parameter types. Most built-ins are generic as the named the operation (add, sub, mul, cmpeq, ...) applies to multiple types. So the build vec_add built-in applies to all the signed and unsigned integer types (char, short, in, and long) plus float and double floating-point types. The compiler looks at the parameter type to select the vector instruction (or instruction sequence) that implements the (add) operation on that type. The compiler infers the output result type from the operation and input parameters and will complain if the target variable type is not compatible. For example:This is one key difference between PowerISA built-ins and Intel Intrinsics (Intel Intrinsics are not generic and include type information in the name). This is why it is so important to understand the vector element types and to add the appropriate type casts to get the correct results.The defacto standard implementation is GCC as defined in the include file <altivec.h> and documented in the GCC online documentation in 6.59.20 PowerPC AltiVec Built-in Functions. The header file name and section title reflect the origin of the Vector Facility, but recent versions of GCC altivec.h include built-ins for newer PowerISA 2.06 and 2.07 VMX plus VSX extensions. This is a work in progress where your  (older) distro GCC compiler may not include built-ins for the latest PowerISA 3.0 or ABI edition. So before you use a built-in you find in the ABI Appendix A, check the specific GCC online documentation for the GCC version you are using. + 2.2.3.3 How vector elements change size and typeMost vector built ins return the same vector type as the (first) input parameters, but there are exceptions. Examples include; conversions between types, compares , pack, unpack,  merge, and integer multiply operations.  Converting floats to from integer will change the type and something change the element size as well (double ↔ int and float ↔ long). For the VMX the conversions are always the same size (float ↔ [unsigned] int). But VSX allows conversion of 64-bit (long or double) to from 32-bit (float or  int)  with the inherent size changes. The PowerISA VSX defines a 4 element vector layout where little endian elements 0, 2 are used for input/output and elements 1,3 are undefined. The OpenPOWER ABI Appendix A define vec_double and vec_float with even/odd and high/low extensions as program aids. These are not included in GCC 7 or earlier but are planned for GCC 8.Compare operations produce either vector bool <input element type> (effectively bit masks) or predicates (the condition code for all and any are represented as an int truth variable). When a predicate compare (ie vec_all_eq, vec_any_gt), is used in a if statement,  the condition code is used directly in the conditional branch and the int truth value is not generated.Pack operations pack integer elements into the next smaller (half) integer sized elements. Pack operations include signed and unsigned saturate and unsigned modulo forms. As the packed result will be half the size (in bits), pack instructions require 2 vectors (256-bits) as input and generate a single 128-bit vector results.Unpack operations expand integer elements into the next larger size elements. The integers are always treated as signed values and sign-extended. The processor design avoids instructions that return multiple register values. So the PowerISA defines unpack-high and unpack low forms where instruction takes (the high or low) half of vector elements and extends them to fill the vector output. Element order is maintained and an unpack high / low sequence with same input vector has the effect of unpacking to a 256-bit result in two vector registers.Merge operations resemble shuffling two (vectors) card decks together, alternating (elements) cards in the result.   As we are merging from 2 vectors (256-bits) into 1 vector (128-bits) and the elements do not change size, we have merge high and merge low instruction forms for each (byte, halfword and word) integer type. The merge high operations alternate elements from the (vector register left) high half of the two input vectors. The merge low operation alternate elements from the (vector register right) low half of the two input vectors. For PowerISA 2.07 we added vector merge word even / odd instructions. Instead of high or low elements the shuffle is from the even or odd number elements of the two input vectors. Passing the same vector to both inputs to merge produces splat like results for each doubleword half, which is handy in some convert operations. Integer multiply has the potential to generate twice as many bits in the product as input. A multiply of 2 int (32-bit) values produces a long (64-bits). Normal C language * operations ignore this and discard the top 32-bits of the result. However  in some computations it useful to preserve the double product precision for intermediate computation before reducing the final result back to the original precision. The PowerISA VMX instruction set took the later approach ie keep all the product bits until the programmer explicitly asks for the truncated result. So the vector integer multiple are split into even/odd forms across signed and unsigned; byte, halfword and word inputs. This requires two instructions (given the same inputs) to generated the full vector  multiply across 2 vector registers and 256-bits. Again as POWER processors are super-scalar this pair of instructions should execute in parallel.The set of expanded product values can either be used directly in further (doubled precision) computation or merged/packed into the single single vector at the smaller bit size. This is what the compiler will generate for C vector extension multiply of vector integer types. + 2.2.4 Some more Intrinsic examplesThe intrinsic _mm_cvtpd_ps converts a packed vector double into a packed vector single float. Since only 2 doubles fit into a 128-bit vector only 2 floats are returned and occupy only half (64-bits) of the XMM register. For this intrinsic the 64-bit are packed into the logical left half of the registers and the logical right half of the register is set to zero (as per the Intel cvtpd2ps instruction).The PowerISA provides the VSX Vector round and Convert Double-Precision to Single-Precision format (xvcvdpsp) instruction. In the ABI this is vec_floato (vector double) .  This instruction convert each double element then transfers converted element 0 to float element 1, and converted element 1 to float element 3. Float elements 0 and 2 are undefined (the hardware can do what ever). This does not match the expected results for _mm_cvtpd_ps.So we need to re-position the results to word elements 0 and 2, which allows a pack operation to deliver the correct format. Here the merge odd splats element 1 to 0 and element 3 to 2. The Pack operation combines the low half of each doubleword from the vector result and vector of zeros to generate the require format.This  technique is also used to implement  _mm_cvttpd_epi32 which converts a packed vector double in to a packed vector int. The PowerISA instruction xvcvdpsxws uses a similar layout for the result as  xvcvdpsp and requires the same fix up. + 2.3 Profound differences We have already mentioned above a number of architectural differences that effect porting of codes containing Intel intrinsics to POWER. The fact that Intel supports multiple vector extensions with different vector widths (64, 128, 256, and 512-bits) while the PowerISA only supports vectors of 128-bits is one issue. Another is the difference in how the respective ISAs support scalars in vector registers is another.  In the text above we propose workable alternatives for the PowerPC port. There also differences in the handling of floating point exceptions and rounding modes that may impact the application's performance or behavior. + 2.3.1 Floating Point ExceptionsNominally both ISAs support the IEEE754 specifications, but there are some subtle differences. Both architecture define a status and control register to record exceptions and enable / disable floating exceptions for program interrupt or default action. Intel has a MXCSR and PowerISA has a FPSCR which basically do the same thing but with different bit layout. Intel provides _mm_setcsr / _mm_getcsr intrinsics to allow direct access to the MXCSR. In the early days before the OS POSIX run-times where updated  to manage the MXCSR, this might have been useful. Today this would be highly discouraged with a strong preference to use the POSIX APIs (feclearexceptflag, fegetexceptflag, fesetexceptflag, ...) instead.If we implement _mm_setcsr / _mm_getcs at all, we should simply redirect the implementation to use the POSIX APIs from <fenv.h>. But it might be simpler just to replace these intrinsics with macros that generate #error.The Intel MXCSR does have some none (POSIX/IEEE754) standard quirks; Flush-To-Zero and Denormals-Are-Zeros flags. This simplifies the hardware response to what should be a rare condition (underflows where the result can not be represented in the exponent range and precision of the format) by simply returning a signed 0.0 value. The intrinsic header implementation does provide constant masks for _MM_DENORMALS_ZERO_ON (<pmmintrin.h>) and _MM_FLUSH_ZERO_ON (<xmmintrin.h>, so technically it is available to users of the Intel Intrinsics API.The VMX Vector facility provides a separate Vector Status and Control register (VSCR) with a Non-Java Mode control bit. This control combines the flush-to-zero semantics for floating Point underflow and denormal values. But this control only applies to VMX vector float instructions and does not apply to VSX scalar floating Point or vector double instructions. The FPSCR does define a Floating-Point non-IEEE mode which is optional in the architecture. This would apply to Scalar and VSX floating-point operations if it was implemented. This was largely intended for embedded processors and is not implemented in the POWER processor line.As the flush-to-zero is primarily a performance enhansement and is clearly outside the IEEE754 standard, it may be best to simply ignore this option for the intrinsic port. + 2.3.2 Floating-point rounding modesThe Intel (x86 / x86_64) and PowerISA architectures both support the 4 IEEE754 rounding modes. Again while the Intel Intrinsic API allows the application to change rounding modes via updates to the MXCSR it is a bad idea and should be replaced with the POSIX APIs (fegetround and fesetround). + 2.3.3 PerformanceThe performance of a ported intrinsic depends on the specifics of the intrinsic and the context it is used in. Many of the SIMD operations have equivalent instructions in both architectures. For example the vector float and vector double match very closely. However the SSE and VSX scalars have subtle differences of how the scalar is positioned with the vector registers and what happens to the rest (non-scalar part) of the register (previously discussed in here). This requires additional PowerISA instructions to preserve the non-scalar portion of the vector registers. This may or may not be important to the logic of the program being ported, but we have handle the case where it is. This is where the context of now the intrinsic is used starts to matter. If the scalar intrinsics are used within a larger program the compiler may be able to eliminate the redundant register moves as the results are never used. In the other cases common set up (like permute vectors or bit masks) can be common-ed up and hoisted out of the loop. So it is very important to let the compiler do its job with higher optimization levels (-O3, -funroll-loops). + 2.3.3.1 Using SSE float and double scalarsSSE scalar float / double intrinsics  “hand” optimization is no longer necessary. This was important, when SSE was initially introduced, and compiler support was limited or nonexistent.  Also SSE scalar float / double provided additional (16) registers and IEEE754 compliance, not available from the 8087 floating point architecture that preceded it. So application developers where motivated to use SSE instruction versus what the compiler was generating at the time.Modern compilers can now to generate and  optimize these (SSE scalar) instructions for Intel from C standard scalar code. Of course PowerISA supported IEEE754 float and double and had 32 dedicated floating point registers from the start (and now 64 with VSX). So replacing a Intel specific scalar intrinsic implementation with the equivalent C language scalar implementation is usually a win; allows the compiler to apply the latest optimization and tuning for the latest generation processor, and is portable to other platforms where the compiler can also apply the latest optimization and tuning for that processors latest generation. + 2.3.3.2 Using MMX intrinsicsMMX was the first and oldest SIMD extension and initially filled a need for wider (64-bit) integer and additional register. This is back when processors were 32-bit and 8 x 32-bit registers was starting to cramp our programming style. Now 64-bit processors, larger register sets, and 128-bit (or larger) vector SIMD extensions are common. There is simply no good reasons write new code using the (now) very limited MMX capabilities. We recommend that existing MMX codes be rewritten to use the newer SSE  and VMX/VSX intrinsics or using the more portable GCC  builtin vector support or in the case of si64 operations use C scalar code. The MMX si64 scalars which are just (64-bit) operations on long long int types and any modern C compiler can handle this type. The char short in SIMD operations should all be promoted to 128-bit SIMD operations on GCC builtin vectors. Both will improve cross platform portability and performance. + + Appendix A: Document References + + A.1 OpenPOWER and Power documentsOpenPOWERTM Technical SpecificationPower ISATM Version 2.07 BPower ISATM Version 3.0Power Architecture 64-bit ELF ABI Specification (AKA OpenPower ABI for Linux Supplement)AltiVec™ Technology Programming Environments Manual + A.2 Intel DocumentsIntel® 64 and IA-32 Architectures Software Developer’s ManualIntelTM Intrinsics Guide + A.3 GNU Compiler Collection (GCC) documentsGCC online documentationGCC Manual (GCC 6.3)GCC Internals Manual + + + </sect1> + <sect1> + <title>Appendix B: Intel Intrinsic suffixes + + B.1 MMX + B.2 SSE + B.3 SSE2 + B.4 AVX/AVX2 __m256_* + B.5 AVX512 __m512_* + 1 +
diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..68c771a --- /dev/null +++ b/LICENSE @@ -0,0 +1,176 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..e07e383 --- /dev/null +++ b/README.md @@ -0,0 +1,93 @@ +# Porting Guide for Linux on Power +TBD... + +To build this project, one must ensure that the Docs-Master project has +also been cloned at the same directory level as the Docs-Template project. +This can be accomplished with the following steps: + +1. Clone the master documentation project (Docs-Master) using the following command: + + ``` + $ git clone https://github.com/OpenPOWERFoundation/Docs-Master.git + ``` + +2. Clone this project (Docs-Template) using the following command: + + ``` + $ git clone https://ibm.github.com/scheel/SJM-Porting-Guide.git + ``` + +3. Build the project with these commands: + ``` + $ cd SJM-Porting-Guide + $ mvn clean generate-sources + ``` + +The online version of the document can be found in the OpenPOWER Foundation +Document library at [TBD](http://openpowerfoundation.org/?resource_lib=TBD). + +The project which controls the look and feel of the document is the +[Docs-Maven-Plugin project](https://github.com/OpenPOWERFoundation/Docs-Maven-Plugin), an +OpenPOWER Foundation private project on GitHub. To obtain access to the Maven Plugin project, +contact Jeff Scheel \([scheel@us.ibm.com](mailto://scheel@us.ibm.com)\) or +Jeff Brown \([jeffdb@us.ibm.com](mailto://jeffdb@us.ibm.com)\). + +## License +This project is licensed under the Apache V2 license. More information +can be found in the LICENSE file or online at + + http://www.apache.org/licenses/LICENSE-2.0 + +## Community +TBD... + +## Contributions +TBD... + +Contributions to this project should conform to the `Developer Certificate +of Origin` as defined at http://elinux.org/Developer_Certificate_Of_Origin. +Commits to this project need to contain the following line to indicate +the submitter accepts the DCO: +``` +Signed-off-by: Your Name +``` +By contributing in this way, you agree to the terms as follows: +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + diff --git a/Vector_Intrinsics/app_intel_suffixes.xml b/Vector_Intrinsics/app_intel_suffixes.xml new file mode 100644 index 0000000..e070cf9 --- /dev/null +++ b/Vector_Intrinsics/app_intel_suffixes.xml @@ -0,0 +1,318 @@ + + + + + Intel Intrinsic suffixes + +
+ MMX + + + _pi16 + 4 x packed short int + + + + _pi32 + 2 x packed int + + + + _pi8 + 8 x packed signed char + + + + _pu16 + 4 x packed unsigned short int + + + + _pu8 + 8 x packed unsigned char + + + + _si64 + single 64-bit binary (logical) + + +
+ +
+ SSE + + + _ps + 4 x packed float + + + + _ss + single scalar float + + + + _si32 + single 32-bit int + + + + _si64 + single 64-bit long int + + +
+ +
+ SSE2 + + + _epi16 + 8 x packed short int + + + + _epi32 + 4 x packed int + + + + _epi64 + 2 x packed long int + + + + _epi8 + 16 x packed signed char + + + + _epu16 + 8 x packed unsigned short int + + + + _epu32 + 4 x packed unsigned int + + + + _epu8 + 16 x packed unsigned char + + + + + + + + + _pd + 2 x packed double + + + + _sd + single scalar double + + + + _pi64 + single long int + + + + _si128 + single 128-bit binary (logical) + + + +
+ +
+ AVX/AVX2 __m256_* + + + _ps + 8 x packed float + + + + _pd + 4 x packed double + + + + _epi16 + 16 x packed short int + + + + _epi32 + 8 x packed int + + + + _epi64 + 4 x packed long int + + + + _epi8 + 32 x packed signed char + + + + _epu16 + 16 x packed unsigned short int + + + + _epu32 + 8 x packed unsigned int + + + + _epu8 + 32 x packed unsigned char + + + + _ss + single scalar float (broadcast/splat) + + + + _sd + single scalar double + + + + _si256 + single 256-bit binary (logical) + + + + _pd256 + cast / zero extend + + + + _ps256 + cast / zero extend + + + + _pd128 + cast + + + + _ps128 + cast + + +
+ +
+ AVX512 __m512_* + + + _ps + 16 x packed float + + + + _pd + 8 x packed double + + + + _epi16 + 32 x packed short int + + + + _epi32 + 16 x packed int + + + + _epi64 + 8 x packed long int + + + + _epi8 + 64 x packed signed char + + + + _epu16 + 32 x packed unsigned short int + + + + _epu32 + 16 x packed unsigned int + + + + _epu64 + 8 x packed unsigned long int + + + + _epu8 + 64 x packed unsigned char + + + + _ss + single scalar float + + + + _sd + single scalar double + + + + _si512 + single 512-bit binary (logical) + + + + _pd512 + cast / zero extend + + + + _ps512 + cast / zero extend + + +
+ +
+ diff --git a/Vector_Intrinsics/app_references.xml b/Vector_Intrinsics/app_references.xml new file mode 100644 index 0000000..52010af --- /dev/null +++ b/Vector_Intrinsics/app_references.xml @@ -0,0 +1,70 @@ + + + + + Document references + +
+ OpenPOWER and Power documents + + OpenPOWER™ Technical Specifications + + + Power ISA™ Version 2.07 B + + + Power ISA™ Version 3.0 + + + Power Architecture 64-bit ELF ABI Specification (AKA OpenPower ABI for Linux Supplement) + + + AltiVec™ Technology Programming Environments Manual + + +
+
+ A.2 Intel documents + + Intel® 64 and IA-32 Architectures Software Developer’s Manual + + + Intel™ Intrinsics Guide + + +
+
+ A.3 GNU Compiler Collection (GCC) documents + + GCC online documentation + + + GCC Manual (GCC 6.3) + + + GCC Internals Manual + + +
+ +
+ diff --git a/Vector_Intrinsics/bk_main.xml b/Vector_Intrinsics/bk_main.xml new file mode 100644 index 0000000..ea01818 --- /dev/null +++ b/Vector_Intrinsics/bk_main.xml @@ -0,0 +1,103 @@ + + + + + Linux on Power Porting Guide + Vector Intrinsic + + + + + System Software Work Group + + syssw-chair@openpowerfoundation.org + + OpenPOWER Foundation + + + + 2017 + OpenPOWER Foundation + + + Revision 0.1 + OpenPOWER + + + + + + Copyright details are filled in by the template. + + + + + + The goal of this project is to provide functional equivalents of the + Intel MMX, SSE, and AVX intrinsic functions, that are commonly used in Linux + applications, and make them (or equivalents) available for the PowerPC64LE + platform. + + This document is a Standard Track, Work Group Note work product owned by the + System Software Workgroup and handled in compliance with the requirements outlined in the + OpenPOWER Foundation Work Group (WG) Process document. It was + created using the Master Template Guide version 0.9.5. Comments, + questions, etc. can be submitted to the public mailing list for this document at + TBD. + + + + + + 2017-07-26 + + + + Revision 0.1 - initial draft from Steve Munroe + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Vector_Intrinsics/ch_howto_start.xml b/Vector_Intrinsics/ch_howto_start.xml new file mode 100644 index 0000000..27ff744 --- /dev/null +++ b/Vector_Intrinsics/ch_howto_start.xml @@ -0,0 +1,115 @@ + + + + How do we work this? + + The working assumption is to start with the existing GCC headers from + ./gcc/config/i386/, then convert them to PowerISA and add them to + ./gcc/config/rs6000/. I assume we will replicate the existing header structure + and retain the existing header file and intrinsic names. This also allows us to + reuse existing DejaGNU test cases from ./gcc/testsuite/gcc.target/i386, modify + them as needed for the POWER target, and them to the + ./gcc/testsuite/gcc.target/powerpc. + + We can be flexible on the sequence that headers/intrinsics and test + cases are ported.  This should be based on customer need and resolving + internal dependencies.  This implies an oldest-to-newest / bottoms-up (MMX, + SSE, SSE2, …) strategy. The assumption is, existing community and user + application codes, are more likely to have optimized code for previous + generation ubiquitous (SSE, SSE2, ...) processors than the latest (and rare) + SkyLake AVX512. + + I would start with an existing header from the current GCC +  ./gcc/config/i386/ and copy the header comment (including FSF copyright) down + to any vector typedefs used in the API or implementation. Skip the Intel + intrinsic implementation code for now, but add the ending #end if matching the + headers conditional guard against multiple inclusion. You can add  #include + <alternative> as needed. For examples: + +#include + +/* We need definitions from the SSE header files. */ +#include + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); + +/* Internal data types for implementing the intrinsics. */ +typedef float __v4sf __attribute__ ((__vector_size__ (16))); +/* more typedefs. */ + +/* The intrinsic implmentations go here. */ + +#endif /* EMMINTRIN_H_ */]]> + + Then you can start adding small groups of related intrinsic + implementations to the header to be compiled and  examine the generated code. + Once you have what looks like reasonable code you can grep through +  ./gcc/testsuite/gcc.target/i386 for examples using the intrinsic names you + just added. You should be able to find functional tests for most X86 + intrinsics. + + The + GCC + testsuite uses the DejaGNU  test framework as documented in the + GNU Compiler Collection (GCC) + Internals manual. GCC adds its own DejaGNU directives and extensions, + that are embedded in the testsuite source as comments.  Some are platform + specific and will need to be adjusted for tests that are ported to our + platform. For example + + + should become something like + + + Repeat this process until you have equivalent implementations for all + the intrinsics in that header and associated test cases that execute without + error. + + + + + + + + diff --git a/Vector_Intrinsics/ch_intel_intrinsic_porting.xml b/Vector_Intrinsics/ch_intel_intrinsic_porting.xml new file mode 100644 index 0000000..7eb1bba --- /dev/null +++ b/Vector_Intrinsics/ch_intel_intrinsic_porting.xml @@ -0,0 +1,46 @@ + + + + Intel Intrinsic porting guide for Power64LE + + The goal of this project is to provide functional equivalents of the + Intel MMX, SSE, and AVX intrinsic functions, that are commonly used in Linux + applications, and make them (or equivalents) available for the PowerPC64LE + platform. These X86 intrinsics started with the Intel and Microsoft compilers + but were then ported to the GCC compiler. The GCC implementation is a set of + headers with inline functions. These inline functions provide a implementation + mapping from the Intel/Microsoft dialect intrinsic names to the corresponding + GCC Intel built-in's or directly via C language vector extension syntax. + + The current proposal is to start with the existing X86 GCC intrinsic + headers and port them (copy and change the source)  to POWER using C language + vector extensions, VMX and VSX built-ins. Another key assumption is that we + will be able to use many of existing Intel DejaGNU test cases on + ./gcc/testsuite/gcc.target/i386. This document is intended as a guide to + developers participating in this effort. However this document provides + guidance and examples that should be useful to developers who may encounter X86 + intrinsics in code that they are porting to another platform. + + + + + diff --git a/Vector_Intrinsics/pom.xml b/Vector_Intrinsics/pom.xml new file mode 100644 index 0000000..1ea84df --- /dev/null +++ b/Vector_Intrinsics/pom.xml @@ -0,0 +1,148 @@ + + + + + org.openpowerfoundation.docs + workgroup-pom + 1.0.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + + Porting-Guide-Vector-Intrinsics + + jar + + + Porting-Guide-Vector-Intrinsics + + + + + 0 + + + + + + + + + org.openpowerfoundation.docs + + openpowerdocs-maven-plugin + + + + generate-webhelp + + generate-webhelp + + generate-sources + + + ${comments.enabled} + LoPAR-Virtualization + 1 + UA-17511903-1 + + appendix toc,title + article/appendix nop + article toc,title + book toc,title,figure,table,example,equation + book/appendix nop + book/chapter nop + chapter toc,title + chapter/section nop + section toc + part toc,title + qandadiv toc + qandaset toc + reference toc,title + set toc,title + + + 1 + 3 + 1 + + + Vector-Intrinsics + + + Vector-Intrinsics + + + workgroupNotes + + + + + + + + public + + + draft + + + + + + + + + true + . + + + bk_main.xml + + + + + ${basedir}/../glossary/glossary-terms.xml + 1 + www.openpowerfoundation.org + + + + + + diff --git a/Vector_Intrinsics/sec_api_implemented.xml b/Vector_Intrinsics/sec_api_implemented.xml new file mode 100644 index 0000000..1b83bf4 --- /dev/null +++ b/Vector_Intrinsics/sec_api_implemented.xml @@ -0,0 +1,35 @@ + + +
+ How the API is implemented + + One pleasant surprise is that many (at least for the older Intel) + Intrinsics are implemented directly in C vector extension code and/or a simple + mapping to GCC target specific builtins. + + + + + + +
+ diff --git a/Vector_Intrinsics/sec_crossing_lanes.xml b/Vector_Intrinsics/sec_crossing_lanes.xml new file mode 100644 index 0000000..e73d63f --- /dev/null +++ b/Vector_Intrinsics/sec_crossing_lanes.xml @@ -0,0 +1,111 @@ + + +
+ Crossing lanes + + We have seen that, most of the time, vector SIMD units prefer to keep + computations in the same “lane” (element number) as the input elements. The + only exception in the examples so far are the occasional splat (copy one + element to all the other elements of the vector) operations. Splat is an + example of the general category of “permute” operations (Intel would call + this a “shuffle” or “blend”). Permutes selects and rearrange the + elements of (usually) a concatenated pair of vectors and delivers those + selected elements, in a specific order, to a result vector. The selection and + order of elements in the result is controlled by a third vector, either as 3rd + input vector or and immediate field of the instruction. + + For example the Intel intrisics for + Horizontal Add / Subtract + added with SSE3. These instrinsics add (subtract) adjacent element pairs, across pair of + input vectors, placing the sum of the adjacent elements in the result vector. + For example + _mm_hadd_ps   + which implments the operation on float: + + + Horizontal Add (hadd) provides an incremental vector “sum across” + operation commonly needed in matrix and vector transform math. Horizontal Add + is incremental as you need three hadd instructions to sum across 4 vectors of 4 + elements ( 7 for 8 x 8, 15 for 16 x 16, …). + + The PowerISA does not have a sum-across operation for float or + double. We can user the vector float add instruction after we rearrange the + inputs so that element pairs line up for the horizontal add. For example we + would need to permute the input vectors {1, 2, 3, 4} and {101, 102, 103, 104} + into vectors {2, 4, 102, 104} and {1, 3, 101, 103} before + the  vec_add. This + requires two vector permutes to align the elements into the correct lanes for + the vector add (to implement Horizontal Add).   + + The PowerISA provides generalized byte-level vector permute (vperm) + based a vector register pair source as input and a control vector. The control + vector provides 16 indexes (0-31) to select bytes from the concatenated input + vector register pair (VRA, VRB). A more specific set of permutes (pack, unpack, + merge, splat) operations (across element sizes) are encoded as separate +  instruction opcodes or instruction immediate fields. + + Unfortunately only the general vec_perm + can provide the realignment + we need the _mm_hadd_ps operation or any of the int, short variants of hadd. + For example: + + + This requires two permute control vectors; one to select the even + word elements across __X and __Y, + and another to select the odd word elements + across __X and __Y. + The result of these permutes (vec_perm) are inputs to the + vec_add and completes the add operation. + + Fortunately the permute required for the double (64-bit) case (IE + _mm_hadd_pd) reduces to the equivalent of vec_mergeh / + vec_mergel  doubleword + (which are variants of  VSX Permute Doubleword Immediate). So the + implementation of _mm_hadd_pd can be simplified to this: + + + This eliminates the load of the control vectors required by the + previous example. + +
+ diff --git a/Vector_Intrinsics/sec_differences.xml b/Vector_Intrinsics/sec_differences.xml new file mode 100644 index 0000000..fb9aebd --- /dev/null +++ b/Vector_Intrinsics/sec_differences.xml @@ -0,0 +1,40 @@ + + +
+ Profound differences + + We have already mentioned above a number of architectural differences + that effect porting of codes containing Intel intrinsics to POWER. The fact + that Intel supports multiple vector extensions with different vector widths + (64, 128, 256, and 512-bits) while the PowerISA only supports vectors of + 128-bits is one issue. Another is the difference in how the respective ISAs + support scalars in vector registers is another.  In the text above we propose + workable alternatives for the PowerPC port. There also differences in the + handling of floating point exceptions and rounding modes that may impact the + application's performance or behavior. + + + + + +
+ diff --git a/Vector_Intrinsics/sec_extra_attributes.xml b/Vector_Intrinsics/sec_extra_attributes.xml new file mode 100644 index 0000000..6a7eec4 --- /dev/null +++ b/Vector_Intrinsics/sec_extra_attributes.xml @@ -0,0 +1,137 @@ + + +
+ Those extra attributes + + You may have noticed there are some special attributes: + + __gnu_inline__ + +This attribute should be used with a function that is also declared with the +inline keyword. It directs GCC to treat the function as if it were defined in +gnu90 mode even when compiling in C99 or gnu99 mode. + +If the function is declared extern, then this definition of the function is used +only for inlining. In no case is the function compiled as a standalone function, +not even if you take its address explicitly. Such an address becomes an external +reference, as if you had only declared the function, and had not defined it. This +has almost the effect of a macro. The way to use this is to put a function +definition in a header file with this attribute, and put another copy of the +function, without extern, in a library file. The definition in the header file +causes most calls to the function to be inlined. + +__always_inline__ + +Generally, functions are not inlined unless optimization is specified. For func- +tions declared inline, this attribute inlines the function independent of any +restrictions that otherwise apply to inlining. Failure to inline such a function +is diagnosed as an error. + +__artificial__ + +This attribute is useful for small inline wrappers that if possible should appear +during debugging as a unit. Depending on the debug info format it either means +marking the function as artificial or using the caller location for all instructions +within the inlined body. + +__extension__ + +... -pedantic’ and other options cause warnings for many GNU C extensions. +You can prevent such warnings within one expression by writing __extension__ + + So far I have been using these attributes unchanged. + + But most intrinsics map the Intel intrinsic to one or more target + specific GCC builtins. For example: + + + The first intrinsic (_mm_load_pd ) is implement as a C vector pointer + reference, but from the comment assumes the compiler will use a + movapd + instruction that requires 16-byte alignment (will raise a general-protection + exception if not aligned). This  implies that there is a performance advantage + for at least some Intel processors to keep the vector aligned. The second + intrinsic uses the explicit GCC builtin + __builtin_ia32_loadupd to generate the + movupd instruction which handles unaligned references. + + The opposite assumption applies to POWER and PPC64LE, where GCC + generates the VSX lxvd2x / + xxswapd + instruction sequence by default, which + allows unaligned references. The PowerISA equivalent for aligned vector access + is the VMX lvx instruction and the + vec_ld builtin, which forces quadword + aligned access (by ignoring the low order 4 bits of the effective address). The + lvx instruction does not raise + alignment exceptions, but perhaps should as part + of our implementation of the Intel intrinsic. This requires that we use + PowerISA VMX/VSX built-ins to insure we get the expected results. + + The current prototype defines the following: + + + The aligned  load intrinsic adds an assert which checks alignment + (to match the Intel semantic) and uses  the GCC builtin + vec_ld (generates an + lvx).  The assert + generates extra code but this can be eliminated by defining + NDEBUG at compile time. + The unaligned load intrinsic uses the GCC builtin + vec_vsx_ld  (for PPC64LE generates + lxvd2x / + xxswapd for POWER8  and will + simplify to lxv + or lxvx + for POWER9).  And similarly for __mm_store_pd / + __mm_storeu_pd, using + vec_st + and vec_vsx_st. These concepts extent to the + load/store intrinsics for vector float and vector int. + +
+ diff --git a/Vector_Intrinsics/sec_floatingpoint_exceptions.xml b/Vector_Intrinsics/sec_floatingpoint_exceptions.xml new file mode 100644 index 0000000..2e61aeb --- /dev/null +++ b/Vector_Intrinsics/sec_floatingpoint_exceptions.xml @@ -0,0 +1,73 @@ + + +
+ Floating Point Exceptions + + Nominally both ISAs support the IEEE754 specifications, but there are + some subtle differences. Both architecture define a status and control register + to record exceptions and enable / disable floating exceptions for program + interrupt or default action. Intel has a MXCSR and PowerISA has a FPSCR which + basically do the same thing but with different bit layout. + + Intel provides _mm_setcsr / _mm_getcsr + intrinsics to allow direct + access to the MXCSR. In the early days before the OS POSIX run-times where + updated  to manage the MXCSR, this might have been useful. Today this would be + highly discouraged with a strong preference to use the POSIX APIs + (feclearexceptflag, + fegetexceptflag, + fesetexceptflag, ...) instead. + + If we implement _mm_setcsr / + _mm_getcs at all, we should simply + redirect the implementation to use the POSIX APIs from + <fenv.h>. But it + might be simpler just to replace these intrinsics with macros that generate + #error. + + The Intel MXCSR does have some none (POSIX/IEEE754) standard quirks; + Flush-To-Zero and Denormals-Are-Zeros flags. This simplifies the hardware + response to what should be a rare condition (underflows where the result can + not be represented in the exponent range and precision of the format) by simply + returning a signed 0.0 value. The intrinsic header implementation does provide + constant masks for _MM_DENORMALS_ZERO_ON + (<pmmintrin.h>) and + _MM_FLUSH_ZERO_ON (<xmmintrin.h>, + so technically it is available to users + of the Intel Intrinsics API. + + The VMX Vector facility provides a separate Vector Status and Control + register (VSCR) with a Non-Java Mode control bit. This control combines the + flush-to-zero semantics for floating Point underflow and denormal values. But + this control only applies to VMX vector float instructions and does not apply + to VSX scalar floating Point or vector double instructions. The FPSCR does + define a Floating-Point non-IEEE mode which is optional in the architecture. + This would apply to Scalar and VSX floating-point operations if it was + implemented. This was largely intended for embedded processors and is not + implemented in the POWER processor line. + + As the flush-to-zero is primarily a performance enhansement and is + clearly outside the IEEE754 standard, it may be best to simply ignore this + option for the intrinsic port. + +
+ diff --git a/Vector_Intrinsics/sec_floatingpoint_rounding.xml b/Vector_Intrinsics/sec_floatingpoint_rounding.xml new file mode 100644 index 0000000..19653a7 --- /dev/null +++ b/Vector_Intrinsics/sec_floatingpoint_rounding.xml @@ -0,0 +1,33 @@ + + +
+ Floating-point rounding modes + + The Intel (x86 / x86_64) and PowerISA architectures both support the + 4 IEEE754 rounding modes. Again while the Intel Intrinsic API allows the + application to change rounding modes via updates to the + MXCSR it is a bad idea + and should be replaced with the POSIX APIs (fegetround and + fesetround). + +
+ diff --git a/Vector_Intrinsics/sec_gcc_vector_extensions.xml b/Vector_Intrinsics/sec_gcc_vector_extensions.xml new file mode 100644 index 0000000..d78bca2 --- /dev/null +++ b/Vector_Intrinsics/sec_gcc_vector_extensions.xml @@ -0,0 +1,113 @@ + + +
+ GCC Vector Extensions + + The GCC vector extensions are common syntax but implemented in a + target specific way. Using the C vector extensions require the + __gnu_inline__ + attribute to avoid syntax errors in case the user specified  C standard + compliance (-std=c90, -std=c11, + etc) that would normally disallow such + extensions. + + The GCC implementation for PowerPC64 Little Endian is (mostly) + functionally compatible with x86_64 vector extension usage. We can use the same + type definitions (at least for  vector_size (16)), operations, syntax + <{...}> + for vector initializers and constants, and array syntax + <[]> + for vector element access. So simple arithmetic / logical operations + on whole vectors should work as is. + + The caveat is that the interface data type of the Intel Intrinsic may + not match the data types of the operation, so it may be necessary to cast the + operands to the specific type for the operation. This also applies to vector + initializers and accessing vector elements. You need to use the appropriate + type to get the expected results. Of course this applies to X86_64 as well. For + example: + + + Note the cast from the interface type (__m128} to the implementation + type (__v4sf, defined in the intrinsic header) for the vector float add (+) + operation. This is enough for the compiler to select the appropriate vector add + instruction for the float type. Then the result (which is + __v4sf) needs to be + cast back to the expected interface type (__m128). + + Note also the use of array syntax (__A)[0]) + to extract the lowest + (left mostHere we are using logical left and logical right + which will not match the PowerISA register view in Little endian. Logical left + is the left most element for initializers {left, … , right}, storage order + and array  order where the left most element is [0].) + element of a vector. The cast (__v4sf) insures that the compiler knows we are + extracting the left most 32-bit float. The compiler insures the code generated + matches the Intel behavior for PowerPC64 Little Endian. + + The code generation is complicated by the fact that PowerISA vector + registers are Big Endian (element 0 is the left most word of the vector) and + X86 scalar stores are from the left most (work/dword) for the vector register. + Application code with extensive use of scalar (vs packed) intrinsic loads / + stores should be flagged for rewrite to native PPC code using exisiing scalar + types (float, double, int, long, etc.). + + Another example is the set reverse order: + + + Note the use of initializer syntax used to collect a set of scalars + into a vector. Code with constant initializer values will generate a vector + constant of the appropriate endian. However code with variables in the + initializer can get complicated as it often requires transfers between register + sets and perhaps format conversions. We can assume that the compiler will + generate the correct code, but if this class of intrinsics shows up a hot spot, + a rewrite to native PPC vector built-ins may be appropriate. For example + initializer of a variable replicated to all the vector fields might not be + recognized as a “load and splat” and making this explicit may help the + compiler generate better code. + +
+ diff --git a/Vector_Intrinsics/sec_handling_avx.xml b/Vector_Intrinsics/sec_handling_avx.xml new file mode 100644 index 0000000..518fe79 --- /dev/null +++ b/Vector_Intrinsics/sec_handling_avx.xml @@ -0,0 +1,91 @@ + + +
+ Dealing with AVX and AVX512 + + AVX is a bit easier for PowerISA and the ELF V2 ABI. First we have + lots (64) of vector registers and a super scalar vector pipe-line (can execute + two or more independent 128-bit vector operations concurrently). Second the ELF + V2 ABI was designed to pass and return larger aggregates in vector + registers: + + + + Up to 12 qualified vector arguments can be passed in + v2–v13. + + + A qualified vector argument corresponds to: + + + A vector data type + + + + A member of a homogeneous aggregate of multiple like data types + passed in up to eight vector registers. + + + + Homogeneous floating-point or vector aggregate return values + that consist of up to eight registers with up to eight elements will + be returned in floating-point or vector registers that correspond to + the parameter registers that would be used if the return value type + were the first input parameter to a function. + + + + + + + So the ABI allows for passing up to three structures each + representing 512-bit vectors and returning such (512-bit) structure all in VMX + registers. This can be extended further by spilling parameters (beyond 12 X + 128-bit vectors) to the parameter save area, but we should not need that, as + most intrinsics only use 2 or 3 operands.. Vector registers not needed for + parameter passing, along with an additional 8 volatile vector registers, are + available for scratch and local variables. All can be used by the application + without requiring register spill to the save area. So most intrinsic operations + on 256- or 512-bit vectors can be held within existing PowerISA vector + registers. + + For larger functions that might use multiple AVX 256 or 512-bit + intrinsics and, as a result, push beyond the 20 volatile vector registers, the + compiler will just allocate non-volatile vector registers by allocating a stack + frame and spilling non-volatile vector registers to the save area (as needed in + the function prologue). This frees up to 64 vectors (32 x 256-bit or 16 x + 512-bit structs) for code optimization. + + Based on the specifics of our ISA and ABI we will not not use + __vector_size__ (32) or (64) in the PowerPC implementation of + __m256 and __m512 + types. Instead we will typedef structs of 2 or 4 vector (__m128) fields. This + allows efficient handling of these larger data types without require new GCC + language extensions. + + In the end we should use the same type names and definitions as the + GCC X86 intrinsic headers where possible. Where that is not possible we can + define new typedefs that provide the best mapping to the underlying PowerISA + hardware. + +
+ diff --git a/Vector_Intrinsics/sec_handling_mmx.xml b/Vector_Intrinsics/sec_handling_mmx.xml new file mode 100644 index 0000000..dc21a90 --- /dev/null +++ b/Vector_Intrinsics/sec_handling_mmx.xml @@ -0,0 +1,72 @@ + + +
+ Dealing with MMX + + MMX is actually the hard case. The __m64 + type supports SIMD vector + int types (char, short, int, long).  The  Intel API defines   + __m64 as: + + + Which is problematic for the PowerPC target (not really supported in + GCC) and we would prefer to use a native PowerISA type that can be passed in a + single register.  The PowerISA Rotate Under Mask instructions can easily + extract and insert integer fields of a General Purpose Register (GPR). This + implies that MMX integer types can be handled as a internal union of arrays for + the supported element types. So an 64-bit unsigned long long is the best type + for parameter passing and return values. Especially for the 64-bit (_si64) + operations as these normally generate a single PowerISA instruction. + + The SSE extensions include some convert operations for + _m128 to / + from _m64 and this includes some int to / from float conversions. However in + these cases the float operands always reside in SSE (XMM) registers (which + match the PowerISA vector registers) and the MMX registers only contain integer + values. POWER8 (PowerISA-2.07) has direct move instructions between GPRs and + VSRs. So these transfers are normally a single instruction and any conversions + can be handed in the vector unit. + + When transferring a __m64 value to a vector register we should also + execute a xxsplatd instruction to insure there is valid data in all four + element lanes before doing floating point operations. This avoids generating + extraneous floating point exceptions that might be generated by uninitialized + parts of the vector. The top two lanes will have the floating point results + that are in position for direct transfer to a GPR or stored via Store Float + Double (stfd). These operation are internal to the intrinsic implementation and + there is no requirement to keep temporary vectors in correct Little Endian + form. + + Also for the smaller element sizes and higher element counts (MMX + _pi8 and _p16 types) the number of  Rotate Under Mask instructions required to + disassemble the 64-bit __m64 + into elements, perform the element calculations, + and reassemble the elements in a single __m64 + value can get larger. In this + case we can generate shorter instruction sequences by transfering (via direct + move instruction) the GPR __m64 value to the + a vector register, performance the + SIMD operation there, then transfer the __m64 + result back to a GPR. + +
+ diff --git a/Vector_Intrinsics/sec_how_findout.xml b/Vector_Intrinsics/sec_how_findout.xml new file mode 100644 index 0000000..4c41752 --- /dev/null +++ b/Vector_Intrinsics/sec_how_findout.xml @@ -0,0 +1,60 @@ + + +
+ How did I find this out? + + The next question is where did I get the details above. The GCC + documentation for __builtin_ia32_loadupd + provides minimal information (the + builtin name, parameters and return types). Not very informative. + + Looking up the Intel intrinsic description is more informative. You + can Google the intrinsic name or use the + Intel + Intrinsic guide for this. The Intrinsic Guide is interactive and + includes  Intel (Chip) technology and text based search capabilities. Clicking + on the intrinsic name opens to a synopsis including; the underlying instruction + name, text description, operation pseudo code, and in some cases performance + information (latency and throughput). + + The key is to get a description of the intrinsic (operand fields and + types, and which fields are updated for the result) and the underlying Intel + instruction. If the Intrinsic guide is not clear you can look up the + instruction details in the + “Intel® 64 and IA-32 + Architectures Software Developer’s Manual”. + + Information about the PowerISA vector facilities is found in the + PowerISA Version 2.07B (for POWER8 and + 3.0 for + POWER9) manual, Book I, Chapter 6. Vector Facility and Chapter 7. + Vector-Scalar Floating-Point Operations. Another good reference is the + OpenPOWER ELF V2 application binary interface (ABI) + document, Chapter 6. Vector Programming Interfaces and Appendix A. Predefined + Functions for Vector Programming. + + Another useful document is the original Altivec Technology Programers Interface Manual + with a  user friendly structure and many helpful diagrams. But alas the PIM does does not + cover the resent PowerISA (power7,  power8, and power9) enhancements. + +
+ diff --git a/Vector_Intrinsics/sec_intel_intrinsic_functions.xml b/Vector_Intrinsics/sec_intel_intrinsic_functions.xml new file mode 100644 index 0000000..e83513b --- /dev/null +++ b/Vector_Intrinsics/sec_intel_intrinsic_functions.xml @@ -0,0 +1,122 @@ + + +
+ Intel Intrinsic functions + + So what is an intrinsic function? From Wikipedia: + +
In compiler theory, an + intrinsic function is a function available for use in a given + programming + language whose implementation is handled specially by the compiler. + Typically, it substitutes a sequence of automatically generated instructions + for the original function call, similar to an + inline function. + Unlike an inline function though, the compiler has an intimate knowledge of the + intrinsic function and can therefore better integrate it and optimize it for + the situation. This is also called builtin function in many languages.
+ + The “Intel Intrinsics” API provides access to the many + instruction set extensions (Intel Technologies) that Intel has added (and + continues to add) over the years. The intrinsics provided access to new + instruction capabilities before the compilers could exploit them directly. + Initially these intrinsic functions where defined for the Intel and Microsoft + compiler and where eventually implemented and contributed to GCC. + + The Intel Intrinsics have a specific type and naming structure. In + this naming structure, functions starts with a common prefix (MMX and SSE use + '_mm' prefix, while AVX added the '_mm256' '_mm512' prefixes), then a short + functional name ('set', 'load', 'store', 'add', 'mul', 'blend', 'shuffle', '…') and a suffix + ('_pd', '_sd', '_pi32'...) with type and packing information. See + for the list of common intrisic suffixes. + + Oddly many of the MMX/SSE operations are not vectors at all. There + are a lot of scalar operations on a single float, double, or long long type. In + effect these are scalars that can take advantage of the larger (xmm) register + space. Also in the Intel 32-bit architecture they provided IEEE754 float and + double types, and 64-bit integers that did not exist or where hard to implement + in the base i386/387 instruction set. These scalar operation use a suffix + starting with '_s' (_sd for scalar double float, + _ss scalar float, and _si64 + for scalar long long). + + True vector operations use the packed or extended packed suffixes, + starting with '_p' or '_ep' (_pd for vector double, + _ps for vector float, and + _epi32 for vector int). The use of '_ep'   + seems to be reserved to disambiguate + intrinsics that existed in the (64-bit vector) MMX extension from the extended + (128-bit vector) SSE equivalent. For example + _mm_add_pi32 is a MMX operation on + a pair of 32-bit integers, while + _mm_add_epi32 is an SSE2 operation on vector + of 4 32-bit integers. + + The GCC  builtins for the + i386.target, + (includes x86 and x86_64) are not + the same as the Intel Intrinsics. While they have similar intent and cover most + of the same functions, they use a different naming (prefixed with + __builtin_ia32_, then function name with type suffix) and uses GCC vector type + modes for operand types. For example: + + + Note: A key difference between GCC builtins for i386 and Powerpc is + that the x86 builtins have different names of each operation and type while the + powerpc altivec builtins tend to have a single generatic builtin for  each + operation, across a set of compatible operand types. + + In GCC the Intel Intrinsic header (*intrin.h) files are implemented + as a set of inline functions using the Intel Intrinsic API names and types. + These functions are implemented as either GCC C vector extension code or via + one or more GCC builtins for the i386 target. So lets take a look at some + examples from GCC's SSE2 intrinsic header emmintrin.h: + + + Note that the   + _mm_add_pd is implemented direct as C vector + extension code., while + _mm_add_sd is implemented via the GCC builtin + __builtin_ia32_addsd. From the + discussion above we know the _pd suffix + indicates a packed vector double while the _sd suffix indicates a scalar double + in a XMM register. + + + + + +
+ diff --git a/Vector_Intrinsics/sec_intel_intrinsic_includes.xml b/Vector_Intrinsics/sec_intel_intrinsic_includes.xml new file mode 100644 index 0000000..275cffb --- /dev/null +++ b/Vector_Intrinsics/sec_intel_intrinsic_includes.xml @@ -0,0 +1,82 @@ + + +
+ The structure of the intrinsic includes + + The GCC x86 intrinsic functions for vector were initially grouped by + technology (MMX and SSE), which starts with MMX continues with SSE through + SSE4.1 stacked like a set of Russian dolls. + + Basically each higher layer include, needs typedefs and helper macros + defined by the lower level intrinsic includes. mm_malloc.h simply provides + wrappers for posix_memalign and free. Then it gets a little weird, starting + with the crypto extensions: + + + + For AVX, AVX2, and AVX512 they must have decided + that the Russian Dolls thing was getting out of hand. AVX et all is split + across 14 files + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include ]]> + + but they do not want the applications include these + individually. + + So immintrin.h includes everything Intel vector, include all the + AVX, AES, SSE and MMX flavors. + directly; include instead." +#endif]]> + + So what is the net? The include structure provides some strong clues + about the order that we should approach this effort.  For example if you need + to intrinsic from SSE4 (smmintrin.h) we are likely to need to type definitions + from SSE (emmintrin.h). So a bottoms up (MMX, SSE, SSE2, …) approach seems + like the best plan of attack. Also saving the AVX parts for latter make sense, + as most are just wider forms of operations that already exists in SSE. + + We should use the same include structure to implement our PowerISA + equivalent API headers. This will make porting easier (drop-in replacement) and + should get the application running quickly on POWER. Then we are in a position + to profile and analyze the resulting application. This will show any hot spots + where the simple one-to-one transformation results in bottlenecks and + additional tuning is needed. For these cases we should improve our tools (SDK + MA/SCA) to identify opportunities for, and perhaps propose, alternative + sequences that are better tuned to PowerISA and our micro-architecture. + +
+ diff --git a/Vector_Intrinsics/sec_intel_intrinsic_types.xml b/Vector_Intrinsics/sec_intel_intrinsic_types.xml new file mode 100644 index 0000000..23435d1 --- /dev/null +++ b/Vector_Intrinsics/sec_intel_intrinsic_types.xml @@ -0,0 +1,89 @@ + + +
+ The types used for intrinsics + + The type system for Intel intrinsics is a little strange. For example + from xmmintrin.h: + + + So there is one set of types that are used in the function prototypes + of the API, and the internal types that are used in the implementation. Notice + the special attribute __may_alias__. From the GCC documentation: + +
+ Accesses through pointers to types with this attribute are not subject + to type-based alias analysis, but are instead assumed to be able to alias any + other type of objects. ... This extension exists to support some vector APIs, + in which pointers to one vector type are permitted to alias pointers to a + different vector type.
+ + So there are a + couple of issues here: 1)  the API seem to force the compiler to assume + aliasing of any parameter passed by reference. Normally the compiler assumes + that parameters of different size do not overlap in storage, which allows more + optimization. 2) the data type used at the interface may not be the correct + type for the implied operation. So parameters of type + __m128i (which is defined + as vector long long) is also used for parameters and return values of vector + [char | short | int ]. + + This may not matter when using x86 built-in's but does matter when + the implementation uses C vector extensions or in our case use PowerPC generic + vector built-ins + (). + For the later cases the type must be correct for + the compiler to generate the correct type (char, short, int, long) + () for the generic + builtin operation. There is also concern that excessive use of + __may_alias__ + will limit compiler optimization. We are not sure how important this attribute + is to the correct operation of the API.  So at a later stage we should + experiment with removing it from our implementation for PowerPC + + The good news is that PowerISA has good support for 128-bit vectors + and (with the addition of VSX) all the required vector data (char, short, int, + long, float, double) types. However Intel supports a wider variety of the + vector sizes  than PowerISA does. This started with the 64-bit MMX vector + support that preceded SSE and extends to 256-bit and 512-bit vectors of AVX, + AVX2, and AVX512 that followed SSE. + + Within the GCC Intel intrinsic implementation these are all + implemented as vector attribute extensions of the appropriate  size (   + __vector_size__ ({8 | 16 | 32, and 64}). For the PowerPC target  GCC currently + only supports the native __vector_size__ ( 16 ). These we can support directly + in VMX/VSX registers and associated instructions. The GCC will compile with + other   __vector_size__ values, but the resulting types are treated as simple + arrays of the element type. This does not allow the compiler to use the vector + registers and vector instructions for these (nonnative) vectors.   So what is + a programmer to do? + + + + +
+ diff --git a/Vector_Intrinsics/sec_more_examples.xml b/Vector_Intrinsics/sec_more_examples.xml new file mode 100644 index 0000000..8e4232e --- /dev/null +++ b/Vector_Intrinsics/sec_more_examples.xml @@ -0,0 +1,76 @@ + + +
+ Some more intrinsic examples + + The intrinsic + _mm_cvtpd_ps + converts a packed vector double into + a packed vector single float. Since only 2 doubles fit into a 128-bit vector + only 2 floats are returned and occupy only half (64-bits) of the XMM register. + For this intrinsic the 64-bit are packed into the logical left half of the + registers and the logical right half of the register is set to zero (as per the + Intel cvtpd2ps instruction). + + The PowerISA provides the VSX Vector round and Convert + Double-Precision to Single-Precision format (xvcvdpsp) instruction. In the ABI + this is vec_floato (vector double) .   + This instruction convert each double + element then transfers converted element 0 to float element 1, and converted + element 1 to float element 3. Float elements 0 and 2 are undefined (the + hardware can do what ever). This does not match the expected results for + _mm_cvtpd_ps. + , 1.0, , 2.0} +_mm_cvtpd_ps ({1.0, 2.0}) result = {1.0, 2.0, 0.0, 0.0}]]> + + So we need to re-position the results to word elements 0 and 2, which + allows a pack operation to deliver the correct format. Here the merge odd + splats element 1 to 0 and element 3 to 2. The Pack operation combines the low + half of each doubleword from the vector result and vector of zeros to generate + the require format. + + + This  technique is also used to implement   + _mm_cvttpd_epi32 + which converts a packed vector double in to a packed vector int. The PowerISA instruction + xvcvdpsxws uses a similar layout for the result as + xvcvdpsp and requires the same fix up. + +
+ diff --git a/Vector_Intrinsics/sec_other_intrinsic_examples.xml b/Vector_Intrinsics/sec_other_intrinsic_examples.xml new file mode 100644 index 0000000..37b2fae --- /dev/null +++ b/Vector_Intrinsics/sec_other_intrinsic_examples.xml @@ -0,0 +1,68 @@ + + +
+ Examples implemented using other intrinsics + + Some intrinsic implementations are defined in terms of other + intrinsics. For example. + + + This notion of using part (one fourth or half) of the SSE XMM + register and leaving the rest unchanged (or forced to zero) is specific to SSE + scalar operations and can generate some complicated (sub-optimal) PowerISA + code.  In this case _mm_load_sd + passes the dereferenced double value  to + _mm_set_sd which + uses C vector initializer notation to combine (merge) that + double scalar value with a scalar 0.0 constant into a vector double. + + While code like this should work as-is for PPC64LE, you should look + at the generated code and assess if it is reasonable.  In this case the code + is not awful (a load double splat, vector xor to generate 0.0s, then a + xxmrghd + to combine __F and 0.0).  Other examples may generate sub-optimal code and + justify a rewrite to PowerISA scalar or vector code (GCC PowerPC + AltiVec Built-in Functions or inline assembler). + + Net: try using the existing C code if you can, but check on what the + compiler generates.  If the generated code is horrendous, it may be worth the + effort to write a PowerISA specific equivalent. For codes making extensive use + of MMX or SSE scalar intrinsics you will be better off rewriting to use + standard C scalar types and letting the the GCC compiler handle the details + (see ). + +
+ diff --git a/Vector_Intrinsics/sec_packed_vs_scalar_intrinsics.xml b/Vector_Intrinsics/sec_packed_vs_scalar_intrinsics.xml new file mode 100644 index 0000000..e798816 --- /dev/null +++ b/Vector_Intrinsics/sec_packed_vs_scalar_intrinsics.xml @@ -0,0 +1,302 @@ + + +
+ Packed vs scalar intrinsics + + So what is actually going on here? The vector code is clear enough if + you know that '+' operator is applied to each vector element. The the intent of + the builtin is a little less clear, as the GCC documentation for + __builtin_ia32_addsd is not very + helpful (nonexistent). So perhaps the + Intel Intrinsic Guide + will be more enlightening. To paraphrase: +
+ + From the + _mm_add_dp description ; + for each double float + element ([0] and [1] or bits [63:0] and [128:64]) for operands a and b are + added and resulting vector is returned. + + From the + _mm_add_sd description ; + Add element 0 of first operand + (a[0]) to element 0 of the second operand (b[0]) and return the packed vector + double {(a[0] + b[0]), a[1]}. Or said differently the sum of the logical left + most half of the the operands are returned in the logical left most half + (element [0]) of the  result, along with the logical right half (element [1]) + of the first operand (unchanged) in the logical right half of the result.
+ + So the packed double is easy enough but the scalar double details are + more complicated. One source of complication is that while both Instruction Set + Architectures (SSE vs VSX) support scalar floating point operations in vector + registers the semantics are different. + + + + The vector bit and field numbering is different (reversed). + + + For Intel the scalar is always placed in the low order (right most) + bits of the XMM register (and the low order address for load and store). + + + + For PowerISA and VSX, scalar floating point operations and Floating + Point Registers (FPRs) are on the low numbered bits which is the left hand + side of the vector / scalar register (VSR). + + + + For the PowerPC64 ELF V2 little endian ABI we also make point of + making the GCC vector extensions and vector built ins, appear to be little + endian. So vector element 0 corresponds to the low order address and low + order (right hand) bits of the vector register (VSR). + + + + + The handling of the non-scalar part of the register for scalar + operations are different. + + + For Intel ISA the scalar operations either leaves the high order part + of the XMM vector unchanged or in some cases force it to 0.0. + + + + For PowerISA scalar operations on the combined FPR/VSR register leaves + the remainder (right half of the VSR) undefined. + + + + + + To minimize confusion and use consistent nomenclature, I will try to + use the terms logical left and logical right elements based on the order they + apprear in a C vector initializers and element index order. So in the vector + (__v2df){1.0, 20.}, The value 1.0 is the in the logical left element [0] and + the value 2.0 is logical right element [1]. + + So lets look at how to implement these intrinsics for the PowerISA. + For example in this case we can use the GCC vector extension, like so: + + + The packed double implementation operates on the vector as a whole. + The scalar double implementation operates on and updates only [0] element of + the vector and leaves the __A[1] element unchanged.   + Form this source the GCC + compiler generates the following code for PPC64LE target.: + + The packed vector double generated the corresponding VSX vector + double add (xvadddp). But the scalar implementation is bit more complicated. + : + 720: 07 1b 42 f0 xvadddp vs34,vs34,vs35 + ... + +0000000000000740 : + 740: 56 13 02 f0 xxspltd vs0,vs34,1 + 744: 57 1b 63 f0 xxspltd vs35,vs35,1 + 748: 03 19 60 f0 xsadddp vs35,vs0,vs35 + 74c: 57 18 42 f0 xxmrghd vs34,vs34,vs35 + ... +]]> + + First the PPC64LE vector format, element [0] is not in the correct + position for  the scalar operations. So the compiler generates vector splat + double (xxspltd) instructions to copy elements __A[0] and + __B[0] into position + for the VSX scalar add double (xsadddp) that follows. However the VSX scalar + operation leaves the other half of the VSR undefined (which does not match the + expected Intel semantics). So the compiler must generates a vector merge high + double (xxmrghd) instruction to combine the original + __A[1] element (from vs34) + with the scalar add result from vs35 + element [1]. This merge swings the scalar + result from vs35[1] element into the + vs34[0] position, while preserving the + original vs34[1] (from __A[1]) + element (copied to itself).Fun + fact: The vector registers in PowerISA are decidedly Big Endian. But we decided + to make the PPC64LE ABI behave like a Little Endian system to make application + porting easier. This require the compiler to manipulate the PowerISA vector + instrinsic behind the the scenes to get the correct Little Endian results. For + example the element selector [0|1] for vec_splat and the + generation of vec_mergeh vs vec_mergel + are reversed for the Little Endian. + + This technique applies to packed and scalar intrinsics for the the + usual arithmetic operators (add, subtract, multiply, divide). Using GCC vector + extensions in these intrinsic implementations provides the compiler more + opportunity to optimize the whole function. + + Now we can look at a slightly more interesting (complicated) case. + Square root (sqrt) is not a arithmetic operator in C and is usually handled + with a library call or a compiler builtin. We really want to avoid a library + calls and want to avoid any unexpected side effects. As you see below the + implementation of + _mm_sqrt_pd and + _mm_sqrt_sd + intrinsics are based on GCC x86 built ins. + + + For the packed vector sqrt, the PowerISA VSX has an equivalent vector + double square root instruction and GCC provides the vec_sqrt builtin. But the + scalar implementation involves an additional parameter and an extra move. +  This seems intended to mimick the propagation of the __A[1] input to the + logical right half of the XMM result that we saw with _mm_add_sd above. + + The instinct is to extract the low scalar (__B[0]) + from operand __B + and pass this to  the GCC __builtin_sqrt () before recombining that scalar + result with __A[1] for the vector result. Unfortunately C language standards + force the compiler to call the libm sqrt function unless -ffast-math is + specified. The -ffast-math option is not commonly used and we want to avoid the + external library dependency for what should be only a few inline instructions. + So this is not a good option. + + Thinking outside the box; we do have an inline intrinsic for a + (packed) vector double sqrt, that we just implemented. However we need to + insure the other half of __B (__B[1]) + does not cause an harmful side effects + (like raising exceptions for NAN or  negative values). The simplest solution + is to splat __B[0] to both halves of a temporary value before taking the + vec_sqrt. Then this result can be combined with __A[1] to return the final + result. For example: + + + In this  example we use + _mm_set1_pd + to splat the scalar __B[0], before passing that vector to our + _mm_sqrt_pd implementation, + then pass the sqrt result (c[0])  with __A[1] to   + _mm_setr_pd + to combine the final result. You could also use the {c[0], __A[1]} + initializer instead of _mm_setr_pd. + + Now we can look at vector and scalar compares that add there own + complication: For example, the Intel Intrinsic Guide for + _mm_cmpeq_pd + describes comparing double elements [0|1] and returning + either 0s for not equal and 1s (0xFFFFFFFFFFFFFFFF + or long long -1) for equal. The comparison result is intended as a select mask + (predicates) for selecting or ignoring specific elements in later operations. + The scalar version + _mm_cmpeq_sd + is similar except for the quirk + of only comparing element [0] and combining the result with __A[1] to return + the final vector result. + + The packed vector implementation for PowerISA is simple as VSX + provides the equivalent instruction and GCC provides the + vec_cmpeq builtin + supporting the vector double type. The technique of using scalar comparison + operators on the __A[0] and __B[0] + does not work as the C comparison operators + return 0 or 1 results while we need the vector select mask (effectively 0 or + -1). Also we need to watch for sequences that mix scalar floats and integers, + generating if/then/else logic or requiring expensive transfers across register + banks. + + In this case we are better off using explicit vector built-ins for + _mm_add_sd as and example. We can use vec_splat + from element [0] to temporaries + where we can safely use vec_cmpeq to generate the expect selector mask. Note + that the vec_cmpeq returns a bool long type so we need the cast the result back + to __v2df. Then use the + (__m128d){c[0], __A[1]} initializer to combine the + comparison result with the original __A[1] input and cast to the require + interface type.  So we have this example: + + + Now lets look at a similar example that adds some surprising + complexity. This is the compare not equal case so we should be able to find the + equivalent vec_cmpne builtin: + + +
+ diff --git a/Vector_Intrinsics/sec_performance.xml b/Vector_Intrinsics/sec_performance.xml new file mode 100644 index 0000000..5230ae2 --- /dev/null +++ b/Vector_Intrinsics/sec_performance.xml @@ -0,0 +1,49 @@ + + +
+ Performance + + The performance of a ported intrinsic depends on the specifics of the + intrinsic and the context it is used in. Many of the SIMD operations have + equivalent instructions in both architectures. For example the vector float and + vector double match very closely. However the SSE and VSX scalars have subtle + differences of how the scalar is positioned with the vector registers and what + happens to the rest (non-scalar part) of the register (previously discussed in + ). + This requires additional PowerISA instructions + to preserve the non-scalar portion of the vector registers. This may or may not + be important to the logic of the program being ported, but we have handle the + case where it is. + + This is where the context of now the intrinsic is used starts to + matter. If the scalar intrinsics are used within a larger program the compiler + may be able to eliminate the redundant register moves as the results are never + used. In the other cases common set up (like permute vectors or bit masks) can + be common-ed up and hoisted out of the loop. So it is very important to let the + compiler do its job with higher optimization levels (-O3, + -funroll-loops). + + + + +
+ diff --git a/Vector_Intrinsics/sec_performance_mmx.xml b/Vector_Intrinsics/sec_performance_mmx.xml new file mode 100644 index 0000000..a4e59ad --- /dev/null +++ b/Vector_Intrinsics/sec_performance_mmx.xml @@ -0,0 +1,41 @@ + + +
+ Using MMX intrinsics + + MMX was the first and oldest SIMD extension and initially filled a + need for wider (64-bit) integer and additional register. This is back when + processors were 32-bit and 8 x 32-bit registers was starting to cramp our + programming style. Now 64-bit processors, larger register sets, and 128-bit (or + larger) vector SIMD extensions are common. There is simply no good reasons + write new code using the (now) very limited MMX capabilities. + + We recommend that existing MMX codes be rewritten to use the newer + SSE  and VMX/VSX intrinsics or using the more portable GCC  builtin vector + support or in the case of si64 operations use C scalar code. The MMX si64 + scalars which are just (64-bit) operations on long long int types and any + modern C compiler can handle this type. The char short in SIMD operations + should all be promoted to 128-bit SIMD operations on GCC builtin vectors. Both + will improve cross platform portability and performance. + +
+ diff --git a/Vector_Intrinsics/sec_performance_sse.xml b/Vector_Intrinsics/sec_performance_sse.xml new file mode 100644 index 0000000..1b8379f --- /dev/null +++ b/Vector_Intrinsics/sec_performance_sse.xml @@ -0,0 +1,44 @@ + + +
+ Using SSE float and double scalars + + SSE scalar float / double intrinsics  “hand” optimization is no + longer necessary. This was important, when SSE was initially introduced, and + compiler support was limited or nonexistent.  Also SSE scalar float / double + provided additional (16) registers and IEEE754 compliance, not available from + the 8087 floating point architecture that preceded it. So application + developers where motivated to use SSE instruction versus what the compiler was + generating at the time. + + Modern compilers can now to generate and  optimize these (SSE + scalar) instructions for Intel from C standard scalar code. Of course PowerISA + supported IEEE754 float and double and had 32 dedicated floating point + registers from the start (and now 64 with VSX). So replacing a Intel specific + scalar intrinsic implementation with the equivalent C language scalar + implementation is usually a win; allows the compiler to apply the latest + optimization and tuning for the latest generation processor, and is portable to + other platforms where the compiler can also apply the latest optimization and + tuning for that processors latest generation. + +
+ diff --git a/Vector_Intrinsics/sec_power_vector_permute_format.xml b/Vector_Intrinsics/sec_power_vector_permute_format.xml new file mode 100644 index 0000000..e626d63 --- /dev/null +++ b/Vector_Intrinsics/sec_power_vector_permute_format.xml @@ -0,0 +1,147 @@ + + +
+ Vector permute and formatting instructions + + The vector Permute and formatting chapter follows and is an important + one to study. These operation operation on the byte, halfword, word (and with + 2.07 doubleword) integer types . Plus special Pixel type. The shifts + instructions in this chapter operate on the vector as a whole at either the bit + or the byte (octet) level, This is an important chapter to study for moving + PowerISA vector results into the vector elements that Intel Intrinsics + expect: + + 6.8 Vector Permute and Formatting Instructions . . . . . . . . . . . 249 +6.8.1 Vector Pack and Unpack Instructions . . . . . . . . . . . . . 249 +6.8.2 Vector Merge Instructions . . . . . . . . . . . . . . . . . . 256 +6.8.3 Vector Splat Instructions . . . . . . . . . . . . . . . . . . 259 +6.8.4 Vector Permute Instruction . . . . . . . . . . . . . . . . . . 260 +6.8.5 Vector Select Instruction . . . . . . . . . . . . . . . . . . 261 +6.8.6 Vector Shift Instructions . . . . . . . . . . . . . . . . . . 262 + + The Vector Integer instructions include the add / subtract / Multiply + / Multiply Add/Sum / (no divide) operations for the standard integer types. + There are instruction forms that  provide signed, unsigned, modulo, and + saturate results for most operations. The PowerISA 2.07 extension add / + subtract of 128-bit integers with carry and extend to 256, 512-bit and beyond , + is included here. There are signed / unsigned compares across the standard + integer types (byte, .. doubleword). The usual and bit-wise logical operations. + And the SIMD shift / rotate instructions that operate on the vector elements + for various types. + + 6.9 Vector Integer Instructions . . . . . . . . . . . . . . . . . . 264 +6.9.1 Vector Integer Arithmetic Instructions . . . . . . . . . . . . 264 +6.9.2 Vector Integer Compare Instructions. . . . . . . . . . . . . . 294 +6.9.3 Vector Logical Instructions . . . . . . . . . . . . . . . . . 300 +6.9.4 Vector Integer Rotate and Shift Instructions . . . . . . . . . 302 + + The vector [single] float instructions are grouped into this chapter. + This chapter does not include the double float instructions which are described + in the VSX chapter. VSX also include additional float instructions that operate + on the whole 64 register vector-scalar set. + + 6.10 Vector Floating-Point Instruction Set . . . . . . . . . . . . . 306 +6.10.1 Vector Floating-Point Arithmetic Instructions . . . . . . . . 306 +6.10.2 Vector Floating-Point Maximum and Minimum Instructions . . . 308 +6.10.3 Vector Floating-Point Rounding and Conversion Instructions. . 309 +6.10.4 Vector Floating-Point Compare Instructions . . . . . . . . . 313 +6.10.5 Vector Floating-Point Estimate Instructions . . . . . . . . . 316 + + The vector XOR based instructions are new with PowerISA 2.07 (POWER8) + and provide vector  crypto and check-sum operations: + + 6.11 Vector Exclusive-OR-based Instructions . . . . . . . . . . . . 318 +6.11.1 Vector AES Instructions . . . . . . . . . . . . . . . . . . . 318 +6.11.2 Vector SHA-256 and SHA-512 Sigma Instructions . . . . . . . . 320 +6.11.3 Vector Binary Polynomial Multiplication Instructions. . . . . 321 +6.11.4 Vector Permute and Exclusive-OR Instruction . . . . . . . . . 323 + + The vector gather and bit permute support bit level rearrangement of + bits with in the vector. While the vector versions of the count leading zeros + and population count are useful to accelerate specific algorithms. + + 6.12 Vector Gather Instruction . . . . . . . . . . . . . . . . . . . 324 +6.13 Vector Count Leading Zeros Instructions . . . . . . . . . . . . 325 +6.14 Vector Population Count Instructions. . . . . . . . . . . . . . 326 +6.15 Vector Bit Permute Instruction . . . . . . . . . . . . . . . . 327 + + The Decimal Integer add / subtract instructions complement the + Decimal Floating-Point instructions. They can also be used to accelerated some + binary to/from decimal conversions. The VSCR instruction provides access the + the Non-Java mode floating-point control and the saturation status. These + instruction are not normally of interest in porting Intel intrinsics. + + 6.16 Decimal Integer Arithmetic Instructions . . . . . . . . . . . . 328 +6.17 Vector Status and Control Register Instructions . . . . . . . . 331 + + With PowerISA 2.07B (Power8) several major extension where added to + the Vector Facility: + + + + Vector Crypto: Under “Vector Exclusive-OR-based Instructions + Vector Exclusive-OR-based Instructions”, AES [inverse] Cipher, SHA 256 / 512 + Sigma, Polynomial Multiplication, and Permute and XOR instructions. + + + 64-bit Integer; signed and unsigned add / subtract, signed and + unsigned compare, Even / Odd 32 x 32 multiple with 64-bit product, signed / + unsigned max / min, rotate and shift left/right. + + + Direct Move between GRPs and the FPRs / Left half of Vector + Registers. + + + 128-bit integer add / subtract with carry / extend, direct + support for vector __int128 and multiple precision arithmetic. + + + Decimal Integer add subtract for 31 digit BCD. + + + Miscellaneous SIMD extensions: Count leading Zeros, Population + count, bit gather / permute, and vector forms of eqv, nand, orc. + + + + The rational for why these are included in the Vector Facilities + (VMX) (vs Vector-Scalar Floating-Point Operations (VSX)) has more to do with + how the instruction where encoded then with the type of operations or the ISA + version of introduction. This is primarily a trade-off between the bits + required for register selection vs bits for extended op-code space within in a + fixed 32-bit instruction. Basically accessing 32 vector registers require + 5-bits per register, while accessing all 64 vector-scalar registers require + 6-bits per register. When you consider the most vector instructions require  3 +  and some (select, fused multiply-add) require 4 register operand forms,  the + impact on op-code space is significant. The larger register set of VSX was + justified by queuing theory of larger HPC matrix codes using double float, + while 32 registers are sufficient for most applications. + + So by definition the VMX instructions are restricted to the original + 32 vector registers while VSX instructions are encoded to  access all 64 + floating-point scalar and vector double registers. This distinction can be + troublesome when programming at the assembler level, but the compiler and + compiler built-ins can hide most of this detail from the programmer. + +
+ diff --git a/Vector_Intrinsics/sec_power_vmx.xml b/Vector_Intrinsics/sec_power_vmx.xml new file mode 100644 index 0000000..638d6ab --- /dev/null +++ b/Vector_Intrinsics/sec_power_vmx.xml @@ -0,0 +1,67 @@ + + +
+ The Vector Facility (VMX) + + The orginal VMX supported SIMD integer byte, halfword, and word, and + single float data types within a separate (from GPR and FPR) bank of 32 x + 128-bit vector registers. These operations like to stay within their (SIMD) + lanes except where the operation changes the element data size (integer + multiply, pack, and unpack). + + This is complimented by bit logical and shift / rotate / permute / + merge instuctions that operate on the vector as a whole.  Some operation + (permute, pack, merge, shift double, select) will select 128 bits from a pair + of vectors (256-bits) and deliver 128-bit vector result. These instructions + will cross lanes or multiple registers to grab fields and assmeble them into + the single register result. + + The PowerISA 2.07B Chapter 6. Vector Facility is organised starting + with an overview (chapters 6.1- 6.6): + + 6.1 Vector Facility Overview . . . . . . . . . . . . . . . . . . . . 227 +6.2 Chapter Conventions. . . . . . . . . . . . . . . . . . . . . . . 227 +6.2.1 Description of Instruction Operation . . . . . . . . . . . . . 227 +6.3 Vector Facility Registers . . . . . . . . . . . . . . . . . . . 234 +6.3.1 Vector Registers . . . . . . . . . . . . . . . . . . . . . . . 234 +6.3.2 Vector Status and Control Register . . . . . . . . . . . . . . 234 +6.3.3 VR Save Register . . . . . . . . . . . . . . . . . . . . . . . 235 +6.4 Vector Storage Access Operations . . . . . . . . . . . . . . . . 235 +6.4.1 Accessing Unaligned Storage Operands . . . . . . . . . . . . . 237 +6.5 Vector Integer Operations . . . . . . . . . . . . . . . . . . . 238 +6.5.1 Integer Saturation . . . . . . . . . . . . . . . . . . . . . . 238 +6.6 Vector Floating-Point Operations . . . . . . . . . . . . . . . . 240 +6.6.1 Floating-Point Overview . . . . . . . . . . . . . . . . . . . 240 +6.6.2 Floating-Point Exceptions . . . . . . . . . . . . . . . . . . 240 +6.7 Vector Storage Access Instructions . . . . . . . . . . . . . . . 242 +6.7.1 Storage Access Exceptions . . . . . . . . . . . . . . . . . . 242 +6.7.2 Vector Load Instructions . . . . . . . . . . . . . . . . . . . 243 +6.7.3 Vector Store Instructions. . . . . . . . . . . . . . . . . . . 246 +6.7.4 Vector Alignment Support Instructions. . . . . . . . . . . . . 248 + + Then a chapter on storage (load/store) access for vector and vector + elements: + + + +
+ diff --git a/Vector_Intrinsics/sec_power_vsx.xml b/Vector_Intrinsics/sec_power_vsx.xml new file mode 100644 index 0000000..6add740 --- /dev/null +++ b/Vector_Intrinsics/sec_power_vsx.xml @@ -0,0 +1,186 @@ + + +
+ Vector-Scalar Floating-Point Operations (VSX) + + With PowerISA 2.06 (POWER7) we extended the vector SIMD capabilities + of the PowerISA: + + + + Extend the available vector and floating-point scalar register + sets from 32 registers each to a combined 64 x 64-bit scalar floating-point and + 64 x 128-bit vector registers. + + + Enable scalar double float operations on all 64 scalar + registers. + + + Enable vector double and vector float operations for all 64 + vector registers. + + + Enable super-scalar execution of vector instructions and support + 2 independent vector floating point  pipelines for parallel execution of 4 x + 64-bit Floating point Fused Multiply Adds (FMAs) and 8 x 32-bit (FMAs) per + cycle. + + + + With PowerISA 2.07 (POWER8) we added single-precision scalar + floating-point instruction to VSX. This completes the floating-point + computational set for VSX. This ISA release also clarified how these operate in + the Little Endian storage model. + + While the focus was on enhanced floating-point computation (for High + Performance Computing),  VSX also extended  the ISA with additional storage + access, logical, and permute (merge, splat, shift) instructions. This was + necessary to extend these operations cover 64 VSX registers, and improves + unaligned storage access for vectors  (not available in VMX). + + The PowerISA 2.07B Chapter 7. Vector-Scalar Floating-Point Operations + is organized starting with an introduction and overview (chapters 7.1- 7.5) . + The early sections (7.1 and 7.2) describe the layout of the 64 VSX registers + and how they relate (overlap and inter-operate) to the existing floating point + scalar (FPRs) and (VMX VRs) vector registers. + + 7.1 Introduction . . . . . . . . . . . . . . . . . . . . . . . . . . 317 +7.1.1 Overview of the Vector-Scalar Extension . . . . . . . . . . . 317 +7.2 VSX Registers . . . . . . . . . . . . . . . . . . . . . . . . . 318 +7.2.1 Vector-Scalar Registers . . . . . . . . . . . . . . . . . . . 318 +7.2.2 Floating-Point Status and Control Register . . . . . . . . . . 321 + + The definitions given in “7.1.1.1 Compatibility with Category + Floating-Point and Category Decimal Floating-Point Operations”, and + “7.1.1.2 Compatibility with Category Vector Operations” +
+ The instruction sets defined in Chapter 4. + Floating-Point Facility and Chapter 5. Decimal + Floating-Point retain their definition with one primary + difference. The FPRs are mapped to doubleword + element 0 of VSRs 0-31. The contents of doubleword 1 + of the VSR corresponding to a source FPR specified + by an instruction are ignored. The contents of + doubleword 1 of a VSR corresponding to the target + FPR specified by an instruction are undefined. + + The instruction set defined in Chapter 6. Vector Facility + [Category: Vector], retains its definition with one + primary difference. The VRs are mapped to VSRs + 32-63.
+ + The reference to scalar element 0 above is from the big endian + register perspective of the ISA. In the PPC64LE ABI implementation, and for the + purpose of porting Intel intrinsics, this is logical element 1.  Intel SSE + scalar intrinsics operated on logical element [0],  which is in the wrong + position for PowerISA FPU and VSX scalar floating-point  operations. Another + important note is what happens to the other half of the VSR when you execute a + scalar floating-point instruction (The contents of doubleword 1 of a VSR … + are undefined.) + + The compiler will hide some of this detail when generating code for + little endian vector element [] notation and most vector built-ins. For example + vec_splat (A, 0) is transformed for + PPC64LE to xxspltd VRT,VRA,1. + What the compiler can not + hide is the different placement of scalars within vector registers. + + Vector registers (VRs) 0-31 overlay and can be accessed from vector + scalar registers (VSRs) 32-63. The ABI also specifies that VR2-13 are used to + pass parameter and return values. In some cases the same (similar) operations + exist in both VMX and VSX instruction forms, while in the other cases + operations only exist for VMX (byte level permute and shift) or VSX (Vector + double). + + So resister selection that; avoids unnecessary vector moves, follows + the ABI, while maintaining the correct instruction specific register numbering, + can be tricky. The + GCC register constraint + annotations for Inline + assembler using vector instructions  is challenging, even for experts. So only + experts should be writing assembler and then only in extraordinary + circumstances. You should leave these details to the compiler (using vector + extensions and vector built-ins) when ever possible. + + The next sections get is into the details of floating point + representation, operations, and exceptions. Basically the implementation + details for the IEEE754R and C/C++ language standards that most developers only + access via higher level APIs. So most programmers will not need this level of + detail, but it is there if needed. + + 7.3 VSX Operations . . . . . . . . . . . . . . . . . . . . . . . . . 326 +7.3.1 VSX Floating-Point Arithmetic Overview . . . . . . . . . . . . 326 +7.3.2 VSX Floating-Point Data . . . . . . . . . . . . . . . . . . . 327 +7.3.3 VSX Floating-Point Execution Models . . . . . . . . . . . . . 335 +7.4 VSX Floating-Point Exceptions . . . . . . . . . . . . . . . . . 338 +7.4.1 Floating-Point Invalid Operation Exception . . . . . . . . . . 341 +7.4.2 Floating-Point Zero Divide Exception . . . . . . . . . . . . . 347 +7.4.3 Floating-Point Overflow Exception. . . . . . . . . . . . . . . 349 +7.4.4 Floating-Point Underflow Exception . . . . . . . . . . . . . . 351 + + Finally an overview the VSX storage access instructions for big and + little endian and for aligned and unaligned data addresses. This included + diagrams that illuminate the differences + + 7.5 VSX Storage Access Operations . . . . . . . . . . . . . . . . . 356 +7.5.1 Accessing Aligned Storage Operands . . . . . . . . . . . . . . 356 +7.5.2 Accessing Unaligned Storage Operands . . . . . . . . . . . . . 357 +7.5.3 Storage Access Exceptions . . . . . . . . . . . . . . . . . . 358 + + Section 7.6 starts with a VSX instruction Set Summary which is the + place to start to get an feel for the types and operations supported.  The + emphasis on float-point, both scalar and vector (especially vector double), is + pronounced. Many of the scalar and single-precision vector instruction look + like duplicates of what we have seen in the Chapter 4 Floating-Point and + Chapter 6 Vector facilities. The difference here is, new instruction encodings + to access the full 64 VSX register space. + + In addition there are small number of logical instructions are + include to support predication (selecting / masking vector elements based on + compare results). And set of permute, merge, shift, and splat instructions that + operation on VSX word (float) and doubleword (double) elements. As mentioned + about VMX section 6.8 these instructions are good to study as they are useful + for realigning elements from PowerISA vector results to that required for Intel + Intrinsics. + + 7.6 VSX Instruction Set . . . . . . . . . . . . . . . . . . . . . . 359 +7.6.1 VSX Instruction Set Summary . . . . . . . . . . . . . . . . . 359 +7.6.1.1 VSX Storage Access Instructions . . . . . . . . . . . . . . 359 +7.6.1.2 VSX Move Instructions . . . . . . . . . . . . . . . . . . . 360 +7.6.1.3 VSX Floating-Point Arithmetic Instructions . . . . . . . . 360 +7.6.1.4 VSX Floating-Point Compare Instructions . . . . . . . . . . 363 +7.6.1.5 VSX DP-SP Conversion Instructions . . . . . . . . . . . . . 364 +7.6.1.6 VSX Integer Conversion Instructions . . . . . . . . . . . . 364 +7.6.1.7 VSX Round to Floating-Point Integer Instructions . . . . . 366 +7.6.1.8 VSX Logical Instructions. . . . . . . . . . . . . . . . . . 366 +7.6.1.9 VSX Permute Instructions. . . . . . . . . . . . . . . . . . 367 +7.6.2 VSX Instruction Description Conventions . . . . . . . . . . . 368 +7.6.3 VSX Instruction Descriptions . . . . . . . . . . . . . . . . 392 + + The VSX Instruction Descriptions section contains the detail + description for each VSX category instruction.  The table entries from the + Instruction Set Summary are formatted in the document at hyperlinks to + corresponding instruction description. + +
+ diff --git a/Vector_Intrinsics/sec_powerisa.xml b/Vector_Intrinsics/sec_powerisa.xml new file mode 100644 index 0000000..1fc45ae --- /dev/null +++ b/Vector_Intrinsics/sec_powerisa.xml @@ -0,0 +1,33 @@ + + +
+ The PowerISA + + The PowerISA is for historical reasons is organized at the top level + by the distinction between older Vector Facility (Altivec / VMX) and the newer + Vector-Scalar Floating-Point Operations (VSX). + + + + +
+ diff --git a/Vector_Intrinsics/sec_powerisa_vector_facilities.xml b/Vector_Intrinsics/sec_powerisa_vector_facilities.xml new file mode 100644 index 0000000..e0c0a75 --- /dev/null +++ b/Vector_Intrinsics/sec_powerisa_vector_facilities.xml @@ -0,0 +1,46 @@ + + +
+ PowerISA Vector facilities + + The PowerISA vector facilities (VMX and VSX) are extensive, but does + not always provide a direct or obvious functional equivalent to the Intel + Intrinsics. But being not obvious is not the same as imposible. It just + requires some basic programing skills. + + It is a good idea to have an overall understanding of the vector + capabilities the PowerISA. You do not need to memorize every instructions but + is helps to know where to look. Both the PowerISA and OpenPOWER ABI have a + specific structure and organization that can help you find what you looking + for. + + It also helps to understand the relationship between the PowerISAs + low level instructions and the higher abstraction of the vector intrinsics as + defined by the OpenPOWER ABIs Vector Programming Interfaces and the the defacto + standard of GCC's PowerPC AltiVec Built-in Functions. + + + + + +
+ diff --git a/Vector_Intrinsics/sec_powerisa_vector_intrinsics.xml b/Vector_Intrinsics/sec_powerisa_vector_intrinsics.xml new file mode 100644 index 0000000..bed2226 --- /dev/null +++ b/Vector_Intrinsics/sec_powerisa_vector_intrinsics.xml @@ -0,0 +1,79 @@ + + +
+ PowerISA Vector Intrinsics + + The + OpenPOWER ELF V2 application binary interface (ABI): Chapter 6. + Vector Programming Interfaces and + Appendix A. Predefined Functions for Vector + Programming document the current and proposed vector built-ins we expect all + C/C++ compilers implement. + + Some of these operations are endian sensitive and the compiler needs + to make corresponding adjustments as  it generate code for endian sensitive + built-ins. There is a good overview for this in the + OpenPOWER ABI Section + 6.4. + Vector Built-in Functions. + + Appendix A is organized (sorted) by built-in name, output type, then + parameter types. Most built-ins are generic as the named the operation (add, + sub, mul, cmpeq, ...) applies to multiple types. + + So the build vec_add built-in applies to all the signed and unsigned + integer types (char, short, in, and long) plus float and double floating-point + types. The compiler looks at the parameter type to select the vector + instruction (or instruction sequence) that implements the (add) operation on + that type. The compiler infers the output result type from the operation and + input parameters and will complain if the target variable type is not + compatible. For example: + + + This is one key difference between PowerISA built-ins and Intel + Intrinsics (Intel Intrinsics are not generic and include type information in + the name). This is why it is so important to understand the vector element + types and to add the appropriate type casts to get the correct results. + + The defacto standard implementation is GCC as defined in the include + file <altivec.h> and documented in the GCC online documentation in + 6.59.20 PowerPC + AltiVec Built-in Functions. The header file name and section title + reflect the origin of the Vector Facility, but recent versions of GCC altivec.h + include built-ins for newer PowerISA 2.06 and 2.07 VMX plus VSX extensions. + This is a work in progress where your  (older) distro GCC compiler may not + include built-ins for the latest PowerISA 3.0 or ABI edition. So before you use + a built-in you find in the ABI Appendix A, check the specific + GCC online documentation for the + GCC version you are using. + +
+ diff --git a/Vector_Intrinsics/sec_powerisa_vector_size_type.xml b/Vector_Intrinsics/sec_powerisa_vector_size_type.xml new file mode 100644 index 0000000..c18f2bb --- /dev/null +++ b/Vector_Intrinsics/sec_powerisa_vector_size_type.xml @@ -0,0 +1,119 @@ + + +
+ How vector elements change size and type + + Most vector built ins return the same vector type as the (first) + input parameters, but there are exceptions. Examples include; conversions + between types, compares , pack, unpack,  merge, and integer multiply + operations. + + Converting floats to from integer will change the type and something + change the element size as well (double ↔ int and float ↔ long). For the + VMX the conversions are always the same size (float ↔ [unsigned] int). But + VSX allows conversion of 64-bit (long or double) to from 32-bit (float or +  int)  with the inherent size changes. The PowerISA VSX defines a 4 element + vector layout where little endian elements 0, 2 are used for input/output and + elements 1,3 are undefined. The OpenPOWER ABI Appendix A define + vec_double and vec_float + with even/odd and high/low extensions as program aids. These are not + included in GCC 7 or earlier but are planned for GCC 8. + + Compare operations produce either + vector bool <input element type> + (effectively bit masks) or predicates (the condition code for all and + any are represented as an int truth variable). When a predicate compare (i.e. + vec_all_eq, vec_any_gt), + is used in a if statement,  the condition code is + used directly in the conditional branch and the int truth value is not + generated. + + Pack operations pack integer elements into the next smaller (half) + integer sized elements. Pack operations include signed and unsigned saturate + and unsigned modulo forms. As the packed result will be half the size (in + bits), pack instructions require 2 vectors (256-bits) as input and generate a + single 128-bit vector results. + + + Unpack operations expand integer elements into the next larger size + elements. The integers are always treated as signed values and sign-extended. + The processor design avoids instructions that return multiple register values. + So the PowerISA defines unpack-high and unpack low forms where instruction + takes (the high or low) half of vector elements and extends them to fill the + vector output. Element order is maintained and an unpack high / low sequence + with same input vector has the effect of unpacking to a 256-bit result in two + vector registers. + + + Merge operations resemble shuffling two (vectors) card decks + together, alternating (elements) cards in the result.   As we are merging from + 2 vectors (256-bits) into 1 vector (128-bits) and the elements do not change + size, we have merge high and merge low instruction forms for each (byte, + halfword and word) integer type. The merge high operations alternate elements + from the (vector register left) high half of the two input vectors. The merge + low operation alternate elements from the (vector register right) low half of + the two input vectors. + + For PowerISA 2.07 we added vector merge word even / odd instructions. + Instead of high or low elements the shuffle is from the even or odd number + elements of the two input vectors. Passing the same vector to both inputs to + merge produces splat like results for each doubleword half, which is handy in + some convert operations. + + + Integer multiply has the potential to generate twice as many bits in + the product as input. A multiply of 2 int (32-bit) values produces a long + (64-bits). Normal C language * operations ignore this and discard the top + 32-bits of the result. However  in some computations it useful to preserve the + double product precision for intermediate computation before reducing the final + result back to the original precision. + + The PowerISA VMX instruction set took the later approach ie keep all + the product bits until the programmer explicitly asks for the truncated result. + So the vector integer multiple are split into even/odd forms across signed and + unsigned; byte, halfword and word inputs. This requires two instructions (given + the same inputs) to generated the full vector  multiply across 2 vector + registers and 256-bits. Again as POWER processors are super-scalar this pair of + instructions should execute in parallel. + + The set of expanded product values can either be used directly in + further (doubled precision) computation or merged/packed into the single single + vector at the smaller bit size. This is what the compiler will generate for C + vector extension multiply of vector integer types. + +
+ diff --git a/Vector_Intrinsics/sec_prefered_methods.xml b/Vector_Intrinsics/sec_prefered_methods.xml new file mode 100644 index 0000000..3a8f729 --- /dev/null +++ b/Vector_Intrinsics/sec_prefered_methods.xml @@ -0,0 +1,57 @@ + + +
+ Prefered methods + + As we will see there are multiple ways to implement the logic of + these intrinsics. Some implementation methods are preferred because they allow + the compiler to select instructions and provided the most flexibility for + optimization across the whole sequence. Other methods may be required to + deliver a specific semantic or to deliver better optimization than the current + compiler is capable of. Some methods are more portable across multiple + compilers (GCC, LLVM, ...). All of this should be taken into consideration for + each intrinsic implementation. In general we should use the following list as a + guide to these decisions: + + + + Use C vector arithmetic, logical, dereference, etc., operators in + preference to intrinsics. + + + Use the bi-endian interfaces from Appendix A of the ABI in + preference to other intrinsics when available, as these are designed for + portability among compilers. + + + Use other, less well documented intrinsics (such as + __builtin_vsx_*) when no better facility is available, in preference to + assembly. + + + If necessary, use inline assembly, but know what you're + doing. + + + +
+ diff --git a/Vector_Intrinsics/sec_prepare.xml b/Vector_Intrinsics/sec_prepare.xml new file mode 100644 index 0000000..fd1f444 --- /dev/null +++ b/Vector_Intrinsics/sec_prepare.xml @@ -0,0 +1,66 @@ + + +
+ Prepare yourself + + To port Intel intrinsics to POWER you will need to prepare yourself + with knowledge of PowerISA vector facilities and how to access the associated + documentation. + + + + + GCC vector extention + syntax and usage. This is one of a set of GCC + "Extentions to the C language Family” + that the intrinsic header implementation depends + on.  As many of the GCC intrinsics for x86 are implemented via C vector + extensions, reading and understanding of this code is an important part of the + porting process. + + + Intel (x86) intrinsic and type naming conventions and how to find + more information. The intrinsic name encodes  some information about the + vector size and type of the data, but the pattern is not always  obvious. + Using the online + Intel + Intrinsic Guide to look up the intrinsic by name is a good first + step. + + + PowerISA Vector facilities. The Vector facilities of POWER8 are + extensive and cover the usual types and usual operations. However it has a + different history and organization from Intel.  Both (Intel and PowerISA) have + their quirks and in some cases the mapping may not be obvious. So familiarizing + yourself with the PowerISA Vector (VMX) and Vector Scalar Extensions (VSX) is + important. + + + + + + + + + +
+ diff --git a/Vector_Intrinsics/sec_review_source.xml b/Vector_Intrinsics/sec_review_source.xml new file mode 100644 index 0000000..d92a0e7 --- /dev/null +++ b/Vector_Intrinsics/sec_review_source.xml @@ -0,0 +1,64 @@ + + +
+ Look at the source, Luke + + So if this is a code porting activity, where is the source? All the + source code we need to look at is in the GCC source trees. You can either git + (https://gcc.gnu.org/wiki/GitMirro) + the gcc source  or down load one of the + recent AT source tars (for example: + ftp://ftp.unicamp.br/pub/linuxpatch/toolchain/at/ubuntu/dists/xenial/at10.0/). +  You will find the intrinsic headers in the ./gcc/config/i386/ + sub-directory. + + If you have a Intel Linux workstation or laptop with GCC installed, + you already have these headers, if you want to take a look: + $ find /usr/lib -name '*mmintrin.h' +/usr/lib/gcc/x86_64-redhat-linux/4.4.4/include/wmmintrin.h +/usr/lib/gcc/x86_64-redhat-linux/4.4.4/include/mmintrin.h +/usr/lib/gcc/x86_64-redhat-linux/4.4.4/include/xmmintrin.h +/usr/lib/gcc/x86_64-redhat-linux/4.4.4/include/emmintrin.h +/usr/lib/gcc/x86_64-redhat-linux/4.4.4/include/tmmintrin.h +... +$ + + But depending on the vintage of the distro, these may not be the + latest versions of the headers. Looking at the header source will tell you a + few things.: The include structure (what other headers are implicitly + included). The types that are used at the API. And finally, how the API is + implemented. + + smmintrin.h (SSE4.1) includes tmmintrin,h +tmmintrin.h (SSSE3) includes pmmintrin.h +pmmintrin.h (SSE3) includes emmintrin,h +emmintrin.h (SSE2) includes xmmintrin.h +xmmintrin.h (SSE) includes mmintrin.h and mm_malloc.h +mmintrin.h (MMX) + + + + + + +
+ diff --git a/Vector_Intrinsics/sec_simple_examples.xml b/Vector_Intrinsics/sec_simple_examples.xml new file mode 100644 index 0000000..c2a2482 --- /dev/null +++ b/Vector_Intrinsics/sec_simple_examples.xml @@ -0,0 +1,62 @@ + + +
+ Some simple examples + + For example; a vector double splat looks like this: + + + Another example: + + + Note in the example above the cast to __v2df for the operation. Both + __m128d and __v2df are vector double, but __v2df does no have the __may_alias__ + attribute. And one more example: + + + Note this requires a cast for the compiler to generate the correct + code for the intended operation. The parameters and result are the generic + __m128i, which is a vector long long with the + __may_alias__ attribute. But + operation is a vector multiply low unsigned short (__v8hu). So not only do we + use the cast to drop the __may_alias__ attribute but we also need to cast to + the correct (vector unsigned short) type for the specified operation. + + I have successfully copied these (and similar) source snippets over + to the PPC64LE implementation unchanged. This of course assumes the associated + types are defined and with compatible attributes. + +
+ diff --git a/Vector_Intrinsics/sec_vec_or_not.xml b/Vector_Intrinsics/sec_vec_or_not.xml new file mode 100644 index 0000000..e66de7d --- /dev/null +++ b/Vector_Intrinsics/sec_vec_or_not.xml @@ -0,0 +1,134 @@ + + +
+ To vec_not or not + + Well not exactly. Looking at the OpenPOWER ABI document we see a + reference to + vec_cmpne for all numeric types. But when we look in the current + GCC 6 documentation we find that + vec_cmpne is not on the list. So it is planned + in the ABI, but not implemented yet. + + Looking at the PowerISA 2.07B we find a VSX Vector Compare Equal to + Double-Precision but no Not Equal. In fact we see only vector double compare + instructions for greater than and greater than or equal in addition to the + equal compare. Not only can't we find a not equal, there is no less than or + less than or equal compares either. + + So what is going on here? Partially this is the Reduced Instruction + Set Computer (RISC) design philosophy. In this case the compiler can generate + all the required compares using the existing vector instructions and simple + transforms based on Boolean algebra. So + vec_cmpne(A,B) is simply vec_not + (vec_cmpeq(A,B)). And vec_cmplt(A,B) is simply + vec_cmpgt(B,A) based on the + identity A < B iff B > A. + Similarly vec_cmple(A,B) is implemented as + vec_cmpge(B,A). + + What a minute, there is no vec_not() either. Can not find it in the + PowerISA, the OpenPOWER ABI, or the GCC PowerPC Altivec Built-in documentation. + There is no vec_move() either! How can this possibly work? + + This is RISC philosophy again. We can always use a logical + instruction (like bit wise and or + or) to effect a move given that we also have + nondestructive 3 register instruction forms. In the PowerISA most instruction + have two input registers and a separate result register. So if the result + register number is  different from either input register then the inputs are + not clobbered (nondestructive). Of course nothing prevents you from specifying + the same register for both inputs or even all three registers (result and both + inputs).  And some times it is useful. + + The statement B = vec_or (A,A) is is effectively a vector move/copy + from A to B. And A = vec_or (A,A) is obviously a + nop (no operation). In the the + PowerISA defines the preferred nop and register move for vector registers in + this way. + + It is also useful to have hardware implement the logical operators + nor (not or) + and nand (not and).   + The PowerISA provides these instruction for + fixed point and vector logical operation. So vec_not(A) + can be implemented as vec_nor(A,A). + So looking at the  implementation of _mm_cmpne we propose the + following: + + + The Intel Intrinsics also include the not forms of the relational + compares: + + + The PowerISA and OpenPOWER ABI, or GCC PowerPC Altivec Built-in + documentation do not provide any direct equivalents to the  not greater than + class of compares. Again you don't really need them if you know Boolean + algebra. We can use identities like + {not (A < B) iff A >= B} and + {not (A + <= B) iff A > B}. So the PPC64LE implementation follows: + + + These patterns repeat for the scalar version of the + not compares. And + in general the larger pattern described in this chapter applies to the other + float and integer types with similar interfaces. + + +
+ diff --git a/intrinsic.xml b/intrinsic.xml new file mode 100644 index 0000000..f9200df --- /dev/null +++ b/intrinsic.xml @@ -0,0 +1,1518 @@ + + +
+
+ 1 Intel Intrinsic porting guide for Power64LE. + The goal of this project is to provide functional equivalents of the +Intel MMX, SSE, and AVX intrinsic functions, that are commonly used in Linux +applications, and make them (or equivalents) available for the PowerPC64LE +platform. These X86 intrinsics started with the Intel and Microsoft compilers +but were then ported to the GCC compiler. The GCC implementation is a set of +headers with inline functions. These inline functions provide a implementation +mapping from the Intel/Microsoft dialect intrinsic names to the corresponding +GCC Intel built-in's or directly via C language vector extension syntax. + + The current proposal is to start with the existing X86 GCC intrinsic +headers and port them (copy and change the source)  to POWER using C language +vector extensions, VMX and VSX built-ins. Another key assumption is that we +will be able to use many of existing Intel DejaGNU test cases on +./gcc/testsuite/gcc.target/i386. This document is intended as a guide to +developers participating in this effort. However this document provides +guidance and examples that should be useful to developers who may encounter X86 +intrinsics in code that they are porting to another platform. + +
+
+ 1.1 Look at the source Luke + So if this is a code porting activity, where is the source? All the +source code we need to look at is in the GCC source trees. You can either git +(https://gcc.gnu.org/wiki/GitMirror) the gcc source  or down load one of the +recent AT source tars (for example: +ftp://ftp.unicamp.br/pub/linuxpatch/toolchain/at/ubuntu/dists/xenial/at10.0/). + You will find the intrinsic headers in the ./gcc/config/i386/ +sub-directory. + + If you have a Intel Linux workstation or laptop with GCC installed, +you already have these headers, if you want to take a look: + + + + + + + + + But depending on the vintage of the distro, these may not be the +latest versions of the headers. Looking at the header source will tell you a +few things.: The include structure (what other headers are implicitly +included). The types that are used at the API. And finally, how the API is +implemented. + +
+
+ 1.1.1 The structure of the intrinsic includes + The GCC x86 intrinsic functions for vector were initially grouped by +technology (MMX and SSE), which starts with MMX continues with SSE through +SSE4.1 stacked like a set of Russian dolls. + + Basically each higher layer include, needs typedefs and helper macros +defined by the lower level intrinsic includes. mm_malloc.h simply provides +wrappers for posix_memalign and free. Then it gets a little weird, starting +with the crypto extensions:For AVX, AVX2, and AVX512 they must have decided +that the Russian Dolls thing was getting out of hand. AVX et all is split +across 14 filesbut they do not want the applications include these +individually.So immintrin.h  includes everything Intel vector, include all the +AVX, AES, SSE and MMX flavors. + + So what is the net? The include structure provides some strong clues +about the order that we should approach this effort.  For example if you need +to intrinsic from SSE4 (smmintrin.h) we are likely to need to type definitions +from SSE (emmintrin.h). So a bottoms up (MMX, SSE, SSE2, …) approach seems +like the best plan of attack. Also saving the AVX parts for latter make sense, +as most are just wider forms of operations that already exists in SSE. + + We should use the same include structure to implement our PowerISA +equivalent API headers. This will make porting easier (drop-in replacement) and +should get the application running quickly on POWER. Then we are in a position +to profile and analyze the resulting application. This will show any hot spots +where the simple one-to-one transformation results in bottlenecks and +additional tuning is needed. For these cases we should improve our tools (SDK +MA/SCA) to identify opportunities for, and perhaps propose, alternative +sequences that are better tuned to PowerISA and our micro-architecture. + +
+
+ 1.1.2 The types used for intrinsics + The type system for Intel intrinsics is a little strange. For example +from xmmintrin.h: + + So there is one set of types that are used in the function prototypes +of the API, and the internal types that are used in the implementation. Notice +the special attribute __may_alias__. From the GCC documentation:So there are a +couple of issues here: 1)  the API seem to force the compiler to assume +aliasing of any parameter passed by reference. Normally the compiler assumes +that parameters of different size do not overlap in storage, which allows more +optimization. 2) the data type used at the interface may not be the correct +type for the implied operation. So parameters of type __m128i (which is defined +as vector long long) is also used for parameters and return values of vector +[char | short | int ]. + + This may not matter when using x86 built-in's but does matter when +the implementation uses C vector extensions or in our case use PowerPC generic +vector built-ins (#2.1.3.2.PowerISA Vector +Intrinsics|outline). For the later cases the type must be correct for +the compiler to generate the correct type (char, short, int, long) (#1.1.3.How the API is implemented.|outline) for the generic +builtin operation. There is also concern that excessive use of __may_alias__ +will limit compiler optimization. We are not sure how important this attribute +is to the correct operation of the API.  So at a later stage we should +experiment with removing it from our implementation for PowerPC + + The good news is that PowerISA has good support for 128-bit vectors +and (with the addition of VSX) all the required vector data (char, short, int, +long, float, double) types. However Intel supports a wider variety of the +vector sizes  than PowerISA does. This started with the 64-bit MMX vector +support that preceded SSE and extends to 256-bit and 512-bit vectors of AVX, +AVX2, and AVX512 that followed SSE. + + Within the GCC Intel intrinsic implementation these are all +implemented as vector attribute extensions of the appropriate  size (   +__vector_size__ ({8 | 16 | 32, and 64}). For the PowerPC target  GCC currently +only supports the native __vector_size__ ( 16 ). These we can support directly +in VMX/VSX registers and associated instructions. The GCC will compile with +other   __vector_size__ values, but the resulting types are treated as simple +arrays of the element type. This does not allow the compiler to use the vector +registers and vector instructions for these (nonnative) vectors.   So what is +a programmer to do? +
+
+ 1.1.2.1 Dealing with MMX + MMX is actually the hard case. The __m64 type supports SIMD vector +int types (char, short, int, long).  The  Intel API defines  __m64 as: + + Which is problematic for the PowerPC target (not really supported in +GCC) and we would prefer to use a native PowerISA type that can be passed in a +single register.  The PowerISA Rotate Under Mask instructions can easily +extract and insert integer fields of a General Purpose Register (GPR). This +implies that MMX integer types can be handled as a internal union of arrays for +the supported element types. So an 64-bit unsigned long long is the best type +for parameter passing and return values. Especially for the 64-bit (_si64) +operations as these normally generate a single PowerISA instruction. + + The SSE extensions include some convert operations for _m128 to / +from _m64 and this includes some int to / from float conversions. However in +these cases the float operands always reside in SSE (XMM) registers (which +match the PowerISA vector registers) and the MMX registers only contain integer +values. POWER8 (PowerISA-2.07) has direct move instructions between GPRs and +VSRs. So these transfers are normally a single instruction and any conversions +can be handed in the vector unit. + + When transferring a __m64 value to a vector register we should also +execute a xxsplatd instruction to insure there is valid data in all four +element lanes before doing floating point operations. This avoids generating +extraneous floating point exceptions that might be generated by uninitialized +parts of the vector. The top two lanes will have the floating point results +that are in position for direct transfer to a GPR or stored via Store Float +Double (stfd). These operation are internal to the intrinsic implementation and +there is no requirement to keep temporary vectors in correct Little Endian +form. + + Also for the smaller element sizes and higher element counts (MMX +_pi8 and _p16 types) the number of  Rotate Under Mask instructions required to +disassemble the 64-bit __m64 into elements, perform the element calculations, +and reassemble the elements in a single __m64 value can get larger. In this +case we can generate shorter instruction sequences by transfering (via direct +move instruction) the GPR __m64 value to the a vector register, performance the +SIMD operation there, then transfer the __m64 result back to a GPR. + +
+
+ 1.1.2.2 Dealing with AVX and AVX512 + AVX is a bit easier for PowerISA and the ELF V2 ABI. First we have +lots (64) of vector registers and a super scalar vector pipe-line (can execute +two or more independent 128-bit vector operations concurrently). Second the ELF +V2 ABI was designed to pass and return larger aggregates in vector +registers: + + + + Up to 12 qualified vector arguments can be passed in +v2–v13. + + + A qualified vector argument corresponds to: + + + So the ABI allows for passing up to three structures each +representing 512-bit vectors and returning such (512-bit) structure all in VMX +registers. This can be extended further by spilling parameters (beyond 12 X +128-bit vectors) to the parameter save area, but we should not need that, as +most intrinsics only use 2 or 3 operands.. Vector registers not needed for +parameter passing, along with an additional 8 volatile vector registers, are +available for scratch and local variables. All can be used by the application +without requiring register spill to the save area. So most intrinsic operations +on 256- or 512-bit vectors can be held within existing PowerISA vector +registers. + + For larger functions that might use multiple AVX 256 or 512-bit +intrinsics and, as a result, push beyond the 20 volatile vector registers, the +compiler will just allocate non-volatile vector registers by allocating a stack +frame and spilling non-volatile vector registers to the save area (as needed in +the function prologue). This frees up to 64 vectors (32 x 256-bit or 16 x +512-bit structs) for code optimization. + + Based on the specifics of our ISA and ABI we will not not use +__vector_size__ (32) or (64) in the PowerPC implementation of __m256 and __m512 +types. Instead we will typedef structs of 2 or 4 vector (__m128) fields. This +allows efficient handling of these larger data types without require new GCC +language extensions. + + In the end we should use the same type names and definitions as the +GCC X86 intrinsic headers where possible. Where that is not possible we can +define new typedefs that provide the best mapping to the underlying PowerISA +hardware. +
+
+ 1.1.3 How is this API implemented. + One pleasant surprise is that many (at least for the older Intel) +Intrinsics are implemented directly in C vector extension code and/or a simple +mapping to GCC target specific builtins. +
+
+ 1.1.3.1 Some simple examples + For example; a vector double splat looks like this: + Another example: + Note in the example above the cast to __v2df for the operation. Both +__m128d and __v2df are vector double, but __v2df does no have the __may_alias__ +attribute. And one more example: + Note this requires a cast for the compiler to generate the correct +code for the intended operation. The parameters and result are the generic +__m128i, which is a vector long long with the __may_alias__ attribute. But +operation is a vector multiply low unsigned short (__v8hu). So not only do we +use the cast to drop the __may_alias__ attribute but we also need to cast to +the correct (vector unsigned short) type for the specified operation. + + I have successfully copied these (and similar) source snippets over +to the PPC64LE implementation unchanged. This of course assumes the associated +types are defined and with compatible attributes. +
+
+ 1.1.3.2 Those extra attributes + You may have noticed there are some special attributes: + So far I have been using these attributes unchanged. + + But most intrinsics map the Intel intrinsic to one or more target +specific GCC builtins. For example: + + The first intrinsic (_mm_load_pd ) is implement as a C vector pointer +reference, but from the comment assumes the compiler will use a movapd +instruction that requires 16-byte alignment (will raise a general-protection +exception if not aligned). This  implies that there is a performance advantage +for at least some Intel processors to keep the vector aligned. The second +intrinsic uses the explicit GCC builtin __builtin_ia32_loadupd to generate the +movupd instruction which handles unaligned references. + + The opposite assumption applies to POWER and PPC64LE, where GCC +generates the VSX  lxvd2x / xxswapd instruction sequence by default, which +allows unaligned references. The PowerISA equivalent for aligned vector access +is the VMX lvx instruction and the vec_ld builtin, which forces quadword +aligned access (by ignoring the low order 4 bits of the effective address). The +lvx instruction does not raise alignment exceptions, but perhaps should as part +of our implementation of the Intel intrinsic. This requires that we use +PowerISA VMX/VSX built-ins to insure we get the expected results. + + The current prototype defines the following: + The aligned  load intrinsic adds an assert which checks alignment +(to match the Intel semantic) and uses  the GCC builtin vec_ld (generates an +lvx).  The assert generates extra code but this can be eliminated by defining +NDEBUG at compile time. The unaligned load intrinsic uses the GCC builtin +vec_vsx_ld  (for PPC64LE generates lxvd2x / xxswapd for power8  and will +simplify to lxv or lxvx for power9).  And similarly for __mm_store_pd / +__mm_storeu_pd, using vec_st and vec_vsx_st. These concepts extent to the +load/store intrinsics for vector float and vector int. +
+
+ 1.1.3.3 How did I find this out? + The next question is where did I get the details above. The GCC +documentation for __builtin_ia32_loadupd provides minimal information (the +builtin name, parameters and return types). Not very informative. + + Looking up the Intel intrinsic description is more informative. You +can Google the intrinsic name or use the Intel +Intrinsic guide  for this. The Intrinsic Guide is interactive and +includes  Intel (Chip) technology and text based search capabilities. Clicking +on the intrinsic name opens to a synopsis including; the underlying instruction +name, text description, operation pseudo code, and in some cases performance +information (latency and throughput). + + The key is to get a description of the intrinsic (operand fields and +types, and which fields are updated for the result) and the underlying Intel +instruction. If the Intrinsic guide is not clear you can look up the +instruction details in the “Intel® 64 and IA-32 +Architectures Software Developer’s Manual”. + + Information about the PowerISA vector facilities is found in the +PowerISA Version 2.07B (for POWER8 and 3.0 for +POWER9) manual, Book I, Chapter 6. Vector Facility and Chapter 7. +Vector-Scalar Floating-Point Operations. Another good reference is the OpenPOWER ELF V2 application binary interface (ABI) +document, Chapter 6. Vector Programming Interfaces and Appendix A. Predefined +Functions for Vector Programming. + + Another useful document is the original Altivec Technology Programers Interface Manual with a  user +friendly structure and many helpful diagrams. But alas the PIM does does not +cover the resent PowerISA (power7,  power8, and power9) enhancements. +
+
+ 1.1.3.4 Examples implemented using other intrinsics + Some intrinsic implementations are defined in terms of other +intrinsics. For example. + + + + + + + + + + + + + + + + This notion of using part (one fourth or half) of the SSE XMM +register and leaving the rest unchanged (or forced to zero) is specific to SSE +scalar operations and can generate some complicated (sub-optimal) PowerISA +code.  In this case _mm_load_sd passes the dereferenced double value  to +_mm_set_sd which uses C vector initializer notation to combine (merge) that +double scalar value with a scalar 0.0 constant into a vector double. + + While code like this should work as-is for PPC64LE, you should look +at the generated code and assess if it is reasonable.  In this case the code +is not awful (a load double splat, vector xor to generate 0.0s, then a xxmrghd +to combine __F and 0.0).  Other examples may generate sub-optimal code and +justify a rewrite to PowerISA scalar or vector code (GCC PowerPC +AltiVec Built-in Functions or inline assembler). + + Net: try using the existing C code if you can, but check on what the +compiler generates.  If the generated code is horrendous, it may be worth the +effort to write a PowerISA specific equivalent. For codes making extensive use +of MMX or SSE scalar intrinsics you will be better off rewriting to use +standard C scalar types and letting the the GCC compiler handle the details +(see #2.1.Prefered methods|outline) +
+
+ 2 How do we work this? + The working assumption is to start with the existing GCC headers from +./gcc/config/i386/, then convert them to PowerISA and add them to +./gcc/config/rs6000/. I assume we will replicate the existing header structure +and retain the existing header file and intrinsic names. This also allows us to +reuse existing DejaGNU test cases from ./gcc/testsuite/gcc.target/i386, modify +them as needed for the POWER target, and them to the +./gcc/testsuite/gcc.target/powerpc. + + We can be flexible on the sequence that headers/intrinsics and test +cases are ported.  This should be based on customer need and resolving +internal dependencies.  This implies an oldest-to-newest / bottoms-up (MMX, +SSE, SSE2, …) strategy. The assumption is, existing community and user +application codes, are more likely to have optimized code for previous +generation ubiquitous (SSE, SSE2, ...) processors than the latest (and rare) +SkyLake AVX512. + + I would start with an existing header from the current GCC + ./gcc/config/i386/ and copy the header comment (including FSF copyright) down +to any vector typedefs used in the API or implementation. Skip the Intel +intrinsic implementation code for now, but add the ending #end if matching the +headers conditional guard against multiple inclusion. You can add  #include +<alternative> as needed. For examples: + + Then you can start adding small groups of related intrinsic +implementations to the header to be compiled and  examine the generated code. +Once you have what looks like reasonable code you can grep through + ./gcc/testsuite/gcc.target/i386 for examples using the intrinsic names you +just added. You should be able to find functional tests for most X86 +intrinsics. + + The GCC +testsuite uses the DejaGNU  test framework as documented in the GNU Compiler Collection (GCC) +Internals manual. GCC adds its own DejaGNU directives and extensions, +that are embedded in the testsuite source as comments.  Some are platform +specific and will need to be adjusted for tests that are ported to our +platform. For example + should become something like + + Repeat this process until you have equivalent implementations for all +the intrinsics in that header and associated test cases that execute without +error. +
+
+ 2.1 Prefered methods + As we will see there are multiple ways to implement the logic of +these intrinsics. Some implementation methods are preferred because they allow +the compiler to select instructions and provided the most flexibility for +optimization across the whole sequence. Other methods may be required to +deliver a specific semantic or to deliver better optimization than the current +compiler is capable of. Some methods are more portable across multiple +compilers (GCC, LLVM, ...). All of this should be taken into consideration for +each intrinsic implementation. In general we should use the following list as a +guide to these decisions: + + + + + + Use C vector arithmetic, logical, dereference, etc., operators in +preference to intrinsics. + + + Use the bi-endian interfaces from Appendix A of the ABI in +preference to other intrinsics when available, as these are designed for +portability among compilers. + + + Use other, less well documented intrinsics (such as +__builtin_vsx_*) when no better facility is available, in preference to +assembly. + + + If necessary, use inline assembly, but know what you're +doing. + + + +
+
+ 2.2 Prepare yourself + To port Intel intrinsics to POWER you will need to prepare yourself +with knowledge of PowerISA vector facilities and how to access the associated +documentation. + + + + GCC vector extention syntax and usage. This is one of a set +of GCC “Extentions to the C language Family” that the intrinsic header implementation depends +on.  As many of the GCC intrinsics for x86 are implemented via C vector +extensions, reading and understanding of this code is an important part of the +porting process. + + + Intel (x86) intrinsic and type naming conventions and how to find +more information. The intrinsic name encodes  some information about the +vector size and type of the data, but the pattern is not always  obvious. +Using the online Intel +Intrinsic Guide to look up the intrinsic by name is a good first +step. + + + PowerISA Vector facilities. The Vector facilities of POWER8 are +extensive and cover the usual types and usual operations. However it has a +different history and organization from Intel.  Both (Intel and PowerISA) have +their quirks and in some cases the mapping may not be obvious. So familiarizing +yourself with the PowerISA Vector (VMX) and Vector Scalar Extensions (VSX) is +important. + + + +
+
+ 2.2.1 GCC Vector Extensions + The GCC vector extensions are common syntax but implemented in a +target specific way. Using the C vector extensions require the __gnu_inline__ +attribute to avoid syntax errors in case the user specified  C standard +compliance (-std=c90, -std=c11, etc) that would normally disallow such +extensions. + + The GCC implementation for PowerPC64 Little Endian is (mostly) +functionally compatible with x86_64 vector extension usage. We can use the same +type definitions (at least for  vector_size (16)), operations, syntax +<{...}> for vector initializers and constants, and array syntax +<[]> for vector element access. So simple arithmetic / logical operations +on whole vectors should work as is. + + The caveat is that the interface data type of the Intel Intrinsic may +not match the data types of the operation, so it may be necessary to cast the +operands to the specific type for the operation. This also applies to vector +initializers and accessing vector elements. You need to use the appropriate +type to get the expected results. Of course this applies to X86_64 as well. For +example: + Note the cast from the interface type (__m128} to the implementation +type (__v4sf, defined in the intrinsic header) for the vector float add (+) +operation. This is enough for the compiler to select the appropriate vector add +instruction for the float type. Then the result (which is __v4sf) needs to be +cast back to the expected interface type (__m128). + + Note also the use of array syntax (__A)[0]) to extract the lowest +(left mostHere we are using logical left and logical right +which will not match the PowerISA register view in Little endian. Logical left +is the left most element for initializers {left, … , right}, storage order +and array  order where the left most element is [0].) +element of a vector. The cast (__v4sf) insures that the compiler knows we are +extracting the left most 32-bit float. The compiler insures the code generated +matches the Intel behavior for PowerPC64 Little Endian. + + The code generation is complicated by the fact that PowerISA vector +registers are Big Endian (element 0 is the left most word of the vector) and +X86 scalar stores are from the left most (work/dword) for the vector register. +Application code with extensive use of scalar (vs packed) intrinsic loads / +stores should be flagged for rewrite to native PPC code using exisiing scalar +types (float, double, int, long, etc.). + + Another example is the set reverse order: + Note the use of initializer syntax used to collect a set of scalars +into a vector. Code with constant initializer values will generate a vector +constant of the appropriate endian. However code with variables in the +initializer can get complicated as it often requires transfers between register +sets and perhaps format conversions. We can assume that the compiler will +generate the correct code, but if this class of intrinsics shows up a hot spot, +a rewrite to native PPC vector built-ins may be appropriate. For example +initializer of a variable replicated to all the vector fields might not be +recognized as a “load and splat” and making this explicit may help the +compiler generate better code. +
+
+ 2.2.2 Intel Intrinsic functions + So what is an intrinsic function? From Wikipedia: + + In compiler theory, an +intrinsic function is a function available for use in a given programming +language whose implementation is handled specially by the compiler. +Typically, it substitutes a sequence of automatically generated instructions +for the original function call, similar to an inline function. +Unlike an inline function though, the compiler has an intimate knowledge of the +intrinsic function and can therefore better integrate it and optimize it for +the situation. This is also called builtin function in many languages. + + The “Intel Intrinsics” API provides access to the many +instruction set extensions (Intel Technologies) that Intel has added (and +continues to add) over the years. The intrinsics provided access to new +instruction capabilities before the compilers could exploit them directly. +Initially these intrinsic functions where defined for the Intel and Microsoft +compiler and where eventually implemented and contributed to GCC. + + The Intel Intrinsics have a specific type and naming structure. In +this naming structure, functions starts with a common prefix (MMX and SSE use +_mm_ prefix, while AVX added the _mm256 _mm512 prefixes), then a short +functional name (set, load, store, add, mul, blend, shuffle, …) and a suffix +(_pd, _sd, _pi32...) with type and packing information. See Appendix B for the list of common intrisic suffixes. + + Oddly many of the MMX/SSE operations are not vectors at all. There +are a lot of scalar operations on a single float, double, or long long type. In +effect these are scalars that can take advantage of the larger (xmm) register +space. Also in the Intel 32-bit architecture they provided IEEE754 float and +double types, and 64-bit integers that did not exist or where hard to implement +in the base i386/387 instruction set. These scalar operation use a suffix +starting with '_s' (_sd for scalar double float, _ss scalar float, and _si64 +for scalar long long). + + True vector operations use the packed or extended packed suffixes, +starting with '_p' or '_ep' (_pd for vector double, _ps for vector float, and +_epi32 for vector int). The use of '_ep'  seems to be reserved to disambiguate +intrinsics that existed in the (64-bit vector) MMX extension from the extended +(128-bit vector) SSE equivalent. For example _mm_add_pi32 is a MMX operation on +a pair of 32-bit integers, while _mm_add_epi32 is an SSE2 operation on vector +of 4 32-bit integers. + + The GCC  builtins for the i386.target, (includes x86 and x86_64) are not +the same as the Intel Intrinsics. While they have similar intent and cover most +of the same functions, they use a different naming (prefixed with +__builtin_ia32_, then function name with type suffix) and uses GCC vector type +modes for operand types. For example: + Note: A key difference between GCC builtins for i386 and Powerpc is +that the x86 builtins have different names of each operation and type while the +powerpc altivec builtins tend to have a single generatic builtin for  each +operation, across a set of compatible operand types. + + In GCC the Intel Intrinsic header (*intrin.h) files are implemented +as a set of inline functions using the Intel Intrinsic API names and types. +These functions are implemented as either GCC C vector extension code or via +one or more GCC builtins for the i386 target. So lets take a look at some +examples from GCC's SSE2 intrinsic header emmintrin.h: + + + Note that the  _mm_add_pd is implemented direct as C vector +extension code., while _mm_add_sd is implemented via the GCC builtin +__builtin_ia32_addsd. From the discussion above we know the _pd suffix +indicates a packed vector double while the _sd suffix indicates a scalar double +in a XMM register. + +
+
+ 2.2.2.1 Packed vs scalar intrinsics + + So what is actually going on here? The vector code is clear enough if +you know that '+' operator is applied to each vector element. The the intent of +the builtin is a little less clear, as the GCC documentation for +__builtin_ia32_addsd is not very helpful (nonexistent). So perhaps the Intel Intrinsic Guide will be more enlightening. To +paraphrase: + + From the _mm_add_dp description ; for each double float +element ([0] and [1] or bits [63:0] and [128:64]) for operands a and b are +added and resulting vector is returned. + + From the _mm_add_sd description ; Add element 0 of first operand +(a[0]) to element 0 of the second operand (b[0]) and return the packed vector +double {(a[0] + b[0]), a[1]}. Or said differently the sum of the logical left +most half of the the operands are returned in the logical left most half +(element [0]) of the  result, along with the logical right half (element [1]) +of the first operand (unchanged) in the logical right half of the result. + + So the packed double is easy enough but the scalar double details are +more complicated. One source of complication is that while both Instruction Set +Architectures (SSE vs VSX) support scalar floating point operations in vector +registers the semantics are different. + + + + The vector bit and field numbering is different (reversed). + + + + The handling of the non-scalar part of the register for scalar +operations are different. + + + + To minimize confusion and use consistent nomenclature, I will try to +use the terms logical left and logical right elements based on the order they +apprear in a C vector initializers and element index order. So in the vector +(__v2df){1.0, 20.}, The value 1.0 is the in the logical left element [0] and +the value 2.0 is logical right element [1]. + + So lets look at how to implement these intrinsics for the PowerISA. +For example in this case we can use the GCC vector extension, like so: + + + The packed double implementation operates on the vector as a whole. +The scalar double implementation operates on and updates only [0] element of +the vector and leaves the __A[1] element unchanged.  Form this source the GCC +compiler generates the following code for PPC64LE target.: + + The packed vector double generated the corresponding VSX vector +double add (xvadddp). But the scalar implementation is bit more complicated. +  + + First the PPC64LE vector format, element [0] is not in the correct +position for  the scalar operations. So the compiler generates vector splat +double (xxspltd) instructions to copy elements __A[0] and __B[0] into position +for the VSX scalar add double (xsadddp) that follows. However the VSX scalar +operation leaves the other half of the VSR undefined (which does not match the +expected Intel semantics). So the compiler must generates a vector merge high +double (xxmrghd) instruction to combine the original __A[1] element (from vs34) +with the scalar add result from vs35 element [1]. This merge swings the scalar +result from vs35[1] element into the vs34[0] position, while preserving the +original vs34[1] (from __A[1]) element (copied to itself).Fun +fact: The vector registers in PowerISA are decidedly Big Endian. But we decided +to make the PPC64LE ABI behave like a Little Endian system to make application +porting easier. This require the compiler to manipulate the PowerISA vector +instrinsic behind the the scenes to get the correct Little Endian results. For +example the element selector [0|1] for vec_splat and the generation of +vec_mergeh vs vec_mergel are reversed for the Little +Endian. + + This technique applies to packed and scalar intrinsics for the the +usual arithmetic operators (add, subtract, multiply, divide). Using GCC vector +extensions in these intrinsic implementations provides the compiler more +opportunity to optimize the whole function. + + Now we can look at a slightly more interesting (complicated) case. +Square root (sqrt) is not a arithmetic operator in C and is usually handled +with a library call or a compiler builtin. We really want to avoid a library +calls and want to avoid any unexpected side effects. As you see below the +implementation of _mm_sqrt_pd and _mm_sqrt_sd intrinsics are based on GCC x86 +built ins. + + For the packed vector sqrt, the PowerISA VSX has an equivalent vector +double square root instruction and GCC provides the vec_sqrt builtin. But the +scalar implementation involves an additional parameter and an extra move. + This seems intended to mimick the propagation of the __A[1] input to the +logical right half of the XMM result that we saw with _mm_add_sd above. + + The instinct is to extract the low scalar (__B[0]) from operand __B +and pass this to  the GCC __builtin_sqrt () before recombining that scalar +result with __A[1] for the vector result. Unfortunately C language standards +force the compiler to call the libm sqrt function unless -ffast-math is +specified. The -ffast-math option is not commonly used and we want to avoid the +external library dependency for what should be only a few inline instructions. +So this is not a good option. + + Thinking outside the box; we do have an inline intrinsic for a +(packed) vector double sqrt, that we just implemented. However we need to +insure the other half of __B (__B[1]) does not cause an harmful side effects +(like raising exceptions for NAN or  negative values). The simplest solution +is to splat __B[0] to both halves of a temporary value before taking the +vec_sqrt. Then this result can be combined with __A[1] to return the final +result. For example: + In this  example we use _mm_set1_pd to splat the +scalar __B[0], before passing that vector to our _mm_sqrt_pd implementation, +then pass the sqrt result (c[0])  with __A[1[ to  _mm_setr_pd to combine the final result. You could also use +the {c[0], __A[1]} initializer instead of _mm_setr_pd. + + Now we can look at vector and scalar compares that add there own +complication: For example: + The Intel Intrinsic Guide for _mm_cmpeq_pd describes comparing double +elements [0|1] and returning either 0s for not equal and 1s (0xFFFFFFFFFFFFFFFF +or long long -1) for equal. The comparison result is intended as a select mask +(predicates) for selecting or ignoring specific elements in later operations. +The scalar version _mm_cmpeq_sd is similar except for the quirk +of only comparing element [0] and combining the result with __A[1] to return +the final vector result. + + The packed vector implementation for PowerISA is simple as VSX +provides the equivalent instruction and GCC provides the vec_cmpeq builtin +supporting the vector double type. The technique of using scalar comparison +operators on the __A[0] and __B[0] does not work as the C comparison operators +return 0 or 1 results while we need the vector select mask (effectively 0 or +-1). Also we need to watch for sequences that mix scalar floats and integers, +generating if/then/else logic or requiring expensive transfers across register +banks. + + In this case we are better off using explicit vector built-ins for +_mm_add_sd as and example. We can use vec_splat from element [0] to temporaries +where we can safely use vec_cmpeq to generate the expect selector mask. Note +that the vec_cmpeq returns a bool long type so we need the cast the result back +to __v2df. Then use the (__m128d){c[0], __A[1]} initializer to combine the +comparison result with the original __A[1] input and cast to the require +interface type.  So we have this example: + + Now lets look at a similar example that adds some surprising +complexity. This is the compare not equal case so we should be able to find the +equivalent vec_cmpne builtin: + +
+
+ 2.2.2.2 To vec_not or not + Well not exactly. Looking at the OpenPOWER ABI document we see a +reference to vec_cmpne for all numeric types. But when we look in the current +GCC 6 documentation we find that vec_cmpne is not on the list. So it is planned +in the ABI, but not implemented yet. + + Looking at the PowerISA 2.07B we find a VSX Vector Compare Equal to +Double-Precision but no Not Equal. In fact we see only vector double compare +instructions for greater than and greater than or equal in addition to the +equal compare. Not only can't we find a not equal, there is no less than or +less than or equal compares either. + + So what is going on here? Partially this is the Reduced Instruction +Set Computer (RISC) design philosophy. In this case the compiler can generate +all the required compares using the existing vector instructions and simple +transforms based on Boolean algebra. So vec_cmpne(A,B) is simply vec_not +(vec_cmpeq(A,B)). And vec_cmplt(A,B) is simply vec_cmpgt(B,A) based on the +identity A < B iff B > A. Similarly vec_cmple(A,B) is implemented as +vec_cmpge(B,A). + + What a minute, there is no vec_not() either. Can not find it in the +PowerISA, the OpenPOWER ABI, or the GCC PowerPC Altivec Built-in documentation. +There is no vec_move() either! How can this possible work? + + This is RISC philosophy again. We can always use a logical +instruction (like bit wise and or or) to effect a move given that we also have +nondestructive 3 register instruction forms. In the PowerISA most instruction +have two input registers and a separate result register. So if the result +register number is  different from either input register then the inputs are +not clobbered (nondestructive). Of course nothing prevents you from specifying +the same register for both inputs or even all three registers (result and both +inputs).  And some times it is useful. + + The statement B = vec_or (A,A) is is effectively a vector move/copy +from A to B. And A = vec_or (A,A) is obviously a nop (no operation). In the the +PowerISA defines the preferred nop and register move for vector registers in +this way. + + It is also useful to have hardware implement the logical operators +nor (not or) and nand (not and).  The PowerISA provides these instruction for +fixed point and vector logical operation. So vec_not(A) can be implemented as +vec_nor(A,A). So looking at the  implementation of _mm_cmpne we propose the +following: + + The Intel Intrinsics also include the not forms of the relational +compares: + The PowerISA and OpenPOWER ABI, or GCC PowerPC Altivec Built-in +documentation do not provide any direct equivalents to the  not greater than +class of compares. Again you don't really need them if you know Boolean +algebra. We can use identities like {not (A < B) iff A >= B} and {not (A +<= B) iff A > B}. So the PPC64LE implementation follows: + These patterns repeat for the scalar version of the not compares. And +in general the larger pattern described in this chapter applies to the other +float and integer types with similar interfaces. + +
+
+ 2.2.2.3 Crossing lanes + We have seen that, most of the time, vector SIMD units prefer to keep +computations in the same “lane” (element number) as the input elements. The +only exception in the examples so far are the occasional splat (copy one +element to all the other elements of the vector) operations. Splat is an +example of the general category of “permute” operations (Intel would call +this a “shuffle” or “blend”). Permutes selects and rearrange the +elements of (usually) a concatenated pair of vectors and delivers those +selected elements, in a specific order, to a result vector. The selection and +order of elements in the result is controlled by a third vector, either as 3rd +input vector or and immediate field of the instruction. + + For example the Intel intrisics for Horizontal Add / Subtract added with +SSE3. These instrinsics add (subtract) adjacent element pairs, across pair of +input vectors, placing the sum of the adjacent elements in the result vecotr.. +For example _mm_hadd_ps  which implments +the operation on float: + Horizontal Add (hadd) provides an incremental vector “sum across” +operation commonly needed in matrix and vector transform math. Horizontal Add +is incremental as you need three hadd instructions to sum across 4 vectors of 4 +elements ( 7 for 8 x 8, 15 for 16 x 16, …). + + The PowerISA does not have a sum-across operation for float or +double. We can user the vector float add instruction after we rearrange the +inputs so that element pairs line up for the horizontal add. For example we +would need to permute the input vectors {1, 2, 3, 4} and {101, 102, 103, 104} +into vectors {2, 4, 102, 104} and {1, 3, 101, 103} before the  vec_add. This +requires two vector permutes to align the elements into the correct lanes for +the vector add (to implement Horizontal Add).   + + The PowerISA provides generalized byte-level vector permute (vperm) +based a vector register pair source as input and a control vector. The control +vector provides 16 indexes (0-31) to select bytes from the concatenated input +vector register pair (VRA, VRB). A more specific set of permutes (pack, unpack, +merge, splat) operations (across element sizes) are encoded as separate + instruction opcodes or instruction immediate fields. + + Unfortunately only the general vec_perm can provide the realignment +we need the _mm_hadd_ps operation or any of the int, short variants of hadd. +For example: + + This requires two permute control vectors; one to select the even +word elements across __X and __Y, and another to select the odd word elements +across __X and __Y. The result of these permutes (vec_perm) are inputs to the +vec_add and completes the hadd operation. + + Fortunately the permute required for the double (64-bit) case (IE +_mm_hadd_pd) reduces to the equivalent of vec_mergeh / vec_mergel  doubleword +(which are variants of  VSX Permute Doubleword Immediate). So the +implementation of _mm_hadd_pd can be simplified to this: + This eliminates the load of the control vectors required by the +previous example. + +
+
+ 2.2.3 PowerISA Vector facilities. + The PowerISA vector facilities (VMX and VSX) are extensive, but does +not always provide a direct or obvious functional equivalent to the Intel +Intrinsics. But being not obvious is not the same as imposible. It just +requires some basic programing skills. + + It is a good idea to have an overall understanding of the vector +capabilities the PowerISA. You do not need to memorize every instructions but +is helps to know where to look. Both the PowerISA and OpenPOWER ABI have a +specific structure and organization that can help you find what you looking +for. + + It also helps to understand the relationship between the PowerISAs +low level instructions and the higher abstraction of the vector intrinsics as +defined by the OpenPOWER ABIs Vector Programming Interfaces and the the defacto + standard of GCC's PowerPC AltiVec Built-in Functions. +
+
+ 2.2.3.1 The PowerISA + The PowerISA is for historical reasons is organized at the top level +by the distinction between older Vector Facility (Altivec / VMX) and the newer +Vector-Scalar Floating-Point Operations (VSX). +
+
+ 2.2.3.1.1 The Vector Facility (VMX) + The orginal VMX supported SIMD integer byte, halfword, and word, and +single float data types within a separate (from GPR and FPR) bank of 32 x +128-bit vector registers. These operations like to stay within their (SIMD) +lanes except where the operation changes the element data size (integer +multiply, pack, and unpack). + + This is complimented by bit logical and shift / rotate / permute / +merge instuctions that operate on the vector as a whole.  Some operation +(permute, pack, merge, shift double, select) will select 128 bits from a pair +of vectors (256-bits) and deliver 128-bit vector result. These instructions +will cross lanes or multiple registers to grab fields and assmeble them into +the single register result. + + The PowerISA 2.07B Chapter 6. Vector Facility is organised starting +with an overview (chapters 6.1- 6.6) : + Then a chapter on storage (load/store) access for vector and vector +elements: +
+
+ 2.2.3.1.1.1 Vector permute and formatting instructions + The vector Permute and formatting chapter follows and is an important +one to study. These operation operation on the byte, halfword, word (and with +2.07 doubleword) integer types . Plus special Pixel type. The shifts +instructions in this chapter operate on the vector as a whole at either the bit +or the byte (octet) level, This is an important chapter to study for moving +PowerISA vector results into the vector elements that Intel Intrinsics +expect: + + The Vector Integer instructions include the add / subtract / Multiply +/ Multiply Add/Sum / (no divide) operations for the standard integer types. +There are instruction forms that  provide signed, unsigned, modulo, and +saturate results for most operations. The PowerISA 2.07 extension add / +subtract of 128-bit integers with carry and extend to 256, 512-bit and beyond , +is included here. There are signed / unsigned compares across the standard +integer types (byte, .. doubleword). The usual and bit-wise logical operations. +And the SIMD shift / rotate instructions that operate on the vector elements +for various types. + + + The vector [single] float instructions are grouped into this chapter. +This chapter does not include the double float instructions which are described +in the VSX chapter. VSX also include additional float instructions that operate +on the whole 64 register vector-scalar set. + + + The vector XOR based instructions are new with PowerISA 2.07 (POWER8) +and provide vector  crypto and check-sum operations: + + The vector gather and bit permute support bit level rearrangement of +bits with in the vector. While the vector versions of the count leading zeros +and population count are useful to accelerate specific algorithms + + The Decimal Integer add / subtract instructions complement the +Decimal Floating-Point instructions. They can also be used to accelerated some +binary to/from decimal conversions. The VSCR instruction provides access the +the Non-Java mode floating-point control and the saturation status. These +instruction are not normally of interest in porting Intel intrinsics. + + With PowerISA 2.07B (Power8) several major extension where added to +the Vector Facility: + + + Vector Crypto: Under “Vector Exclusive-OR-based Instructions +Vector Exclusive-OR-based Instructions”, AES [inverse] Cipher, SHA 256 / 512 +Sigma, Polynomial Multiplication, and Permute and XOR instructions. + + + 64-bit Integer; signed and unsigned add / subtract, signed and +unsigned compare, Even / Odd 32 x 32 multiple with 64-bit product, signed / +unsigned max / min, rotate and shift left/right. + + + Direct Move between GRPs and the FPRs / Left half of Vector +Registers. + + + 128-bit integer add / subtract with carry / extend, direct +support for vector __int128 and multiple precision arithmetic. + + + Decimal Integer add subtract for 31 digit BCD. + + + Miscellaneous SIMD extensions: Count leading Zeros, Population +count, bit gather / permute, and vector forms of eqv, nand, orc. + + + + The rational for why these are included in the Vector Facilities +(VMX) (vs Vector-Scalar Floating-Point Operations (VSX)) has more to do with +how the instruction where encoded then with the type of operations or the ISA +version of introduction. This is primarily a trade-off between the bits +required for register selection vs bits for extended op-code space within in a +fixed 32-bit instruction. Basically accessing 32 vector registers require +5-bits per register, while accessing all 64 vector-scalar registers require +6-bits per register. When you consider the most vector instructions require  3 + and some (select, fused multiply-add) require 4 register operand forms,  the +impact on op-code space is significant. The larger register set of VSX was +justified by queuing theory of larger HPC matrix codes using double float, +while 32 registers are sufficient for most applications. + + So by definition the VMX instructions are restricted to the original +32 vector registers while VSX instructions are encoded to  access all 64 +floating-point scalar and vector double registers. This distinction can be +troublesome when programming at the assembler level, but the compiler and +compiler built-ins can hide most of this detail from the programmer. + +
+
+ 2.2.3.1.2 Vector-Scalar Floating-Point Operations (VSX) + With PowerISA 2.06 (POWER7) we extended the vector SIMD capabilities +of the PowerISA: + + + Extend the available vector and floating-point scalar register +sets from 32 registers each to a combined 64 x 64-bit scalar floating-point and +64 x 128-bit vector registers. + + + Enable scalar double float operations on all 64 scalar +registers. + + + Enable vector double and vector float operations for all 64 +vector registers. + + + Enable super-scalar execution of vector instructions and support +2 independent vector floating point  pipelines for parallel execution of 4 x +64-bit Floating point Fused Multiply Adds (FMAs) and 8 x 32-bit (FMAs) per +cycle. + + + + With PowerISA 2.07 (POWER8) we added single-precision scalar +floating-point instruction to VSX. This completes the floating-point +computational set for VSX. This ISA release also clarified how these operate in +the Little Endian storage model. + + While the focus was on enhanced floating-point computation (for High +Performance Computing),  VSX also extended  the ISA with additional storage +access, logical, and permute (merge, splat, shift) instructions. This was +necessary to extend these operations cover 64 VSX registers, and improves +unaligned storage access for vectors  (not available in VMX). + + The PowerISA 2.07B Chapter 7. Vector-Scalar Floating-Point Operations +is organized starting with an introduction and overview (chapters 7.1- 7.5) . +The early sections (7.1 and 7.2) describe the layout of the 64 VSX registers +and how they relate (overlap and inter-operate) to the existing floating point +scalar (FPRs) and (VMX VRs) vector registers. + + The definitions given in “7.1.1.1 Compatibility with Category +Floating-Point and Category Decimal Floating-Point Operations”, and +“7.1.1.2 Compatibility with Category Vector Operations” + Note; the reference to scalar element 0 above is from the big endian +register perspective of the ISA. In the PPC64LE ABI implementation, and for the +purpose of porting Intel intrinsics, this is logical element 1.  Intel SSE +scalar intrinsics operated on logical element [0],  which is in the wrong +position for PowerISA FPU and VSX scalar floating-point  operations. Another +important note is what happens to the other half of the VSR when you execute a +scalar floating-point instruction (The contents of doubleword 1 of a VSR … +are undefined.) + + The compiler will hide some of this detail when generating code for +little endian vector element [] notation and most vector built-ins. For example +vec_splat (A, 0) is transformed for PPC64LE to xxspltd VRT,VRA,1. What the +compiler can not hide is the different placement of scalars within vector +registers. + + Vector registers (VRs) 0-31 overlay and can be accessed from vector +scalar registers (VSRs) 32-63. The ABI also specifies that VR2-13 are used to +pass parameter and return values. In some cases the same (similar) operations +exist in both VMX and VSX instruction forms, while in the other cases +operations only exist for VMX (byte level permute and shift) or VSX (Vector +double).   + + So resister selection that; avoids unnecessary vector moves, follows +the ABI, while maintaining the correct instruction specific register numbering, +can be tricky. The GCC register constraint annotations for Inline +assembler using vector instructions  is challenging, even for experts. So only +experts should be writing assembler and then only in extraordinary +circumstances. You should leave these details to the compiler (using vector +extensions and vector built-ins) when ever possible. + + The next sections get is into the details of floating point +representation, operations, and exceptions. Basically the implementation +details for the IEEE754R and C/C++ language standards that most developers only +access via higher level APIs. So most programmers will not need this level of +detail, but it is there if needed. + + Finally an overview the VSX storage access instructions for big and +little endian and for aligned and unaligned data addresses. This included +diagrams that illuminate the differences + + Section 7.6 starts with a VSX instruction Set Summary which is the +place to start to get an feel for the types and operations supported.  The +emphasis on float-point, both scalar and vector (especially vector double), is +pronounced. Many of the scalar and single-precision vector instruction look +like duplicates of what we have seen in the Chapter 4 Floating-Point and +Chapter 6 Vector facilities. The difference here is, new instruction encodings +to access the full 64 VSX register space. + + In addition there are small number of logical instructions are +include to support predication (selecting / masking vector elements based on +compare results). And set of permute, merge, shift, and splat instructions that +operation on VSX word (float) and doubleword (double) elements. As mentioned +about VMX section 6.8 these instructions are good to study as they are useful +for realigning elements from PowerISA vector results to that required for Intel +Intrinsics. + + + The VSX Instruction Descriptions section contains the detail +description for each VSX category instruction.  The table entries from the +Instruction Set Summary are formatted in the document at hyperlinks to +corresponding instruction description. + +
+
+ 2.2.3.2 PowerISA Vector Intrinsics + The OpenPOWER ELF V2 application binary interface (ABI): Chapter 6. +Vector Programming Interfaces and Appendix A. Predefined Functions for Vector +Programming document the current and proposed vector built-ins we expect all +C/C++ compilers implement. + + Some of these operations are endian sensitive and the compiler needs +to make corresponding adjustments as  it generate code for endian sensitive +built-ins. There is a good overview for this in the OpenPOWER ABI section 6.4. +Vector Built-in Functions. + + Appendix A is organized (sorted) by built-in name, output type, then +parameter types. Most built-ins are generic as the named the operation (add, +sub, mul, cmpeq, ...) applies to multiple types. + + So the build vec_add built-in applies to all the signed and unsigned +integer types (char, short, in, and long) plus float and double floating-point +types. The compiler looks at the parameter type to select the vector +instruction (or instruction sequence) that implements the (add) operation on +that type. The compiler infers the output result type from the operation and +input parameters and will complain if the target variable type is not +compatible. For example: + + This is one key difference between PowerISA built-ins and Intel +Intrinsics (Intel Intrinsics are not generic and include type information in +the name). This is why it is so important to understand the vector element +types and to add the appropriate type casts to get the correct results. + + The defacto standard implementation is GCC as defined in the include +file <altivec.h> and documented in the GCC online documentation in 6.59.20 PowerPC +AltiVec Built-in Functions. The header file name and section title +reflect the origin of the Vector Facility, but recent versions of GCC altivec.h +include built-ins for newer PowerISA 2.06 and 2.07 VMX plus VSX extensions. +This is a work in progress where your  (older) distro GCC compiler may not +include built-ins for the latest PowerISA 3.0 or ABI edition. So before you use +a built-in you find in the ABI Appendix A, check the specific GCC online documentation for the +GCC version you are using. + +
+
+ 2.2.3.3 How vector elements change size and type + Most vector built ins return the same vector type as the (first) +input parameters, but there are exceptions. Examples include; conversions +between types, compares , pack, unpack,  merge, and integer multiply +operations.   + + Converting floats to from integer will change the type and something +change the element size as well (double ↔ int and float ↔ long). For the +VMX the conversions are always the same size (float ↔ [unsigned] int). But +VSX allows conversion of 64-bit (long or double) to from 32-bit (float or + int)  with the inherent size changes. The PowerISA VSX defines a 4 element +vector layout where little endian elements 0, 2 are used for input/output and +elements 1,3 are undefined. The OpenPOWER ABI Appendix A define vec_double and +vec_float with even/odd and high/low extensions as program aids. These are not +included in GCC 7 or earlier but are planned for GCC 8. + + Compare operations produce either vector bool <input element +type> (effectively bit masks) or predicates (the condition code for all and +any are represented as an int truth variable). When a predicate compare (ie +vec_all_eq, vec_any_gt), is used in a if statement,  the condition code is +used directly in the conditional branch and the int truth value is not +generated. + + Pack operations pack integer elements into the next smaller (half) +integer sized elements. Pack operations include signed and unsigned saturate +and unsigned modulo forms. As the packed result will be half the size (in +bits), pack instructions require 2 vectors (256-bits) as input and generate a +single 128-bit vector results. + + Unpack operations expand integer elements into the next larger size +elements. The integers are always treated as signed values and sign-extended. +The processor design avoids instructions that return multiple register values. +So the PowerISA defines unpack-high and unpack low forms where instruction +takes (the high or low) half of vector elements and extends them to fill the +vector output. Element order is maintained and an unpack high / low sequence +with same input vector has the effect of unpacking to a 256-bit result in two +vector registers. + + Merge operations resemble shuffling two (vectors) card decks +together, alternating (elements) cards in the result.   As we are merging from +2 vectors (256-bits) into 1 vector (128-bits) and the elements do not change +size, we have merge high and merge low instruction forms for each (byte, +halfword and word) integer type. The merge high operations alternate elements +from the (vector register left) high half of the two input vectors. The merge +low operation alternate elements from the (vector register right) low half of +the two input vectors. + + For PowerISA 2.07 we added vector merge word even / odd instructions. +Instead of high or low elements the shuffle is from the even or odd number +elements of the two input vectors. Passing the same vector to both inputs to +merge produces splat like results for each doubleword half, which is handy in +some convert operations. + + Integer multiply has the potential to generate twice as many bits in +the product as input. A multiply of 2 int (32-bit) values produces a long +(64-bits). Normal C language * operations ignore this and discard the top +32-bits of the result. However  in some computations it useful to preserve the +double product precision for intermediate computation before reducing the final +result back to the original precision. + + The PowerISA VMX instruction set took the later approach ie keep all +the product bits until the programmer explicitly asks for the truncated result. +So the vector integer multiple are split into even/odd forms across signed and +unsigned; byte, halfword and word inputs. This requires two instructions (given +the same inputs) to generated the full vector  multiply across 2 vector +registers and 256-bits. Again as POWER processors are super-scalar this pair of +instructions should execute in parallel. + + The set of expanded product values can either be used directly in +further (doubled precision) computation or merged/packed into the single single +vector at the smaller bit size. This is what the compiler will generate for C +vector extension multiply of vector integer types. + +
+
+ 2.2.4 Some more Intrinsic examples + The intrinsic _mm_cvtpd_ps converts a packed vector double into +a packed vector single float. Since only 2 doubles fit into a 128-bit vector +only 2 floats are returned and occupy only half (64-bits) of the XMM register. +For this intrinsic the 64-bit are packed into the logical left half of the +registers and the logical right half of the register is set to zero (as per the +Intel cvtpd2ps instruction). + + The PowerISA provides the VSX Vector round and Convert +Double-Precision to Single-Precision format (xvcvdpsp) instruction. In the ABI +this is vec_floato (vector double) .  This instruction convert each double +element then transfers converted element 0 to float element 1, and converted +element 1 to float element 3. Float elements 0 and 2 are undefined (the +hardware can do what ever). This does not match the expected results for +_mm_cvtpd_ps. + + So we need to re-position the results to word elements 0 and 2, which +allows a pack operation to deliver the correct format. Here the merge odd +splats element 1 to 0 and element 3 to 2. The Pack operation combines the low +half of each doubleword from the vector result and vector of zeros to generate +the require format. + + This  technique is also used to implement  _mm_cvttpd_epi32 which converts a packed +vector double in to a packed vector int. The PowerISA instruction xvcvdpsxws +uses a similar layout for the result as  xvcvdpsp and requires the same fix +up. +
+
+ 2.3 Profound differences + We have already mentioned above a number of architectural differences +that effect porting of codes containing Intel intrinsics to POWER. The fact +that Intel supports multiple vector extensions with different vector widths +(64, 128, 256, and 512-bits) while the PowerISA only supports vectors of +128-bits is one issue. Another is the difference in how the respective ISAs +support scalars in vector registers is another.  In the text above we propose +workable alternatives for the PowerPC port. There also differences in the +handling of floating point exceptions and rounding modes that may impact the +application's performance or behavior. + +
+
+ 2.3.1 Floating Point Exceptions + Nominally both ISAs support the IEEE754 specifications, but there are +some subtle differences. Both architecture define a status and control register +to record exceptions and enable / disable floating exceptions for program +interrupt or default action. Intel has a MXCSR and PowerISA has a FPSCR which +basically do the same thing but with different bit layout. + + Intel provides _mm_setcsr / _mm_getcsr intrinsics to allow direct +access to the MXCSR. In the early days before the OS POSIX run-times where +updated  to manage the MXCSR, this might have been useful. Today this would be +highly discouraged with a strong preference to use the POSIX APIs +(feclearexceptflag, fegetexceptflag, fesetexceptflag, ...) instead. + + If we implement _mm_setcsr / _mm_getcs at all, we should simply +redirect the implementation to use the POSIX APIs from <fenv.h>. But it +might be simpler just to replace these intrinsics with macros that generate +#error. + + The Intel MXCSR does have some none (POSIX/IEEE754) standard quirks; +Flush-To-Zero and Denormals-Are-Zeros flags. This simplifies the hardware +response to what should be a rare condition (underflows where the result can +not be represented in the exponent range and precision of the format) by simply +returning a signed 0.0 value. The intrinsic header implementation does provide +constant masks for _MM_DENORMALS_ZERO_ON (<pmmintrin.h>) and +_MM_FLUSH_ZERO_ON (<xmmintrin.h>, so technically it is available to users +of the Intel Intrinsics API. + + The VMX Vector facility provides a separate Vector Status and Control +register (VSCR) with a Non-Java Mode control bit. This control combines the +flush-to-zero semantics for floating Point underflow and denormal values. But +this control only applies to VMX vector float instructions and does not apply +to VSX scalar floating Point or vector double instructions. The FPSCR does +define a Floating-Point non-IEEE mode which is optional in the architecture. +This would apply to Scalar and VSX floating-point operations if it was +implemented. This was largely intended for embedded processors and is not +implemented in the POWER processor line. + + As the flush-to-zero is primarily a performance enhansement and is +clearly outside the IEEE754 standard, it may be best to simply ignore this +option for the intrinsic port. + +
+
+ 2.3.2 Floating-point rounding modes + The Intel (x86 / x86_64) and PowerISA architectures both support the +4 IEEE754 rounding modes. Again while the Intel Intrinsic API allows the +application to change rounding modes via updates to the MXCSR it is a bad idea +and should be replaced with the POSIX APIs (fegetround and fesetround). + +
+
+ 2.3.3 Performance + The performance of a ported intrinsic depends on the specifics of the +intrinsic and the context it is used in. Many of the SIMD operations have +equivalent instructions in both architectures. For example the vector float and +vector double match very closely. However the SSE and VSX scalars have subtle +differences of how the scalar is positioned with the vector registers and what +happens to the rest (non-scalar part) of the register (previously discussed in +here). This requires additional PowerISA instructions +to preserve the non-scalar portion of the vector registers. This may or may not +be important to the logic of the program being ported, but we have handle the +case where it is. + + This is where the context of now the intrinsic is used starts to +matter. If the scalar intrinsics are used within a larger program the compiler +may be able to eliminate the redundant register moves as the results are never +used. In the other cases common set up (like permute vectors or bit masks) can +be common-ed up and hoisted out of the loop. So it is very important to let the +compiler do its job with higher optimization levels (-O3, +-funroll-loops). + +
+
+ 2.3.3.1 Using SSE float and double scalars + SSE scalar float / double intrinsics  “hand” optimization is no +longer necessary. This was important, when SSE was initially introduced, and +compiler support was limited or nonexistent.  Also SSE scalar float / double +provided additional (16) registers and IEEE754 compliance, not available from +the 8087 floating point architecture that preceded it. So application +developers where motivated to use SSE instruction versus what the compiler was +generating at the time. + + Modern compilers can now to generate and  optimize these (SSE +scalar) instructions for Intel from C standard scalar code. Of course PowerISA +supported IEEE754 float and double and had 32 dedicated floating point +registers from the start (and now 64 with VSX). So replacing a Intel specific +scalar intrinsic implementation with the equivalent C language scalar +implementation is usually a win; allows the compiler to apply the latest +optimization and tuning for the latest generation processor, and is portable to +other platforms where the compiler can also apply the latest optimization and +tuning for that processors latest generation. + +
+
+ 2.3.3.2 Using MMX intrinsics + MMX was the first and oldest SIMD extension and initially filled a +need for wider (64-bit) integer and additional register. This is back when +processors were 32-bit and 8 x 32-bit registers was starting to cramp our +programming style. Now 64-bit processors, larger register sets, and 128-bit (or +larger) vector SIMD extensions are common. There is simply no good reasons +write new code using the (now) very limited MMX capabilities. + + We recommend that existing MMX codes be rewritten to use the newer +SSE  and VMX/VSX intrinsics or using the more portable GCC  builtin vector +support or in the case of si64 operations use C scalar code. The MMX si64 +scalars which are just (64-bit) operations on long long int types and any +modern C compiler can handle this type. The char short in SIMD operations +should all be promoted to 128-bit SIMD operations on GCC builtin vectors. Both +will improve cross platform portability and performance. + +
+
+ Appendix A: Document References +
+
+ A.1 OpenPOWER and Power documents + + OpenPOWER + TM + Technical Specification + + + Power ISA + TM + Version 2.07 B + + + Power +ISA + TM + +Version 3.0 + + + Power Architecture 64-bit ELF ABI Specification (AKA OpenPower +ABI for Linux Supplement) + + + AltiVec™ Technology + Programming Environments Manual + + +
+
+ A.2 Intel Documents + + Intel® +64 and IA-32 Architectures Software Developer’s Manual + + + Intel + TM + Intrinsics +Guide + + +
+
+ A.3 GNU Compiler Collection (GCC) documents + + GCC online +documentation + + + GCC Manual +(GCC 6.3) + + + GCC Internals +Manual + + +
+
+ + </section> + <section> + <title>Appendix B: Intel Intrinsic suffixes +
+
+ B.1 MMX +
+
+ B.2 SSE +
+
+ B.3 SSE2 +
+
+ B.4 AVX/AVX2 __m256_* +
+
+ B.5 AVX512 __m512_* + +
+ 1 +
diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..cb479f5 --- /dev/null +++ b/pom.xml @@ -0,0 +1,22 @@ + + + + + org.openpowerfoundation.docs + master-pom + 1.0.0-SNAPSHOT + ../Docs-Master/pom.xml + + 4.0.0 + + workgroup-pom + pom + + + + Vector_Intrinsics + +