From 321ac9e713e4e5eb035f0ba1a04fed95e45d23be Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 23 Jun 2021 16:43:57 -0500 Subject: [PATCH] Incorporate changes following Paul Clarke's admirable review Signed-off-by: Bill Schmidt --- Intrinsics_Reference/ch_biendian.xml | 84 ++- Intrinsics_Reference/ch_intro.xml | 28 +- Intrinsics_Reference/ch_mma_reference.xml | 268 +++++++- Intrinsics_Reference/ch_techniques.xml | 32 +- Intrinsics_Reference/ch_vec_reference.xml | 780 +++++++++++++++++++--- 5 files changed, 1036 insertions(+), 156 deletions(-) diff --git a/Intrinsics_Reference/ch_biendian.xml b/Intrinsics_Reference/ch_biendian.xml index 5578c71..94b6ef4 100644 --- a/Intrinsics_Reference/ch_biendian.xml +++ b/Intrinsics_Reference/ch_biendian.xml @@ -804,7 +804,7 @@ a[3] = c; - @@ -817,10 +817,7 @@ a[3] = c; - - - + @@ -831,7 +828,7 @@ a[3] = c; - + @@ -845,7 +842,7 @@ a[3] = c; - + @@ -859,7 +856,7 @@ a[3] = c; - + @@ -870,7 +867,7 @@ a[3] = c; - + @@ -881,7 +878,7 @@ a[3] = c; - + @@ -892,7 +889,7 @@ a[3] = c; - + @@ -903,7 +900,7 @@ a[3] = c; - + @@ -914,7 +911,7 @@ a[3] = c; - + @@ -925,7 +922,10 @@ a[3] = c; - + + + @@ -937,7 +937,7 @@ a[3] = c; - @@ -964,7 +964,10 @@ a[3] = c; - + + + @@ -975,7 +978,7 @@ a[3] = c; - + @@ -986,7 +989,7 @@ a[3] = c; - + @@ -997,7 +1000,8 @@ a[3] = c; - + @@ -1008,7 +1012,7 @@ a[3] = c; - + @@ -1019,7 +1023,7 @@ a[3] = c; - + @@ -1030,7 +1034,7 @@ a[3] = c; - + @@ -1044,7 +1048,7 @@ a[3] = c; - (ISA 2.07 only) + @@ -1058,7 +1062,7 @@ a[3] = c; - + (ISA 2.07 only) @@ -1072,13 +1076,13 @@ a[3] = c; - (ISA 2.07 only) + - @@ -1086,12 +1090,15 @@ a[3] = c; - + (ISA 2.07 only) - + + + @@ -1099,6 +1106,20 @@ a[3] = c; xrefstyle="select:title nopage"/> + + + + + + + + + + + + + @@ -1255,13 +1276,14 @@ a[3] = c; introduced serious compiler complexity without much utility. Thus this support (previously controlled by switches -maltivec=be and/or -qaltivec=be) is - now deprecated. Current versions of the GCC and Clang - open-source compilers do not implement this support. + now deprecated. Current versions of the GCC, Clang, and Open XL + compilers do not implement this support. -
+
Language-Specific Vector Support for Other Languages
diff --git a/Intrinsics_Reference/ch_intro.xml b/Intrinsics_Reference/ch_intro.xml index 4fad449..78f02f8 100644 --- a/Intrinsics_Reference/ch_intro.xml +++ b/Intrinsics_Reference/ch_intro.xml @@ -201,11 +201,15 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_intro"> - The XL compilers. For - XL compilers provided with the Linux Community Edition, you - can provide feedback to the XL compiler team via email + The XL and OpenXL + compilers. For XL and OpenXL compilers provided + with the Linux Community Edition, you can provide feedback + to the XL compiler team via email (compinfo@cn.ibm.com); for other editions of - XL compilers, please open a and OpenXL + compilers, please open a Case. @@ -335,6 +339,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_intro"> + + + The GNU C Library Project. + + https://www.gnu.org/software/libc + + + + + + Matrix-Multiply Assist Best Practices Guide. + + https://www.redbooks.ibm.com/redpapers/pdfs/redp5612.pdf + + +
diff --git a/Intrinsics_Reference/ch_mma_reference.xml b/Intrinsics_Reference/ch_mma_reference.xml index fee36c7..822f606 100644 --- a/Intrinsics_Reference/ch_mma_reference.xml +++ b/Intrinsics_Reference/ch_mma_reference.xml @@ -19,7 +19,7 @@ revisionflag="added"> - Matrix Multiply Accelerate (MMA) Intrinsic Reference + Matrix-Multiply Assist (MMA) Intrinsic Reference
Introduction @@ -43,8 +43,14 @@ instruction directly writes to one of these VSRs. - Review status: This chapter is - not yet reviewed by anyone. + This reference is not intended to be a complete introduction to + MMA concepts. The reader is directed to the Matrix-Multiply + Assist Best Practices Guide (see ) and to the POWER ISA. + + + Review status: Chapter reviewed + by Paul Clarke; changes made.
@@ -76,6 +82,14 @@ Load and store vector pairs. + + lxvp + __builtin_vsx_lxvp + + + stxvp + __builtin_vsx_stxvp + @@ -95,7 +109,7 @@ - __vector pair __builtin_vsx_lxvp (long long int a, const __vector_pair* b) + __vector_pair __builtin_vsx_lxvp (long long a, const __vector_pair* b) @@ -107,7 +121,7 @@ - void __builtin_vsx_stxvp (__vector_pair s, long long int a, const __vector_pair* b) + void __builtin_vsx_stxvp (__vector_pair s, long long a, const __vector_pair* b) @@ -226,6 +240,18 @@ (a "priming" operation) or vice versa ( a "depriming" operation), or initialize an accumulator to zeros. + + xxmfacc + __builtin_mma_xxmfacc + + + xxmtacc + __builtin_mma_xxmtacc + + + xxsetaccz + __builtin_mma_xxsetaccz + @@ -289,6 +315,238 @@ Each of these intrinsics generates an instruction to perform an outer product operation. + + pmxvbf16ger2 + __builtin_mma_pmxvbf16ger2 + + + pmxvbf16ger2nn + __builtin_mma_pmxvbf16ger2nn + + + pmxvbf16ger2np + __builtin_mma_pmxvbf16ger2np + + + pmxvbf16ger2pn + __builtin_mma_pmxvbf16ger2pn + + + pmxvbf16ger2pp + __builtin_mma_pmxvbf16ger2pp + + + pmxvf16ger2 + __builtin_mma_pmxvf16ger2 + + + pmxvf16ger2nn + __builtin_mma_pmxvf16ger2nn + + + pmxvf16ger2np + __builtin_mma_pmxvf16ger2np + + + pmxvf16ger2pn + __builtin_mma_pmxvf16ger2pn + + + pmxvf16ger2pp + __builtin_mma_pmxvf16ger2pp + + + pmxvf32ger + __builtin_mma_pmxvf32ger + + + pmxvf32gernn + __builtin_mma_pmxvf32gernn + + + pmxvf32gernp + __builtin_mma_pmxvf32gernp + + + pmxvf32gerpn + __builtin_mma_pmxvf32gerpn + + + pmxvf32gerpp + __builtin_mma_pmxvf32gerpp + + + pmxvf64ger + __builtin_mma_pmxvf64ger + + + pmxvf64gernn + __builtin_mma_pmxvf64gernn + + + pmxvf64gernp + __builtin_mma_pmxvf64gernp + + + pmxvf64gerpn + __builtin_mma_pmxvf64gerpn + + + pmxvf64gerpp + __builtin_mma_pmxvf64gerpp + + + pmxvi64ger2 + __builtin_mma_pmxvi64ger2 + + + pmxvi64ger2pp + __builtin_mma_pmxvi64ger2pp + + + pmxvi64ger2s + __builtin_mma_pmxvi64ger2s + + + pmxvi64ger2spp + __builtin_mma_pmxvi64ger2spp + + + pmxvi4ger8 + __builtin_mma_pmxvi4ger8 + + + pmxvi4ger8pp + __builtin_mma_pmxvi4ger8pp + + + pmxvi8ger4 + __builtin_mma_pmxvi8ger4 + + + pmxvi8ger4pp + __builtin_mma_pmxvi8ger4pp + + + pmxvi8ger4spp + __builtin_mma_pmxvi8ger4spp + + + xvbf16ger2 + __builtin_mma_xvbf16ger2 + + + xvbf16ger2nn + __builtin_mma_xvbf16ger2nn + + + xvbf16ger2np + __builtin_mma_xvbf16ger2np + + + xvbf16ger2pn + __builtin_mma_xvbf16ger2pn + + + xvbf16ger2pp + __builtin_mma_xvbf16ger2pp + + + xvf16ger2 + __builtin_mma_xvf16ger2 + + + xvf16ger2nn + __builtin_mma_xvf16ger2nn + + + xvf16ger2np + __builtin_mma_xvf16ger2np + + + xvf16ger2pn + __builtin_mma_xvf16ger2pn + + + xvf16ger2pp + __builtin_mma_xvf16ger2pp + + + xvf32ger + __builtin_mma_xvf32ger + + + xvf32gernn + __builtin_mma_xvf32gernn + + + xvf32gernp + __builtin_mma_xvf32gernp + + + xvf32gerpn + __builtin_mma_xvf32gerpn + + + xvf32gerpp + __builtin_mma_xvf32gerpp + + + xvf64ger + __builtin_mma_xvf64ger + + + xvf64gernn + __builtin_mma_xvf64gernn + + + xvf64gernp + __builtin_mma_xvf64gernp + + + xvf64gerpn + __builtin_mma_xvf64gerpn + + + xvf64gerpp + __builtin_mma_xvf64gerpp + + + xvi16ger2 + __builtin_mma_xvi16ger2 + + + xvi16ger2pp + __builtin_mma_xvi16ger2pp + + + xvi16ger2s + __builtin_mma_xvi16ger2s + + + xvi16ger2spp + __builtin_mma_xvi16ger2spp + + + xvi4ger8 + __builtin_mma_xvi4ger8 + + + xvi4ger8pp + __builtin_mma_xvi4ger8pp + + + xvi8ger4 + __builtin_mma_xvi8ger4 + + + xvi8ger4pp + __builtin_mma_xvi8ger4pp + + + xvi8ger4spp + __builtin_mma_xvi8ger4spp + diff --git a/Intrinsics_Reference/ch_techniques.xml b/Intrinsics_Reference/ch_techniques.xml index 2ed5900..b391b4e 100644 --- a/Intrinsics_Reference/ch_techniques.xml +++ b/Intrinsics_Reference/ch_techniques.xml @@ -113,9 +113,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_techniques"> references. (restrict can be used only in C when compiling for the C99 standard or later. __restrict__ is a language extension, available - in GCC, Clang, and the XL compilers, that can be used - without restriction for both C and C++. See your compiler's - user manual for details.) + in GCC, Clang, and the XL and + Open XL compilers, that can be used without + restriction for both C and C++. See your compiler's user + manual for details.) Suppose you have a function that takes two pointer @@ -142,8 +143,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_techniques"> This reference provides intrinsics that are guaranteed to be - portable across compliant compilers. In particular, both the - GCC and Clang compilers for Power implement the intrinsics in + portable across compliant compilers. In particular, the GCC, Clang, and Open XL + compilers for Power implement the intrinsics in this manual. The compilers may each implement many more intrinsics, but the ones in this manual are the only ones guaranteed to be portable. So if you are using an interface not @@ -204,10 +206,15 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_techniques"> responsible for following the calling conventions established by the ABI (see ). Again, it is best to look at examples. One place to find well-written - .S files is in the GLIBC project. You can also + .S files is in the GNU C Library project (see ). You can also study the assembly output from your favorite compiler, which can be obtained with the -S or similar option, or by - using the objdump utility. + using the objdump utility: + + + objdump -dr <binary or object file>
@@ -219,7 +226,8 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_techniques">
x86 Vector Portability Headers - Recent versions of the GCC and Clang open-source compilers + Recent versions of the GCC, + Clang, and Open XL compilers for Power provide "drop-in" portability headers for portions of the Intel Architecture Instruction Set Extensions (see ). These headers mirror the APIs @@ -243,14 +251,18 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_techniques"> Access to the portability APIs occurs automatically when including one of the corresponding Intel header files, such as - <mmintrin.h>. + <mmintrin.h>. You must also compile with + -DNO_WARN_X86_INTRINSICS to opt into using the + headers.
The Power Vector Library (pveclib) The Power Vector Library, also known as pveclib, is a separate project available from - github (see ). The + GitHub (see ). The pveclib project builds on top of the intrinsics described in this manual to provide higher-level vector interfaces that are highly portable. The goals of the project diff --git a/Intrinsics_Reference/ch_vec_reference.xml b/Intrinsics_Reference/ch_vec_reference.xml index c7866b9..f5a50cf 100644 --- a/Intrinsics_Reference/ch_vec_reference.xml +++ b/Intrinsics_Reference/ch_vec_reference.xml @@ -51,7 +51,8 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> A description of any unusual characteristics of the - intrinsic when different target endiannesses are in force. + intrinsic when the target is + big-endian versus little-endian. If the semantics of the intrinsic in big-endian and little-endian modes are identical, the description will read "None."; @@ -134,7 +135,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> ISA 3.1 or later. This form is only available starting with PowerISA 3.1, corresponding to - POWER10 servers. + POWER10 servers. The Power Vector Library (see provides equivalent + POWER7/POWER8/POWER9 implementations for many ISA 3.1 vector + instructions, which may be preferred for portability. @@ -1591,7 +1595,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -1641,7 +1645,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> - + @@ -2094,7 +2098,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -2487,7 +2491,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -2966,7 +2970,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -3356,7 +3360,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -3843,7 +3847,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -4346,6 +4350,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Endian considerations: None. + + Notes: If an element of + either a or b contains a NaN value, the condition + for that element is considered to be satisfied. + xvcmpeqdp. @@ -4457,6 +4467,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Endian considerations: None. + + Notes: If an element of + either a or b contains a NaN value, the condition + for that element is considered to be satisfied. + xvcmpgtdp. @@ -4568,6 +4584,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Endian considerations: None. + + Notes: If an element of + either a or b contains a NaN value, the condition + for that element is considered to be satisfied. + xvcmpgedp. @@ -4679,6 +4701,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Endian considerations: None. + + Notes: If an element of + either a or b contains a NaN value, the condition + for that element is considered to be satisfied. + xvcmpgtdp. @@ -5532,8 +5560,8 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> None. - Restrictions: - Not yet reviewed. + Review status: + Reviewed by Paul Clarke. @@ -6075,7 +6103,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -6589,7 +6617,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -7007,7 +7035,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -7472,7 +7500,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -7999,7 +8027,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -8540,6 +8568,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Endian considerations: None. + + Notes: If an element of + either a or b contains a NaN value, the condition + for that element is considered to be satisfied. + xvcmpgedp. @@ -8662,6 +8696,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Endian considerations: None. + + Notes: If an element of + either a or b contains a NaN value, the condition + for that element is considered to be satisfied. + xvcmpgtdp. @@ -8784,6 +8824,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Endian considerations: None. + + Notes: If an element of + either a or b contains a NaN value, the condition + for that element is considered to be satisfied. + xvcmpgedp. @@ -8906,6 +8952,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Endian considerations: None. + + Notes: If an element of + either a or b contains a NaN value, the condition + for that element is considered to be satisfied. + xvcmpgtdp. @@ -10306,10 +10358,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> a - 0x0123456789abcdef + 0x0123456789abcdef - 0x0123456789abcdef + 0x0123456789abcdef @@ -10317,10 +10369,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> b - 0x0f0f0f0f0f0f0f0f + 0x0f0f0f0f0f0f0f0f - 0xffff0000ffff0000 + 0xffff0000ffff0000 @@ -10328,10 +10380,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> r - 0x02468ace13579bdf + 0x02468ace13579bdf - 0x4567cdef012389ab + 0x4567cdef012389ab @@ -10343,7 +10395,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; changes made. @@ -10613,7 +10665,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> big-endian targets, and right-to-left for little-endian targets. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -10752,7 +10804,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> big-endian targets, and right-to-left for little-endian targets. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -10994,7 +11046,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -11438,7 +11490,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -11815,7 +11867,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -12178,7 +12230,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -12555,7 +12607,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -12916,7 +12968,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -14294,10 +14346,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> a - 0x0123456789abcdef + 0x0123456789abcdef - 0x0123456789abcdef + 0x0123456789abcdef @@ -14305,10 +14357,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> b - 0xaaaaaaaaaaaaaaaa + 0xaaaaaaaaaaaaaaaa - 0xcccccccccccccccc + 0xcccccccccccccccc @@ -14326,12 +14378,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> + + Consider element 0 in the above table. The first sixteen bits + of a are 0000 0001 0010 0011. + The first sixteen bits of b + are 1010 1010 1010 1010. Applying the mask of b to a + means that we only consider the even-numbered bits of + a, which are 0 0 0 0 0 1 0 + 1. The number of leading zeros in this result is 5. + Endian considerations: None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; changes made. @@ -14436,7 +14498,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> (lxvl) instruction. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -15327,10 +15389,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> a - 0xfedcba9876543210 + 0xfedcba9876543210 - 0xfedcba9876543210 + 0xfedcba9876543210 @@ -15338,10 +15400,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> b - 0xaaaaaaaaaaaaaaaa + 0xaaaaaaaaaaaaaaaa - 0xcccccccccccccccc + 0xcccccccccccccccc @@ -15359,12 +15421,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> + + Consider element 0 in the above table. The last sixteen bits + of a are 0011 0010 0001 0000. + The last sixteen bits of b + are 1010 1010 1010 1010. Applying the mask of b to a + means that we only consider the even-numbered bits of + a, which are 0 1 0 1 0 0 0 + 0. The number of trailing zeros in this result is 4. + Endian considerations: None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; changes made. @@ -15712,7 +15784,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. Notes: @@ -15830,7 +15902,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. Notes: @@ -15962,7 +16034,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: Changes other than __int128 reviewed by Paul Clarke and Jinsong Ji. Added 0x8000_0000 example per Jinsong's comment. - Changes for __int128 have not yet been reviewed. + Changes for __int128 have been reviewed by Paul Clarke. @@ -16312,8 +16384,8 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: Changes other than __int128 were reviewed by Paul Clarke and Jinsong Ji. Updated language about the shift amount per - Jinsong's comment. Changes for __int128 have not yet been - reviewed. + Jinsong's comment. Changes for __int128 were reviewed by Paul + Clarke. @@ -17822,7 +17894,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -19208,7 +19280,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -22177,13 +22249,49 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> role="bold">a are likewise numbered from left to right. + An example follows: + + + + + + + + + + + Example + + + + + + + a + + + 1010 1010 1010 1010 + + + + + r + + + 0xff00ff00 ff00ff00 ff00ff00 ff00ff00 + + + + + + Endian considerations: Because elements are numbered from left to right in the result vector regardless of endianness, the semantics of this built-in function differ on big-endian and little-endian targets. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; example added. @@ -22272,13 +22380,49 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> role="bold">a are likewise numbered from left to right. + An example follows: + + + + + + + + + + + Example + + + + + + + a + + + 0 1 + + + + + r + + + 0x0000000000000000 ffffffffffffffff + + + + + + Endian considerations: Because elements are numbered from left to right in the result vector regardless of endianness, the semantics of this built-in function differ on big-endian and little-endian targets. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; example added. @@ -22367,13 +22511,49 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> role="bold">a are likewise numbered from left to right. + An example follows: + + + + + + + + + + + Example + + + + + + + a + + + 10 10 01 01 + + + + + r + + + 0xffff0000 ffff0000 0000ffff 0000ffff + + + + + + Endian considerations: Because elements are numbered from left to right in the result vector regardless of endianness, the semantics of this built-in function differ on big-endian and little-endian targets. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; example added. @@ -22448,11 +22628,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> source vector according to a specified mask. Result value: - b must have a value of 0, 1, 2, - or 3. a contains a bit mask - where the high-order bit in each element is set if the permute - mask should specify expansion or compression of that element, as - explained below. + In what follows, b must have a + value of 0, 1, 2, or 3. a + contains a bit mask where the high-order bit in each element is + set if the permute mask should specify expansion or compression + of that element, as explained below. If b = 0, must be reversed by the compiler for little-endian targets. Notes: - See the descriptions of xxgenpcvbm et al. in the - Power Instruction Set Architecture, Version 3.1B Specification - () for examples of how to use - this function in practice. + See the Programming Notes following the descriptions of + xxgenpcvbm et al. in the Power Instruction Set + Architecture, Version 3.1B Specification () for examples of how to use this + function in practice. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; minor changes made. @@ -22691,15 +22872,15 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Result value: All bits of r are set to 0 if - the bit in a is equal to 0. - Otherwise all bits of r are set - to 1. + the low-order bit in a is equal + to 0. Otherwise all bits of r + are set to 1. Endian considerations: None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -22788,13 +22969,49 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> role="bold">a are likewise numbered from left to right. + An example follows: + + + + + + + + + + + Example + + + + + + + a + + + 10 01 + + + + + r + + + 0xffffffff00000000 00000000ffffffff + + + + + + Endian considerations: Because elements are numbered from left to right in the result vector regardless of endianness, the semantics of this built-in function differ on big-endian and little-endian targets. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; example added. @@ -22891,7 +23108,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -27107,7 +27324,8 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: Changes other than __int128 were reviewed by Paul Clarke and - Jinsong Ji. Changes for __int128 have not yet been reviewed. + Jinsong Ji. Changes for __int128 have been reviewed by Paul + Clarke. @@ -27515,7 +27733,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> operation using the source vectors. Result value: - There are two cases: + There are three cases: @@ -27542,6 +27760,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> role="bold">r. + + + All operations are performed using 32-bit modular + arithmetic. + + @@ -27570,19 +27794,52 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> role="bold">r. + + + All operations are performed using 32-bit modular + arithmetic. + + + + + + + + When a is of type + vector signed long long or vector unsigned long long, + r is + computed as follows: + + + + Each of the two doubleword elements of a is multiplied by the + corresponding doubleword element of b. + + + + + The sum of these two doubleword products is added + to the contents of c and placed in r. + + - All operations are performed using 32-bit modular arithmetic. + All operations are performed + using 32-bit modular arithmetic. Endian considerations: None. - Endian considerations: - Changes not yet reviewed. + Review status: + Reviewed by Paul Clarke; changes made. @@ -27792,7 +28049,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> None. Review status: - Changes have not been reviewed. + Reviewed by Paul Clarke. @@ -28478,7 +28735,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Changes not yet reviewed. + Reviewed by Paul Clarke. @@ -28964,7 +29221,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Changes not yet reviewed. + Reviewed by Paul Clarke. @@ -32630,10 +32887,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> a - 0xfedcba9876543210 + 0xfedcba9876543210 - 0xfedcba9876543210 + 0xfedcba9876543210 @@ -32641,10 +32898,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> b - 0x8888888888888888 + 0x8888888888888888 - 0xcccccccccccccccc + 0xcccccccccccccccc @@ -32652,22 +32909,31 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> r - 0x0088 0080 0008 0000 + 0x0088 0080 0008 0000 - 0x4c 48 44 40 0c 08 04 00 + 0x4c 48 44 40 0c 08 04 00 - + + Consider element 0 in the above table. Since b contains 16 set bits, only the + rightmost 16 bits of a are + pertinent to the result. These bits are 0011 0010 0001 0000. + These bits are distributed into r at the bit positions that are set in + b, providing the result shown + in the table. + Endian considerations: None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; changes made. @@ -33948,10 +34214,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> a - 0x0088 0080 0008 0000 + 0x0088 0080 0008 0000 - 0x4c 48 44 40 0c 08 04 00 + 0x4c 48 44 40 0c 08 04 00 @@ -33959,10 +34225,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> b - 0x8888888888888888 + 0x8888888888888888 - 0xcccccccccccccccc + 0xcccccccccccccccc @@ -33970,22 +34236,31 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> r - 0x0000000000003210 + 0x0000000000003210 - 0x0000000076543210 + 0x0000000076543210 - + + Consider element 0 in the above table. Since b contains 16 set bits, all but the + rightmost 16 bits of r are + set to zero. The remaining 16 bits are set by extracting + the bits of a at the + positions of the bits set in b. Those bits are 0011 0010 0001 0000, + providing the result shown in the table. + Endian considerations: None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke; changes made. @@ -35571,7 +35846,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -35916,7 +36191,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -36092,7 +36367,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -37897,7 +38172,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> achieve this. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -37957,6 +38232,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> vextsb2w r,a + @@ -37979,6 +38255,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> vextsh2w r,a + @@ -38033,7 +38310,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> achieve this. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -38097,6 +38374,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> vextsb2d r,a + @@ -38119,6 +38397,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> vextsh2d r,a + @@ -38141,6 +38420,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> vextsw2d r,a + @@ -38180,7 +38460,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> leftmost element for big endian. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -38236,6 +38516,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> vextsd2q r,a + @@ -38276,7 +38557,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -41231,7 +41512,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -41526,7 +41807,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -44184,7 +44465,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> big-endian targets, and right-to-left for little-endian targets. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -44549,7 +44830,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> big-endian targets, and right-to-left for little-endian targets. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -46796,7 +47077,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> None. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -47155,7 +47436,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> position in condition register field t. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -47251,7 +47532,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> position in condition register field t. Review status: - Not yet reviewed. + Reviewed by Paul Clarke. @@ -51616,4 +51897,291 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref">
+ +
+ Built-In Vector Functions for Fortran + + + shows the + correspondence between the C/C++ types described in this + document and their Fortran equivalents. In Fortran, the + Boolean vector data types are represented by + VECTOR(UNSIGNED(n)). + + + Fortran Vector Data Types + + + + + + + + XL Fortran Vector Type + + + + + XL C/C++ Vector Type + + + + + + + + VECTOR(INTEGER(1)) + + + vector signed char + + + + + VECTOR(INTEGER(2)) + + + vector signed short + + + + + VECTOR(INTEGER(4)) + + + vector signed int + + + + + VECTOR(INTEGER(8)) + + + vector signed long long, vector signed long + The vector long types are deprecated due to their + ambiguity between 32-bit and 64-bit environments. The use + of the vector long long types is preferred. + + + + + + VECTOR(INTEGER(16)) + + + vector signed __int128 + + + + + VECTOR(UNSIGNED(1)) + + + vector unsigned char + + + + + VECTOR(UNSIGNED(2)) + + + vector unsigned short + + + + + VECTOR(UNSIGNED(4)) + + + vector unsigned int + + + + + VECTOR(UNSIGNED(8)) + + + vector unsigned long long, vector unsigned long + + + + + VECTOR(UNSIGNED(16)) + + + vector unsigned __int128 + + + + + VECTOR(REAL(4)) + + + vector float + + + + + VECTOR(REAL(8)) + + + vector double + + + + + VECTOR(PIXEL) + + + vector pixel + + + + +
+ + Because the Fortran language does not support pointers, vector + built-in functions that expect pointers to a base type take an + array element reference to indicate the address of a memory + location that is the subject of a memory access built-in + function. + + + Because the Fortran language does not support type casts, the + vec_convert and vec_concat built-in + functions described in the following pages are provided to + perform bit-exact type conversions between vector types. + + + +
+ vec_concat + Vector Concatenate + + r = vec_concat (a, b) + + + Purpose: + Concatenates two elements to form a vector. + + Result value: + The resulting vector consists of the two scalar elements, + a and b, assigned to elements 0 and 1 in + natural element order, respectively. + + Endian considerations: + None. + + + Supported type signatures for vec_concat + + + + + + + + + r + + + + + a + + + + + b + + + + + + + + + vector signed long long + + + + + signed long long + + + + + signed long long + + + + + + + vector unsigned long long + + + + + unsigned long long + + + + + unsigned long long + + + + + + + vector double + + + + + double + + + + + double + + + + + +
+
+ + +
+ vec_convert + Vector Convert + + r = vec_convert (a, b) + + + Purpose: + Converts a vector to a vector of a different type by + reinterpreting the bits of the vector. + + Result value: + In what follows, a and + b must be INTENT(IN) vectors. + If b is a variable, it need not + be defined. The result r has + the value of a interpreted as + the type of b. + + Endian considerations: + None. + +
+ +
+