diff --git a/Intrinsics_Reference/bk_main.xml b/Intrinsics_Reference/bk_main.xml index 29f658f..0949172 100644 --- a/Intrinsics_Reference/bk_main.xml +++ b/Intrinsics_Reference/bk_main.xml @@ -32,7 +32,7 @@ xml:id="bk_main"> - Intrinsic Function Programming Reference + Vector Intrinsic Programming Reference @@ -50,11 +50,11 @@ - 2017 + 2018 OpenPOWER Foundation - Revision 0.8.0 + Revision 0.9.0 OpenPOWER @@ -88,14 +88,26 @@ - 2017-09-25 - - - - Version 0.8: Initial publication to private GitHub project. - - - + 2018-12-30 + + + + Version 0.9: Completed initial transfer of appendix + information from ELFv2 ABI. + + + + + + 2017-09-25 + + + + Version 0.8: Initial publication to private GitHub + project. + + + @@ -109,13 +121,14 @@ - + + - + diff --git a/Intrinsics_Reference/ch_vec_reference.xml b/Intrinsics_Reference/ch_vec_reference.xml index 1091dc6..eba18da 100644 --- a/Intrinsics_Reference/ch_vec_reference.xml +++ b/Intrinsics_Reference/ch_vec_reference.xml @@ -13859,30 +13859,37 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_ld - Vector Load + Vector Load Indexed r = vec_ld (a, b) Purpose: - Performs a 16-byte load at a 16-byte-aligned address. - - Result value: The scalar value - a - is added to the pointer value b, and - the result is rounded down to the nearest multiple of 16. The 16-byte - value at this address is loaded into r. + Loads a 16-byte vector from the memory address specified by the + displacement and the pointer, ignoring the four low-order bits + of the calculated address. + Result value: The value of + r is obtained by adding a and b, + masking off the four low-order bits of the result, and + loading the 16-byte vector from the resultant memory address. Endian considerations: None. + Notes: No Power compilers yet + support the vector _Float16 type, so those interfaces are currently + deferred. + - Supported type signatures for vec_loge - + Supported type signatures for vec_ld + + + @@ -13895,605 +13902,652 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> a + + + b + + - Example Implementation + Example ISA 3.0 + Implementation + + + Restrictions - vector float + vector bool char - vector float + any integral type + + + vector bool char * - vlogefp r,a + lvx r,b,a + + + - - -
- -
- - - - vec_loge - Vector Base-2 Logarithm Estimate - - r = vec_loge (a) - - - Purpose: - Returns a vector containing estimates of the base-2 logarithms of the - corresponding elements of the source vector. - - Result value: Each element of - r contains an estimated value of the - base-2 logarithm of the corresponding element of a. - Endian considerations: - None. - - - - Supported type signatures for vec_loge - - - - - - - r - + vector signed char - - a - + any integral type - - Example Implementation + + signed char * + + + + lvx r,b,a + + + + - - - vector float + vector signed char - vector float + any integral type + + + vector signed char * - vlogefp r,a + lvx r,b,a + + + - - -
- -
- - - - vec_madd - Vector Multiply-Add - - r = vec_madd (a, b, c) - - - Purpose: - Returns a vector containing the results of performing a fused - multiply-add operation for each corresponding set of elements of the - source vectors. - - Result value: The value of each - element of r is the product of the - values of the corresponding elements of a and b, added - to the value of the corresponding element of c. - Endian considerations: - None. - - - - Supported type signatures for vec_madd - - - - - - - - - r - + vector unsigned char - - a - + any integral type - - b - + unsigned char * - - - c - + + + lvx r,b,a + - - Example Implementation + + - - - vector signed short - - - vector signed short + vector unsigned char - vector signed short + any integral type - vector signed short + vector unsigned char * - vmladduhm r,a,b,c + lvx r,b,a + + + - vector signed short - - - vector signed short + vector bool int - vector unsigned short + any integral type - vector unsigned short + vector bool int * - vmladduhm r,a,b,c + lvx r,b,a + + + - vector signed short - - - vector unsigned short + vector signed int - vector signed short + any integral type - vector signed short + signed int * - vmladduhm r,a,b,c + lvx r,b,a + + + - vector unsigned short - - - vector unsigned short + vector signed int - vector unsigned short + any integral type - vector unsigned short + vector signed int * - vmladduhm r,a,b,c + lvx r,b,a + + + - vector double + vector unsigned int - vector double + any integral type - vector double + unsigned int * + + + + lvx r,b,a + + + + + + - vector double + vector unsigned int + + + any integral type + + + vector unsigned int * - xvmaddmdp r/a,b,c + lvx r,b,a + + + - vector float + vector signed __int128 - vector float + any integral type - vector float + signed __int128 * + + + + lvx r,b,a + + + + + + - vector float + vector signed __int128 + + + any integral type + + + vector signed __int128 * - xvmaddmsp r/a,b,c + lvx r,b,a + + + - - -
- -
- - - - vec_madds - Vector Multiply-Add Saturated - - r = vec_madds (a, b, c) - - - Purpose: - Returns a vector containing the results of performing a saturated - multiply-high-and-add operation for each corresponding set of elements - of the source vectors. - - Result value: The value of each - element of r is produced as follows: - The values of the corresponding elements of a and b are - multiplied. The value of the 17 most-significant bits of this product - is then added, using 16-bit-saturated addition, to the value of the - corresponding element of c. - Endian considerations: - None. - - - - Supported type signatures for vec_madds - - - - - - - - - r - + vector unsigned __int128 - - a - + any integral type - - b - + unsigned __int128 * - - - c - + + + lvx r,b,a + - - Example Implementation + + - - - vector signed short + vector unsigned __int128 - vector signed short + any integral type - vector signed short + vector unsigned __int128 * + + + + lvx r,b,a + + + + + + + + + vector bool long long - vector signed short + any integral type + + + vector bool long long * - vmhaddshs r,a,b,c + lvx r,b,a + + + - - -
- -
- - - - vec_max - Vector Maximum - - r = vec_max (a, b)) - - - Purpose: - Returns a vector containing the maximum value from each set of - corresponding elements of the source vectors. - - Result value: - The value of each element of r is the - maximum of the values of the corresponding elements of a and b. - - Endian considerations: - None. - - - - Supported type signatures for vec_max - - - - - - - - - r - + + vector signed long long - - - a - + + any integral type - - - b - + + signed long long * - - Example Implementation + + + lvx r,b,a + + + + - - - vector signed char + vector signed long long - vector signed char + any integral type - vector signed char + vector signed long long * - vmaxsb r,a,b + lvx r,b,a + + + - vector unsigned char + vector unsigned long long - vector unsigned char + any integral type - vector unsigned char + unsigned long long * - vmaxub r,a,b + lvx r,b,a + + + - vector signed int + vector unsigned long long - vector signed int + any integral type - vector signed int + vector unsigned long long * - vmaxsw r,a,b + lvx r,b,a + + + - vector unsigned int + vector pixel - vector unsigned int + any integral type - vector unsigned int + vector pixel * - vmaxuw r,a,b + lvx r,b,a + + + - vector signed long long + vector bool short - vector signed long long + any integral type - vector signed long long + vector bool short * - vmaxsd r,a,b + lvx r,b,a + + + - vector unsigned long long + vector signed short - vector unsigned long long + any integral type - vector unsigned long long + signed short * - vmaxud r,a,b + lvx r,b,a + + + vector signed short - vector signed short + any integral type - vector signed short + vector signed short * - vmaxsh r,a,b + lvx r,b,a + + + vector unsigned short + + any integral type + + + unsigned short * + + + + lvx r,b,a + + + + + + + vector unsigned short - vector unsigned short + any integral type + + + vector unsigned short * - vmaxuh r,a,b + lvx r,b,a + + + vector double + + any integral type + + + double * + + + + lvx r,b,a + + + + + + + vector double - vector double + any integral type + + + vector double * - xvmaxdp r,a,b + lvx r,b,a + + + vector float - vector float + any integral type - vector float + float * - xvmaxsp r,a,b + lvx r,b,a + + + - - -
- -
- - - - vec_mergee - Vector Merge Even - - r = vec_mergee (a, b) + + + vector float + + + any integral type + + + vector float * + + + + lvx r,b,a + + + + + + + + + vector _Float16 + + + any integral type + + + _Float16 * + + + + lvx r,b,a + + + + Deferred + + + + + vector _Float16 + + + any integral type + + + vector _Float16 * + + + + lvx r,b,a + + + + Deferred + + + + + + + + + + + vec_lde + Vector Load Element Indexed + + r = vec_lde (a, b) Purpose: - Merges the even-numbered values from two vectors. + Loads a single element into the position in the vector register + corresponding to its address, leaving the remaining elements of + the register undefined. + + Result value: + The integer value a is added to the + pointer value b. The resulting + address is rounded down to the nearest address that is a multiple of + es, where es is 1 for + char pointers, 2 for short pointers, and 4 for float or int pointers. + The element at this address is loaded into an element of r, leaving all other elements of r undefined. The position of the loaded + element in r is determined by taking the + address modulo 16. - Result value: The even-numbered - elements of a are stored into the - even-numbered elements of r. The - even-numbered elements of b are stored - into the odd-numbered elements of r. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + None. - Supported type signatures for vec_mergee - + Supported type signatures for vec_lde + - @@ -14512,158 +14566,105 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example LE Implementation - - - Example BE Implementation + Example ISA 3.0 + Implementation - vector bool int - - - vector bool int - - - vector bool int - - - - vmrgow r,b,a - - - - - vmrgew r,a,b - - - - - - vector signed int + vector signed char - vector signed int + any integral type - vector signed int - - - - vmrgow r,b,a - + signed char * - vmrgew r,a,b + lvebx r,b,a - vector unsigned int + vector unsigned char - vector unsigned int + any integral type - vector unsigned int - - - - vmrgow r,b,a - + unsigned char * - vmrgew r,a,b + lvebx r,b,a - vector bool long long + vector signed int - vector bool long long + any integral type - vector bool long long - - - - xxpermdi r,b,a,3 - + signed int * - xxpermdi r,a,b,0 + lvewx r,b,a - vector signed long long + vector unsigned int - vector signed long long + any integral type - vector signed long long - - - - xxpermdi r,b,a,3 - + unsigned int * - xxpermdi r,a,b,0 + lvewx r,b,a - vector unsigned long long + vector signed short - vector unsigned long long + any integral type - vector unsigned long long - - - - xxpermdi r,b,a,3 - + signed short * - xxpermdi r,a,b,0 + lvehx r,b,a - vector double + vector unsigned short - vector double + any integral type - vector double - - - - xxpermdi r,b,a,3 - + unsigned short * - xxpermdi r,a,b,0 + lvehx r,b,a @@ -14672,19 +14673,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - vector float + any integral type - vector float - - - - vmrgow r,b,a - + float * - vmrgew r,a,b + lvewx r,b,a @@ -14695,42 +14691,40 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_mergeh - Vector Merge High + + vec_ldl + Vector Load Indexed Least Recently Used - r = vec_mergeh (a, b) + r = vec_ldl (a, b) Purpose: - Merges the first halves (in element order) of two vectors. + Loads a 16-byte vector from the memory address specified by the + displacement and the pointer, ignoring the four low-order bits + of the calculated address, and marks the cache line loaded from + as least recently used. - Result value: The - nth element of r, - if n is an even number, is given the value of the - (n/2)th element of a. The (n+1)th element - of r, if n is an - even number, is given the value of the (n/2)th - element of b. + Result value: The value of + r is obtained by adding a and b, + masking off the four low-order bits of the result, and + loading the 16-byte vector from the resultant memory address. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + None. - Notes: - No Power compilers yet support the vector _Float16 type, so that - interface is currently deferred. + Notes: No Power compilers yet + support the vector _Float16 type, so those interfaces are currently + deferred.
- Supported type signatures for vec_mergeh - + Supported type signatures for vec_ldl + - @@ -14749,10 +14743,8 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example LE Implementation - - - Example BE Implementation + Example ISA 3.0 + Implementation Restrictions @@ -14765,19 +14757,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool char - vector bool char + any integral type - vector bool char - - - - vmrglb r,b,a - + vector bool char * - vmrghb r,a,b + lvxl r,b,a @@ -14789,19 +14776,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - vector signed char + any integral type - vector signed char - - - - vmrglb r,b,a - + signed char * - vmrghb r,a,b + lvxl r,b,a @@ -14810,22 +14792,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned char + vector signed char - vector unsigned char + any integral type - vector unsigned char - - - - vmrglb r,b,a - + vector signed char * - vmrghb r,a,b + lvxl r,b,a @@ -14834,22 +14811,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector bool int + vector unsigned char - vector bool int + any integral type - vector bool int - - - - vmrglw r,b,a - + unsigned char * - vmrghw r,a,b + lvxl r,b,a @@ -14858,46 +14830,36 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed int + vector unsigned char - vector signed int + any integral type - vector signed int + vector unsigned char * - vmrglw r,b,a + lvxl r,b,a - - vmrghw r,a,b - - - - + - vector unsigned int + vector bool int - vector unsigned int + any integral type - vector unsigned int - - - - vmrglw r,b,a - + vector bool int * - vmrghw r,a,b + lvxl r,b,a @@ -14906,22 +14868,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector bool long long + vector signed int - vector bool long long + any integral type - vector bool long long - - - - xxpermdi r,b,a,3 - + signed int * - xxpermdi r,a,b,0 + lvxl r,b,a @@ -14930,22 +14887,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed long long + vector signed int - vector signed long long + any integral type - vector signed long long - - - - xxpermdi r,b,a,3 - + vector signed int * - xxpermdi r,a,b,0 + lvxl r,b,a @@ -14954,22 +14906,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned long long + vector unsigned int - vector unsigned long long + any integral type - vector unsigned long long - - - - xxpermdi r,b,a,3 - + unsigned int * - xxpermdi r,a,b,0 + lvxl r,b,a @@ -14978,22 +14925,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector pixel + vector unsigned int - vector pixel + any integral type - vector pixel - - - - vmrglh r,b,a - + vector unsigned int * - vmrghh r,a,b + lvxl r,b,a @@ -15002,22 +14944,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector bool short + vector bool long long - vector bool short + any integral type - vector bool short - - - - vmrglh r,b,a - + vector bool long long * - vmrghh r,a,b + lvxl r,b,a @@ -15026,22 +14963,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed short + vector signed long long - vector signed short + any integral type - vector signed short - - - - vmrglh r,b,a - + signed long long * - vmrghh r,a,b + lvxl r,b,a @@ -15050,22 +14982,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned short + vector signed long long - vector unsigned short + any integral type - vector unsigned short + vector signed long long * - vmrglh r,b,a - - - - - vmrghh r,a,b + lvxl r,b,a @@ -15074,22 +15001,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector double + vector unsigned long long - vector double + any integral type - vector double - - - - xxpermdi r,b,a,3 - + unsigned long long * - xxpermdi r,a,b,0 + lvxl r,b,a @@ -15098,22 +15020,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector float + vector unsigned long long - vector float + any integral type - vector float - - - - vmrglw r,b,a - + vector unsigned long long * - vmrghw r,a,b + lvxl r,b,a @@ -15122,121 +15039,55 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector _Float16 + vector pixel - vector _Float16 + any integral type - vector _Float16 + vector pixel * - vmrglh r,b,a + lvxl r,b,a - - vmrghh r,a,b - - - - Deferred + - - -
- -
- - - - vec_mergel - Vector Merge Low - - r = vec_mergel (a, b) - - - Purpose: - Merges the last halves (in element order) of two vectors. - - Result value: Let - m be the number of elements in r. The nth element of - r, if n is an even - number, is given the value of the m/2 + - (n/2)th element of a. The (n+1)th element - of r, if n is an - even number, is given the value of the m/2 + - (n/2)th element of b. - Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. - - Notes: - No Power compilers yet support the vector _Float16 type, so that - interface is currently deferred. - - - - Supported type signatures for vec_mergel - - - - - - - - - - r - + vector bool short - - a - + any integral type - - b - + vector bool short * - - Example LE Implementation - - - Example BE Implementation + + + lvxl r,b,a + - - Restrictions + + - - - vector bool char + vector signed short - vector bool char + any integral type - vector bool char - - - - vmrghb r,b,a - + signed short * - vmrglb r,a,b + lvxl r,b,a @@ -15245,22 +15096,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed char + vector signed short - vector signed char + any integral type - vector signed char - - - - vmrghb r,b,a - + vector signed short * - vmrglb r,a,b + lvxl r,b,a @@ -15269,22 +15115,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned char + vector unsigned short - vector unsigned char + any integral type - vector unsigned char - - - - vmrghb r,b,a - + unsigned short * - vmrglb r,a,b + lvxl r,b,a @@ -15293,22 +15134,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector bool int + vector unsigned short - vector bool int + any integral type - vector bool int - - - - vmrghw r,b,a - + vector unsigned short * - vmrglw r,a,b + lvxl r,b,a @@ -15317,22 +15153,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed int + vector double - vector signed int + any integral type - vector signed int - - - - vmrghw r,b,a - + double * - vmrglw r,a,b + lvxl r,b,a @@ -15341,22 +15172,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned int + vector double - vector unsigned int + any integral type - vector unsigned int - - - - vmrghw r,b,a - + vector double * - vmrglw r,a,b + lvxl r,b,a @@ -15365,22 +15191,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector bool long long + vector float - vector bool long long + any integral type - vector bool long long - - - - xxpermdi r,b,a,0 - + float * - xxpermdi r,a,b,3 + lvxl r,b,a @@ -15389,22 +15210,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed long long + vector float - vector signed long long + any integral type - vector signed long long - - - - xxpermdi r,b,a,0 - + vector float * - xxpermdi r,a,b,3 + lvxl r,b,a @@ -15413,76 +15229,170 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned long long + vector _Float16 - vector unsigned long long + any integral type - vector unsigned long long - - - - xxpermdi r,b,a,0 - + _Float16 * - xxpermdi r,a,b,3 + lvxl r,b,a - - + + Deferred - vector pixel + vector _Float16 - vector pixel + any integral type - vector pixel - - - - vmrghh r,b,a - + vector _Float16 * - vmrglh r,a,b + lvxl r,b,a - - + + Deferred + + +
+ +
+ + + + vec_loge + Vector Base-2 Logarithm Estimate + + r = vec_loge (a) + + + Purpose: + Returns a vector containing estimates of the base-2 logarithms of the + corresponding elements of the source vector. + + Result value: Each element of + r contains an estimated value of the + base-2 logarithm of the corresponding element of a. + Endian considerations: + None. + + + + Supported type signatures for vec_loge + + + + + - vector bool short + + r + - vector bool short + + a + + + Example Implementation + + + + + - vector bool short + vector float - - - vmrghh r,b,a - + + vector float - vmrglh r,a,b + vlogefp r,a - - + + + +
+ +
+ + + + vec_madd + Vector Multiply-Add + + r = vec_madd (a, b, c) + + + Purpose: + Returns a vector containing the results of performing a fused + multiply-add operation for each corresponding set of elements of the + source vectors. + + Result value: The value of each + element of r is the product of the + values of the corresponding elements of a and b, added + to the value of the corresponding element of c. + Endian considerations: + None. + + + + Supported type signatures for vec_madd + + + + + + + + + + + r + + + + + a + + + + + b + + + + + c + + + + Example Implementation + + vector signed short @@ -15493,146 +15403,142 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - - - vmrghh r,b,a - + + vector signed short - vmrglh r,a,b + vmladduhm r,a,b,c - - - - vector unsigned short + vector signed short - vector unsigned short + vector signed short vector unsigned short - - - vmrghh r,b,a - + + vector unsigned short - vmrglh r,a,b + vmladduhm r,a,b,c - - - - vector double + vector signed short - vector double + vector unsigned short - vector double + vector signed short - - - xxpermdi r,b,a,0 - + + vector signed short - xxpermdi r,a,b,3 + vmladduhm r,a,b,c - - - - vector float + vector unsigned short - vector float + vector unsigned short - vector float + vector unsigned short - - - vmrghw r,b,a - + + vector unsigned short - vmrglw r,a,b + vmladduhm r,a,b,c - - - - vector _Float16 + vector double - vector _Float16 + vector double - vector _Float16 + vector double - - - vmrghh r,b,a - + + vector double - vmrglh r,a,b + xvmaddmdp r/a,b,c + + - Deferred + vector float - - - -
- -
- + + vector float + + + vector float + + + vector float + + + + xvmaddmsp r/a,b,c + + + + + + - - vec_mergeo - Vector Merge Odd + + + + + vec_madds + Vector Multiply-Add Saturated - r = vec_mergeo (a, b) + r = vec_madds (a, b, c) Purpose: - Merges the odd-numbered values from two vectors. + Returns a vector containing the results of performing a saturated + multiply-high-and-add operation for each corresponding set of elements + of the source vectors. - Result value: The odd-numbered - elements of a are stored into the - even-numbered elements of r. The - odd-numbered elements of b are stored - into the odd-numbered elements of r. + Result value: The value of each + element of r is produced as follows: + The values of the corresponding elements of a and b are + multiplied. The value of the 17 most-significant bits of this product + is then added, using 16-bit-saturated addition, to the value of the + corresponding element of c. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + None. - Supported type signatures for vec_mergeo + Supported type signatures for vec_madds @@ -15656,96 +15562,154 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> b - - Example LE Implementation + + + c + - Example BE Implementation + Example Implementation - vector bool int + vector signed short - vector bool int + vector signed short - vector bool int + vector signed short - - - vmrgew r,b,a - + + vector signed short - vmrgow r,a,b + vmhaddshs r,a,b,c + + +
+ +
+ + + + vec_max + Vector Maximum + + r = vec_max (a, b)) + + + Purpose: + Returns a vector containing the maximum value from each set of + corresponding elements of the source vectors. + + Result value: + The value of each element of r is the + maximum of the values of the corresponding elements of a and b. + + Endian considerations: + None. + + + + Supported type signatures for vec_max + + + + + + - - vector signed int + + + r + + + + + a + + + + + b + + + Example Implementation + + + + + - vector signed int + vector signed char - vector signed int + vector signed char - - - vmrgew r,b,a - + + vector signed char - vmrgow r,a,b + vmaxsb r,a,b - vector unsigned int + vector unsigned char - vector unsigned int + vector unsigned char - vector unsigned int - - - - vmrgew r,b,a - + vector unsigned char - vmrgow r,a,b + vmaxub r,a,b - vector bool long long + vector signed int - vector bool long long + vector signed int - vector bool long long + vector signed int - xxpermdi r,b,a,0 + vmaxsw r,a,b + + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + - xxpermdi r,a,b,3 + vmaxuw r,a,b @@ -15761,12 +15725,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxpermdi r,b,a,0 - - - - - xxpermdi r,a,b,3 + vmaxsd r,a,b @@ -15782,33 +15741,55 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxpermdi r,b,a,0 + vmaxud r,a,b + + + + vector signed short + + + vector signed short + + + vector signed short + - xxpermdi r,a,b,3 + vmaxsh r,a,b - vector double + vector unsigned short - vector double + vector unsigned short - vector double + vector unsigned short - xxpermdi r,b,a,0 + vmaxuh r,a,b + + + + vector double + + + vector double + + + vector double + - xxpermdi r,a,b,3 + xvmaxdp r,a,b @@ -15824,12 +15805,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vmrgew r,b,a - - - - - vmrgow r,a,b + xvmaxsp r,a,b @@ -15840,143 +15816,79 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_mfvscr - Vector Move From Vector Status and Control Register + + vec_mergee + Vector Merge Even - r = vec_mfvscr () + r = vec_mergee (a, b) Purpose: - Copies the contents of the Vector Status and Control Register into the - result vector. - - Result value: The high-order 16 - bits of the VSCR are copied into the seventh element of r, using big-endian (left-to-right) order. The - low-order 16 bits of the VSCR are copied into the eighth element of - r, using big-endian order. All other - elements of r are set to zero. + Merges the even-numbered values from two vectors. + Result value: The even-numbered + elements of a are stored into the + even-numbered elements of r. The + even-numbered elements of b are stored + into the odd-numbered elements of r. Endian considerations: - The contents of the VSCR are placed in the low-order 32 bits of the - result vector, regardless of endianness. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
- Supported type signatures for vec_mfvscr - - - - - - - - r - - - - Example - Implementation - - - - - - - vector unsigned short - - - - mfvscr a - - - - - -
- -
- - - - vec_min - Vector Minimum - - r = vec_min (a, b) - - - Purpose: - Returns a vector containing the minimum value from each set of - corresponding elements of the source vectors. - - Result value: The value of each - element of r is the minimum of the - values of the corresponding elements of a and b. - Endian considerations: - None. - - - - Supported type signatures for vec_min - + Supported type signatures for vec_mergee + + - + r - + a - + b - Example Implementation + Example LE Implementation + + + Example BE Implementation - vector signed char + vector bool int - vector signed char + vector bool int - vector signed char + vector bool int - vminsb r,a,b + vmrgow r,b,a - - - - vector unsigned char - - - vector unsigned char - - - vector unsigned char - - vminub r,a,b + vmrgew r,a,b @@ -15992,7 +15904,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vminsw r,a,b + vmrgow r,b,a + + + + + vmrgew r,a,b @@ -16008,71 +15925,75 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vminuw r,a,b + vmrgow r,b,a - - - - vector signed long long - - - vector signed long long - - - vector signed long long - - vminsd r,a,b + vmrgew r,a,b - vector unsigned long long + vector bool long long - vector unsigned long long + vector bool long long - vector unsigned long long + vector bool long long - vminud r,a,b + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 - vector signed short + vector signed long long - vector signed short + vector signed long long - vector signed short + vector signed long long - vminsh r,a,b + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 - vector unsigned short + vector unsigned long long - vector unsigned short + vector unsigned long long - vector unsigned short + vector unsigned long long - vminuh r,a,b + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 @@ -16088,7 +16009,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xvmindp r,a,b + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 @@ -16104,7 +16030,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xvminsp r,a,b + vmrgow r,b,a + + + + + vmrgew r,a,b @@ -16115,38 +16046,42 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_mradds - Vector Multiply-High Round and Add Saturated + + vec_mergeh + Vector Merge High - r = vec_mradds (a, b, c) + r = vec_mergeh (a, b) Purpose: - Returns a vector containing the results of performing a saturated - multiply-high-round-and-add operation for each corresponding set of - elements of the source vectors. + Merges the first halves (in element order) of two vectors. - Result value: The value of each - element of r is produced as follows. - The values of the corresponding elements of a and b are - multiplied and rounded such that the 15 least-significant bits are 0. - The value of the 17 most-significant bits of this rounded product is - then added, using 16-bit-saturated addition, to the value of the - corresponding element of c. + Result value: The + nth element of r, + if n is an even number, is given the value of the + (n/2)th element of a. The (n+1)th element + of r, if n is an + even number, is given the value of the (n/2)th + element of b. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + Notes: + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred.
- Supported type signatures for vec_mradds - + Supported type signatures for vec_mergeh + + @@ -16164,356 +16099,285 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> b - - - c - + + Example LE Implementation - Example Implementation + Example BE Implementation + + + Restrictions - vector signed short + vector bool char - vector signed short + vector bool char - vector signed short + vector bool char + + + + vmrglb r,b,a + + + + + vmrghb r,a,b + + + + + + - vector signed short + vector signed char + + + vector signed char + + + vector signed char - vmhraddshs r,a,b,c + vmrglb r,b,a + + + vmrghb r,a,b + + + + + - - -
- -
- - - - vec_msub - Vector Multiply-Subtract - - r = vec_msub (a, b, c) - - - Purpose: - Returns a vector containing the results of performing a multiply-subtract - operation using the source vectors. - - Result value: Each element of - r is produced by multiplying the - corresponding element of a by the - corresponding element of b and then - subtracting the corresponding element of c. - Endian considerations: - None. - - - - Supported type signatures for vec_msub - - - - - - - - - r - + vector unsigned char - - a - + vector unsigned char - - b - + vector unsigned char - - - c - + + + vmrglb r,b,a + - - Example Implementation + + + vmrghb r,a,b + + + + - - - vector double + vector bool int - vector double + vector bool int - vector double + vector bool int - - vector double + + + vmrglw r,b,a + - xvmsubmdp r/a,b,c + vmrghw r,a,b + + + - vector float + vector signed int - vector float + vector signed int - vector float + vector signed int - - vector float + + + vmrglw r,b,a + - xvmsubmsp r/a,b,c + vmrghw r,a,b + + + - - -
- -
- - - - vec_msum - Vector Multiply-Sum - - r = vec_msum (a, b, c) - - - Purpose: - Returns a vector containing the results of performing a multiply-sum - operation using the source vectors. - - Result value: Assume that the - elements of each vector are numbered beginning with 0. If - a is a vector signed char or a vector - unsigned char vector, then let m be 4. Otherwise, - let m be 2. The value of each element - n of r is obtained - as follows. For p = mn to - mn + m – 1, multiply - element p of a - by element p of b. - Add the sum of these products to element n of - c. All additions are performed using - 32-bit modular arithmetic. - Endian considerations: - None. - - - - Supported type signatures for vec_msum - - - - - - - - - r - + vector unsigned int - - a - + vector unsigned int - - b - + vector unsigned int - - - c - + + + vmrglw r,b,a + - - Example Implementation + + + vmrghw r,a,b + + + + - - - vector signed int + vector bool long long - vector signed char + vector bool long long - vector unsigned char + vector bool long long - - vector signed int + + + xxpermdi r,b,a,3 + - vmsummbm r,a,b,c + xxpermdi r,a,b,0 + + + - vector signed int + vector signed long long - vector signed short + vector signed long long - vector signed short + vector signed long long - - vector signed int + + + xxpermdi r,b,a,3 + - vmsumshm r,a,b,c + xxpermdi r,a,b,0 + + + - vector unsigned int + vector unsigned long long - vector unsigned char + vector unsigned long long - vector unsigned char + vector unsigned long long - - vector unsigned int + + + xxpermdi r,b,a,3 + - vmsumubm r,a,b,c + xxpermdi r,a,b,0 + + + - vector unsigned int + vector pixel - vector unsigned short + vector pixel - vector unsigned short + vector pixel - - vector unsigned int + + + vmrglh r,b,a + - vmsumuhm r,a,b,c + vmrghh r,a,b + + + - - -
- -
- - - - vec_msums - Vector Multiply-Sum Saturated - - r = vec_msums (a, b, c) - - - Purpose: - Returns a vector containing the results of performing a saturated - multiply-sum operation using the source vectors. - - Result value: Assume that the - elements of each vector are numbered beginning with 0. The value of each - element n of r - is obtained as follows. For p = - 2n to 2n+1, multiply element - p of a by element - p of b. Add the - sum of these products to element n of - c. All additions are performed using - 32-bit saturated arithmetic. - Endian considerations: - None. - - - - Supported type signatures for vec_msums - - - - - - - - - r - + vector bool short - - a - + vector bool short - - b - + vector bool short - - - c - + + + vmrglh r,b,a + - - Example Implementation + + + vmrghh r,a,b + + + + - - - vector signed int + vector signed short vector signed short @@ -16521,18 +16385,23 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - - vector signed int + + + vmrglh r,b,a + - vmsumshs r,a,b,c + vmrghh r,a,b + + + - vector unsigned int + vector unsigned short vector unsigned short @@ -16540,14 +16409,91 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short + + + vmrglh r,b,a + + + + + vmrghh r,a,b + + + + + + + - vector unsigned int + vector double + + + vector double + + + vector double - vmsumuhs r,a,b,c + xxpermdi r,b,a,3 + + + + + xxpermdi r,a,b,0 + + + + + + + + + vector float + + + vector float + + + vector float + + + + vmrglw r,b,a + + + + + vmrghw r,a,b + + + + + + + + + vector _Float16 + + + vector _Float16 + + + vector _Float16 + + + + vmrglh r,b,a + + + + + vmrghh r,a,b + + Deferred + @@ -16556,355 +16502,337 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_mtvscr - Vector Move to Vector Status and Control Register + + vec_mergel + Vector Merge Low - r = vec_mtvscr (a) + r = vec_mergel (a, b) Purpose: - Copies the given value into the Vector Status and Control Register. - The low-order 32 bits of a are copied - into the VSCR. + Merges the last halves (in element order) of two vectors. - - Result value: None. - + Result value: Let + m be the number of elements in r. The nth element of + r, if n is an even + number, is given the value of the m/2 + + (n/2)th element of a. The (n+1)th element + of r, if n is an + even number, is given the value of the m/2 + + (n/2)th element of b. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + Notes: + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred.
- Supported type signatures for vec_mtvscr - + Supported type signatures for vec_mergel + + + + - + r - + a + + + b + + - Example - Implementation + Example LE Implementation + + + Example BE Implementation + + + Restrictions - void + vector bool char vector bool char + + vector bool char + - mtvscr a + vmrghb r,b,a + + + + + vmrglb r,a,b + + + - void + vector signed char vector signed char + + vector signed char + - mtvscr a + vmrghb r,b,a + + + + + vmrglb r,a,b + + + - void + vector unsigned char vector unsigned char + + vector unsigned char + - mtvscr a + vmrghb r,b,a - - - - void - - - vector bool int - - mtvscr a + vmrglb r,a,b + + + - void + vector bool int - vector signed int + vector bool int + + + vector bool int - mtvscr a + vmrghw r,b,a - - - - void - - - vector unsigned int - - mtvscr a + vmrglw r,a,b + + + - void + vector signed int - vector pixel + vector signed int + + + vector signed int - mtvscr a + vmrghw r,b,a - - - - void - - - vector bool short - - mtvscr a + vmrglw r,a,b + + + - void + vector unsigned int - vector signed short + vector unsigned int + + + vector unsigned int - mtvscr a + vmrghw r,b,a - - - - void - - - vector unsigned short - - mtvscr a + vmrglw r,a,b - - - -
- -
- - - - vec_mul - Vector Multiply - - r = vec_mul (a, b) - - - Purpose: - Returns a vector containing the results of performing a multiply - operation using the source vectors. - - Result value: Each element of - r receives the product of - the corresponding elements of a and - b. - Endian considerations: - None. - - Notes: - - - - The example implementation for vector char assumes that the - address of the permute control vector for the vperm instruction - is in a register identified by pcv. Its value is - {1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}. - - - - - There are currently no vector instructions to support vector long - long multiplication, so the compiler must perform two scalar - multiplies on the vector elements for this case. - - - - - - Supported type signatures for vec_mul - - - - - - - - - - r - - - - - a - - - - - b - - - - Example Implementation + + - - - vector signed char + vector bool long long - vector signed char + vector bool long long - vector signed char + vector bool long long - vmulesb t,a,b - vmulosb u,a,b - lxvw4x v,0,pcv - vperm r,t,u,v + xxpermdi r,b,a,0 - - - - vector unsigned char - - - vector unsigned char - - - vector unsigned char - - vmulesb t,a,b - vmulosb u,a,b - lxvw4x v,0,pcv - vperm r,t,u,v + xxpermdi r,a,b,3 + + + - vector signed int + vector signed long long - vector signed int + vector signed long long - vector signed int + vector signed long long - vmuluwm r,a,b + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + + + - vector unsigned int + vector unsigned long long - vector unsigned int + vector unsigned long long - vector unsigned int + vector unsigned long long - vmuluwm r,a,b + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + + + - vector signed long long + vector pixel - vector signed long long + vector pixel - vector signed long long + vector pixel - [scalarized] + vmrghh r,b,a + + + + + vmrglh r,a,b + + + - vector unsigned long long + vector bool short - vector unsigned long long + vector bool short - vector unsigned long long + vector bool short - [scalarized] + vmrghh r,b,a + + + + + vmrglh r,a,b + + + @@ -16918,10 +16846,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxspltib t,0 - vmladduhm r,a,b,t + vmrghh r,b,a + + + + + vmrglh r,a,b + + + @@ -16935,11 +16870,18 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxspltib t,0 - vmladduhm r,a,b,t + vmrghh r,b,a - + + + vmrglh r,a,b + + + + + + vector double @@ -16952,9 +16894,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xvmuldp r,a,b + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 + + + @@ -16968,9 +16918,41 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xvmulsp r,a,b + vmrghw r,b,a + + + + + vmrglw r,a,b + + + + + + + + + vector _Float16 + + + vector _Float16 + + + vector _Float16 + + + + vmrghh r,b,a + + + + + vmrglh r,a,b + + Deferred + @@ -16979,29 +16961,29 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_mule - Vector Multiply Even + + vec_mergeo + Vector Merge Odd - r = vec_mule (a, b) + r = vec_mergeo (a, b) Purpose: - Multiplies the even-numbered elements of the source vectors to - produce the target vector. + Merges the odd-numbered values from two vectors. - Result value: Each element - n of r is the - product of element 2n of a and element 2n of - b. + Result value: The odd-numbered + elements of a are stored into the + even-numbered elements of r. The + odd-numbered elements of b are stored + into the odd-numbered elements of r. Endian considerations: The element numbering within a register is left-to-right for big-endian targets, and right-to-left for little-endian targets.
- Supported type signatures for vec_mule + Supported type signatures for vec_mergeo @@ -17034,24 +17016,45 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> + + + vector bool int + + + vector bool int + + + vector bool int + + + + vmrgew r,b,a + + + + + vmrgow r,a,b + + + vector signed int - vector signed short + vector signed int - vector signed short + vector signed int - vmulosh r,a,b + vmrgew r,b,a - vmulesh r,a,b + vmrgow r,a,b @@ -17060,19 +17063,40 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - vector unsigned short + vector unsigned int - vector unsigned short + vector unsigned int - vmulouh r,a,b + vmrgew r,b,a - vmuleuh r,a,b + vmrgow r,a,b + + + + + + vector bool long long + + + vector bool long long + + + vector bool long long + + + + xxpermdi r,b,a,0 + + + + + xxpermdi r,a,b,3 @@ -17081,19 +17105,19 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - vector signed int + vector signed long long - vector signed int + vector signed long long - vmulosw r,a,b + xxpermdi r,b,a,0 - vmulesw r,a,b + xxpermdi r,a,b,3 @@ -17102,61 +17126,61 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - vector unsigned int + vector unsigned long long - vector unsigned int + vector unsigned long long - vmulouw r,a,b + xxpermdi r,b,a,0 - vmuleuw r,a,b + xxpermdi r,a,b,3 - vector signed short + vector double - vector signed char + vector double - vector signed char + vector double - vmulosb r,a,b + xxpermdi r,b,a,0 - vmulesb r,a,b + xxpermdi r,a,b,3 - vector unsigned short + vector float - vector unsigned char + vector float - vector unsigned char + vector float - vmuloub r,a,b + vmrgew r,b,a - vmuleub r,a,b + vmrgow r,a,b @@ -17167,295 +17191,239 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_mulo - Vector Multiply Odd + + vec_mfvscr + Vector Move From Vector Status and Control Register - r = vec_mulo (a, b) + r = vec_mfvscr () Purpose: - Multiplies the odd-numbered elements of the source vectors to - produce the target vector. + Copies the contents of the Vector Status and Control Register into the + result vector. + + Result value: The high-order 16 + bits of the VSCR are copied into the seventh element of r, using big-endian (left-to-right) order. The + low-order 16 bits of the VSCR are copied into the eighth element of + r, using big-endian order. All other + elements of r are set to zero. - Result value: Each element - n of r is the - product of element 2n+1 of a and element 2n+1 of - b. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + The contents of the VSCR are placed in the low-order 32 bits of the + result vector, regardless of endianness.
- Supported type signatures for vec_mulo - + Supported type signatures for vec_mfvscr + - - - - + r - - - a - - - - - b - - - - Example LE Implementation - - Example BE Implementation + Example + Implementation - - vector signed int - - - vector signed short - - - vector signed short - - - - vmulesh r,a,b - - - - - vmulosh r,a,b - - - - - - vector unsigned int - vector unsigned short - - vector unsigned short - - vmuleuh r,a,b + mfvscr a - - - vmulouh r,a,b - + + + +
+ +
+ + + + vec_min + Vector Minimum + + r = vec_min (a, b) + + + Purpose: + Returns a vector containing the minimum value from each set of + corresponding elements of the source vectors. + + Result value: The value of each + element of r is the minimum of the + values of the corresponding elements of a and b. + Endian considerations: + None. + + + + Supported type signatures for vec_min + + + + + + + + + + r + + + + + a + + + + + b + + + + Example Implementation + + - vector signed long long + vector signed char - vector signed int + vector signed char - vector signed int - - - - vmulesw r,a,b - + vector signed char - vmulosw r,a,b + vminsb r,a,b - vector unsigned long long + vector unsigned char - vector unsigned int + vector unsigned char - vector unsigned int - - - - vmuleuw r,a,b - + vector unsigned char - vmulouw r,a,b + vminub r,a,b - vector signed short + vector signed int - vector signed char + vector signed int - vector signed char - - - - vmulesb r,a,b - + vector signed int - vmulosb r,a,b + vminsw r,a,b - vector unsigned short + vector unsigned int - vector unsigned char + vector unsigned int - vector unsigned char - - - - vmuleub r,a,b - + vector unsigned int - vmuloub r,a,b + vminuw r,a,b - - -
- -
- - - - vec_nabs - Vector Negated Absolute Value - - r = vec_nabs (a) - - - Purpose: - Returns a vector containing the negated absolute values of the contents - of the source vector. - - Result value: The value of each - element of r is the negated absolute - value of the fcorresponding element of a. For integer vectors, the arithmetic is - modular. - Endian considerations: - None. - - - - Supported type signatures for vec_nabs - - - - - - - - r - - - - - a - - - - Example Implementation + + vector signed long long - - - - - vector signed char + vector signed long long - vector signed char + vector signed long long - vspltisw t,0 - vsububm u,t,a - vminsb r,u,a + vminsd r,a,b - vector signed int + vector unsigned long long - vector signed int + vector unsigned long long + + + vector unsigned long long - vspltisw t,0 - vsubuwm u,t,a - vminsw r,u,a + vminud r,a,b - vector signed long long + vector signed short - vector signed long long + vector signed short + + + vector signed short - vspltisw t,0 - vsubudm u,t,a - vminsd r,u,a + vminsh r,a,b - vector signed short + vector unsigned short - vector signed short + vector unsigned short + + + vector unsigned short - vspltisw t,0 - vsubuhm u,t,a - vminsh r,u,a + vminuh r,a,b @@ -17466,9 +17434,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double + + vector double + - xvnabsdp r,a + xvmindp r,a,b @@ -17479,9 +17450,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float + + vector float + - xvnabssp r,a + xvminsp r,a,b @@ -17492,47 +17466,60 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_nand - Vector NAND + + vec_mradds + Vector Multiply-High Round and Add Saturated - r = vec_nand (a, b) + r = vec_mradds (a, b, c) Purpose: - Performs a bitwise NAND of the given vectors. + Returns a vector containing the results of performing a saturated + multiply-high-round-and-add operation for each corresponding set of + elements of the source vectors. - Result value: r is the bitwise - NAND of a and b. + Result value: The value of each + element of r is produced as follows. + The values of the corresponding elements of a and b are + multiplied and rounded such that the 15 least-significant bits are 0. + The value of the 17 most-significant bits of this rounded product is + then added, using 16-bit-saturated addition, to the value of the + corresponding element of c. Endian considerations: None.
- Supported type signatures for vec_nand - + Supported type signatures for vec_mradds + + - + r - + a - + b + + + c + + Example Implementation @@ -17541,167 +17528,218 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector bool char + vector signed short - vector bool char + vector signed short - vector bool char - - - - xxlnand r,a,b - - - - - - vector signed char - - - vector signed char + vector signed short - vector signed char + vector signed short - xxlnand r,a,b + vmhraddshs r,a,b,c + + +
+ +
+ + + + vec_msub + Vector Multiply-Subtract + + r = vec_msub (a, b, c) + + + Purpose: + Returns a vector containing the results of performing a multiply-subtract + operation using the source vectors. + + Result value: Each element of + r is produced by multiplying the + corresponding element of a by the + corresponding element of b and then + subtracting the corresponding element of c. + Endian considerations: + None. + + + + Supported type signatures for vec_msub + + + + + + + - vector unsigned char - - - vector unsigned char - - - vector unsigned char - - - - xxlnand r,a,b - + + r + - - - vector bool int + + a + - vector bool int + + b + - vector bool int + + c + - - - xxlnand r,a,b - + + Example Implementation + + - vector signed int - - - vector signed int - - - vector signed int - - - - xxlnand r,a,b - + vector double - - - vector unsigned int + vector double - vector unsigned int + vector double - vector unsigned int + vector double - xxlnand r,a,b + xvmsubmdp r/a,b,c - vector bool long long - - - vector bool long long - - - vector bool long long - - - - xxlnand r,a,b - + vector float - - - vector signed long long + vector float - vector signed long long + vector float - vector signed long long + vector float - xxlnand r,a,b + xvmsubmsp r/a,b,c + + +
+ +
+ + + + vec_msum + Vector Multiply-Sum + + r = vec_msum (a, b, c) + + + Purpose: + Returns a vector containing the results of performing a multiply-sum + operation using the source vectors. + + Result value: Assume that the + elements of each vector are numbered beginning with 0. If + a is a vector signed char or a vector + unsigned char vector, then let m be 4. Otherwise, + let m be 2. The value of each element + n of r is obtained + as follows. For p = mn to + mn + m – 1, multiply + element p of a + by element p of b. + Add the sum of these products to element n of + c. All additions are performed using + 32-bit modular arithmetic. + Endian considerations: + None. + + + + Supported type signatures for vec_msum + + + + + + + - vector unsigned long long + + r + - vector unsigned long long + + a + - vector unsigned long long + + b + - - - xxlnand r,a,b - + + + c + + + + Example Implementation + + - vector bool short + vector signed int - vector bool short + vector signed char - vector bool short + vector unsigned char + + + vector signed int - xxlnand r,a,b + vmsummbm r,a,b,c - vector signed short + vector signed int vector signed short @@ -17709,120 +17747,115 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short + + vector signed int + - xxlnand r,a,b + vmsumshm r,a,b,c - vector unsigned short + vector unsigned int - vector unsigned short + vector unsigned char - vector unsigned short + vector unsigned char + + + vector unsigned int - xxlnand r,a,b + vmsumubm r,a,b,c - vector double + vector unsigned int - vector double + vector unsigned short - vector double + vector unsigned short + + + vector unsigned int - xxlnand r,a,b - - - - - - vector float - - - vector float - - - vector float - - - - xxlnand r,a,b + vmsumuhm r,a,b,c
-
+ - - vec_ncipher_be - Vector AES Inverse Cipher Big-Endian + + + vec_msums + Vector Multiply-Sum Saturated - r = vec_ncipher_be (a, b) + r = vec_msums (a, b, c) Purpose: - Performs one round of the AES inverse cipher operation on an - intermediate state array a by using a - given round key b. - - - Result value: r contains the - resulting intermediate state, after one round of the AES inverse cipher - operation on intermediate state array a, - using the round key specified by b. + Returns a vector containing the results of performing a saturated + multiply-sum operation using the source vectors. - + Result value: Assume that the + elements of each vector are numbered beginning with 0. The value of each + element n of r + is obtained as follows. For p = + 2n to 2n+1, multiply element + p of a by element + p of b. Add the + sum of these products to element n of + c. All additions are performed using + 32-bit saturated arithmetic. Endian considerations: - All element and bit numberings of the AES inverse cipher operation use - big-endian (i.e., left-to-right) order, reflecting the underlying - hardware insruction. Unlike most of the vector intrinsics in this - chapter, vec_ncipher_be does not follow the bi-endian - programming model. + None. - - Notes: This intrinsic may - not yet be available in all implementations. - Supported type signatures for vec_ncipher_be - + Supported type signatures for vec_msums + + - + r - + a - + b + + + c + + Example Implementation @@ -17831,100 +17864,39 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned char + vector signed int - vector unsigned char + vector signed short - vector unsigned char + vector signed short + + + vector signed int - vncipher r,a,b + vmsumshs r,a,b,c - - -
- -
- - - - vec_ncipherlast_be - Vector AES Inverse Cipher Last Big-Endian - - r = vec_ncipherlast_be (a, b) - - - Purpose: - Performs the final round of the AES inverse cipher operation on an - intermediate state array a using the - specified round key b. - - - Result value: r contains the - resulting final state, after the final round of the AES inverse cipher - operation on intermediate state array a, - using the round key specified by b. - - - Endian considerations: - All element and bit numberings of the AES inverse cipher-last operation - use big-endian (i.e., left-to-right) order, reflecting the underlying - hardware insruction. Unlike most of the vector intrinsics in this - chapter, vec_ncipherlast_be does not follow the bi-endian - programming model. - - - Notes: This intrinsic may - not yet be available in all implementations. - - - Supported type signatures for vec_ncipherlast_be - - - - - - - - - r - - - - - a - - - - - b - - - - Example Implementation + + vector unsigned int - - - - - vector unsigned char + vector unsigned short - vector unsigned char + vector unsigned short - vector unsigned char + vector unsigned int - vncipherlast r,a,b + vmsumuhs r,a,b,c @@ -17935,29 +17907,27 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_nearbyint - Vector Nearby Integer + + vec_mtvscr + Vector Move to Vector Status and Control Register - r = vec_nearbyint (a) + r = vec_mtvscr (a) Purpose: - Returns a vector containing the floating-point integral values nearest to - the values of the corresponding elements of the source vector. + Copies the given value into the Vector Status and Control Register. + The low-order 32 bits of a are copied + into the VSCR. - Result value: Each element of - r contains the nearest representable - floating-point integral value to the value of the corresponding element - of a. When an input element value is - exactly between two integer values, the input value with the larger - absolute value is selected. + + Result value: None. + Endian considerations: None.
- Supported type signatures for vec_nearbyint + Supported type signatures for vec_mtvscr @@ -17975,166 +17945,139 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example Implementation + Example + Implementation - vector double + void - vector double + vector bool char - xvrdpi r,a + mtvscr a - vector float + void - vector float + vector signed char - xvrspi r,a + mtvscr a - - -
- -
- - - - vec_neg - Vector Negate - - r = vec_neg (a) - - - Purpose: - Returns a vector containing the negated values of the contents of the - source vector. - - Result value: The value of each - element of r is the negated value of - the corresponding element of a. For - integer vectors, the arithmetic is modular. - Endian considerations: - None. - - - - Supported type signatures for vec_neg - - - - - - - - r - + + void - - - a - + + vector unsigned char - - Example Implementation + + + mtvscr a + - - - vector signed char + void - vector signed char + vector bool int - vspltisw t,0 - vsububm r,t,a + mtvscr a - vector signed int + void vector signed int - vspltisw t,0 - vsubuwm r,t,a + mtvscr a - vector signed long long + void - vector signed long long + vector unsigned int - vspltisw t,0 - vsubudm r,t,a + mtvscr a - vector signed short + void - vector signed short + vector pixel - vspltisw t,0 - vsubuhm r,t,a + mtvscr a - vector double + void - vector double + vector bool short - xvnegdp r,a + mtvscr a - vector float + void - vector float + vector signed short - xvnegsp r,a + mtvscr a + + + + + + void + + + vector unsigned short + + + + mtvscr a @@ -18145,57 +18088,67 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_nmadd - Vector Negated Multiply-Add + + vec_mul + Vector Multiply - r = vec_nmadd (a, b, c) + r = vec_mul (a, b) Purpose: - Returns a vector containing the results of performing a negated - multiply-add operation on the source vectors. + Returns a vector containing the results of performing a multiply + operation using the source vectors. - Result value: The value of each - element of r is the product of the - corresponding elements of a and - b, added to the corresponding elements - of c, then multiplied by - –1.0. + Result value: Each element of + r receives the product of + the corresponding elements of a and + b. Endian considerations: None. + Notes: + + + + The example implementation for vector char assumes that the + address of the permute control vector for the vperm instruction + is in a register identified by pcv. Its value is + {1,17,3,19,5,21,7,23,9,25,11,27,13,29,15,31}. + + + + + There are currently no vector instructions to support vector long + long multiplication, so the compiler must perform two scalar + multiplies on the vector elements for this case. + + +
- Supported type signatures for vec_nmadd - + Supported type signatures for vec_mul + - - + r - + a - + b - - - c - - Example Implementation @@ -18204,141 +18157,169 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector double - - - vector double + vector signed char - vector double + vector signed char - vector double + vector signed char - xvnmaddadp r/c,a,b + vmulesb t,a,b + vmulosb u,a,b + lxvw4x v,0,pcv + vperm r,t,u,v - vector float + vector unsigned char - vector float + vector unsigned char - vector float + vector unsigned char + + + + vmulesb t,a,b + vmulosb u,a,b + lxvw4x v,0,pcv + vperm r,t,u,v + + + + + + vector signed int - vector float + vector signed int + + + vector signed int - xvnmaddasp r/c,a,b + vmuluwm r,a,b - - -
- -
- - - - vec_nmsub - Vector Negated Multiply-Subtract - - r = vec_nmsub (a, b, c) - - - Purpose: - Returns a vector containing the results of performing a negated - multiply-subtract operation on the source vectors. - - Result value: The value of each - element of r is the value of the - corresponding element of c subtracted - from the product of the corresponding elements of a and b, and - then multiplied by –1.0. - Endian considerations: - None. - - - - Supported type signatures for vec_nmsub - - - - - - - - - r - + vector unsigned int - - a - + vector unsigned int - - b - + vector unsigned int + + + + vmuluwm r,a,b + + + - - c - + vector signed long long - - Example Implementation + + vector signed long long + + + vector signed long long + + + + [scalarized] + - - - vector double + vector unsigned long long - vector double + vector unsigned long long - vector double + vector unsigned long long + + + + [scalarized] + + + - vector double + vector signed short + + + vector signed short + + + vector signed short - xvnmsubmdp r/a,b,c + xxspltib t,0 + vmladduhm r,a,b,t - vector float + vector unsigned short - vector float + vector unsigned short - vector float + vector unsigned short + + + + xxspltib t,0 + vmladduhm r,a,b,t + + + - vector float + vector double + + + vector double + + + vector double - xvnmsubmsp r/a,b,c + xvmuldp r,a,b + + + + + + vector float + + + vector float + + + vector float + + + + xvmulsp r,a,b @@ -18349,120 +18330,106 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_nor - Vector NOR + + vec_mule + Vector Multiply Even - r = vec_nor (a, b) + r = vec_mule (a, b) Purpose: - Performs a bitwise NOR of the given vectors. + Multiplies the even-numbered elements of the source vectors to + produce the target vector. - Result value: r is the bitwise NOR - of a and b. + Result value: Each element + n of r is the + product of element 2n of a and element 2n of + b. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
- Supported type signatures for vec_nor - + Supported type signatures for vec_mule + + - + r - + a - + b - Example Implementation + Example LE Implementation + + + Example BE Implementation - vector bool char + vector signed int - vector bool char + vector signed short - vector bool char + vector signed short - xxlnor r,a,b + vmulosh r,a,b - - - - vector signed char - - - vector signed char - - - vector signed char - - xxlnor r,a,b + vmulesh r,a,b - vector unsigned char + vector unsigned int - vector unsigned char + vector unsigned short - vector unsigned char + vector unsigned short - xxlnor r,a,b + vmulouh r,a,b - - - - vector bool int - - - vector bool int - - - vector bool int - - xxlnor r,a,b + vmuleuh r,a,b - vector signed int + vector signed long long vector signed int @@ -18472,55 +18439,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlnor r,a,b - - - - - - vector unsigned int - - - vector unsigned int - - - vector unsigned int - - - - xxlnor r,a,b - - - - - - vector bool long long - - - vector bool long long - - - vector bool long long - - - - xxlnor r,a,b + vmulosw r,a,b - - - - vector signed long long - - - vector signed long long - - - vector signed long long - - xxlnor r,a,b + vmulesw r,a,b @@ -18529,30 +18453,19 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - vector unsigned long long + vector unsigned int - vector unsigned long long + vector unsigned int - xxlnor r,a,b + vmulouw r,a,b - - - - vector bool short - - - vector bool short - - - vector bool short - - xxlnor r,a,b + vmuleuw r,a,b @@ -18561,62 +18474,40 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - vector signed short + vector signed char - vector signed short + vector signed char - xxlnor r,a,b + vmulosb r,a,b - - - - vector unsigned short - - - vector unsigned short - - - vector unsigned short - - xxlnor r,a,b + vmulesb r,a,b - vector double + vector unsigned short - vector double + vector unsigned char - vector double + vector unsigned char - xxlnor r,a,b + vmuloub r,a,b - - - - vector float - - - vector float - - - vector float - - xxlnor r,a,b + vmuleub r,a,b @@ -18627,136 +18518,127 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_or - Vector OR + + vec_mulo + Vector Multiply Odd - r = vec_or (a, b) + r = vec_mulo (a, b) Purpose: - Performs a bitwise OR of the given vectors. + Multiplies the odd-numbered elements of the source vectors to + produce the target vector. - Result value: r is the bitwise OR - of a and b. + Result value: Each element + n of r is the + product of element 2n+1 of a and element 2n+1 of + b. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
- Supported type signatures for vec_or - + Supported type signatures for vec_mulo + + - + r - + a - + b - Example Implementation + Example LE Implementation + + + Example BE Implementation - vector bool char + vector signed int - vector bool char + vector signed short - vector bool char + vector signed short - xxlor r,a,b + vmulesh r,a,b + + + + + vmulosh r,a,b - vector signed char + vector unsigned int - vector signed char + vector unsigned short - vector signed char + vector unsigned short - xxlor r,a,b + vmuleuh r,a,b - - - - vector unsigned char - - - vector unsigned char - - - vector unsigned char - - xxlor r,a,b + vmulouh r,a,b - vector bool int + vector signed long long - vector bool int + vector signed int - vector bool int + vector signed int - xxlor r,a,b + vmulesw r,a,b - - - - vector signed int - - - vector signed int - - - vector signed int - - xxlor r,a,b + vmulosw r,a,b - vector unsigned int + vector unsigned long long vector unsigned int @@ -18766,103 +18648,165 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlor r,a,b + vmuleuw r,a,b + + + + + vmulouw r,a,b - vector bool long long + vector signed short - vector bool long long + vector signed char - vector bool long long + vector signed char - xxlor r,a,b + vmulesb r,a,b + + + + + vmulosb r,a,b - vector signed long long + vector unsigned short - vector signed long long + vector unsigned char - vector signed long long + vector unsigned char - xxlor r,a,b + vmuleub r,a,b + + + + + vmuloub r,a,b + + +
+ +
+ + + + vec_nabs + Vector Negated Absolute Value + + r = vec_nabs (a) + + + Purpose: + Returns a vector containing the negated absolute values of the contents + of the source vector. + + Result value: The value of each + element of r is the negated absolute + value of the fcorresponding element of a. For integer vectors, the arithmetic is + modular. + Endian considerations: + None. + + + + Supported type signatures for vec_nabs + + + + + - - vector unsigned long long + + + r + + + + + a + + + + Example Implementation + + + + - vector unsigned long long + vector signed char - vector unsigned long long + vector signed char - xxlor r,a,b + vspltisw t,0 + vsububm u,t,a + vminsb r,u,a - vector bool short - - - vector bool short + vector signed int - vector bool short + vector signed int - xxlor r,a,b + vspltisw t,0 + vsubuwm u,t,a + vminsw r,u,a - vector signed short - - - vector signed short + vector signed long long - vector signed short + vector signed long long - xxlor r,a,b + vspltisw t,0 + vsubudm u,t,a + vminsd r,u,a - vector unsigned short - - - vector unsigned short + vector signed short - vector unsigned short + vector signed short - xxlor r,a,b + vspltisw t,0 + vsubuhm u,t,a + vminsh r,u,a @@ -18873,12 +18817,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - - vector double - - xxlor r,a,b + xvnabsdp r,a @@ -18889,12 +18830,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - - vector float - - xxlor r,a,b + xvnabssp r,a @@ -18905,26 +18843,25 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_orc - Vector OR with Complement + + vec_nand + Vector NAND - r = vec_orc (a, b) + r = vec_nand (a, b) Purpose: - Performs a bitwise OR of the first vector with the bitwise-complemented - second vector. + Performs a bitwise NAND of the given vectors. - Result value: r is the bitwise OR - of a and the bitwise complement of - b. + Result value: r is the bitwise + NAND of a and b. Endian considerations: None.
- Supported type signatures for vec_orc + Supported type signatures for vec_nand @@ -18965,7 +18902,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -18981,7 +18918,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -18997,7 +18934,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19013,7 +18950,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19029,7 +18966,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19045,7 +18982,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19061,7 +18998,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19077,7 +19014,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19093,7 +19030,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19109,7 +19046,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19125,7 +19062,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19141,7 +19078,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19157,7 +19094,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b @@ -19173,340 +19110,383 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - xxlorc r,a,b + xxlnand r,a,b
-
- - - vec_pack - Vector Pack + + + vec_ncipher_be + Vector AES Inverse Cipher Big-Endian - r = vec_pack (a, b) + r = vec_ncipher_be (a, b) Purpose: - Packs information from each element of two vectors into the result - vector. - - Result value: Let v represent the concatenation of vectors - a and b. For integer types, the value of each element - of r is taken from the low-order half - of the corresponding element of v. For - floating-point types, the value of each element of r is the corresponding element of v, rounded to the result type. - Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + Performs one round of the AES inverse cipher operation on an + intermediate state array a by using a + given round key b. - Notes: - No Power compilers yet support the vector _Float16 type, so that - interface is currently deferred. Also, the - pack-double-to-float interface produces incorrect code. Issue 417. - + + Result value: r contains the + resulting intermediate state, after one round of the AES inverse cipher + operation on intermediate state array a, + using the round key specified by b. + + + Endian considerations: + All element and bit numberings of the AES inverse cipher operation use + big-endian (i.e., left-to-right) order, reflecting the underlying + hardware insruction. Unlike most of the vector intrinsics in this + chapter, vec_ncipher_be does not follow the bi-endian + programming model. + + Notes: This intrinsic may + not yet be available in all implementations. - Supported type signatures for vec_pack - + Supported type signatures for vec_ncipher_be + - - - + r - + a - + b - Example LE Implementation - - - Example BE Implementation - - - Restrictions + Example Implementation - vector bool char + vector unsigned char - vector bool short + vector unsigned char - vector bool short - - - - vpkuhum r,b,a - + vector unsigned char - vpkuhum r,a,b + vncipher r,a,b - - - + + +
+ +
+ + + + vec_ncipherlast_be + Vector AES Inverse Cipher Last Big-Endian + + r = vec_ncipherlast_be (a, b) + + + Purpose: + Performs the final round of the AES inverse cipher operation on an + intermediate state array a using the + specified round key b. + + + Result value: r contains the + resulting final state, after the final round of the AES inverse cipher + operation on intermediate state array a, + using the round key specified by b. + + + Endian considerations: + All element and bit numberings of the AES inverse cipher-last operation + use big-endian (i.e., left-to-right) order, reflecting the underlying + hardware insruction. Unlike most of the vector intrinsics in this + chapter, vec_ncipherlast_be does not follow the bi-endian + programming model. + + + Notes: This intrinsic may + not yet be available in all implementations. + + + Supported type signatures for vec_ncipherlast_be + + + + + + - - vector signed char - - - vector signed short - - - vector signed short + + + r + - - - vpkuhum r,b,a - + + + a + - - - vpkuhum r,a,b - + + + b + - - + + Example Implementation + + vector unsigned char - vector unsigned short + vector unsigned char - vector unsigned short - - - - vpkuhum r,b,a - + vector unsigned char - vpkuhum r,a,b + vncipherlast r,a,b - - - + + +
+ +
+ + + + vec_nearbyint + Vector Nearby Integer + + r = vec_nearbyint (a) + + + Purpose: + Returns a vector containing the floating-point integral values nearest to + the values of the corresponding elements of the source vector. + + Result value: Each element of + r contains the nearest representable + floating-point integral value to the value of the corresponding element + of a. When an input element value is + exactly between two integer values, the input value with the larger + absolute value is selected. + Endian considerations: + None. + + + + Supported type signatures for vec_nearbyint + + + + + - - vector bool int - - - vector bool long long - - - vector bool long long - - - - vpkudum r,b,a - + + + r + - - - vpkudum r,a,b - + + + a + - - + + Example Implementation + + - vector signed int - - - vector signed long long + vector double - vector signed long long - - - - vpkudum r,b,a - + vector double - vpkudum r,a,b + xvrdpi r,a - - - - vector unsigned int - - - vector unsigned long long + vector float - vector unsigned long long - - - - vpkudum r,b,a - + vector float - vpkudum r,a,b + xvrspi r,a - - - - - - vector bool short - - - vector bool int - + + +
+ +
+ + + + vec_neg + Vector Negate + + r = vec_neg (a) + + + Purpose: + Returns a vector containing the negated values of the contents of the + source vector. + + Result value: The value of each + element of r is the negated value of + the corresponding element of a. For + integer vectors, the arithmetic is modular. + Endian considerations: + None. + + + + Supported type signatures for vec_neg + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + - vector bool int + vector signed char - - - vpkuwum r,b,a - + + vector signed char - vpkuwum r,a,b + vspltisw t,0 + vsububm r,t,a - - - - - vector signed short - vector signed int - vector signed int - - - - vpkuwum r,b,a - + vector signed int - vpkuwum r,a,b + vspltisw t,0 + vsubuwm r,t,a - - - - vector unsigned short - - - vector unsigned int + vector signed long long - vector unsigned int - - - - vpkuwum r,b,a - + vector signed long long - vpkuwum r,a,b + vspltisw t,0 + vsubudm r,t,a - - - - vector float + vector signed short - vector double + vector signed short - - vector double + + + vspltisw t,0 + vsubuhm r,t,a + + + - sample implementation TBD + vector double - sample implementation TBD + vector double - - Broken + + + xvnegdp r,a + - - vector _Float16 - vector float - vector float - - - sample implementation TBD - - - sample implementation TBD + vector float - - Deferred + + + xvnegsp r,a + @@ -19516,38 +19496,35 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_pack_to_short_fp32 - Vector Pack 32-bit Float to Short + + vec_nmadd + Vector Negated Multiply-Add - r = vec_pack_to_short_fp32 (a, b) + r = vec_nmadd (a, b, c) Purpose: - Packs eight single-precision 32-bit floating-point numbers from two - source vectors into a vector of eight 16-bit floating-point numbers. + Returns a vector containing the results of performing a negated + multiply-add operation on the source vectors. - Result value: Let v represent the 16-element concatenation of - a and b. Each value of r contains the result of converting the - corresponding single-precision element of v to half-precision. + Result value: The value of each + element of r is the product of the + corresponding elements of a and + b, added to the corresponding elements + of c, then multiplied by + –1.0. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + None.
- Supported type signatures for vec_pack_to_short_fp32 - + Supported type signatures for vec_nmadd + - @@ -19566,44 +19543,53 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example LE Implementation - - - Example BE Implementation + + c + - - Restrictions + + Example Implementation - vector unsigned short + vector double - vector float + vector double - vector float + vector double + + + vector double - vctuxs t,a,0 - vctuxs u,b,0 - vpkswss r,u,t + xvnmaddadp r/c,a,b + + + + vector float + + + vector float + + + vector float + + + vector float + - vctuxs t,a,0 - vctuxs u,b,0 - vpkswss r,t,u + xvnmaddasp r/c,a,b - - ISA 3.0 or later - @@ -19612,41 +19598,29 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_packpx - Vector Pack Pixel + + vec_nmsub + Vector Negated Multiply-Subtract - r = vec_packpx (a, b) + r = vec_nmsub (a, b, c) Purpose: - Packs information from each element of two vectors into the result - vector. + Returns a vector containing the results of performing a negated + multiply-subtract operation on the source vectors. - Result value: Let v be the concatenation of a and b. The - value of each element of r is taken - from the corresponding element of v as - follows: - - - The least-significant bit of the high-order byte is - stored into the first bit of the result element. - - - The least-significant 5 bits of each of the remaining - bytes are stored into the remaining portion of the result - element. - - + Result value: The value of each + element of r is the value of the + corresponding element of c subtracted + from the product of the corresponding elements of a and b, and + then multiplied by –1.0. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + None.
- Supported type signatures for vec_packpx + Supported type signatures for vec_nmsub @@ -19671,32 +19645,51 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example LE Implementation + + c + - - Example BE Implementation + + Example Implementation - vector pixel + vector double - vector unsigned int + vector double - vector unsigned int + vector double + + + vector double - vpkpx r,b,a + xvnmsubmdp r/a,b,c + + + + vector float + + + vector float + + + vector float + + + vector float + - vpkpx r,a,b + xvnmsubmsp r/a,b,c @@ -19707,149 +19700,120 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_packs - Vector Pack Saturated + + vec_nor + Vector NOR - r = vec_packs (a, b) + r = vec_nor (a, b) Purpose: - Packs information from each element of two vectors into the result - vector, using saturated values. + Performs a bitwise NOR of the given vectors. - Result value: Let v be the concatenation of a and b. The - value of each element of r is the - saturated value of the corresponding element of v. + Result value: r is the bitwise NOR + of a and b. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + None.
- Supported type signatures for vec_packs - + Supported type signatures for vec_nor + - - + r - + a - + b - - Example LE Implementation - - - Example BE Implementation + + Example Implementation - vector signed char + vector bool char - vector signed short + vector bool char - vector signed short - - - - vpkshss r,b,a - + vector bool char - vpkshss r,a,b + xxlnor r,a,b - vector unsigned char + vector signed char - vector unsigned short + vector signed char - vector unsigned short - - - - vpkuhus r,b,a - + vector signed char - vpkuhus r,a,b + xxlnor r,a,b - vector signed int + vector unsigned char - vector signed long long + vector unsigned char - vector signed long long - - - - vpksdss r,b,a - + vector unsigned char - vpksdss r,a,b + xxlnor r,a,b - vector unsigned int + vector bool int - vector unsigned long long + vector bool int - vector unsigned long long - - - - vpkudus r,b,a - + vector bool int - vpkudus r,a,b + xxlnor r,a,b - vector signed short + vector signed int vector signed int @@ -19859,18 +19823,13 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vpkswss r,b,a - - - - - vpkswss r,a,b + xxlnor r,a,b - vector unsigned short + vector unsigned int vector unsigned int @@ -19880,159 +19839,87 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vpkuwus r,b,a - - - - - vpkuwus r,a,b + xxlnor r,a,b - - -
- -
- - - - vec_packsu - Vector Pack Saturated Unsigned - - r = vec_packsu (a, b) - - - Purpose: - Packs information from each element of two vectors into the result - vector, using unsigned saturated values. - - Result value: Let v be the concatenation of a and b. The - value of each element of r is the - saturated value of the corresponding element of v. - Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. - - - - Supported type signatures for vec_packsu - - - - - - - - - r - + vector bool long long - - a - + vector bool long long - - b - - - - Example LE Implementation + vector bool long long - - Example BE Implementation + + + xxlnor r,a,b + - - - vector unsigned char + vector signed long long - vector signed short + vector signed long long - vector signed short - - - - vpkshus r,b,a - + vector signed long long - vpkshus r,a,b + xxlnor r,a,b - vector unsigned char + vector unsigned long long - vector unsigned short + vector unsigned long long - vector unsigned short - - - - vpkuhus r,b,a - + vector unsigned long long - vpkuhus r,a,b + xxlnor r,a,b - vector unsigned int + vector bool short - vector signed long long + vector bool short - vector signed long long - - - - vpksdus r,b,a - + vector bool short - vpksdus r,a,b + xxlnor r,a,b - vector unsigned int + vector signed short - vector unsigned long long + vector signed short - vector unsigned long long + vector signed short - vpkudus r,b,a - - - - - vpkudus r,a,b + xxlnor r,a,b @@ -20041,40 +19928,46 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - vector signed int + vector unsigned short - vector signed int - - - - vpkswus r,b,a - + vector unsigned short - vpkswus r,a,b + xxlnor r,a,b - vector unsigned short + vector double - vector unsigned int + vector double - vector unsigned int + vector double - vpkuwus r,b,a + xxlnor r,a,b + + + + vector float + + + vector float + + + vector float + - vpkuwus r,a,b + xxlnor r,a,b @@ -20085,26 +19978,25 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_parity_lsbb - Vector Parity over Least-Significant Bits of Bytes + + vec_or + Vector OR - r = vec_parity_lsbb (a) + r = vec_or (a, b) Purpose: - Compute parity on the least-significant bit of each byte. + Performs a bitwise OR of the given vectors. - Result value: Each element of - r contains the parity computed over the - low-order bit of each of the bytes in the corresponding element of - a. + Result value: r is the bitwise OR + of a and b. Endian considerations: None.
- Supported type signatures for vec_parity_lsbb + Supported type signatures for vec_or @@ -20123,335 +20015,373 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example Implementation + + b + - Restrictions + Example Implementation - vector unsigned int + vector bool char - vector signed int + vector bool char + + + vector bool char - vprtybw r,a + xxlor r,a,b - - ISA 3.0 or later - - vector unsigned int + vector signed char - vector unsigned int + vector signed char + + + vector signed char - vprtybw r,a + xxlor r,a,b - - ISA 3.0 or later - - vector unsigned __int128 + vector unsigned char - vector signed __int128 + vector unsigned char + + + vector unsigned char - vprtybq r,a + xxlor r,a,b - - ISA 3.0 or later - - vector unsigned __int128 + vector bool int - vector unsigned __int128 + vector bool int + + + vector bool int - vprtybq r,a + xxlor r,a,b - - ISA 3.0 or later - - vector unsigned long long + vector signed int - vector signed long long + vector signed int + + + vector signed int - vprtybd r,a + xxlor r,a,b - - ISA 3.0 or later - - vector unsigned long long + vector unsigned int - vector unsigned long long + vector unsigned int + + + vector unsigned int - vprtybd r,a + xxlor r,a,b - - ISA 3.0 or later - - - -
- -
- - - - vec_perm - Vector Permute - - r = vec_perm (a, b, c) - - - Purpose: - Returns a vector that contains elements selected from two input - vectors, in the order specified by a third input vector. - - Result value: Let v be the concatenation of a and b. Each - byte of r selected by using the - least-significant 5 bits of the corresponding byte of c as an index into v. - Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. - - Notes: - - - - The example little-endian code generation uses the vpermr instruction from ISA 3.0. For - earlier targets, the compiler must generate an extra instruction - to adjust the permute control vector c. - - - - - No Power compilers yet support the vector _Float16 type, so that - interface is currently deferred. - - - - - - Supported type signatures for vec_perm - - - - - - - - - - - r - - - - - a - + vector bool long long - - b - + vector bool long long - - c - - - - Example LE Implementation - - - Example BE Implementation + vector bool long long - - Restrictions + + + xxlor r,a,b + - - - vector bool char - - - vector bool char + vector signed long long - vector bool char + vector signed long long - vector unsigned char - - - - vpermr r,b,a,c - + vector signed long long - vperm r,a,b,c + xxlor r,a,b - - - - vector signed char - - - vector signed char + vector unsigned long long - vector signed char + vector unsigned long long - vector unsigned char + vector unsigned long long - vpermr r,b,a,c + xxlor r,a,b - - - vperm r,a,b,c - + + + + vector bool short + + + vector bool short + + + vector bool short - + + xxlor r,a,b + - vector unsigned char + vector signed short - vector unsigned char + vector signed short - vector unsigned char + vector signed short + + + + xxlor r,a,b + + + + + + vector unsigned short - vector unsigned char + vector unsigned short + + + vector unsigned short - vpermr r,b,a,c + xxlor r,a,b + + + + vector double + + + vector double + + + vector double + - vperm r,a,b,c + xxlor r,a,b + + + + vector float + + + vector float + + + vector float + - + + xxlor r,a,b + + + +
+ +
+ + + + vec_orc + Vector OR with Complement + + r = vec_orc (a, b) + + + Purpose: + Performs a bitwise OR of the first vector with the bitwise-complemented + second vector. + + Result value: r is the bitwise OR + of a and the bitwise complement of + b. + Endian considerations: + None. + + + + Supported type signatures for vec_orc + + + + + + - - vector bool int + + + r + + + + + a + + + + + b + + + + Example Implementation + + + + - vector bool int + vector bool char - vector bool int + vector bool char - vector unsigned char + vector bool char - vpermr r,b,a,c + xxlorc r,a,b + + + + vector signed char + + + vector signed char + + + vector signed char + - vperm r,a,b,c + xxlorc r,a,b - - - - vector signed int - - - vector signed int + vector unsigned char - vector signed int + vector unsigned char vector unsigned char - vpermr r,b,a,c + xxlorc r,a,b + + + + vector bool int + + + vector bool int + + + vector bool int + - vperm r,a,b,c + xxlorc r,a,b + + + + vector signed int + + + vector signed int + + + vector signed int + - + + xxlorc r,a,b + @@ -20464,22 +20394,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - - vector unsigned char - - - - vpermr r,b,a,c - - - vperm r,a,b,c + xxlorc r,a,b - - - @@ -20491,22 +20410,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool long long - - vector unsigned char - - - - vpermr r,b,a,c - - - vperm r,a,b,c + xxlorc r,a,b - - - @@ -20518,22 +20426,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - - vector unsigned char - - - - vpermr r,b,a,c - - - vperm r,a,b,c + xxlorc r,a,b - - - @@ -20545,80 +20442,31 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - - vector unsigned char - - - - vpermr r,b,a,c - - - vperm r,a,b,c + xxlorc r,a,b - - - - vector pixel - - - vector pixel + vector bool short - vector pixel + vector bool short - vector unsigned char - - - - vpermr r,b,a,c - + vector bool short - vperm r,a,b,c + xxlorc r,a,b - - - - vector bool short - - - vector bool short - - - vector bool short - - - vector unsigned char - - - - vpermr r,b,a,c - - - - - vperm r,a,b,c - - - - - - - - - vector signed short + vector signed short vector signed short @@ -20626,22 +20474,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - - vector unsigned char - - - - vpermr r,b,a,c - - - vperm r,a,b,c + xxlorc r,a,b - - - @@ -20653,22 +20490,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - - vector unsigned char - - - - vpermr r,b,a,c - - - vperm r,a,b,c + xxlorc r,a,b - - - @@ -20680,22 +20506,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - - vector unsigned char - - vpermr r,b,a,c - - - - - vperm r,a,b,c + xxlorc r,a,b - - - @@ -20707,49 +20522,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - - vector unsigned char - - - - vpermr r,b,a,c - - - - - vperm r,a,b,c - - - - - - - - - vector _Float16 - - - vector _Float16 - - - vector _Float16 - - - vector unsigned char - - vpermr r,b,a,c - - - - - vperm r,a,b,c + xxlorc r,a,b - - Deferred - @@ -20758,33 +20535,39 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_permxor - Vector Permute and Exclusive-OR + + vec_pack + Vector Pack - r = vec_permxor (a, b, c) + r = vec_pack (a, b) Purpose: - Applies a permute and exclusive-OR operation on two input vectors of byte - elements, with the selected elements identified by a third input vector. + Packs information from each element of two vectors into the result + vector. - Result value: For each - i (0 ≤ i < 16), let - index1 be bits 0–3 and - index2 be bits 4–7 of byte element - i of c. Byte - element i of r - is set to the exclusive-OR of byte elements index1 - of a and index2 - of b. + Result value: Let v represent the concatenation of vectors + a and b. For integer types, the value of each element + of r is taken from the low-order half + of the corresponding element of v. For + floating-point types, the value of each element of r is the corresponding element of v, rounded to the result type. Endian considerations: The element numbering within a register is left-to-right for big-endian targets, and right-to-left for little-endian targets. + Notes: + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred. Also, the + pack-double-to-float interface produces incorrect code. Issue 417. + +
- Supported type signatures for vec_permxor + Supported type signatures for vec_pack @@ -20809,17 +20592,15 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> b - - - c - - - + Example LE Implementation - + Example BE Implementation + + Restrictions + @@ -20828,166 +20609,124 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool char - vector bool char - - - vector bool char + vector bool short - vector bool char + vector bool short - xxlnor t,c,c - vpermxor r,a,b,t + vpkuhum r,b,a - vpermxor r,a,b,c - + vpkuhum r,a,b + + + - - vector unsigned char - vector signed char - vector signed char + vector signed short - vector signed char + vector signed short - xxlnor t,c,c - vpermxor r,a,b,t + vpkuhum r,b,a - vpermxor r,a,b,c - + vpkuhum r,a,b + + + vector unsigned char - vector unsigned char - - - vector unsigned char + vector unsigned short - vector unsigned char + vector unsigned short - xxlnor t,c,c - vpermxor r,a,b,t + vpkuhum r,b,a - vpermxor r,a,b,c - + vpkuhum r,a,b + + + - - -
- -
- - - - vec_pmsum_be - Vector Polynomial Multiply-Sum Big-Endian - - r = vec_pmsum_be (a, b) - - - Purpose: - Performs the exclusive-OR operation (implementing polynomial addition) - on each even-odd pair of the polynomial-multiplication result of the - corresponding elements of a and - b. - - Result value: Each element - i of r is - computed by an exclusive-OR operation of the polynomial - multiplication of input elements 2 × i of - a and b and input elements 2 × - i + 1 of a and - b. - - - Endian considerations: - All element numberings in the above description denote big-endian - (i.e., left-to-right) order, reflecting the underlying hardware - insruction. Unlike most of the vector intrinsics in this chapter, - vec_pmsum_be does not follow the bi-endian - programming model. - - - - Supported type signatures for vec_pmsum_be - - - - - - - - r - + vector bool int - - a - + vector bool long long - - b - + vector bool long long - - Example Implementation + + + vpkudum r,b,a + + + + + vpkudum r,a,b + + + + - - - vector unsigned int + vector signed int - vector unsigned short + vector signed long long - vector unsigned short + vector signed long long - vpmsumh r,a,b + vpkudum r,b,a + + + + + vpkudum r,a,b + + + - vector unsigned __int128 + vector unsigned int vector unsigned long long @@ -20997,194 +20736,128 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vpmsumd r,a,b + vpkudum r,b,a - - - - vector unsigned long long - - - vector unsigned int - - - vector unsigned int - - vpmsumw r,a,b + vpkudum r,a,b + + + - vector unsigned short + vector bool short - vector unsigned char + vector bool int - vector unsigned char + vector bool int - vpmsumb r,a,b + vpkuwum r,b,a - - - -
- -
- - - - vec_popcnt - Vector Population Count - - r = vec_popcnt (a) - - - Purpose: - Returns a vector containing the number of bits set in each element of - the input vector. - - Result value: The value of each - element of r is the number of bits set - in the corresponding element of a. - Endian considerations: - None. - - - - Supported type signatures for vec_popcnt - - - - - - - - - r - - - - - a - + + + vpkuwum r,a,b + - - Example Implementation + + - - - vector unsigned char + vector signed short - vector signed char + vector signed int + + + vector signed int - vpopcntb r,a + vpkuwum r,b,a - - - - vector unsigned char - - - vector unsigned char - - vpopcntb r,a + vpkuwum r,a,b + + + + + vector unsigned short + vector unsigned int - vector signed int + vector unsigned int - vpopcntw r,a + vpkuwum r,b,a - - - - vector unsigned int - - - vector unsigned int - - vpopcntw r,a + vpkuwum r,a,b + + + - vector unsigned long long + vector float - vector signed long long + vector double - - - vpopcntd r,a - + + vector double - - - vector unsigned long long + sample implementation TBD - vector unsigned long long + sample implementation TBD - - - vpopcntd r,a - + + Broken - vector unsigned short + vector _Float16 - vector signed short + vector float - - - vpopcnth r,a - + + vector float - - - vector unsigned short + sample implementation TBD - vector unsigned short + sample implementation TBD - - - vpopcnth r,a - + + Deferred @@ -21194,74 +20867,94 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_re - Vector Reciprocal Estimate + + vec_pack_to_short_fp32 + Vector Pack 32-bit Float to Short - r = vec_re (a) + r = vec_pack_to_short_fp32 (a, b) Purpose: - Returns a vector containing estimates of the reciprocals of the - corresponding elements of the input vector. + Packs eight single-precision 32-bit floating-point numbers from two + source vectors into a vector of eight 16-bit floating-point numbers. - Result value: Each element of - r contains the estimated value of the - reciprocal of the corresponding element of a. + Result value: Let v represent the 16-element concatenation of + a and b. Each value of r contains the result of converting the + corresponding single-precision element of v to half-precision. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
- Supported type signatures for vec_re - + Supported type signatures for vec_pack_to_short_fp32 + + + + - + r - + a - - Example Implementation - - - - - - vector double + + b + - vector double + Example LE Implementation - - - xvredp r,a - + + Example BE Implementation + + + Restrictions + + - vector float + vector unsigned short vector float + + vector float + - xvresp r,a + vctuxs t,a,0 + vctuxs u,b,0 + vpkswss r,u,t + + + + + vctuxs t,a,0 + vctuxs u,b,0 + vpkswss r,t,u + + ISA 3.0 or later + @@ -21270,106 +20963,91 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_recipdiv - Vector Reciprocal Divide + + vec_packpx + Vector Pack Pixel - r = vec_recipdiv (a, b) + r = vec_packpx (a, b) Purpose: - Returns a vector containing refined approximations of the division of - the corresponding elements of a by the - corresponding elements of b. This - built-in function provides an implementation-dependent precision, which - is commonly within 2 ulps (units in the last place) for most of the - numeric range expressible by - the input operands. This built-in function does not correspond to a - single IEEE operation and does not provide the overflow, underflow, and - NaN propagation characteristics specified for IEEE division. (Precision - may be a function of both the specified target processor model during - compilation and the actual processor on which a program is executed.) + Packs information from each element of two vectors into the result + vector. - Result value: Each element of - r contains a refined approximation of - the division of the corresponding element of a by the corresponding element of b. + Result value: Let v be the concatenation of a and b. The + value of each element of r is taken + from the corresponding element of v as + follows: + + + The least-significant bit of the high-order byte is + stored into the first bit of the result element. + + + The least-significant 5 bits of each of the remaining + bytes are stored into the remaining portion of the result + element. + + Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. - Notes: The example implementation - for vector double assumes that a register z - initially contains the double-precision floating-point value 1.0 - in each doubleword.
- Supported type signatures for vec_recipdiv - + Supported type signatures for vec_packpx + + - + r - + a - + b - - Example Implementation + + Example LE Implementation + + + Example BE Implementation - vector double + vector pixel - vector double + vector unsigned int - vector double + vector unsigned int - xvredp t,b - xvnmsubadp z,b,t - xvmaddadp u,z,t - xvmuldp v,a,u - xvnmsubadp r/a,b,v - xvmaddmdp r/a,u,v + vpkpx r,b,a - - - - vector float - - - vector float - - - vector float - - xvresp t,b - xvmulsp u,a,t - xvnmsubasp r/a,b,u - xvmaddmsp r/a,t,u + vpkpx r,a,b @@ -21380,46 +21058,36 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_revb - Vector Reverse Bytes + + vec_packs + Vector Pack Saturated - r = vec_revb (a) + r = vec_packs (a, b) Purpose: - Reverse the bytes of each vector element of a vector. + Packs information from each element of two vectors into the result + vector, using saturated values. - Result value: Each element of - r contains the byte-reversed value of - the corresponding element of a. + Result value: Let v be the concatenation of a and b. The + value of each element of r is the + saturated value of the corresponding element of v. Endian considerations: - None. - - Notes: - - - - No Power compilers yet support the vector _Float16 type, so that - interface is currently deferred. - - - - - The examples shown are for ISA 3.0. More complex sequences are - required for earlier ISA levels. - - - + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
- Supported type signatures for vec_revb - + Supported type signatures for vec_packs + + @@ -21433,285 +21101,4197 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example ISA 3.0 Implementation + + b + - Restrictions + Example LE Implementation + + + Example BE Implementation - vector bool char + vector signed char - vector bool char + vector signed short + + + vector signed short - [none] + vpkshss r,b,a - + + vpkshss r,a,b + - vector signed char + vector unsigned char - vector signed char + vector unsigned short + + + vector unsigned short - [none] + vpkuhus r,b,a - + + vpkuhus r,a,b + - vector unsigned char + vector signed int - vector unsigned char + vector signed long long + + + vector signed long long - [none] + vpksdss r,b,a - + + vpksdss r,a,b + - vector bool int + vector unsigned int - vector bool int + vector unsigned long long + + + vector unsigned long long - xxbrw r,a + vpkudus r,b,a - + + vpkudus r,a,b + - vector signed int + vector signed short vector signed int + + vector signed int + - xxbrw r,a + vpkswss r,b,a - + + vpkswss r,a,b + - vector unsigned int + vector unsigned short vector unsigned int + + vector unsigned int + - xxbrw r,a + vpkuwus r,b,a - + + vpkuwus r,a,b + - - - vector signed __int128 + + +
+ +
+ + + + vec_packsu + Vector Pack Saturated Unsigned + + r = vec_packsu (a, b) + + + Purpose: + Packs information from each element of two vectors into the result + vector, using unsigned saturated values. + + Result value: Let v be the concatenation of a and b. The + value of each element of r is the + saturated value of the corresponding element of v. + Endian considerations: + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + + + Supported type signatures for vec_packsu + + + + + + + + + + + r + - vector signed __int128 + + a + + + + + b + + + + Example LE Implementation + + + Example BE Implementation + + + + + + + vector unsigned char + + + vector signed short + + + vector signed short - xxbrq r,a + vpkshus r,b,a - + + vpkshus r,a,b + - vector unsigned __int128 + vector unsigned char - vector unsigned __int128 + vector unsigned short + + + vector unsigned short - xxbrq r,a + vpkuhus r,b,a - + + vpkuhus r,a,b + - vector bool long long + vector unsigned int - vector bool long long + vector signed long long + + + vector signed long long - xxbrd r,a + vpksdus r,b,a - + + vpksdus r,a,b + - vector signed long long + vector unsigned int - vector signed long long + vector unsigned long long + + + vector unsigned long long - xxbrd r,a + vpkudus r,b,a - + + vpkudus r,a,b + - vector unsigned long long + vector unsigned short - vector unsigned long long + vector signed int + + + vector signed int - xxbrd r,a + vpkswus r,b,a - + + vpkswus r,a,b + - vector bool short + vector unsigned short - vector bool short + vector unsigned int + + + vector unsigned int - xxbrh r,a + vpkuwus r,b,a - + + vpkuwus r,a,b + + + + + +
+ +
+ + + + vec_parity_lsbb + Vector Parity over Least-Significant Bits of Bytes + + r = vec_parity_lsbb (a) + + + Purpose: + Compute parity on the least-significant bit of each byte. + + Result value: Each element of + r contains the parity computed over the + low-order bit of each of the bytes in the corresponding element of + a. + Endian considerations: + None. + + + + Supported type signatures for vec_parity_lsbb + + + + + + + + + + r + + + + + a + + + + Example Implementation + + + Restrictions + + - vector signed short + vector unsigned int - vector signed short + vector signed int - xxbrh r,a + vprtybw r,a + + ISA 3.0 or later + + + + + vector unsigned int + + + vector unsigned int + - + + vprtybw r,a + + + + ISA 3.0 or later - vector unsigned short + vector unsigned __int128 - vector unsigned short + vector signed __int128 - xxbrh r,a + vprtybq r,a + + ISA 3.0 or later + + + + + vector unsigned __int128 + + + vector unsigned __int128 + - + + vprtybq r,a + + + + ISA 3.0 or later - vector double + vector unsigned long long + + + vector signed long long + + + + vprtybd r,a + + + + ISA 3.0 or later + + + + + vector unsigned long long + + + vector unsigned long long + + + + vprtybd r,a + + + + ISA 3.0 or later + + + + +
+ +
+ + + + vec_perm + Vector Permute + + r = vec_perm (a, b, c) + + + Purpose: + Returns a vector that contains elements selected from two input + vectors, in the order specified by a third input vector. + + Result value: Let v be the concatenation of a and b. Each + byte of r selected by using the + least-significant 5 bits of the corresponding byte of c as an index into v. + Endian considerations: + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + Notes: + + + + The example little-endian code generation uses the vpermr instruction from ISA 3.0. For + earlier targets, the compiler must generate an extra instruction + to adjust the permute control vector c. + + + + + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred. + + + + + + Supported type signatures for vec_perm + + + + + + + + + + + + + r + + + + + a + + + + + b + + + + + c + + + + Example LE Implementation + + + Example BE Implementation + + + Restrictions + + + + + + + vector bool char + + + vector bool char + + + vector bool char + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector signed char + + + vector signed char + + + vector signed char + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector unsigned char + + + vector unsigned char + + + vector unsigned char + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector bool int + + + vector bool int + + + vector bool int + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector signed int + + + vector signed int + + + vector signed int + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector bool long long + + + vector bool long long + + + vector bool long long + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector signed long long + + + vector signed long long + + + vector signed long long + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector pixel + + + vector pixel + + + vector pixel + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector bool short + + + vector bool short + + + vector bool short + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector signed short + + + vector signed short + + + vector signed short + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector unsigned short + + + vector unsigned short + + + vector unsigned short + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector double + + + vector double + + + vector double + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector float + + + vector float + + + vector float + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + + + + + + vector _Float16 + + + vector _Float16 + + + vector _Float16 + + + vector unsigned char + + + + vpermr r,b,a,c + + + + + vperm r,a,b,c + + + + Deferred + + + + +
+ +
+ + + + vec_permxor + Vector Permute and Exclusive-OR + + r = vec_permxor (a, b, c) + + + Purpose: + Applies a permute and exclusive-OR operation on two input vectors of byte + elements, with the selected elements identified by a third input vector. + + Result value: For each + i (0 ≤ i < 16), let + index1 be bits 0–3 and + index2 be bits 4–7 of byte element + i of c. Byte + element i of r + is set to the exclusive-OR of byte elements index1 + of a and index2 + of b. + Endian considerations: + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + + + Supported type signatures for vec_permxor + + + + + + + + + + + + r + + + + + a + + + + + b + + + + + c + + + + Example LE Implementation + + + Example BE Implementation + + + + + + + vector bool char + + + vector bool char + + + vector bool char + + + vector bool char + + + + xxlnor t,c,c + vpermxor r,a,b,t + + + + + vpermxor r,a,b,c + + + + + + + vector unsigned char + + + vector signed char + + + vector signed char + + + vector signed char + + + + xxlnor t,c,c + vpermxor r,a,b,t + + + + + vpermxor r,a,b,c + + + + + + + vector unsigned char + + + vector unsigned char + + + vector unsigned char + + + vector unsigned char + + + + xxlnor t,c,c + vpermxor r,a,b,t + + + + + vpermxor r,a,b,c + + + + + + +
+ +
+ + + + vec_pmsum_be + Vector Polynomial Multiply-Sum Big-Endian + + r = vec_pmsum_be (a, b) + + + Purpose: + Performs the exclusive-OR operation (implementing polynomial addition) + on each even-odd pair of the polynomial-multiplication result of the + corresponding elements of a and + b. + + Result value: Each element + i of r is + computed by an exclusive-OR operation of the polynomial + multiplication of input elements 2 × i of + a and b and input elements 2 × + i + 1 of a and + b. + + + Endian considerations: + All element numberings in the above description denote big-endian + (i.e., left-to-right) order, reflecting the underlying hardware + insruction. Unlike most of the vector intrinsics in this chapter, + vec_pmsum_be does not follow the bi-endian + programming model. + + + + Supported type signatures for vec_pmsum_be + + + + + + + + + + r + + + + + a + + + + + b + + + + Example Implementation + + + + + + + vector unsigned int + + + vector unsigned short + + + vector unsigned short + + + + vpmsumh r,a,b + + + + + + vector unsigned __int128 + + + vector unsigned long long + + + vector unsigned long long + + + + vpmsumd r,a,b + + + + + + vector unsigned long long + + + vector unsigned int + + + vector unsigned int + + + + vpmsumw r,a,b + + + + + + vector unsigned short + + + vector unsigned char + + + vector unsigned char + + + + vpmsumb r,a,b + + + + + +
+ +
+ + + + vec_popcnt + Vector Population Count + + r = vec_popcnt (a) + + + Purpose: + Returns a vector containing the number of bits set in each element of + the input vector. + + Result value: The value of each + element of r is the number of bits set + in the corresponding element of a. + Endian considerations: + None. + + + + Supported type signatures for vec_popcnt + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + + + vector unsigned char + + + vector signed char + + + + vpopcntb r,a + + + + + + vector unsigned char + + + vector unsigned char + + + + vpopcntb r,a + + + + + + vector unsigned int + + + vector signed int + + + + vpopcntw r,a + + + + + + vector unsigned int + + + vector unsigned int + + + + vpopcntw r,a + + + + + + vector unsigned long long + + + vector signed long long + + + + vpopcntd r,a + + + + + + vector unsigned long long + + + vector unsigned long long + + + + vpopcntd r,a + + + + + + vector unsigned short + + + vector signed short + + + + vpopcnth r,a + + + + + + vector unsigned short + + + vector unsigned short + + + + vpopcnth r,a + + + + + +
+ +
+ + + + vec_re + Vector Reciprocal Estimate + + r = vec_re (a) + + + Purpose: + Returns a vector containing estimates of the reciprocals of the + corresponding elements of the input vector. + + Result value: Each element of + r contains the estimated value of the + reciprocal of the corresponding element of a. + Endian considerations: + None. + + + + Supported type signatures for vec_re + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + + + vector double + + + vector double + + + + xvredp r,a + + + + + + vector float + + + vector float + + + + xvresp r,a + + + + + +
+ +
+ + + + vec_recipdiv + Vector Reciprocal Divide + + r = vec_recipdiv (a, b) + + + Purpose: + Returns a vector containing refined approximations of the division of + the corresponding elements of a by the + corresponding elements of b. This + built-in function provides an implementation-dependent precision, which + is commonly within 2 ulps (units in the last place) for most of the + numeric range expressible by + the input operands. This built-in function does not correspond to a + single IEEE operation and does not provide the overflow, underflow, and + NaN propagation characteristics specified for IEEE division. (Precision + may be a function of both the specified target processor model during + compilation and the actual processor on which a program is executed.) + + Result value: Each element of + r contains a refined approximation of + the division of the corresponding element of a by the corresponding element of b. + Endian considerations: + None. + + Notes: The example implementation + for vector double assumes that a register z + initially contains the double-precision floating-point value 1.0 + in each doubleword. + + + Supported type signatures for vec_recipdiv + + + + + + + + + + r + + + + + a + + + + + b + + + + Example Implementation + + + + + + + vector double + + + vector double + + + vector double + + + + xvredp t,b + xvnmsubadp z,b,t + xvmaddadp u,z,t + xvmuldp v,a,u + xvnmsubadp r/a,b,v + xvmaddmdp r/a,u,v + + + + + + vector float + + + vector float + + + vector float + + + + xvresp t,b + xvmulsp u,a,t + xvnmsubasp r/a,b,u + xvmaddmsp r/a,t,u + + + + + +
+ +
+ + + + vec_revb + Vector Reverse Bytes + + r = vec_revb (a) + + + Purpose: + Reverse the bytes of each vector element of a vector. + + Result value: Each element of + r contains the byte-reversed value of + the corresponding element of a. + Endian considerations: + None. + + Notes: + + + + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred. + + + + + The examples shown are for ISA 3.0. More complex sequences are + required for earlier ISA levels. + + + + + Interfaces that make no change to the data are deprecated. + + + + + + + Supported type signatures for vec_revb + + + + + + + + + + r + + + + + a + + + + Example ISA 3.0 Implementation + + + Restrictions + + + + + + + vector bool char + + + vector bool char + + + + [none] + + + + Deprecated + + + + + vector signed char + + + vector signed char + + + + [none] + + + + Deprecated + + + + + vector unsigned char + + + vector unsigned char + + + + [none] + + + + Deprecated + + + + + vector bool int + + + vector bool int + + + + xxbrw r,a + + + + Deprecated + + + + + vector signed int + + + vector signed int + + + + xxbrw r,a + + + + + + + + + vector unsigned int + + + vector unsigned int + + + + xxbrw r,a + + + + + + + + + vector signed __int128 + + + vector signed __int128 + + + + xxbrq r,a + + + + + + + + + vector unsigned __int128 + + + vector unsigned __int128 + + + + xxbrq r,a + + + + + + + + + vector bool long long + + + vector bool long long + + + + xxbrd r,a + + + + Deprecated + + + + + vector signed long long + + + vector signed long long + + + + xxbrd r,a + + + + + + + + + vector unsigned long long + + + vector unsigned long long + + + + xxbrd r,a + + + + + + + + + vector bool short + + + vector bool short + + + + xxbrh r,a + + + + Deprecated + + + + + vector signed short + + + vector signed short + + + + xxbrh r,a + + + + + + + + + vector unsigned short + + + vector unsigned short + + + + xxbrh r,a + + + + + + + + + vector double + + + vector double + + + + xxbrd r,a + + + + + + + + + vector float + + + vector float + + + + xxbrw r,a + + + + + + + + + vector _Float16 + + + vector _Float16 + + + + xxbrh r,a + + + + Deferred + + + + +
+ +
+ + + + vec_reve + Vector Reverse Elements + + r = vec_reve (a) + + + Purpose: + Reverse the elements of a vector. + + Result value: Returns a vector + with the elements of the input vector in reversed order. + Endian considerations: + The vpermr instruction is most naturally used to implement this built-in + function for a little-endian target, and the vperm instruction for a + big-endian target. This is not technically necessary, however, provided + the correct permute control vector is used. Note that use of vpermr + requires ISA 3.0. + + Notes: + + + + The example implementations assume that the permute control + vector for the vperm or vpermr instruction is in a register + identified by pcv. The value of pcv differs based on the + element size. + + + + + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred. + + + + + + Supported type signatures for vec_reve + + + + + + + + + + r + + + + + a + + + + Example Implementation + + + Restrictions + + + + + + + vector bool char + + + vector bool char + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector signed char + + + vector signed char + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector unsigned char + + + vector unsigned char + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector bool int + + + vector bool int + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector signed int + + + vector signed int + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector unsigned int + + + vector unsigned int + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector bool long long + + + vector bool long long + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector signed long long + + + vector signed long long + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector unsigned long long + + + vector unsigned long long + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector bool short + + + vector bool short + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector signed short + + + vector signed short + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector unsigned short + + + vector unsigned short + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector double + + + vector double + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector float + + + vector float + + + + vperm[r] r,a,a,pcv + + + + + + + + + vector _Float16 + + + vector _Float16 + + + + vperm[r] r,a,a,pcv + + + + Deferred + + + + +
+ +
+ + + + vec_rint + Vector Round to Nearest Integer + + r = vec_rint (a) + + + Purpose: + Returns a vector containing the floating-point integral values nearest + to the values of the corresponding elements of the given vector. + + Result value: Each element of + r contains the nearest representable + floating-point integral value to the value of the corresponding element + of a. When an input element value is + exactly between two integer values, the result value is selected based + on the rounding mode specified by the Floating-Point Rounding Control + field (RN) of the FPSCR register. + Endian considerations: + None. + + + + Supported type signatures for vec_rint + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + + + vector double + + + vector double + + + + xvrdpic r,a + + + + + + vector float + + + vector float + + + + xvrspic r,a + + + + + +
+ +
+ + + + vec_rl + Vector Rotate Left + + r = vec_rl (a, b) + + + Purpose: + Rotates each element of a vector left by a given number of bits. + + Result value: Each element of + r is obtained by rotating the + corresponding element of a left by the + number of bits specified by the corresponding element of + b. + Endian considerations: + None. + + + + Supported type signatures for vec_rl + + + + + + + + + + r + + + + + a + + + + + b + + + + Example Implementation + + + + + + + vector signed char + + + vector signed char + + + vector unsigned char + + + + vrlb r,a,b + + + + + + vector unsigned char + + + vector unsigned char + + + vector unsigned char + + + + vrlb r,a,b + + + + + + vector signed int + + + vector signed int + + + vector unsigned int + + + + vrlw r,a,b + + + + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + + + + vrlw r,a,b + + + + + + vector signed long long + + + vector signed long long + + + vector unsigned long long + + + + vrld r,a,b + + + + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned long long + + + + vrld r,a,b + + + + + + vector signed short + + + vector signed short + + + vector unsigned short + + + + vrlh r,a,b + + + + + + vector unsigned short + + + vector unsigned short + + + vector unsigned short + + + + vrlh r,a,b + + + + + +
+ +
+ + + + vec_rlmi + Vector Rotate Left then Mask Insert + + r = vec_rlmi (a, b, c) + + + Purpose: + Rotates each element of a vector left and inserts each element under + a mask. + + Result value: Each element of + r is obtained by rotating the + corresponding element of vector b left + and inserting it under mask into the corresponding element of + a. Bits 11:15 of the corresponding + element of c contain the mask + beginning, bits 19:23 contain the mask end, and bits 27:31 contain the + shift count. + Endian considerations: + The referenced bit numbers within the elements of c are in left-to-right order. + + + + Supported type signatures for vec_rlmi + + + + + + + + + + + + r + + + + + a + + + + + b + + + + + c + + + + Example Implementation + + + Restrictions + + + + + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + + + + vrlwmi r/a,b,c + + + + ISA 3.0 or later + + + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned long long + + + + vrldmi r/a,b,c + + + + ISA 3.0 or later + + + + +
+ +
+ + + + vec_rlnm + Vector Rotate Left then AND with Mask + + r = vec_rlnm (a, b, c) + + + Purpose: + Rotates each element of a vector left, then logically ANDs it with a + mask. + + Result value: Each element of + a is rotated left, then logically ANDed + with a mask specified by b and + c. + b contains the shift count for + each element in the low-order byte, with other bytes zero. + c contains the mask begin and mask end + for each element, with the mask end in the low-order byte, the mask + begin in the next higher byte, and other bytes zero. + Endian considerations: + None. + + + + Supported type signatures for vec_rlnm + + + + + + + + + + + + r + + + + + a + + + + + b + + + + + c + + + + Example Implementation + + + Restrictions + + + + + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + + + + vspltisw t,8 + vslw u,b,t + xxlor v,u,c + vrlwnm r,a,v + + + + ISA 3.0 or later + + + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned long long + + + + xxspltib t,8 + vextsb2d u,t + vsld v,b,u + xxlor w,v,c + vrldnm r,a,w + + + + ISA 3.0 or later + + + + +
+ +
+ + + + vec_round + Vector Round + + r = vec_round (a) + + + Purpose: + Returns a vector containing the rounded values of the corresponding + elements of the given vector. + + Result value: Each element of + r contains the value of the + corresponding element of a, rounded + to the nearest representable floating-point integer, using IEEE + round-to-nearest rounding. + Notes: This function might not + follow the strict operation definition of the resolution of a tie during + a round if the -qstrict=nooperationprecision compiler option is + specified to the XLC compiler. + Endian considerations: + None. + + + + Supported type signatures for vec_round + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + + + vector double + + + vector double + + + + xvrdpi r,a + + + + + + vector float + + + vector float + + + + vrfin r,a + + + + + +
+ +
+ + + + vec_rsqrt + Vector Reciprocal Square Root + + r = vec_rsqrt (a) + + + Purpose: + Returns a vector containing a refined approximation of the reciprocal + square roots of the corresponding elements of the given vector. This + function provides an implementation-dependent greater precision than + vec_rsqrte. + Result value: Each element of + r contains a refined approximation of + the reciprocal square root of the corresponding element of + a. + Endian considerations: + None. + + Notes: The example implementations + assume that a register h initially + contains the floating-point value 0.5 in each element (single- or + double-precision as appropriate). + + + Supported type signatures for vec_rsqrt + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + + + vector double + + + vector double + + + + xvrsqrtedp t,a + xvmuldp u,t,a + xvmuldp v,t,h + xxlor w,h,h + xvnmsubadp w,u,v + xvmaddadp v,v,w + xvmaddadp u,u,w + xvnmsubmdp u,v,h + xvmaddadp v,v,u + xvadddp r,v,v + + + + + + vector float + + + vector float + + + + xvrsqrtesp t,a + xvmulsp u,t,a + xvmulsp v,t,h + xvnmsubmsp v,u,h + xvmaddmsp r/v,t,t + + + + + +
+ +
+ + + + vec_rsqrte + Vector Reciprocal Square Root Estimate + + r = vec_rsqrte (a) + + + Purpose: + Returns a vector containing estimates of the reciprocal square roots of + the corresponding elements of the given vector. + + Result value: Each element of + r contains the estimated value of the + reciprocal square root of the corresponding element of a. + Endian considerations: + None. + + + + Supported type signatures for vec_rsqrte + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + + + vector double + + + vector double + + + + xvrsqrtedp r,a + + + + + + vector float + + + vector float + + + + xvrsqrtesp r,a + + + + + +
+ +
+ + + + vec_sbox_be + Vector AES SubBytes Big-Endian + + r = vec_sbox_be (a) + + + Purpose: + Performs the SubBytes operation, as defined in Federal Information + Processing Standards FIPS-197, on a state_array contained in + a. + + + Result value: r contains the + result of the SubBytes operation, as defined in Federal Information + Processing Standard FIPS-197, on the state array represented by + a. + + Endian considerations: + All element numberings of the SubBytes operation use + big-endian (i.e., left-to-right) order, reflecting the underlying + hardware insruction. Unlike most of the vector intrinsics in this + chapter, vec_sbox_be does not follow the bi-endian + programming model. + + + Notes: This intrinsic may + not yet be available in all implementations. + + + Supported type signatures for vec_sbox_be + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + + + vector unsigned char + + + vector unsigned char + + + + vsbox r,a + + + + + +
+ +
+ + + + vec_sel + Vector Select + + r = vec_sel (a, b, c) + + + Purpose: + Returns a vector containing the value of either a or b + depending on the value of c. + + Result value: Each bit of + r has the value of the corresponding + bit of a if the corresponding bit of + c is 0. Otherwise, the bit of + r has the value of the corresponding + bit of b. + Endian considerations: + None. + + Notes: + No Power compilers yet support the vector _Float16 type, so those + interfaces are currently deferred. + + + Supported type signatures for vec_sel + + + + + + + + + + + + r + + + + + a + + + + + b + + + + + c + + + + Example Implementation + + + Restrictions + + + + + + + vector bool char + + + vector bool char + + + vector bool char + + + vector bool char + + + + xxsel r,a,b,c + + + + + + + + + vector bool char + + + vector bool char + + + vector bool char + + + vector unsigned char + + + + xxsel r,a,b,c + + + + + + + + + vector signed char + + + vector signed char + + + vector signed char + + + vector bool char + + + + xxsel r,a,b,c + + + + + + + + + vector signed char + + + vector signed char + + + vector signed char + + + vector unsigned char + + + + xxsel r,a,b,c + + + + + + + + + vector unsigned char + + + vector unsigned char + + + vector unsigned char + + + vector bool char + + + + xxsel r,a,b,c + + + + + + + + + vector unsigned char + + + vector unsigned char + + + vector unsigned char + + + vector unsigned char + + + + xxsel r,a,b,c + + + + + + + + + vector bool int + + + vector bool int + + + vector bool int + + + vector bool int + + + + xxsel r,a,b,c + + + + + + + + + vector bool int + + + vector bool int + + + vector bool int + + + vector unsigned int + + + + xxsel r,a,b,c + + + + + + + + + vector signed int + + + vector signed int + + + vector signed int + + + vector bool int + + + + xxsel r,a,b,c + + + + + + + + + vector signed int + + + vector signed int + + + vector signed int + + + vector unsigned int + + + + xxsel r,a,b,c + + + + + + + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + + + vector bool int + + + + xxsel r,a,b,c + + + + + + + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + + + vector unsigned int + + + + xxsel r,a,b,c + + + + + + + + + vector bool long long + + + vector bool long long + + + vector bool long long + + + vector bool long long + + + + xxsel r,a,b,c + + + + + + + + + vector bool long long + + + vector bool long long + + + vector bool long long + + + vector unsigned long long + + + + xxsel r,a,b,c + + + + + + + + + vector signed long long + + + vector signed long long + + + vector signed long long + + + vector bool long long + + + + xxsel r,a,b,c + + + + + + + + + vector signed long long + + + vector signed long long + + + vector signed long long + + + vector unsigned long long + + + + xxsel r,a,b,c + + + + + + + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned long long + + + vector bool long long + + + + xxsel r,a,b,c + + + + + + + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned long long + + + vector unsigned long long + + + + xxsel r,a,b,c + + + + + + + + + vector bool short + + + vector bool short + + + vector bool short + + + vector bool short + + + + xxsel r,a,b,c + + + + + + + + + vector bool short + + + vector bool short + + + vector bool short + + + vector unsigned short + + + + xxsel r,a,b,c + + + + + + + + + vector signed short + + + vector signed short + + + vector signed short + + + vector bool short + + + + xxsel r,a,b,c + + + + + + + + + vector signed short + + + vector signed short + + + vector signed short + + + vector unsigned short + + + + xxsel r,a,b,c + + + + + + + + + vector unsigned short + + + vector unsigned short + + + vector unsigned short + + + vector bool short + + + + xxsel r,a,b,c + + + + + + + + + vector unsigned short + + + vector unsigned short + + + vector unsigned short + + + vector unsigned short + + + + xxsel r,a,b,c + + + + + + + + + vector double + + + vector double + + + vector double + + + vector bool long long + + + + xxsel r,a,b,c + + + + + + + + + vector double + + + vector double + + + vector double + + + vector unsigned long long + + + + xxsel r,a,b,c + + + + + + + + + vector float + + + vector float + + + vector float + + + vector bool int + + + + xxsel r,a,b,c + + + + + + + + + vector float + + + vector float + + + vector float + + + vector unsigned int + + + + xxsel r,a,b,c + + + + + + + + + vector _Float16 + + + vector _Float16 + + + vector _Float16 + + + vector bool short + + + + xxsel r,a,b,c + + + + Deferred + + + + + vector _Float16 + + + vector _Float16 + + + vector _Float16 + + + vector unsigned short + + + + xxsel r,a,b,c + + + + Deferred + + + + +
+ +
+ + + + vec_shasigma_be + Vector SHA Sigma Big-Endian + + r = vec_shasigma_be (a, b, c) + + + Purpose: + Performs a Secure Hash computation in accordance with Federal + Information Processing Standards FIPS-180-3. + + + Result value: Each element of + r contains the SHA256 or SHA512 hash + as follows. + + + The result of the SHA-256 function (r[i] for + i = 0 to 3) is: + + + + σ0(a[i]), if b is 0 and bit i of + the 4-bit c is 0. + + + + + σ1(a[i]), if b is 0 and bit i of + the 4-bit c is 1. + + + + + Σ0(a[i]), if b is nonzero and bit i + of the 4-bit c is 0. + + + + + Σ1(a[i]), if b is nonzero and bit i + of the 4-bit c is 1. + + + + + The result of the SHA-512 function (r[i] for + i = 0 to 1) is: + + + + σ0(a[i]), if b is 0 and bit 2 × + i of the 4-bit c is 0. + + + + + σ1(a[i]), if b is 0 and bit 2 × + i of the 4-bit c is 1. + + + + + Σ0(a[i]), if b is nonzero and bit 2 × + i of the 4-bit c is 0. + + + + + Σ1(a[i]), if b is nonzero and bit 2 × + i of the 4-bit c is 1. + + + + + Endian considerations: + All element numberings in the above description denote big-endian + (i.e., left-to-right) order, reflecting the underlying hardware + insruction. Unlike most of the vector intrinsics in this chapter, + vec_pmsum_be does not follow the bi-endian + programming model. + + + + Supported type signatures for vec_shasigma_be + + + + + + + + + + + r + + + + + a + + + + + b + + + + + c + + + + Example Implementation + + + + + + + vector unsigned int + + + vector unsigned int + + + const int + + + const int + + + + vshasigmaw r,a,b,c + + + + + + vector unsigned long long + + + vector unsigned long long + + + const int + + + const int + + + + vshasigmaw r,a,b,d + + + + + +
+ +
+ + + + vec_signed + Vector Convert Floating-Point to Signed Integer + + r = vec_signed (a) + + + Purpose: + Converts a vector of floating-point numbers to a vector of signed + integers. + + Result value: Each element of + r is obtained by truncating the + corresponding element of a to a signed + integer. + Endian considerations: + None. + + + + Supported type signatures for vec_signed + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + + + vector signed int + + + vector float + + + + xvcvspsxws r,a + + + + + + vector signed long long + + + vector double + + + + xvcvdpsxds r,a + + + + + +
+ +
+ + + + vec_signed2 + Vector Convert Double-Precision to Signed Word + + r = vec_signed2 (a, b) + + + Purpose: + Converts two vectors of double-precision floating-point numbers to a + vector of signed 32-bit integers. + + Result value: Let v be the concatenation of a and b. Each + element of r is obtained by truncating + the corresponding element of v to a + signed 32-bit integer. + Endian considerations: + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + + + Supported type signatures for vec_signed2 + + + + + + + + + + + r + + + + + a + - vector double + + b + - - - xxbrd r,a - + + Example LE + Implementation - - + + Example BE + Implementation + + - vector float + vector signed int - vector float + vector double + + + vector double - xxbrw r,a + xxpermdi t,b,a,3 + xxpermdi u,b,a,0 + xvcvdpsxws v,t + xvcvdpsxws w,u + vmrgow r,w,v - - - - - - - vector _Float16 - - - vector _Float16 - - xxbrh r,a + xxpermdi t,a,b,0 + xxpermdi u,a,b,3 + xvcvdpsxws v,t + xvcvdpsxws w,u + vmrgew r,v,w - - Deferred - @@ -21720,45 +25300,30 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_reve - Vector Reverse Elements + + vec_signede + Vector Convert Double-Precision to Signed Word Even - r = vec_reve (a) + r = vec_signede (a) Purpose: - Reverse the elements of a vector. + Converts elements of an input vector to signed integers and stores + them in the even-numbered elements of the result vector. - Result value: Returns a vector - with the elements of the input vector in reversed order. + Result value: Element 0 of + r contains element 0 of a, truncated to a signed integer. Element 2 of + r contains element 1 of a, truncated to a signed integer. Elements 1 and + 3 of r are undefined. Endian considerations: - The vpermr instruction is most naturally used to implement this built-in - function for a little-endian target, and the vperm instruction for a - big-endian target. This is not technically necessary, however, provided - the correct permute control vector is used. Note that use of vpermr - requires ISA 3.0. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. - Notes: - - - - The example implementations assume that the permute control - vector for the vperm or vpermr instruction is in a register - identified by pcv. The value of pcv differs based on the - element size. - - - - - No Power compilers yet support the vector _Float16 type, so that - interface is currently deferred. - - -
- Supported type signatures for vec_reve + Supported type signatures for vec_signede @@ -21777,253 +25342,299 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example Implementation + Example LE + Implementation - Restrictions + Example BE + Implementation - vector bool char + vector signed int - vector bool char + vector double - vperm[r] r,a,a,pcv + xvcvdpsxws t,a + vsldoi r,t,t,12 - - - - - - - vector signed char - - - vector signed char - - vperm[r] r,a,a,pcv + xvcvdpsxws t,a + - - - + + +
+ +
+ + + + vec_signedo + Vector Convert Double-Precision to Signed Word Odd + + r = vec_signedo (a) + + + Purpose: + Converts elements of an input vector to signed integers and stores them + in the odd-numbered elements of the result vector. + + Result value: Element 1 of + r contains element 0 of a, truncated to a signed integer. Element 3 of + r contains element 1 of a, truncated to a signed integer. Elements 0 and + 2 of r are undefined. + Endian considerations: + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + + + Supported type signatures for vec_signedo + + + + + + - - vector unsigned char + + + r + - - vector unsigned char + + + a + - - - vperm[r] r,a,a,pcv - + + Example LE + Implementation - - + + Example BE + Implementation + + - vector bool int + vector signed int - vector bool int + vector double - vperm[r] r,a,a,pcv + xvcvdpsxws r,a + - + + xvcvdpsxws t,a + vsldoi r,t,t,12 + + + +
+ +
+ + + + vec_sl + Vector Shift Left + + r = vec_sl (a, b) + + + Purpose: + Performs a left shift for each element of a vector. + + Result value: Each element of + r is the result of left-shifting the + corresponding element of a by the + number of bits specified by the corresponding element of b, modulo the number of bits in the element. + Zeros are shifted in from the right. + Endian considerations: + None. + + + + Supported type signatures for vec_sl + + + + + + - - vector signed int + + + r + - - vector signed int + + + a + - - - vperm[r] r,a,a,pcv - + + + b + - - + + Example Implementation + + - vector unsigned int - - - vector unsigned int - - - - vperm[r] r,a,a,pcv - - - - + vector signed char - - - vector bool long long + vector signed char - vector bool long long + vector unsigned char - vperm[r] r,a,a,pcv + vslb r,a,b - - - - vector signed long long - - - vector signed long long - - - - vperm[r] r,a,a,pcv - - - - + vector unsigned char - - - vector unsigned long long + vector unsigned char - vector unsigned long long + vector unsigned char - vperm[r] r,a,a,pcv + vslb r,a,b - - - - vector bool short + vector signed int - vector bool short + vector signed int + + + vector unsigned int - vperm[r] r,a,a,pcv + vslw r,a,b - - - - vector signed short + vector unsigned int - vector signed short + vector unsigned int + + + vector unsigned int - vperm[r] r,a,a,pcv + vslw r,a,b - - - - vector unsigned short + vector signed long long - vector unsigned short + vector signed long long + + + vector unsigned long long - vperm[r] r,a,a,pcv + vsld r,a,b - - - - vector double + vector unsigned long long - vector double + vector unsigned long long + + + vector unsigned long long - vperm[r] r,a,a,pcv + vsld r,a,b - - - - vector float + vector signed short - vector float + vector signed short + + + vector unsigned short - vperm[r] r,a,a,pcv + vslh r,a,b - - - - vector _Float16 + vector unsigned short - vector _Float16 + vector unsigned short + + + vector unsigned short - vperm[r] r,a,a,pcv + vslh r,a,b - - Deferred - @@ -22032,46 +25643,68 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_rint - Vector Round to Nearest Integer + + vec_sld + Vector Shift Left Double - r = vec_rint (a) + r = vec_sld (a, b, c) Purpose: - Returns a vector containing the floating-point integral values nearest - to the values of the corresponding elements of the given vector. + Left shifts a double vector (that is, two concatenated vectors) by a + given number of bytes. For vec_sld being performed on the vector bool + and floating-point types, the result is undefined when the specified + shift count is not a multiple of the element size. - Result value: Each element of - r contains the nearest representable - floating-point integral value to the value of the corresponding element - of a. When an input element value is - exactly between two integer values, the result value is selected based - on the rounding mode specified by the Floating-Point Rounding Control - field (RN) of the FPSCR register. + Result value: Vector r receives the most-significant 16 bytes obtained + by concatenating a and b and shifting left by the number of bytes + specified by c, which must be in the + range 0–15. Endian considerations: - None. + This intrinsic is not endian-neutral, so uses of + vec_sld in big-endian code must be rewritten for little-endian targets. + Historically, vec_sld could be used to shift by amounts not a multiple + of the element size for most types, in which case the purpose of the + shift is difficult to determine and difficult to automatically rewrite + efficiently for little endian. So the concatenation of a and b is + done in big-endian fashion (left to right), and the shift is always + to the left. This will generally produce surprising results for + little-endian targets.
- Supported type signatures for vec_rint - + Supported type signatures for vec_sld + + + - + r - + a + + + b + + + + + c + + Example Implementation @@ -22080,115 +25713,77 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector double + vector bool char - vector double - - - - xvrdpic r,a - + vector bool char - - - vector float + vector bool char - vector float + const int - xvrspic r,a + vsldoi r,a,b,c - - -
- -
- - - - vec_rl - Vector Rotate Left - - r = vec_rl (a, b) - - - Purpose: - Rotates each element of a vector left by a given number of bits. - - Result value: Each element of - r is obtained by rotating the - corresponding element of a left by the - number of bits specified by the corresponding element of - b. - Endian considerations: - None. - - - - Supported type signatures for vec_rl - - - - - - - - - r - + + vector signed char - - - a - + + vector signed char - - - b - + + vector signed char + + + const int - - Example Implementation + + + vsldoi r,a,b,c + - - - vector signed char + vector unsigned char - vector signed char + vector unsigned char vector unsigned char + + const int + - vrlb r,a,b + vsldoi r,a,b,c - vector unsigned char + vector bool int - vector unsigned char + vector bool int - vector unsigned char + vector bool int + + + const int - vrlb r,a,b + vsldoi r,a,b,c @@ -22200,11 +25795,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - vector unsigned int + vector signed int + + + const int - vrlw r,a,b + vsldoi r,a,b,c @@ -22218,9 +25816,31 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int + + const int + - vrlw r,a,b + vsldoi r,a,b,c + + + + + + vector bool long long + + + vector bool long long + + + vector bool long long + + + const int + + + + vsldoi r,a,b,c @@ -22232,11 +25852,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - vector unsigned long long + vector signed long long + + + const int - vrld r,a,b + vsldoi r,a,b,c @@ -22250,9 +25873,50 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long + + const int + - vrld r,a,b + vsldoi r,a,b,c + + + + + + vector pixel + + + vector pixel + + + vector pixel + + + const int + + + + vsldoi r,a,b,c + + + + + + vector bool short + + + vector bool short + + + vector bool short + + + const int + + + + vsldoi r,a,b,c @@ -22264,11 +25928,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - vector unsigned short + vector signed short + + + const int - vrlh r,a,b + vsldoi r,a,b,c @@ -22282,9 +25949,50 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short + + const int + - vrlh r,a,b + vsldoi r,a,b,c + + + + + + vector double + + + vector double + + + vector double + + + const int + + + + vsldoi r,a,b,c + + + + + + vector float + + + vector float + + + vector float + + + const int + + + + vsldoi r,a,b,c @@ -22295,39 +26003,41 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_rlmi - Vector Rotate Left then Mask Insert + + vec_sldw + Vector Shift Left Double by Words - r = vec_rlmi (a, b, c) + r = vec_sldw (a, b, c) Purpose: - Rotates each element of a vector left and inserts each element under - a mask. + Returns a vector obtained by shifting left the concatenated input + vectors by the number of specified words. - Result value: Each element of - r is obtained by rotating the - corresponding element of vector b left - and inserting it under mask into the corresponding element of - a. Bits 11:15 of the corresponding - element of c contain the mask - beginning, bits 19:23 contain the mask end, and bits 27:31 contain the - shift count. + Result value: Vector r receives the most-significant 16 bytes obtained + by concatenating a and b and shifting left by the number of words + specified by c, which must be in the + range 0–3. Endian considerations: - The referenced bit numbers within the elements of c are in left-to-right order. + This intrinsic is not endian-neutral, so uses of + vec_sldw in big-endian code must be rewritten for little-endian targets. + The concatenation of a and b is + done in big-endian fashion (left to right), and the shift is always + to the left. This will generally produce surprising results for + little-endian targets.
- Supported type signatures for vec_rlmi - + Supported type signatures for vec_sldw + - @@ -22350,15 +26060,69 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> c - + Example Implementation + + + + - Restrictions + vector signed char + + + vector signed char + + + vector signed char + + + const int + + + + xxsldwi r,a,b,c + + + + + + vector unsigned char + + + vector unsigned char + + + vector unsigned char + + + const int + + + + xxsldwi r,a,b,c + + + + + + vector signed int + + + vector signed int + + + vector signed int + + + const int + + + + xxsldwi r,a,b,c + - - vector unsigned int @@ -22370,160 +26134,89 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - vector unsigned int + const int - vrlwmi r/a,b,c + xxsldwi r,a,b,c - - ISA 3.0 or later - - vector unsigned long long + vector signed long long - vector unsigned long long + vector signed long long - vector unsigned long long + vector signed long long - vector unsigned long long + const int - vrldmi r/a,b,c + xxsldwi r,a,b,c - - ISA 3.0 or later - - - -
- -
- - - - vec_rlnm - Vector Rotate Left then AND with Mask - - r = vec_rlnm (a, b, c) - - - Purpose: - Rotates each element of a vector left, then logically ANDs it with a - mask. - - Result value: Each element of - a is rotated left, then logically ANDed - with a mask specified by b and - c. - b contains the shift count for - each element in the low-order byte, with other bytes zero. - c contains the mask begin and mask end - for each element, with the mask end in the low-order byte, the mask - begin in the next higher byte, and other bytes zero. - Endian considerations: - None. - - - - Supported type signatures for vec_rlnm - - - - - - - - - - r - + vector unsigned long long - - a - + vector unsigned long long - - b - + vector unsigned long long - - c - - - - Example Implementation + const int - - Restrictions + + + xxsldwi r,a,b,c + - - - vector unsigned int + vector signed short - vector unsigned int + vector signed short - vector unsigned int + vector signed short - vector unsigned int + const int - vspltisw t,8 - vslw u,b,t - xxlor v,u,c - vrlwnm r,a,v + xxsldwi r,a,b,c - - ISA 3.0 or later - - vector unsigned long long + vector unsigned short - vector unsigned long long + vector unsigned short - vector unsigned long long + vector unsigned short - vector unsigned long long + const int - xxspltib t,8 - vextsb2d u,t - vsld v,b,u - xxlor w,v,c - vrldnm r,a,w + xxsldwi r,a,b,c - - ISA 3.0 or later - @@ -22532,36 +26225,36 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_round - Vector Round + + vec_sll + Vector Shift Left Long - r = vec_round (a) + r = vec_sll (a, b) Purpose: - Returns a vector containing the rounded values of the corresponding - elements of the given vector. + Left shifts an entire vector by a given number of bits. - Result value: Each element of - r contains the value of the - corresponding element of a, rounded - to the nearest representable floating-point integer, using IEEE - round-to-nearest rounding. - Notes: This function might not - follow the strict operation definition of the resolution of a tie during - a round if the -qstrict=nooperationprecision compiler option is - specified to the XLC compiler. + Result value: Vector + r contains the contents of a, shifted left by the number of bits specified + by the three least-significant bits of b. Zeros are supplied on the right. The shift + count must have been replicated into all bytes of b; if not, the value of r is undefined. Endian considerations: - None. + This intrinsic is not endian-neutral, so uses of + vec_sll in big-endian code must be rewritten for little-endian targets.
- Supported type signatures for vec_round - + Supported type signatures for vec_sll + + @@ -22574,6 +26267,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> a + + + b + + Example Implementation @@ -22582,121 +26280,145 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector double + vector signed char - vector double + vector signed char + + + vector unsigned char - xvrdpi r,a + vsl r,a,b - vector float + vector unsigned char - vector float + vector unsigned char + + + vector unsigned char - vrfin r,a + vsl r,a,b + + + + + + vector signed int + + + vector signed int + + + vector unsigned char + + + + vsl r,a,b + + + + + + vector unsigned int + + + vector unsigned int + + + vector unsigned char + + + + vsl r,a,b + + + + + + vector signed long long + + + vector signed long long + + + vector unsigned char + + + + vsl r,a,b - - -
- -
- - - - vec_rsqrt - Vector Reciprocal Square Root - - r = vec_rsqrt (a) - - - Purpose: - Returns a vector containing a refined approximation of the reciprocal - square roots of the corresponding elements of the given vector. This - function provides an implementation-dependent greater precision than - vec_rsqrte. - Result value: Each element of - r contains a refined approximation of - the reciprocal square root of the corresponding element of - a. - Endian considerations: - None. - - Notes: The example implementations - assume that a register h initially - contains the floating-point value 0.5 in each element (single- or - double-precision as appropriate). - - - Supported type signatures for vec_rsqrt - - - - - - - - r - + + vector unsigned long long - - - a - + + vector unsigned long long - - Example Implementation + + vector unsigned char + + + + vsl r,a,b + - - - vector double + vector pixel - vector double + vector pixel + + + vector unsigned char - xvrsqrtedp t,a - xvmuldp u,t,a - xvmuldp v,t,h - xxlor w,h,h - xvnmsubadp w,u,v - xvmaddadp v,v,w - xvmaddadp u,u,w - xvnmsubmdp u,v,h - xvmaddadp v,v,u - xvadddp r,v,v + vsl r,a,b - vector float + vector signed short - vector float + vector signed short + + + vector unsigned char - xvrsqrtesp t,a - xvmulsp u,t,a - xvmulsp v,t,h - xvnmsubmsp v,u,h - xvmaddmsp r/v,t,t + vsl r,a,b + + + + + + vector unsigned short + + + vector unsigned short + + + vector unsigned char + + + + vsl r,a,b @@ -22707,31 +26429,36 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_rsqrte - Vector Reciprocal Square Root Estimate + + vec_slo + Vector Shift Left by Octets - r = vec_rsqrte (a) + r = vec_slo (a, b) Purpose: - Returns a vector containing estimates of the reciprocal square roots of - the corresponding elements of the given vector. + Left shifts a vector by a given number of bytes (octets). - Result value: Each element of - r contains the estimated value of the - reciprocal square root of the corresponding element of a. + Result value: Vector r receives the contents of a, shifted left by the number of bytes specified + by bits 1:4 of the least-significant byte of b. Endian considerations: - None. + This intrinsic is not endian-neutral, so uses of + vec_slo in big-endian code must be rewritten for little-endian targets. + The shift count is in element 15 of b + for big-endian, but in element 0 of b + for little-endian.
- Supported type signatures for vec_rsqrte - + Supported type signatures for vec_slo + + @@ -22744,6 +26471,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> a + + + b + + Example Implementation @@ -22752,90 +26484,52 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector double + vector signed char - vector double + vector signed char + + + vector signed char - xvrsqrtedp r,a + vslo r,a,b - vector float + vector signed char - vector float + vector signed char + + + vector unsigned char - xvrsqrtesp r,a + vslo r,a,b - - -
- -
- - - - vec_sbox_be - Vector AES SubBytes Big-Endian - - r = vec_sbox_be (a) - - - Purpose: - Performs the SubBytes operation, as defined in Federal Information - Processing Standards FIPS-197, on a state_array contained in - a. - - - Result value: r contains the - result of the SubBytes operation, as defined in Federal Information - Processing Standard FIPS-197, on the state array represented by - a. - - Endian considerations: - All element numberings of the SubBytes operation use - big-endian (i.e., left-to-right) order, reflecting the underlying - hardware insruction. Unlike most of the vector intrinsics in this - chapter, vec_sbox_be does not follow the bi-endian - programming model. - - - Notes: This intrinsic may - not yet be available in all implementations. - - - Supported type signatures for vec_sbox_be - - - - - - - - r - + + vector unsigned char - - - a - + + vector unsigned char - - Example Implementation + + vector signed char + + + + vslo r,a,b + - - vector unsigned char @@ -22843,386 +26537,447 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char + + vector unsigned char + - vsbox r,a + vslo r,a,b - - -
- -
- - - - vec_sel - Vector Select - - r = vec_sel (a, b, c) - - - Purpose: - Returns a vector containing the value of either a or b - depending on the value of c. - - Result value: Each bit of - r has the value of the corresponding - bit of a if the corresponding bit of - c is 0. Otherwise, the bit of - r has the value of the corresponding - bit of b. - Endian considerations: - None. - - Notes: - No Power compilers yet support the vector _Float16 type, so those - interfaces are currently deferred. - - - Supported type signatures for vec_sel - - - - - - - - - - r - + vector signed int - - a - + vector signed int - - b - + vector signed char + + + vslo r,a,b + + + + - - c - + vector signed int - - Example Implementation + + vector signed int - Restrictions + vector unsigned char + + + + vslo r,a,b + - - - vector bool char - - - vector bool char + vector unsigned int - vector bool char + vector unsigned int - vector bool char + vector signed char - xxsel r,a,b,c + vslo r,a,b - - - - vector bool char - - - vector bool char + vector unsigned int - vector bool char + vector unsigned int vector unsigned char - xxsel r,a,b,c + vslo r,a,b - - - - vector signed char + vector signed long long - vector signed char + vector signed long long vector signed char - - vector bool char - - xxsel r,a,b,c + vslo r,a,b + + + + vector signed long long + + + vector signed long long + + + vector unsigned char + - + + vslo r,a,b + - vector signed char + vector unsigned long long - vector signed char + vector unsigned long long vector signed char + + + vslo r,a,b + + + + + + vector unsigned long long + + + vector unsigned long long + vector unsigned char - xxsel r,a,b,c + vslo r,a,b + + + + vector pixel + + + vector pixel + + + vector signed char + - + + vslo r,a,b + - vector unsigned char + vector pixel - vector unsigned char + vector pixel vector unsigned char - - vector bool char - - xxsel r,a,b,c + vslo r,a,b - - - - vector unsigned char + vector signed short - vector unsigned char + vector signed short - vector unsigned char + vector signed char + + + + vslo r,a,b + + + + + + vector signed short + + + vector signed short vector unsigned char - xxsel r,a,b,c + vslo r,a,b - - - - vector bool int - - - vector bool int + vector unsigned short - vector bool int + vector unsigned short - vector bool int + vector signed char - xxsel r,a,b,c + vslo r,a,b - - - - vector bool int - - - vector bool int + vector unsigned short - vector bool int + vector unsigned short - vector unsigned int + vector unsigned char - xxsel r,a,b,c + vslo r,a,b - - - - vector signed int - - - vector signed int + vector float - vector signed int + vector float - vector bool int + vector signed char - xxsel r,a,b,c + vslo r,a,b - - - - vector signed int + vector float - vector signed int + vector float + + + vector unsigned char + + + + vslo r,a,b + + + + + +
+ +
+ + + + vec_slv + Vector Shift Left Variable + + r = vec_slv (a, b) + + + Purpose: + Left-shifts a vector by a varying number of bits by element. + + Result value: Let v be a 17-byte vector formed from a in bytes [0:15] and a zero byte in element 16. + Then each byte element i of r is determined as follows. The start bit + sb is obtained from bits 5:7 of byte element + i of a. Then + the contents of bits sb:sb+7 + of the halfword in byte elements + i:i+1 of v are placed into byte element + i of r. + Endian considerations: + All bit and byte element numbers are specified in big-endian order. + This intrinsic is not endian-neutral. + + + + Supported type signatures for vec_slv + + + + + + + + + + + r + - vector signed int + + a + - vector unsigned int + + b + - - - xxsel r,a,b,c - + + Example Implementation - - + + Restrictions + + - vector unsigned int - - - vector unsigned int + vector unsigned char - vector unsigned int + vector unsigned char - vector bool int + vector unsigned char - xxsel r,a,b,c + vslv r,a,b - - + + ISA 3.0 or later + + +
+ +
+ + + + vec_splat + Vector Splat + + r = vec_splat (a, b) + + + Purpose: + Returns a vector that has all of its elements set to a given value. + + Result value: The value of each + element of r is the value of the + element of a specified by b, which must be an element number less than the + number of elements supported for a's + type. + Endian considerations: + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + Notes: + No Power compilers yet support the vector _Float16 type, so those + interfaces are currently deferred. + + + Supported type signatures for vec_splat + + + + + + + + - vector unsigned int + + r + - vector unsigned int + + a + - vector unsigned int + + b + - - vector unsigned int + + Example LE + Implementation - - - xxsel r,a,b,c - + + Example BE + Implementation - - + + Restrictions + + - vector bool long long - - - vector bool long long + vector bool char - vector bool long long + vector bool char - vector bool long long + const int - xxsel r,a,b,c + vspltb r,a,15-b - - - - - - - vector bool long long - - - vector bool long long - - - vector bool long long - - - vector unsigned long long - - xxsel r,a,b,c + vspltb r,a,b @@ -23231,20 +26986,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed long long + vector signed char - vector signed long long + vector signed char - vector signed long long + const int - - vector bool long long + + + vspltb r,a,15-b + - xxsel r,a,b,c + vspltb r,a,b @@ -23253,20 +27010,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed long long + vector unsigned char - vector signed long long + vector unsigned char - vector signed long long + const int - - vector unsigned long long + + + vspltb r,a,15-b + - xxsel r,a,b,c + vspltb r,a,b @@ -23275,20 +27034,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned long long + vector bool int - vector unsigned long long + vector bool int - vector unsigned long long + const int - - vector bool long long + + + xxspltw r,a,3-b + - xxsel r,a,b,c + xxspltw r,a,b @@ -23297,20 +27058,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned long long + vector signed int - vector unsigned long long + vector signed int - vector unsigned long long + const int - - vector unsigned long long + + + xxspltw r,a,3-b + - xxsel r,a,b,c + xxspltw r,a,b @@ -23319,20 +27082,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector bool short + vector unsigned int - vector bool short + vector unsigned int - vector bool short + const int - - vector bool short + + + xxspltw r,a,3-b + - xxsel r,a,b,c + xxspltw r,a,b @@ -23341,20 +27106,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector bool short + vector bool long long - vector bool short + vector bool long long - vector bool short + const int - - vector unsigned short + + + xxpermdi r,a,a,(1-b)*3 + - xxsel r,a,b,c + xxpermdi r,a,a,b @@ -23363,20 +27130,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed short + vector signed long long - vector signed short + vector signed long long - vector signed short + const int - - vector bool short + + + xxpermdi r,a,a,(1-b)*3 + - xxsel r,a,b,c + xxpermdi r,a,a,b @@ -23385,20 +27154,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed short + vector unsigned long long - vector signed short + vector unsigned long long - vector signed short + const int - - vector unsigned short + + + xxpermdi r,a,a,(1-b)*3 + - xxsel r,a,b,c + xxpermdi r,a,a,b @@ -23407,20 +27178,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned short + vector pixel - vector unsigned short + vector pixel - vector unsigned short + const int - - vector bool short + + + vsplth r,a,7-b + - xxsel r,a,b,c + vsplth r,a,b @@ -23429,20 +27202,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned short + vector bool short - vector unsigned short + vector bool short - vector unsigned short + const int - - vector unsigned short + + + vsplth r,a,7-b + - xxsel r,a,b,c + vsplth r,a,b @@ -23451,20 +27226,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector double + vector signed short - vector double + vector signed short - vector double + const int - - vector bool long long + + + vsplth r,a,7-b + - xxsel r,a,b,c + vsplth r,a,b @@ -23473,20 +27250,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector double + vector unsigned short - vector double + vector unsigned short - vector double + const int - - vector unsigned long long + + + vsplth r,a,7-b + - xxsel r,a,b,c + vsplth r,a,b @@ -23495,20 +27274,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector float + vector double - vector float + vector double - vector float + const int - - vector bool int + + + xxpermdi r,a,a,(1-b)*3 + - xxsel r,a,b,c + xxpermdi r,a,a,b @@ -23523,14 +27304,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - vector float + const int - - vector unsigned int + + + xxspltw r,a,3-b + - xxsel r,a,b,c + xxspltw r,a,b @@ -23545,41 +27328,83 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector _Float16 - vector _Float16 + const int - - vector bool short + + + vsplth r,a,7-b + - xxsel r,a,b,c + vsplth r,a,b Deferred + + +
+ +
+ + + + vec_splat_s8 + Vector Splat to Signed Byte + + r = vec_splat_s8 (a) + + + Purpose: + Returns a vector with all elements equal to the given value. + + Result value: + Each element of r is given the + sign-extended 5-bit value of a. + The range of this value is [-16:15]. + Endian considerations: + None. + + + + Supported type signatures for vec_splat_s8 + + + + + - - vector _Float16 + + + r + - - vector _Float16 + + + a + + + + Example Implementation + + + + - vector _Float16 + vector signed char - vector unsigned short + 5-bit signed literal - xxsel r,a,b,c + vspltisb r,a - - Deferred - @@ -23588,142 +27413,43 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_shasigma_be - Vector SHA Sigma Big-Endian + + vec_splat_s16 + Vector Splat to Signed Halfword - r = vec_shasigma_be (a, b, c) + r = vec_splat_s16 (a) Purpose: - Performs a Secure Hash computation in accordance with Federal - Information Processing Standards FIPS-180-3. - - - Result value: Each element of - r contains the SHA256 or SHA512 hash - as follows. + Returns a vector with all elements equal to the given value. - - The result of the SHA-256 function (r[i] for - i = 0 to 3) is: - - - - σ0(a[i]), if b is 0 and bit i of - the 4-bit c is 0. - - - - - σ1(a[i]), if b is 0 and bit i of - the 4-bit c is 1. - - - - - Σ0(a[i]), if b is nonzero and bit i - of the 4-bit c is 0. - - - - - Σ1(a[i]), if b is nonzero and bit i - of the 4-bit c is 1. - - - - - The result of the SHA-512 function (r[i] for - i = 0 to 1) is: - - - - σ0(a[i]), if b is 0 and bit 2 × - i of the 4-bit c is 0. - - - - - σ1(a[i]), if b is 0 and bit 2 × - i of the 4-bit c is 1. - - - - - Σ0(a[i]), if b is nonzero and bit 2 × - i of the 4-bit c is 0. - - - - - Σ1(a[i]), if b is nonzero and bit 2 × - i of the 4-bit c is 1. - - - - + Result value: + Each element of r is given the + sign-extended 5-bit value of a. + The range of this value is [-16:15]. Endian considerations: - All element numberings in the above description denote big-endian - (i.e., left-to-right) order, reflecting the underlying hardware - insruction. Unlike most of the vector intrinsics in this chapter, - vec_pmsum_be does not follow the bi-endian - programming model. + None.
- Supported type signatures for vec_shasigma_be - + Supported type signatures for vec_splat_s16 + - - - + - + r - + a - - - b - - - - - c - - - + Example Implementation @@ -23731,39 +27457,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned int - - - vector unsigned int - - - const int - - - const int - - - - vshasigmaw r,a,b,c - - - - - - vector unsigned long long - - - vector unsigned long long - - - const int + vector signed short - const int + 5-bit signed literal - vshasigmaw r,a,b,d + vspltish r,a @@ -23774,27 +27475,26 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_signed - Vector Convert Floating-Point to Signed Integer + + vec_splat_s32 + Vector Splat to Signed Word - r = vec_signed (a) + r = vec_splat_s32 (a) Purpose: - Converts a vector of floating-point numbers to a vector of signed - integers. + Returns a vector with all elements equal to the given value. - Result value: Each element of - r is obtained by truncating the - corresponding element of a to a signed - integer. + Result value: + Each element of r is given the + sign-extended 5-bit value of a. + The range of this value is [-16:15]. Endian considerations: None.
- Supported type signatures for vec_signed + Supported type signatures for vec_splat_s32 @@ -23822,24 +27522,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - vector float - - - - xvcvspsxws r,a - - - - - - vector signed long long - - - vector double + 5-bit signed literal - xvcvdpsxds r,a + vspltisw r,a @@ -23850,90 +27537,60 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_signed2 - Vector Convert Double-Precision to Signed Word + + vec_splat_u8 + Vector Splat to Unsigned Byte - r = vec_signed2 (a, b) + r = vec_splat_u8 (a) Purpose: - Converts two vectors of double-precision floating-point numbers to a - vector of signed 32-bit integers. + Returns a vector with all elements equal to the given value. - Result value: Let v be the concatenation of a and b. Each - element of r is obtained by truncating - the corresponding element of v to a - signed 32-bit integer. + Result value: + The 5-bit signed value of a is + sign-extended to a byte and the resulting value is cast to an + unsigned char. This value is placed in each element of + r. The range of the original value is + [-16:15]. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + None.
- Supported type signatures for vec_signed2 - + Supported type signatures for vec_splat_u8 + - - - + r - + a - - - b - - - - Example LE - Implementation - - - Example BE - Implementation + + Example Implementation - vector signed int - - - vector double + vector unsigned char - vector double - - - - xxpermdi t,b,a,3 - xxpermdi u,b,a,0 - xvcvdpsxws v,t - xvcvdpsxws w,u - vmrgow r,w,v - + 5-bit signed literal - xxpermdi t,a,b,0 - xxpermdi u,a,b,3 - xvcvdpsxws v,t - xvcvdpsxws w,u - vmrgew r,v,w + vspltisb r,a @@ -23944,35 +27601,32 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_signede - Vector Convert Double-Precision to Signed Word Even + + vec_splat_u16 + Vector Splat to Unsigned Halfword - r = vec_signede (a) + r = vec_splat_u16 (a) Purpose: - Converts elements of an input vector to signed integers and stores - them in the even-numbered elements of the result vector. + Returns a vector with all elements equal to the given value. - Result value: Element 0 of - r contains element 0 of a, truncated to a signed integer. Element 2 of - r contains element 1 of a, truncated to a signed integer. Elements 1 and - 3 of r are undefined. + Result value: + The 5-bit signed value of a is + sign-extended to a halfword and the resulting value is cast to an + unsigned short. This value is placed in each element of + r. The range of the original value is + [-16:15]. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + None.
- Supported type signatures for vec_signede - + Supported type signatures for vec_splat_u16 + - @@ -23986,33 +27640,21 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example LE - Implementation - - - Example BE - Implementation + Example Implementation - vector signed int + vector unsigned short - vector double - - - - xvcvdpsxws t,a - vsldoi r,t,t,12 - + 5-bit signed literal - xvcvdpsxws t,a - + vspltish r,a @@ -24023,35 +27665,32 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_signedo - Vector Convert Double-Precision to Signed Word Odd + + vec_splat_u32 + Vector Splat to Unsigned Word - r = vec_signedo (a) + r = vec_splat_u32 (a) Purpose: - Converts elements of an input vector to signed integers and stores them - in the odd-numbered elements of the result vector. + Returns a vector with all elements equal to the given value. - Result value: Element 1 of - r contains element 0 of a, truncated to a signed integer. Element 3 of - r contains element 1 of a, truncated to a signed integer. Elements 0 and - 2 of r are undefined. + Result value: + The 5-bit signed value of a is + sign-extended to a word and the resulting value is cast to an + unsigned int. This value is placed in each element of + r. The range of the original value is + [-16:15]. Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. + None.
- Supported type signatures for vec_signedo - + Supported type signatures for vec_splat_u32 + - @@ -24065,33 +27704,21 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - Example LE - Implementation - - - Example BE - Implementation + Example Implementation - vector signed int + vector unsigned int - vector double - - - - xvcvdpsxws r,a - - + 5-bit signed literal - xvcvdpsxws t,a - vsldoi r,t,t,12 + vspltisw r,a @@ -24102,28 +27729,29 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_sl - Vector Shift Left + + vec_splats + Vector Splat Scalar - r = vec_sl (a, b) + r = vec_splats (a) Purpose: - Performs a left shift for each element of a vector. + Returns a vector with the value of each element set to the value of + the scalar input parameter. Result value: Each element of - r is the result of left-shifting the - corresponding element of a by the - number of bits specified by the corresponding element of b, modulo the number of bits in the element. - Zeros are shifted in from the right. + r is set to the value of a. Endian considerations: None. + Notes: + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred.
- Supported type signatures for vec_sl + Supported type signatures for vec_splats @@ -24142,12 +27770,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - b - + Example Implementation - Example Implementation + Restrictions @@ -24157,126 +27783,294 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - vector signed char + signed char + + + + rlwinm t,a,0,0xff + mtvsrd u,t + vspltb r,u,7 + + + + + + - vector unsigned char + vector unsigned char + + + unsigned char - vslb r,a,b + rlwinm t,a,0,0xff + mtvsrd u,t + vspltb r,u,7 + + + - vector unsigned char + vector signed int - vector unsigned char + signed int + + + + mtvsrd t,a + vspltb r,t,7 + + + + + + - vector unsigned char + vector unsigned int + + + unsigned int - vslb r,a,b + mtvsrd t,a + vspltb r,t,7 + + + - vector signed int + vector signed __int128 - vector signed int + signed __int128 + + + + mtvsrwz t,a + xxspltw r,t,1 + + + + + + + + + vector unsigned __int128 - vector unsigned int + unsigned __int128 - vslw r,a,b + mtvsrwz t,a + xxspltw r,t,1 + + + - vector unsigned int + vector signed long long - vector unsigned int + signed long long + + + + mtvsrd t,a + xxpermdi r,t,t,0 + + + + + + - vector unsigned int + vector unsigned long long + + + unsigned long long - vslw r,a,b + mtvsrd t,a + xxpermdi r,t,t,0 + + + - vector signed long long + vector signed short - vector signed long long + signed short + + + + rlwinm t,a,0,0xffff + mtvsrd u,t + vsplth r,u,3 + + + + + + + + + vector unsigned short - vector unsigned long long + unsigned short - vsld r,a,b + rlwinm t,a,0,0xffff + mtvsrd u,t + vsplth r,u,3 + + + - vector unsigned long long + vector double - vector unsigned long long + double + + + + xxpermdi r,a,a,0 + + + + + + - vector unsigned long long + vector float + + + float - vsld r,a,b + xxscvdpspn t,a + xxspltw r,t,0 + + + - vector signed short + vector _Float16 - vector signed short + _Float16 + + + sample implementation TBD - vector unsigned short + Deferred + + + + +
+ +
+ + + + vec_sqrt + Vector Square Root + + r = vec_sqrt (a) + + + Purpose: + Returns a vector containing the square root of each element in the + source vector. + + Result value: Each element of + r is the square root of the + corresponding element of a. + Endian considerations: + None. + + + + Supported type signatures for vec_sqrt + + + + + + + + + r + + + + + a + + + + Example Implementation + + + + + + + vector double + + + vector double - vslh r,a,b + xvsqrtdp r,a - vector unsigned short - - - vector unsigned short + vector float - vector unsigned short + vector float - vslh r,a,b + xvsqrtsp r,a @@ -24287,93 +28081,57 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_sld - Vector Shift Left Double + + vec_sr + Vector Shift Right - r = vec_sld (a, b, c) + r = vec_sr (a, b) Purpose: - Left shifts a double vector (that is, two concatenated vectors) by a - given number of bytes. For vec_sld being performed on the vector bool - and floating-point types, the result is undefined when the specified - shift count is not a multiple of the element size. + Performs a logical right shift for each element of a vector. - Result value: Vector r receives the most-significant 16 bytes obtained - by concatenating a and b and shifting left by the number of bytes - specified by c, which must be in the - range 0–15. + Result value: Each element of + r is the result of logically + right-shifting the corresponding element of a by the number of bits specified by the + corresponding element of b, modulo the + number of bits in the element. Zeros are shifted in from the + left. Endian considerations: - This intrinsic is not endian-neutral, so uses of - vec_sld in big-endian code must be rewritten for little-endian targets. - Historically, vec_sld could be used to shift by amounts not a multiple - of the element size for most types, in which case the purpose of the - shift is difficult to determine and difficult to automatically rewrite - efficiently for little endian. So the concatenation of a and b is - done in big-endian fashion (left to right), and the shift is always - to the left. This will generally produce surprising results for - little-endian targets. + None.
- Supported type signatures for vec_sld - + Supported type signatures for vec_sr + - - + r - + a - + b - - - c - - Example Implementation - - - vector bool char - - - vector bool char - - - vector bool char - - - const int - - - - vsldoi r,a,b,c - - - vector signed char @@ -24382,14 +28140,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - vector signed char - - - const int + vector unsigned char - vsldoi r,a,b,c + vsrb r,a,b @@ -24403,31 +28158,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - - const int - - - - vsldoi r,a,b,c - - - - - - vector bool int - - - vector bool int - - - vector bool int - - - const int - - vsldoi r,a,b,c + vsrb r,a,b @@ -24439,14 +28172,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - vector signed int - - - const int + vector unsigned int - vsldoi r,a,b,c + vsrw r,a,b @@ -24460,31 +28190,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - - const int - - - - vsldoi r,a,b,c - - - - - - vector bool long long - - - vector bool long long - - - vector bool long long - - - const int - - vsldoi r,a,b,c + vsrw r,a,b @@ -24496,14 +28204,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - vector signed long long - - - const int + vector unsigned long long - vsldoi r,a,b,c + vsrd r,a,b @@ -24517,50 +28222,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - - const int - - - - vsldoi r,a,b,c - - - - - - vector pixel - - - vector pixel - - - vector pixel - - - const int - - - - vsldoi r,a,b,c - - - - - - vector bool short - - - vector bool short - - - vector bool short - - - const int - - vsldoi r,a,b,c + vsrd r,a,b @@ -24572,14 +28236,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - vector signed short - - - const int + vector unsigned short - vsldoi r,a,b,c + vsrh r,a,b @@ -24593,50 +28254,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - - const int - - - - vsldoi r,a,b,c - - - - - - vector double - - - vector double - - - vector double - - - const int - - vsldoi r,a,b,c - - - - - - vector float - - - vector float - - - vector float - - - const int - - - - vsldoi r,a,b,c + vsrh r,a,b @@ -24647,64 +28267,52 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_sldw - Vector Shift Left Double by Words + + vec_sra + Vector Shift Right Algebraic - r = vec_sldw (a, b, c) + r = vec_sra (a, b) Purpose: - Returns a vector obtained by shifting left the concatenated input - vectors by the number of specified words. + Performs an algebraic right shift for each element of a vector. - Result value: Vector r receives the most-significant 16 bytes obtained - by concatenating a and b and shifting left by the number of words - specified by c, which must be in the - range 0–3. + Result value: Each element of + r is the result of algebraically + right-shifting the corresponding element of a by the number of bits specified by the + corresponding element of b, modulo the + number of bits in the element. Copies of the sign bit are shifted in + from the left. Endian considerations: - This intrinsic is not endian-neutral, so uses of - vec_sldw in big-endian code must be rewritten for little-endian targets. - The concatenation of a and b is - done in big-endian fashion (left to right), and the shift is always - to the left. This will generally produce surprising results for - little-endian targets. + None.
- Supported type signatures for vec_sldw - + Supported type signatures for vec_sra + - - + r - + a - + b - - - c - - - + Example Implementation @@ -24718,14 +28326,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - vector signed char - - - const int + vector unsigned char - xxsldwi r,a,b,c + vsrab r,a,b @@ -24739,12 +28344,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - - const int - - xxsldwi r,a,b,c + vsrab r,a,b @@ -24756,14 +28358,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - vector signed int - - - const int + vector unsigned int - xxsldwi r,a,b,c + vsraw r,a,b @@ -24777,12 +28376,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - - const int - - xxsldwi r,a,b,c + vsraw r,a,b @@ -24794,14 +28390,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - vector signed long long - - - const int + vector unsigned long long - xxsldwi r,a,b,c + vsrad r,a,b @@ -24815,12 +28408,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - - const int - - xxsldwi r,a,b,c + vsrad r,a,b @@ -24832,14 +28422,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - vector signed short - - - const int + vector unsigned short - xxsldwi r,a,b,c + vsrah r,a,b @@ -24853,12 +28440,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - - const int - - xxsldwi r,a,b,c + vsrah r,a,b @@ -24869,31 +28453,30 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_sll - Vector Shift Left Long + + vec_srl + Vector Shift Right Long - r = vec_sll (a, b) + r = vec_srl (a, b) Purpose: - Left shifts an entire vector by a given number of bits. + Right shifts a vector by a given number of bits. - Result value: Vector - r contains the contents of a, shifted left by the number of bits specified - by the three least-significant bits of b. Zeros are supplied on the right. The shift - count must have been replicated into all bytes of b; if not, the value of r is undefined. + Result value: Vector r contains the contents of a, shifted right by the number of bits specified + by the 3 least-significant bits of b. + Zeros are supplied on the left. The shift count must have been + replicated into all bytes of b; if not, + the value of r is undefined. Endian considerations: This intrinsic is not endian-neutral, so uses of - vec_sll in big-endian code must be rewritten for little-endian targets. + vec_srl in big-endian code must be rewritten for little-endian targets.
- Supported type signatures for vec_sll + Supported type signatures for vec_srl @@ -24934,7 +28517,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsl r,a,b + vsr r,a,b @@ -24950,7 +28533,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsl r,a,b + vsr r,a,b @@ -24966,7 +28549,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsl r,a,b + vsr r,a,b @@ -24982,7 +28565,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsl r,a,b + vsr r,a,b @@ -24998,7 +28581,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsl r,a,b + vsr r,a,b @@ -25014,7 +28597,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsl r,a,b + vsr r,a,b @@ -25030,7 +28613,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsl r,a,b + vsr r,a,b @@ -25046,7 +28629,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsl r,a,b + vsr r,a,b @@ -25062,7 +28645,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsl r,a,b + vsr r,a,b @@ -25073,31 +28656,32 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_slo - Vector Shift Left by Octets + + vec_sro + Vector Shift Right by Octets - r = vec_slo (a, b) + r = vec_sro (a, b) Purpose: - Left shifts a vector by a given number of bytes (octets). + Right shifts a vector by a given number of bytes (octets). - Result value: Vector r receives the contents of a, shifted left by the number of bytes specified - by bits 1:4 of the least-significant byte of b. + Result value: Vector + r receives the contents of + a, shifted right by the number of bytes + specified by bits 1–4 of the least-significant byte of + b. + Zeros are supplied from the left. Endian considerations: This intrinsic is not endian-neutral, so uses of - vec_slo in big-endian code must be rewritten for little-endian targets. + vec_sro in big-endian code must be rewritten for little-endian targets. The shift count is in element 15 of b for big-endian, but in element 0 of b for little-endian.
- Supported type signatures for vec_slo + Supported type signatures for vec_sro @@ -25118,678 +28702,501 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> b - - - - Example Implementation - - - - - - - vector signed char - - - vector signed char - - - vector signed char - - - - vslo r,a,b - - - - - - vector signed char - - - vector signed char - - - vector unsigned char - - - - vslo r,a,b - - - - - - vector unsigned char - - - vector unsigned char - - - vector signed char - - - - vslo r,a,b - - - - - - vector unsigned char - - - vector unsigned char - - - vector unsigned char - - - - vslo r,a,b - - - - - - vector signed int - - - vector signed int - - - vector signed char - - - - vslo r,a,b - - - - - - vector signed int - - - vector signed int - - - vector unsigned char + - - - vslo r,a,b - + + Example Implementation + + - vector unsigned int + vector signed char - vector unsigned int + vector signed char vector signed char - vslo r,a,b + vsro r,a,b - vector unsigned int + vector signed char - vector unsigned int + vector signed char vector unsigned char - vslo r,a,b + vsro r,a,b - vector signed long long + vector unsigned char - vector signed long long + vector unsigned char vector signed char - vslo r,a,b + vsro r,a,b - vector signed long long + vector unsigned char - vector signed long long + vector unsigned char vector unsigned char - vslo r,a,b + vsro r,a,b - vector unsigned long long + vector signed int - vector unsigned long long + vector signed int vector signed char - vslo r,a,b + vsro r,a,b - vector unsigned long long + vector signed int - vector unsigned long long + vector signed int vector unsigned char - vslo r,a,b + vsro r,a,b - vector pixel + vector unsigned int - vector pixel + vector unsigned int vector signed char - vslo r,a,b + vsro r,a,b - vector pixel + vector unsigned int - vector pixel + vector unsigned int vector unsigned char - vslo r,a,b + vsro r,a,b - vector signed short + vector signed long long - vector signed short + vector signed long long vector signed char - vslo r,a,b + vsro r,a,b - vector signed short + vector signed long long - vector signed short + vector signed long long vector unsigned char - vslo r,a,b + vsro r,a,b - vector unsigned short + vector unsigned long long - vector unsigned short + vector unsigned long long vector signed char - vslo r,a,b + vsro r,a,b - vector unsigned short + vector unsigned long long - vector unsigned short + vector unsigned long long vector unsigned char - vslo r,a,b + vsro r,a,b - vector float + vector pixel - vector float + vector pixel vector signed char - vslo r,a,b + vsro r,a,b - vector float + vector pixel - vector float + vector pixel vector unsigned char - vslo r,a,b + vsro r,a,b - - -
- -
- - - - vec_slv - Vector Shift Left Variable - - r = vec_slv (a, b) - - - Purpose: - Left-shifts a vector by a varying number of bits by element. - - Result value: Let v be a 17-byte vector formed from a in bytes [0:15] and a zero byte in element 16. - Then each byte element i of r is determined as follows. The start bit - sb is obtained from bits 5:7 of byte element - i of a. Then - the contents of bits sb:sb+7 - of the halfword in byte elements - i:i+1 of v are placed into byte element - i of r. - Endian considerations: - All bit and byte element numbers are specified in big-endian order. - This intrinsic is not endian-neutral. - - - - Supported type signatures for vec_slv - - - - - - - - - r - + vector signed short - - a - + vector signed short - - b - - - - Example Implementation + vector signed char - - Restrictions + + + vsro r,a,b + - - - vector unsigned char + vector signed short - vector unsigned char + vector signed short vector unsigned char - vslv r,a,b + vsro r,a,b - - ISA 3.0 or later - - - - -
- -
- - - - vec_splat - Vector Splat - - r = vec_splat (a, b) - - - Purpose: - Returns a vector that has all of its elements set to a given value. - - Result value: The value of each - element of r is the value of the - element of a specified by b, which must be an element number less than the - number of elements supported for a's - type. - Endian considerations: - The element numbering within a register is left-to-right for big-endian - targets, and right-to-left for little-endian targets. - - Notes: - No Power compilers yet support the vector _Float16 type, so those - interfaces are currently deferred. - - - Supported type signatures for vec_splat - - - - - - - - - - - - r - - - - - a - - - - - b - - - - Example LE - Implementation - - - Example BE - Implementation - - - Restrictions - - - - vector bool char + vector unsigned short - vector bool char + vector unsigned short - const int - - - - vspltb r,a,15-b - + vector signed char - vspltb r,a,b - - - - + vsro r,a,b + - vector signed char + vector unsigned short - vector signed char + vector unsigned short - const int - - - - vspltb r,a,15-b - + vector unsigned char - vspltb r,a,b + vsro r,a,b - - - - vector unsigned char + vector float - vector unsigned char + vector float - const int - - - - vspltb r,a,15-b - + vector signed char - vspltb r,a,b + vsro r,a,b - - - - vector bool int + vector float - vector bool int + vector float - const int - - - - xxspltw r,a,3-b - + vector unsigned char - xxspltw r,a,b + vsro r,a,b - - - + + +
+ +
+ + + + vec_srv + Vector Shift Right Variable + + r = vec_srv (a, b) + + + Purpose: + Right-shifts a vector by a varying number of bits by element. + + Result value: Let v be a 17-byte vector formed from a zero byte + in element 0 and the elements of a + in bytes [1:16]. Then each byte element i of + r is determined as follows. The + start bit sb is obtained from bits 5:7 of + byte element i of a. Then the contents of bits + (8 – sb):(15 – sb) of the + halfword in byte elements i:i+1 + of v are placed into byte element + i of r. + Endian considerations: + All bit and byte element numbers are specified in big-endian order. + This intrinsic is not endian-neutral. + + + + Supported type signatures for vec_srv + + + + + + + - vector signed int + + r + - vector signed int + + a + - const int - - - - xxspltw r,a,3-b - + + b + - - - xxspltw r,a,b - + + Example Implementation - - + + Restrictions + + - vector unsigned int + vector unsigned char - vector unsigned int + vector unsigned char - const int - - - - xxspltw r,a,3-b - + vector unsigned char - xxspltw r,a,b + vsrv r,a,b - - + + ISA 3.0 or later + + +
+ +
+ + + + vec_st + Vector Store Indexed + + vec_st (a, b, c) + + + Purpose: + Stores a 16-byte vector into memory at the address specified by a + displacement and a pointer, ignoring the four low-order bits + of the calculated address. + + Operation: A memory address + is obtained by adding b and c, and masking off the four low-order bits of the + result. The 16-byte vector in a is + stored to the resultant memory address. + Endian considerations: + None. + + Notes: No Power compilers yet + support the vector _Float16 type, so those interfaces are currently + deferred. + + + + Supported type signatures for vec_st + + + + + + + - vector bool long long + + a + - vector bool long long + + b + - const int - - - - xxpermdi r,a,a,(1-b)*3 - + + c + - - - xxpermdi r,a,a,b - + + Example ISA 3.0 + Implementation - - + + Restrictions + + - vector signed long long + vector bool char - vector signed long long + any integral type - const int - - - - xxpermdi r,a,a,(1-b)*3 - + vector bool char * - xxpermdi r,a,a,b + stvx r,b,a @@ -25798,22 +29205,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned long long + vector bool char - vector unsigned long long + any integral type - const int - - - - xxpermdi r,a,a,(1-b)*3 - + signed char * - xxpermdi r,a,a,b + stvx r,b,a @@ -25822,22 +29224,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector pixel + vector bool char - vector pixel + any integral type - const int - - - - vsplth r,a,7-b - + unsigned char * - vsplth r,a,b + stvx r,b,a @@ -25846,22 +29243,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector bool short + vector signed char - vector bool short + any integral type - const int - - - - vsplth r,a,7-b - + signed char * - vsplth r,a,b + stvx r,b,a @@ -25870,22 +29262,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed short + vector signed char - vector signed short + any integral type - const int - - - - vsplth r,a,7-b - + vector signed char * - vsplth r,a,b + stvx r,b,a @@ -25894,22 +29281,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned short - - - vector unsigned short + vector unsigned char - const int - - - - vsplth r,a,7-b - + any integral type + + + unsigned char * - vsplth r,a,b + stvx r,b,a @@ -25918,22 +29300,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector double + vector unsigned char - vector double + any integral type - const int + vector unsigned char * - xxpermdi r,a,a,(1-b)*3 - - - - - xxpermdi r,a,a,b + stvx r,b,a @@ -25942,22 +29319,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector float + vector bool int - vector float + any integral type - const int - - - - xxspltw r,a,3-b - + vector bool int * - xxspltw r,a,b + stvx r,b,a @@ -25966,474 +29338,207 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector _Float16 + vector bool int - vector _Float16 + any integral type - const int + signed int * - vsplth r,a,7-b + stvx r,b,a - - vsplth r,a,b - - - - Deferred + - - -
- -
- - - - vec_splat_s8 - Vector Splat to Signed Byte - - r = vec_splat_s8 (a) - - - Purpose: - Returns a vector with all elements equal to the given value. - - Result value: - Each element of r is given the - sign-extended 5-bit value of a. - The range of this value is [-16:15]. - Endian considerations: - None. - - - - Supported type signatures for vec_splat_s8 - - - - - - - - r - - - - - a - - - - Example Implementation + + vector bool int - - - - - vector signed char + any integral type - 5-bit signed literal + unsigned int * - vspltisb r,a + stvx r,b,a - - - -
- -
- - - - vec_splat_s16 - Vector Splat to Signed Halfword - - r = vec_splat_s16 (a) - - - Purpose: - Returns a vector with all elements equal to the given value. - - Result value: - Each element of r is given the - sign-extended 5-bit value of a. - The range of this value is [-16:15]. - Endian considerations: - None. - - - - Supported type signatures for vec_splat_s16 - - - - - - - - - r - - - - - a - - - - Example Implementation + + - - - vector signed short + vector signed int - 5-bit signed literal + any integral type + + + signed int * - vspltish r,a + stvx r,b,a - - - -
- -
- - - - vec_splat_s32 - Vector Splat to Signed Word - - r = vec_splat_s32 (a) - - - Purpose: - Returns a vector with all elements equal to the given value. - - Result value: - Each element of r is given the - sign-extended 5-bit value of a. - The range of this value is [-16:15]. - Endian considerations: - None. - - - - Supported type signatures for vec_splat_s32 - - - - - - - - - r - - - - - a - - - - Example Implementation + + - - vector signed int - 5-bit signed literal + any integral type + + + vector signed int * - vspltisw r,a + stvx r,b,a - - - -
- -
- - - - vec_splat_u8 - Vector Splat to Unsigned Byte - - r = vec_splat_u8 (a) - - - Purpose: - Returns a vector with all elements equal to the given value. - - Result value: - The 5-bit signed value of a is - sign-extended to a byte and the resulting value is cast to an - unsigned char. This value is placed in each element of - r. The range of the original value is - [-16:15]. - Endian considerations: - None. - - - - Supported type signatures for vec_splat_u8 - - - - - - - - - r - - - - - a - - - - Example Implementation + + - - - vector unsigned char + vector unsigned int + + + any integral type - 5-bit signed literal + unsigned int * - vspltisb r,a + stvx r,b,a + + + - - -
- -
- - - - vec_splat_u16 - Vector Splat to Unsigned Halfword - - r = vec_splat_u16 (a) - - - Purpose: - Returns a vector with all elements equal to the given value. - - Result value: - The 5-bit signed value of a is - sign-extended to a halfword and the resulting value is cast to an - unsigned short. This value is placed in each element of - r. The range of the original value is - [-16:15]. - Endian considerations: - None. - - - - Supported type signatures for vec_splat_u16 - - - - - - - - r - + + vector unsigned int - - - a - + + any integral type - - Example Implementation + + vector unsigned int * + + + + stvx r,b,a + + + + - - - vector unsigned short + vector bool long long - 5-bit signed literal + any integral type + + + vector bool long long * - vspltish r,a + stvx r,b,a + + + - - -
- -
- - - - vec_splat_u32 - Vector Splat to Unsigned Word - - r = vec_splat_u32 (a) - - - Purpose: - Returns a vector with all elements equal to the given value. - - Result value: - The 5-bit signed value of a is - sign-extended to a word and the resulting value is cast to an - unsigned int. This value is placed in each element of - r. The range of the original value is - [-16:15]. - Endian considerations: - None. - - - - Supported type signatures for vec_splat_u32 - - - - - - - - r - + + vector signed long long - - - a - + + any integral type - - Example Implementation + + signed long long * + + + + stvx r,b,a + + + + - - - vector unsigned int + vector signed long long - 5-bit signed literal + any integral type + + + vector signed long long * - vspltisw r,a + stvx r,b,a + + + - - -
- -
- - - - vec_splats - Vector Splat Scalar - - r = vec_splats (a) - - - Purpose: - Returns a vector with the value of each element set to the value of - the scalar input parameter. - - Result value: Each element of - r is set to the value of a. - Endian considerations: - None. - - Notes: - No Power compilers yet support the vector _Float16 type, so that - interface is currently deferred. - - - Supported type signatures for vec_splats - - - - - - - - - r - + + vector unsigned long long - - - a - + + any integral type - - Example Implementation + + unsigned long long * - - Restrictions + + + stvx r,b,a + + + + - - - vector signed char + vector unsigned long long - signed char + any integral type + + + vector unsigned long long * - rlwinm t,a,0,0xff - mtvsrd u,t - vspltb r,u,7 + stvx r,b,a @@ -26442,16 +29547,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned char + vector pixel - unsigned char + any integral type + + + vector pixel * - rlwinm t,a,0,0xff - mtvsrd u,t - vspltb r,u,7 + stvx r,b,a @@ -26460,15 +29566,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed int + vector bool short - signed int + any integral type + + + vector bool short * - mtvsrd t,a - vspltb r,t,7 + stvx r,b,a @@ -26477,15 +29585,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned int + vector bool short - unsigned int + any integral type + + + signed short * - mtvsrd t,a - vspltb r,t,7 + stvx r,b,a @@ -26494,15 +29604,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed __int128 + vector bool short - signed __int128 + any integral type + + + unsigned short * - mtvsrwz t,a - xxspltw r,t,1 + stvx r,b,a @@ -26511,15 +29623,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned __int128 + vector signed short - unsigned __int128 + any integral type + + + signed short * - mtvsrwz t,a - xxspltw r,t,1 + stvx r,b,a @@ -26528,15 +29642,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed long long + vector signed short - signed long long + any integral type + + + vector signed short * - mtvsrd t,a - xxpermdi r,t,t,0 + stvx r,b,a @@ -26545,15 +29661,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned long long + vector unsigned short - unsigned long long + any integral type + + + unsigned short * - mtvsrd t,a - xxpermdi r,t,t,0 + stvx r,b,a @@ -26562,16 +29680,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector signed short + vector unsigned short - signed short + any integral type + + + vector unsigned short * - rlwinm t,a,0,0xffff - mtvsrd u,t - vsplth r,u,3 + stvx r,b,a @@ -26580,16 +29699,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector unsigned short + vector double - unsigned short + any integral type + + + double * - rlwinm t,a,0,0xffff - mtvsrd u,t - vsplth r,u,3 + stvx r,b,a @@ -26601,11 +29721,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - double + any integral type + + + vector double * - xxpermdi r,a,a,0 + stvx r,b,a @@ -26617,12 +29740,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - float + any integral type + + + float * - xxscvdpspn t,a - xxspltw r,t,0 + stvx r,b,a @@ -26631,92 +29756,60 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vector _Float16 + vector float - _Float16 - - - sample implementation TBD + any integral type - Deferred - - - - -
- -
- - - - vec_sqrt - Vector Square Root - - r = vec_sqrt (a) - - - Purpose: - Returns a vector containing the square root of each element in the - source vector. - - Result value: Each element of - r is the square root of the - corresponding element of a. - Endian considerations: - None. - - - - Supported type signatures for vec_sqrt - - - - - - - - - r - + vector float * - - - a - + + + stvx r,b,a + - - Example Implementation + + - - - vector double + vector _Float16 - vector double + any integral type + + + _Float16 * - xvsqrtdp r,a + stvx r,b,a + + Deferred + - vector float + vector _Float16 - vector float + any integral type + + + vector _Float16 * - xvsqrtsp r,a + stvx r,b,a + + Deferred + @@ -26725,29 +29818,33 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_sr - Vector Shift Right + + vec_ste + Vector Store Element Indexed - r = vec_sr (a, b) + vec_ste (a, b, c) Purpose: - Performs a logical right shift for each element of a vector. - - Result value: Each element of - r is the result of logically - right-shifting the corresponding element of a by the number of bits specified by the - corresponding element of b, modulo the - number of bits in the element. Zeros are shifted in from the - left. + Stores a single element from a 16-byte vector into memory at the address + specified by a displacement and a pointer, aligned to the element size. + + Operation: The integer value + b is added to the pointer value + c. The resulting address is rounded + down to the nearest address that is a multiple of + es, where es is 1 for char + pointers, 2 for short pointers, and 4 for float or int pointers. + An element offset eo is calculated by taking the + resultant address modulo 16. The vector element of a at offset eo is stored + to the resultant address. Endian considerations: None.
- Supported type signatures for vec_sr + Supported type signatures for vec_ste @@ -26755,306 +29852,217 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + - r + a - + - a + b - + - b + c - Example Implementation + Example ISA 3.0 + Implementation - vector signed char - - - vector signed char - - - vector unsigned char - - - - vsrb r,a,b - - - - - - vector unsigned char - - - vector unsigned char - - - vector unsigned char - - - - vsrb r,a,b - - - - - - vector signed int - - - vector signed int - - - vector unsigned int - - - - vsrw r,a,b - - - - - - vector unsigned int + vector bool char - vector unsigned int + any integral type - vector unsigned int + signed char * - vsrw r,a,b + stvebx r,b,a - vector signed long long + vector bool char - vector signed long long + any integral type - vector unsigned long long + unsigned char * - vsrd r,a,b + stvebx r,b,a - vector unsigned long long + vector signed char - vector unsigned long long + any integral type - vector unsigned long long + signed char * - vsrd r,a,b + stvebx r,b,a - vector signed short + vector unsigned char - vector signed short + any integral type - vector unsigned short + unsigned char * - vsrh r,a,b + stvebx r,b,a - vector unsigned short + vector bool int - vector unsigned short + any integral type - vector unsigned short + signed int * - vsrh r,a,b + stvewx r,b,a - - -
- -
- - - - vec_sra - Vector Shift Right Algebraic - - r = vec_sra (a, b) - - - Purpose: - Performs an algebraic right shift for each element of a vector. - - Result value: Each element of - r is the result of algebraically - right-shifting the corresponding element of a by the number of bits specified by the - corresponding element of b, modulo the - number of bits in the element. Copies of the sign bit are shifted in - from the left. - Endian considerations: - None. - - - - Supported type signatures for vec_sra - - - - - - - - - r - + + vector bool int - - - a - + + any integral type - - - b - + + unsigned int * - - Example Implementation + + + stvewx r,b,a + - - - vector signed char + vector signed int - vector signed char + any integral type - vector unsigned char + signed int * - vsrab r,a,b + stvewx r,b,a - vector unsigned char + vector unsigned int - vector unsigned char + any integral type - vector unsigned char + unsigned int * - vsrab r,a,b + stvewx r,b,a - vector signed int + vector pixel - vector signed int + any integral type - vector unsigned int + signed short * - vsraw r,a,b + stvehx r,b,a - vector unsigned int + vector pixel - vector unsigned int + any integral type - vector unsigned int + unsigned short * - vsraw r,a,b + stvehx r,b,a - vector signed long long + vector bool short - vector signed long long + any integral type - vector unsigned long long + signed short * - vsrad r,a,b + stvehx r,b,a - vector unsigned long long + vector bool short - vector unsigned long long + any integral type - vector unsigned long long + unsigned short * - vsrad r,a,b + stvehx r,b,a @@ -27063,14 +30071,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - vector signed short + any integral type - vector unsigned short + signed short * - vsrah r,a,b + stvehx r,b,a @@ -27079,14 +30087,30 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - vector unsigned short + any integral type - vector unsigned short + unsigned short * - vsrah r,a,b + stvehx r,b,a + + + + + + vector float + + + any integral type + + + float * + + + + stvewx r,b,a @@ -27097,668 +30121,693 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - - vec_srl - Vector Shift Right Long + + vec_stl + Vector Store Indexed Least Recently Used - r = vec_srl (a, b) + vec_stl (a, b, c) Purpose: - Right shifts a vector by a given number of bits. - - Result value: Vector r contains the contents of a, shifted right by the number of bits specified - by the 3 least-significant bits of b. - Zeros are supplied on the left. The shift count must have been - replicated into all bytes of b; if not, - the value of r is undefined. + Stores a 16-byte vector into memory at the address specified by a + displacement and a pointer, ignoring the four low-order bits + of the calculated address, and marking the cache line containing + the address as least frequently used. + + Operation: A memory address + is obtained by adding b and c, and masking off the four low-order bits of the + result. The 16-byte vector in a is + stored to the resultant memory address, and the containing cache + line is marked as least frequently used. Endian considerations: - This intrinsic is not endian-neutral, so uses of - vec_srl in big-endian code must be rewritten for little-endian targets. + None. + + Notes: No Power compilers yet + support the vector _Float16 type, so those interfaces are currently + deferred.
- Supported type signatures for vec_srl - + Supported type signatures for vec_stl + + - + - r + a - + - a + b - + - b + c - Example Implementation + Example ISA 3.0 + Implementation + + + Restrictions - vector signed char + vector bool char - vector signed char + any integral type - vector unsigned char + vector bool char * - vsr r,a,b + stvxl r,b,a + + + - vector unsigned char + vector bool char - vector unsigned char + any integral type - vector unsigned char + signed char * - vsr r,a,b + stvxl r,b,a + + + - vector signed int + vector bool char - vector signed int + any integral type - vector unsigned char + unsigned char * - vsr r,a,b + stvxl r,b,a + + + - vector unsigned int + vector signed char - vector unsigned int + any integral type - vector unsigned char + signed char * - vsr r,a,b + stvxl r,b,a + + + - vector signed long long + vector signed char - vector signed long long + any integral type - vector unsigned char + vector signed char * - vsr r,a,b + stvxl r,b,a + + + - vector unsigned long long + vector unsigned char - vector unsigned long long + any integral type - vector unsigned char + unsigned char * - vsr r,a,b + stvxl r,b,a + + + - vector pixel + vector unsigned char - vector pixel + any integral type - vector unsigned char + vector unsigned char * - vsr r,a,b + stvxl r,b,a + + + - vector signed short + vector bool int - vector signed short + any integral type - vector unsigned char + vector bool int * - vsr r,a,b + stvxl r,b,a + + + - vector unsigned short + vector bool int - vector unsigned short + any integral type - vector unsigned char + signed int * - vsr r,a,b + stvxl r,b,a + + + - - -
- -
- - - - vec_sro - Vector Shift Right by Octets - - r = vec_sro (a, b) - - - Purpose: - Right shifts a vector by a given number of bytes (octets). - - Result value: Vector - r receives the contents of - a, shifted right by the number of bytes - specified by bits 1–4 of the least-significant byte of - b. - Zeros are supplied from the left. - Endian considerations: - This intrinsic is not endian-neutral, so uses of - vec_sro in big-endian code must be rewritten for little-endian targets. - The shift count is in element 15 of b - for big-endian, but in element 0 of b - for little-endian. - - - - Supported type signatures for vec_sro - - - - - - - - - r - + + vector bool int - - - a - + + any integral type - - - b - + + unsigned int * + + + + stvxl r,b,a + - - Example Implementation + + - - - vector signed char + vector signed int - vector signed char + any integral type - vector signed char + signed int * - vsro r,a,b + stvxl r,b,a + + + - vector signed char + vector signed int - vector signed char + any integral type - vector unsigned char + vector signed int * - vsro r,a,b + stvxl r,b,a + + + - vector unsigned char + vector unsigned int - vector unsigned char + any integral type - vector signed char + unsigned int * - vsro r,a,b + stvxl r,b,a + + + - vector unsigned char + vector unsigned int - vector unsigned char + any integral type - vector unsigned char + vector unsigned int * - vsro r,a,b + stvxl r,b,a + + + - vector signed int + vector bool long long - vector signed int + any integral type - vector signed char + vector bool long long * - vsro r,a,b + stvxl r,b,a + + + - vector signed int + vector signed long long - vector signed int + any integral type - vector unsigned char + signed long long * - vsro r,a,b + stvxl r,b,a + + + - vector unsigned int + vector signed long long - vector unsigned int + any integral type - vector signed char + vector signed long long * - vsro r,a,b + stvxl r,b,a + + + - vector unsigned int + vector unsigned long long - vector unsigned int + any integral type - vector unsigned char + unsigned long long * - vsro r,a,b + stvxl r,b,a + + + - vector signed long long + vector unsigned long long - vector signed long long + any integral type - vector signed char + vector unsigned long long * - vsro r,a,b + stvxl r,b,a + + + - vector signed long long + vector pixel - vector signed long long + any integral type - vector unsigned char + vector pixel * - vsro r,a,b + stvxl r,b,a + + + - vector unsigned long long + vector bool short - vector unsigned long long + any integral type - vector signed char + vector bool short * - vsro r,a,b + stvxl r,b,a + + + - vector unsigned long long + vector bool short - vector unsigned long long + any integral type - vector unsigned char + signed short * - vsro r,a,b + stvxl r,b,a + + + - vector pixel + vector bool short - vector pixel + any integral type - vector signed char + unsigned short * - vsro r,a,b + stvxl r,b,a + + + - vector pixel + vector signed short - vector pixel + any integral type - vector unsigned char + signed short * - vsro r,a,b + stvxl r,b,a + + + vector signed short - vector signed short + any integral type - vector signed char + vector signed short * - vsro r,a,b + stvxl r,b,a + + + - vector signed short + vector unsigned short - vector signed short + any integral type - vector unsigned char + unsigned short * - vsro r,a,b + stvxl r,b,a + + + vector unsigned short - vector unsigned short + any integral type - vector signed char + vector unsigned short * - vsro r,a,b + stvxl r,b,a + + + - vector unsigned short + vector double - vector unsigned short + any integral type - vector unsigned char + double * - vsro r,a,b + stvxl r,b,a + + + - vector float + vector double - vector float + any integral type - vector signed char + vector double * - vsro r,a,b + stvxl r,b,a + + + vector float + + any integral type + + + float * + + + + stvxl r,b,a + + + + + + + vector float - vector unsigned char + any integral type + + + vector float * - vsro r,a,b + stvxl r,b,a + + + - - -
- -
- - - - vec_srv - Vector Shift Right Variable - - r = vec_srv (a, b) - - - Purpose: - Right-shifts a vector by a varying number of bits by element. - - Result value: Let v be a 17-byte vector formed from a zero byte - in element 0 and the elements of a - in bytes [1:16]. Then each byte element i of - r is determined as follows. The - start bit sb is obtained from bits 5:7 of - byte element i of a. Then the contents of bits - (8 – sb):(15 – sb) of the - halfword in byte elements i:i+1 - of v are placed into byte element - i of r. - Endian considerations: - All bit and byte element numbers are specified in big-endian order. - This intrinsic is not endian-neutral. - - - - Supported type signatures for vec_srv - - - - - - - - - r - + vector _Float16 - - a - + any integral type - - b - + _Float16 * - - Example Implementation + + + stvxl r,b,a + - Restrictions + Deferred - - - vector unsigned char + vector _Float16 - vector unsigned char + any integral type - vector unsigned char + vector _Float16 * - vsrv r,a,b + stvxl r,b,a - ISA 3.0 or later + Deferred