diff --git a/Intrinsics_Reference/ch_vec_reference.xml b/Intrinsics_Reference/ch_vec_reference.xml index b514a1d..5d9cf17 100644 --- a/Intrinsics_Reference/ch_vec_reference.xml +++ b/Intrinsics_Reference/ch_vec_reference.xml @@ -6694,17 +6694,26 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_extract_fp32_from_shorth - Vector ... Spelled Out Name TBD + Vector Extract Floats from High Elements of Vector Short Int - r = vec_extract_fp32_from_shorth (ARG1) + r = vec_extract_fp32_from_shorth (a) Purpose: - Extracts four single-precision floating-point numbers from the high elements of a vector of eight 16-bit elements, interpreting each element as a 16-bit floating-point number in IEEE format. + Extracts four single-precision floating-point numbers from the high + elements of a vector of eight 16-bit elements, interpreting each + element as a 16-bit floating-point number in IEEE format. - Result value: The first four elements are interpreted as 16-bit floating-point numbers in IEEE format, and extended to single-precision format, returning a vector with four single-precision IEEE numbers. + Result value: + The first four elements of a are + interpreted as 16-bit floating-point numbers in IEEE format, and + extended to single-precision format, returning a vector with four + single-precision IEEE numbers. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. Thus the + permute control vector at address pcv + in the example implementation will differ for big- and little-endian. @@ -6723,7 +6732,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a @@ -6743,9 +6752,13 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + lxv t,0(pcv) + vperm u,a,a,t + xvcvhpsp r,u + - + ISA 3.0 or later @@ -6758,17 +6771,26 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_extract_fp32_from_shortl - Vector ... Spelled Out Name TBD + Vector Extract Floats from Low Elements of Vector Short Int - r = vec_extract_fp32_from_shortl (ARG1) + r = vec_extract_fp32_from_shortl (a) Purpose: - + Extracts four single-precision floating-point numbers from the low + elements of a vector of eight 16-bit elements, interpreting each + element as a 16-bit floating-point number in IEEE format. - Result value: The last four elements are interpreted as 16-bit floating-point numbers in IEEE format, and extended to single-precision format, returning a vector with four single-precision IEEE numbers. + Result value: + The last four elements of a are + interpreted as 16-bit floating-point numbers in IEEE format, and + extended to single-precision format, returning a vector with four + single-precision IEEE numbers. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. Thus the + permute control vector at address pcv + in the example implementation will differ for big- and little-endian.
@@ -6787,7 +6809,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a @@ -6807,9 +6829,13 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + lxv t,0(pcv) + vperm u,a,a,t + xvcvhpsp r,u + - + ISA 3.0 or later @@ -6822,20 +6848,23 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_extract_sig - Vector ... Spelled Out Name TBD + Vector Extract Significand - r = vec_extract_sig (ARG1) + r = vec_extract_sig (a) Purpose: - Extracts a significand (mantissa) from a floating-point number. - - Result value: Each element of the returned integer vector is extracted from the significand (mantissa) field of the corresponding floating-point vector element. -The significand is from the corresponding floating-point - number in accordance with the IEEE format. The returned result - includes the implicit leading digit. The value of that digit is - not encoded in the IEEE format, but is implied by the - exponent. + Extracts a vector of significands (mantissas) from a vector of + floating-point numbers. + + Result value: Each element of + r is extracted from the significand + (mantissa) field of the corresponding floating-point element of + a. + The significand is from the corresponding floating-point + number in accordance with the IEEE format. The returned result + includes the implicit leading digit. The value of that digit is + not encoded in the IEEE format, but is implied by the exponent. Endian considerations: None. @@ -6856,7 +6885,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a @@ -6876,9 +6905,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvxsigdp r,a + - + ISA 3.0 or later @@ -6890,9 +6921,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + xvxsigsp r,a + - + ISA 3.0 or later @@ -6905,48 +6938,59 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_extract4b - Vector ... Spelled Out Name TBD + Vector Extract Four Bytes - r = vec_extract4b (ARG1, ARG2) + r = vec_extract4b (a, b) Purpose: - Extracts a word from a vector at a byte position. - - Result value: The first doubleword element of the result contains the zero-extended extracted word from ARG1. The second doubleword is set to 0. ARG2 specifies the least-significant byte number (0–12) of the word to be extracted. + Extracts a word from vector a at + constant byte position b. + + Result value: The first + doubleword element of r contains + the zero-extended extracted word from a. + The second doubleword is set to 0. b + specifies the least-significant byte number (0–12) of the word + to be extracted. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
Supported type signatures for vec_extract4b - + - - - + + + + - + r - + - ARG1 + a - + - ARG2 + b - Example Implementation + Example LE Implementation + Example BE Implementation + + Restrictions @@ -6963,9 +7007,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> const int - sample implementation TBD + + xxextractuw r,a,12-b + + + xxextractuw r,a,b + + + ISA 3.0 or later @@ -6983,6 +7034,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> r = vec_first_match_index (ARG1, ARG2) + GCC 8.1 implementation is broken!Purpose: Performs a comparison of equality on each of the corresponding elements of ARG1 and ARG2, and returns the first position of equality. @@ -7038,7 +7090,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7055,7 +7107,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7072,7 +7124,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7089,7 +7141,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7106,7 +7158,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7123,7 +7175,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7141,6 +7193,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> r = vec_first_match_or_eos_index (ARG1, ARG2) + GCC 8.1 implementation is broken! Purpose: Performs a comparison of equality on each of the corresponding elements of ARG1 and ARG2. Returns the first position of equality, or the zero string terminator. @@ -7196,7 +7249,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7213,7 +7266,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7230,7 +7283,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7247,7 +7300,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7264,7 +7317,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7281,7 +7334,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7299,6 +7352,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> r = vec_first_mismatch_index (ARG1, ARG2) + GCC 8.1 implementation is broken! Purpose: Performs a comparison of inequality on each of the corresponding elements of ARG1 and ARG2, and returns the first position of inequality. @@ -7354,7 +7408,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7371,7 +7425,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7388,7 +7442,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7405,7 +7459,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7422,7 +7476,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7439,7 +7493,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7457,6 +7511,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> r = vec_first_mismatch_or_eos_index (ARG1, ARG2) + GCC 8.1 implementation is broken! Purpose: Performs a comparison of inequality on each of the corresponding elements of ARG1 and ARG2. Returns the first position of inequality, or the zero string terminator. @@ -7512,7 +7567,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7529,7 +7584,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7546,7 +7601,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7563,7 +7618,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7580,7 +7635,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7597,7 +7652,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -7610,15 +7665,19 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_float - Vector ... Spelled Out Name TBD + Vector Convert Integer to Floating-Point - r = vec_float (ARG1) + r = vec_float (a) Purpose: - Converts a vector of integers to a vector of single-precision floating-point numbers. + Converts a vector of integers to a vector of single-precision + floating-point numbers. - Result value: Target elements are obtained by converting the respective source elements to single-precision floating-point numbers. + Result value: Elements of + r are obtained by converting the + respective elements of a to + single-precision floating-point numbers. Endian considerations: None. @@ -7638,7 +7697,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a @@ -7655,7 +7714,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + xvcvsxwsp r,a + @@ -7666,7 +7727,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + xvcvuxwsp r,a + @@ -7678,34 +7741,37 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_float2 - Vector ... Spelled Out Name TBD + Vector Convert Two Vectors to Floating-Point - r = vec_float2 (ARG1, ARG2) + r = vec_float2 (a, b) Purpose: - Converts an input vector to a vector of single-precision numbers. - - Result value: Target elements are obtained by converting the source elements to single-precision numbers as follows: - - - Target elements 0 and 1 from source 0 - - - Target elements 2 and 3 from source 1 - - + Converts two input vectors of long long integers or double-precision + floating-point numbers to a vector of single-precision numbers. + + Result value: Elements of + r are obtained by converting the + elements of a and + b to single-precision numbers. + Elements 0 and 1 of r are converted + from elements 0 and 1 of a, + respectively, and elements 2 and 3 of r + are converted from elements 0 and 1 of b, respectively. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
Supported type signatures for vec_float2 - + + @@ -7715,16 +7781,19 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a - ARG2 + b - Example Implementation + Example LE Implementation + + + Example BE Implementation @@ -7740,7 +7809,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + xxpermdi t,b,a,0 + xxpermdi u,b,a,3 + xvcvsxdsp v,t + xvcvsxdsp w,u + vmrgow r,v,w + + + + + xxpermdi t,b,a,0 + xxpermdi u,b,a,3 + xvcvsxdsp v,t + xvcvsxdsp w,u + vmrgew r,v,w + @@ -7754,7 +7838,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + xxpermdi t,b,a,0 + xxpermdi u,b,a,3 + xvcvsxdsp v,t + xvcvsxdsp w,u + vmrgow r,v,w + + + + + xxpermdi t,b,a,0 + xxpermdi u,b,a,3 + xvcvsxdsp v,t + xvcvsxdsp w,u + vmrgew r,v,w + @@ -7768,7 +7867,22 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xxpermdi t,b,a,0 + xxpermdi u,b,a,3 + xvcvsxdsp v,t + xvcvsxdsp w,u + vmrgow r,v,w + + + + + xxpermdi t,b,a,0 + xxpermdi u,b,a,3 + xvcvsxdsp v,t + xvcvsxdsp w,u + vmrgew r,v,w + @@ -7780,25 +7894,33 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_floate - Vector ... Spelled Out Name TBD + Vector Convert to Floating-Point in Even Elements - r = vec_floate (ARG2) + r = vec_floate (a) Purpose: - Converts an input vector to a vector of single-precision numbers. - - Result value: The even-numbered target elements are obtained by converting the source elements to single-precision numbers, using the current floating-point rounding mode. + Converts the elements of an input vector to single-precision + floating-point and stores the results in the even elements of + the target vector. + + Result value: The even-numbered + elements of r are obtained by + converting the elements of a to + single-precision numbers, using the current floating-point rounding + mode. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
Supported type signatures for vec_floate - + + @@ -7808,11 +7930,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG2 + a - Example Implementation + Example LE Implementation + + + Example BE Implementation @@ -7825,7 +7950,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + xvcvsxdsp r,a + + + + + + xvcvsxdsp t,a + vsldoi r,t,t,4 + @@ -7836,7 +7970,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + xvcvuxdsp r,a + + + + + + xvcvuxdsp t,a + vsldoi r,t,t,4 + @@ -7847,7 +7990,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvcvdpsp r,a + + + + + + xvcvdpsp t,a + vsldoi r,t,t,4 + @@ -7859,17 +8011,27 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_floath - Vector ... Spelled Out Name TBD + Vector Convert High Elements to Float - r = vec_floath (ARG2) + r = vec_floath (a) Purpose: - Converts a vector to a vector of single-precision floating-point numbers. + Converts the first four elements of a vector of half-precision + floating-point numbers to a vector of single-precision floating-point + numbers. - Result value: Target elements 0 through 3 are set to the converted values of source elements 0 through 3, respectively. + Result value: Elements 0 through 3 + of r are set to the converted values + of elements 0 through 3, respectively, of a. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + Notes: + No Power compilers yet support the vector _Float16 type, so this + interface is currently deferred.
@@ -7888,7 +8050,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG2 + a @@ -7907,11 +8069,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector _Float16 - + sample implementation TBD - + ISA 3.0 or later + Deferred @@ -7923,17 +8086,27 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_floatl - Vector ... Spelled Out Name TBD + Vector Convert Low Elements to Float - r = vec_floatl (ARG2) + r = vec_floatl (a) Purpose: - Converts a vector to a vector of single-precision floating-point numbers. + Converts the last four elements of a vector of half-precision + floating-point numbers to a vector of single-precision floating-point + numbers. - Result value: Target elements 0 through 3 are set to the converted values of source elements 4 through 7, respectively. + Result value: Elements 0 through 3 + of r are set to the converted values of + elements 4 through 7, respectively, of a. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + Notes: + No Power compilers yet support the vector _Float16 type, so this + interface is currently deferred.
@@ -7952,7 +8125,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG2 + a @@ -7971,11 +8144,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector _Float16 - + sample implementation TBD - + ISA 3.0 or later + Deferred @@ -7987,25 +8161,33 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_floato - Vector ... Spelled Out Name TBD + Vector Convert to Floating-Point in Odd Elements - r = vec_floato (ARG2) + r = vec_floato (a) Purpose: - Converts an input vector to a vector of single-precision numbers. - - Result value: The odd-numbered target elements are obtained by converting the source elements to single-precision numbers, using the current floating-point rounding mode. + Converts the elements of an input vector to single-precision + floating-point and stores the results in the odd elements of the + target vector. + + Result value: The odd-numbered + elements of r are obtained by + converting the elements of a to + single-precision numbers, using the current floating-point rounding + mode. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
Supported type signatures for vec_floato - + + @@ -8015,11 +8197,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG2 + a - Example Implementation + Example LE Implementation + + + Example BE Implementation @@ -8032,7 +8217,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed long long - sample implementation TBD + + xvcvsxdsp t,a + vsldoi r,t,t,4 + + + + + xvcvsxdsp r,a + + @@ -8043,7 +8237,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + xvcvuxdsp t,a + vsldoi r,t,t,4 + + + + + xvcvuxdsp r,a + + @@ -8054,7 +8257,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvcvdpsp t,a + vsldoi r,t,t,4 + + + + + xvcvdpsp r,a + + @@ -8066,15 +8278,20 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_floor - Vector ... Spelled Out Name TBD + Vector Floor - r = vec_floor (ARG1) + r = vec_floor (a) Purpose: - Returns a vector containing the largest representable floating-point integral values less than or equal to the values of the corresponding elements of the given vector. - - Result value: Each element of the result contains the largest representable floating-point integral value less than or equal to the value of the corresponding element of ARG1. + Returns a vector containing the largest representable floating-point + integral values less than or equal to the values of the corresponding + elements of the given vector. + + Result value: Each element of + r contains the largest representable + floating-point integral value less than or equal to the value of the + corresponding element of a. Endian considerations: None. @@ -8094,7 +8311,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a @@ -8111,7 +8328,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvrdpim r,a + @@ -8122,7 +8341,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + xvrspim r,a + @@ -8134,17 +8355,30 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_gb - Vector ... Spelled Out Name TBD + Vector Gather Bits by Byte - r = vec_gb (ARG1) + r = vec_gb (a) Purpose: Performs a gather-bits operation on the input. - Result value: Within each doubleword, let x(i) (0 ≤ i < 8) denote the byte elements of the corresponding input doubleword element, with x(7) the most-significant byte. For each pair of i and j (0 ≤ i < 8, 0 ≤ j < 8), the j-th bit of the i-th byte element of the result is set to the value of the i-th bit of the j-th byte element of the input. + Result value: Within each + doubleword, let x(i) (0 ≤ i < 8) denote the byte elements, with + x(0) the most-significant byte. For each pair of i and j (0 ≤ i + < 8, 0 ≤ j < 8), the jth bit of the + ith byte element of + r is set to the value of the + ith bit of the jth byte + element of a. Endian considerations: - None. + The vec_gb intrinsic function assumes + big-endian (left-to-right) numbering for both bits and bytes, matching + the ISA 2.07 vgbbd instruction. + + Notes: + Try to get the diagram from the ISA manual to include + here.
@@ -8162,7 +8396,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a @@ -8179,7 +8413,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - sample implementation TBD + + vgbbd r,a + @@ -8191,54 +8427,86 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_insert - Vector ... Spelled Out Name TBD + Vector Insert - r = vec_insert (ARG1, ARG2, ARG3) + r = vec_insert (a, b, c) Purpose: - Returns a copy of vector ARG2 with element ARG3 replaced by the value of ARG1. + Returns a copy of vector b with + element c replaced by the value of + a. - Result value: A copy of vector ARG2 with element ARG3 replaced by the value of ARG1. This function uses modular arithmetic on ARG3 to determine the element number. For example, if ARG3 is out of range, the compiler uses ARG3 modulo the number of elements in the vector to determine the element position. + Result value: + r contains a copy of vector + b with element c replaced by the value of a. This function uses modular arithmetic on + c to determine the element number. + For example, if c is out of range, the + compiler uses c modulo the number of + elements in the vector to determine the element position. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + + Notes: + + + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred. + + + The sample implementations are given for ISA 3.0 when c is a constant. For earlier target + architectures, or when c is + variable, less efficient sequences are required. The sample + implementations also assume that c + is in range; that is, any required modulus operations have + already been performed on the constant index. + +
Supported type signatures for vec_insert - - - - - - - + + + + + + + + - + r - + - ARG1 + a - + - ARG2 + b - + - ARG3 + c - - Example Implementation + + Example ISA 3.0 LE Implementation - + + Example ISA 3.0 BE Implementation + + Restrictions @@ -8258,7 +8526,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + mtvsrwz t,b + vinsertb r,t,15-c + + + + + mtvsrwz t,b + vinsertb r,t,c + @@ -8278,7 +8555,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + mtvsrwz t,b + vinsertb r,t,15-c + + + + + mtvsrwz t,b + vinsertb r,t,c + @@ -8298,7 +8584,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + mtvsrwz t,b + xxinsertw r,t,(3-c)*4 + + + + + mtvsrwz t,b + vinsertb r,t,c*4 + @@ -8318,7 +8613,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + mtvsrwz t,b + xxinsertw r,t,(3-c)*4 + + + + + mtvsrwz t,b + vinsertb r,t,c*4 + @@ -8338,7 +8642,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + mtvsrd t,b + xxpermdi r,t,a,c + + + + + mtvsrd t,b + xxpermdi r,t,a,1-c + @@ -8358,7 +8671,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + mtvsrd t,b + xxpermdi r,t,a,c + + + + + mtvsrd t,b + xxpermdi r,t,a,1-c + @@ -8378,7 +8700,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + mtvsrwz t,b + vinserth r,t,a,(7-c)*2 + + + + + mtvsrd t,b + vinserth r,t,a,c*2 + @@ -8398,7 +8729,16 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + mtvsrwz t,b + vinserth r,t,a,(7-c)*2 + + + + + mtvsrd t,b + vinserth r,t,a,c*2 + @@ -8418,7 +8758,18 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + xxpermdi r,b,a,1 [c=0] + [or] + xxpermdi r,a,b,1 [c=1] + + + + + xxpermdi r,a,b,1 [c=0] + [or] + xxpermdi r,b,a,1 [c=1] + @@ -8438,9 +8789,20 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + xscvdpspn t,a + xxextractuw u,t,4 + xxinsertw r/b,u,(3-c)*4 + + + xscvdpspn t,a + xxextractuw u,t,4 + xxinsertw r/b,u,c*4 + + + @@ -8458,10 +8820,19 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> signed int - sample implementation TBD + + mtvsrwz t,b + vinserth r,t,a,(7-c)*2 + - ISA 3.0 or later + + mtvsrd t,b + vinserth r,t,a,c*2 + + + + Deferred @@ -8473,20 +8844,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_insert_exp - Vector ... Spelled Out Name TBD + Vector Insert Exponent - r = vec_insert_exp (ARG1, ARG2) + r = vec_insert_exp (a, b) Purpose: - Inserts an exponent into a floating-point number. - - Result value: Each element of the returned floating-point vector is generated by combining the exponent specified by the corresponding element of ARG2 with the sign and significand of the corresponding element of ARG1. -The inserted exponent of ARG2 is treated as a - right-justified unsigned integer containing a biased exponent, in - accordance with the exponent representation specified by IEEE - 754. It is combined with the sign and significand of ARG1 without - further processing. + Inserts exponents into a vector of floating-point numbers. + + Result value: Each element of + r is generated by combining the exponent + specified by the corresponding element of b with the sign and significand of the + corresponding element of a. + The inserted exponent of b is + treated as a right-justified unsigned integer containing a biased + exponent, in accordance with the exponent representation specified by + IEEE 754. It is combined with the sign and significand of + a without further processing. Endian considerations: None. @@ -8501,25 +8876,25 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + - ARG1 + a - + - ARG2 + b Example Implementation - + Restrictions @@ -8536,9 +8911,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + xviexpdp r,a,b + - + ISA 3.0 or later @@ -8553,9 +8930,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned long long - sample implementation TBD + + xviexpdp r,a,b + - + ISA 3.0 or later @@ -8570,9 +8949,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + xviexpsp r,a,b + - + ISA 3.0 or later @@ -8587,9 +8968,11 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + xviexpsp r,a,b + - + ISA 3.0 or later @@ -8602,54 +8985,64 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_insert4b - Vector ... Spelled Out Name TBD + Vector Insert Four Bytes - r = vec_insert4b (ARG1, ARG2, ARG3) + r = vec_insert4b (a, b, c) Purpose: Inserts a word into a vector at a byte position. - Result value: Let W be the first doubleword element of ARG1, truncated to 32 bits. The result vector is formed by inserting W into ARG2 at the byte position (0–12) specified by ARG3. + Result value: Let W be the first + doubleword element of a, truncated to + 32 bits. The result vector r is formed + by inserting W into b at the byte + position (0–12) specified by c. Endian considerations: - None. + The element and byte numbering within a register is left-to-right for + big-endian targets, and right-to-left for little-endian targets.
Supported type signatures for vec_insert4b - - - - - - - + + + + + + + + - + r - + - ARG1 + a - + - ARG2 + b - + - ARG3 + c - Example Implementation + Example LE Implementation + Example BE Implementation + + Restrictions @@ -8669,9 +9062,18 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> const int - sample implementation TBD + + xxpermdi t,a,a,1 + xxinsertw b,t,12-c + + + xxinsertw b,t,c + + + + ISA 3.0 or later @@ -8689,9 +9091,18 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> const int - sample implementation TBD + + xxpermdi t,a,a,1 + xxinsertw b,t,12-c + + + xxinsertw b,t,c + + + + ISA 3.0 or later @@ -8704,15 +9115,19 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_loge - Vector ... Spelled Out Name TBD + Vector Base-2 Logarithm Estimate - r = vec_loge (ARG1) + r = vec_loge (a) Purpose: - Returns a vector containing estimates of the base-2 logarithms of the corresponding elements of the given vector. + Returns a vector containing estimates of the base-2 logarithms of the + corresponding elements of the source vector. - Result value: Each element of the result contains the estimated value of the base-2 logarithm of the corresponding element of ARG1. + Result value: Each element of + r contains an estimated value of the + base-2 logarithm of the corresponding element of a. Endian considerations: None. @@ -8732,7 +9147,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a @@ -8749,7 +9164,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + vlogefp r,a + @@ -9667,7 +10084,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -9995,7 +10412,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -12916,7 +13333,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -12989,7 +13406,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -13402,7 +13819,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -13416,7 +13833,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -13430,7 +13847,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -13444,7 +13861,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -13458,7 +13875,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -13472,7 +13889,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -13854,7 +14271,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -14535,7 +14952,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -14795,7 +15212,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -15109,7 +15526,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -15129,7 +15546,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -15216,7 +15633,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -15236,7 +15653,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -16088,7 +16505,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -16108,7 +16525,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -17622,7 +18039,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -17950,7 +18367,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -18524,7 +18941,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -19503,7 +19920,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -20518,7 +20935,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -20535,7 +20952,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -20808,7 +21225,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -21013,7 +21430,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -21557,7 +21974,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -21837,7 +22254,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -21915,7 +22332,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -21932,7 +22349,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -21949,7 +22366,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -21966,7 +22383,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -21983,7 +22400,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22000,7 +22417,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22017,7 +22434,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22034,7 +22451,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22051,7 +22468,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22068,7 +22485,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22085,7 +22502,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22102,7 +22519,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22119,7 +22536,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22197,7 +22614,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -22774,7 +23191,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23099,7 +23516,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23186,7 +23603,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23206,7 +23623,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23226,7 +23643,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23246,7 +23663,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23266,7 +23683,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23286,7 +23703,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23306,7 +23723,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23326,7 +23743,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23346,7 +23763,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23366,7 +23783,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23386,7 +23803,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23406,7 +23823,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23426,7 +23843,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later @@ -23513,7 +23930,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> sample implementation TBD - + ISA 3.0 or later