From 8cccf7f5392275d481ce78d76ae90d95beb3ef16 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Mon, 25 Jun 2018 17:16:23 -0500 Subject: [PATCH] Updates through vec_unpackl. Signed-off-by: Bill Schmidt --- Intrinsics_Reference/ch_vec_reference.xml | 746 ++++++++++++++++------ 1 file changed, 544 insertions(+), 202 deletions(-) diff --git a/Intrinsics_Reference/ch_vec_reference.xml b/Intrinsics_Reference/ch_vec_reference.xml index c1d553e..7cd809c 100644 --- a/Intrinsics_Reference/ch_vec_reference.xml +++ b/Intrinsics_Reference/ch_vec_reference.xml @@ -22093,7 +22093,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> start bit sb is obtained from bits 5:7 of byte element i of a. Then the contents of bits - (8-sb):(15-sb) of the + (8 – sb):(15 – sb) of the halfword in byte elements i:i+1 of v are placed into byte element i of r. @@ -22614,10 +22614,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vspltisw t,1 - vsubuwm u,a,b - xxland v,c,t - vsubuwm r,u,v + vspltisw t,1 + vsubuwm u,a,b + xxland v,c,t + vsubuwm r,u,v @@ -22636,10 +22636,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vspltisw t,1 - vsubuwm u,a,b - xxland v,c,t - vsubuwm r,u,v + vspltisw t,1 + vsubuwm u,a,b + xxland v,c,t + vsubuwm r,u,v @@ -22658,7 +22658,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsubeuqm r,a,b,c + vsubeuqm r,a,b,c @@ -22677,7 +22677,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - vsubeuqm r,a,b,c + vsubeuqm r,a,b,c @@ -22690,15 +22690,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_subec - Vector ... Spelled Out Name TBD + Vector Subtract Extended Carryout - r = vec_subec (ARG1, ARG2, ARG3) + r = vec_subec (a, b, c) Purpose: - Returns a vector containing the carries produced by first elementwise subtracting vector ARG2 from vector ARG1, and then elementwise adding vector ARG3 to the difference. ARG3 is a carry vector, with each element having a value of 0 or 1. + Returns a vector containing the carries produced by subtracting one + vector from another, then adding a third vector to the difference. The + third vector is a carry vector, with each element having a value of 0 + or 1. - Result value: The value of each element of the result is the carry produced by subtracting the corresponding element of ARG2 from the corresponding element of ARG1, and then adding the carry specified in the corresponding element of ARG3 (1 if there is a carry, 0 otherwise). + Result value: The value of each + element of r is the carry produced by + subtracting the corresponding element of b from the corresponding element of a, and then adding the carry specified in the + corresponding element of c (1 if there + is a carry, 0 otherwise). Endian considerations: None. @@ -22713,24 +22722,24 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + - ARG1 + a - + - ARG2 + b - + - ARG3 + c @@ -22753,7 +22762,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vspltisw t,1 + xxland u,c,t + vsubuwm v,a,b + vsubcuw w,a,b + vsubcuw x,v,u + xxlor r,w,x + @@ -22770,7 +22786,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vspltisw t,1 + xxland u,c,t + vsubuwm v,a,b + vsubcuw w,a,b + vsubcuw x,v,u + xxlor r,w,x + @@ -22787,7 +22810,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed __int128 - sample implementation TBD + + vsubecuq r,a,b,c + @@ -22804,7 +22829,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned __int128 - sample implementation TBD + + vsubecuq r,a,b,c + @@ -22816,15 +22843,20 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_subs - Vector ... Spelled Out Name TBD + Vector Subtract Saturated - r = vec_subs (ARG1, ARG2) + r = vec_subs (a, b) Purpose: - Returns a vector containing the saturated differences of each set of corresponding elements of the given vectors. + Returns a vector containing the saturated differences of each set of + corresponding elements of the source vectors. - Result value: The value of each element of the result is the saturated result of subtracting the value of the corresponding element of ARG2 from the value of the corresponding element of ARG1. + Result value: The value of each + element of r is the saturated result of + subtracting the value of the corresponding element of b from the value of the corresponding element of + a. Endian considerations: None. @@ -22845,12 +22877,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a - ARG2 + b @@ -22870,7 +22902,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - sample implementation TBD + + vsubsbs r,a,b + @@ -22884,7 +22918,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - sample implementation TBD + + vsububs r,a,b + @@ -22898,7 +22934,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vsubsws r,a,b + @@ -22912,7 +22950,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned int - sample implementation TBD + + vsubuws r,a,b + @@ -22926,7 +22966,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vsubshs r,a,b + @@ -22940,7 +22982,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned short - sample implementation TBD + + vsubuhs r,a,b + @@ -22952,45 +22996,61 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_sum2s - Vector ... Spelled Out Name TBD + Vector Sum Across Half - r = vec_sum2s (ARG1, ARG2) + r = vec_sum2s (a, b) Purpose: - Returns a vector containing the results of performing a sum-across-doublewords vector operation on the given vectors. - - Result value: The first and third element of the result are 0. The second element of the result contains the saturated sum of the first and second elements of ARG1 and the second element of ARG2. The fourth element of the result contains the saturated sum of the third and fourth elements of ARG1 and the fourth element of ARG2. + Returns a vector containing the results of performing a sum-across + operation within each doubleword of the first source vector together with + accumulated results in the second source vector. + + Result value: Elements 0 and 2 + of r are 0. Element 1 of r contains the saturated sum of elements 0 and 1 + of a and element 1 of b. Element 3 of r contains the saturated sum of elements 2 and 3 + of a and element 3 of b. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. Supported type signatures for vec_sum2s - + + - + r - + - ARG1 + a - + - ARG2 + b - Example Implementation + Example LE + Implementation + + + Example BE + Implementation @@ -23006,7 +23066,18 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vsldoi t,b,b,12 + vsum2sws u,a,t + vsldoi r,u,u,4 + + + + + + vsum2sws r,a,b + + @@ -23018,45 +23089,64 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_sum4s - Vector ... Spelled Out Name TBD + Vector Sum Across Quarter - r = vec_sum4s (ARG1, ARG2) + r = vec_sum4s (a, b) Purpose: - Returns a vector containing the results of performing a sum-across-words vector operation on the given vectors. - - Result value: Assume that the elements of each vector are numbered beginning with 0. If ARG1 is a vector signed char vector or a vector unsigned char vector, then let m be 4. Otherwise, let m be 2. For each element n of the result vector, the value is obtained by adding elements mn through mn + m – 1 of ARG1 and element n of ARG2 using saturated addition. + Returns a vector containing the results of performing a sum-across + operation within each word of the first source vector together with + accumulated results in the second source vector. + + Result value: If a is a vector of signed or unsigned char, then + let m be 4; otherwise, let m + be 2. For each element n of the result vector, the + value is obtained by adding elements mn through + mn + m – 1 of a and element n of b using saturated addition. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets. + Notes: + Issue #438 in the power-gcc github tracker has been opened + for wrong little-endian behavior.
Supported type signatures for vec_sum4s - + + - + r - + ARG1 - + ARG2 - Example Implementation + Example LE + Implementation + + + Example BE + Implementation @@ -23069,10 +23159,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - vector signed int + vector signed int - sample implementation TBD + + TBD + + + + + vsum4sbs r,a,b + @@ -23083,10 +23180,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - vector signed int + vector signed int - sample implementation TBD + + TBD + + + + + vsum4shs r,a,b + @@ -23097,10 +23201,17 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector unsigned char - vector unsigned int + vector unsigned int - sample implementation TBD + + TBD + + + + + vsum4ubs r,a,b + @@ -23112,45 +23223,57 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_sums - Vector ... Spelled Out Name TBD + Vector Sum Across - r = vec_sums (ARG1, ARG2) + r = vec_sums (a, b) Purpose: - Returns a vector containing the results of performing a sum across vector operation on the given vectors. - - Result value: The first three elements of the result are 0. The fourth element is the saturated sum of all the elements of ARG1 and the fourth element of ARG2. + Returns a vector containing the results of performing a sum-across + operation on the first source vector together with accumulated results + in the second source vector. + + Result value: Elements 0, 1, and 2 + of r are 0. Element 3 is the saturated + sum of all the elements of a and + element 3 of b. Endian considerations: - None. + The element numbering within a register is left-to-right for big-endian + targets, and right-to-left for little-endian targets.
Supported type signatures for vec_sums - + + - + r - + - ARG1 + a - + - ARG2 + b - Example Implementation + Example LE + Implementation + + + Example BE + Implementation @@ -23163,10 +23286,21 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - vector signed int + vector signed int - sample implementation TBD + + vspltw t,b,0 + vsumsws u,a,t + vsldoi r,u,u,12 + + + + + + vsumsws r,a,b + + @@ -23178,15 +23312,31 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_test_data_class - Vector ... Spelled Out Name TBD + Vector Test Data Class - r = vec_test_data_class (ARG1, ARG2) + r = vec_test_data_class (a, b) Purpose: Determines the data class for each floating-point element. - Result value: Each element is set to all ones if the corresponding element of ARG1 matches one of the possible data types selected by ARG2. If not, each element is set to all zeros. ARG2 can select one of the data types defined in + Result value: Each element of + r is set to all ones if the + corresponding element of a matches one + of the possible data types selected by b. If not, the element is set to all zeros. + b can select one of the following data + classes, or more than one of them by ORing the constants together. + + Not a number (NaN) 64 + Positive infinity 32 + Negative infinity 16 + Positive zero 8 + Negative zero 4 + Positive subnormal 2 + Negative subnormal 1 + + Endian considerations: None. @@ -23201,25 +23351,25 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - + r - + - ARG1 + a - + - ARG2 + b Example Implementation - + Restrictions @@ -23233,10 +23383,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - const int + const int - sample implementation TBD + + xvtstdcsp r,a,b + ISA 3.0 or later @@ -23253,7 +23405,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> const int - sample implementation TBD + + xvtstdcdp r,a,b + ISA 3.0 or later @@ -23268,15 +23422,19 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_trunc - Vector ... Spelled Out Name TBD + Vector Truncate - r = vec_trunc (ARG1) + r = vec_trunc (a) Purpose: - Returns a vector containing the truncated values of the corresponding elements of the given vector. + Returns a vector containing the truncated values of the corresponding + elements of the given vector. - Result value: Each element of the result contains the value of the corresponding element of ARG1, truncated to an integral value. + Result value: Each element of + r contains the value of the + corresponding element of a, truncated + to an integral value. Endian considerations: None. @@ -23296,7 +23454,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> - ARG1 + a @@ -23313,7 +23471,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector double - sample implementation TBD + + xvrdpiz r,a + @@ -23324,7 +23484,9 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + + xvrspiz r,a + @@ -23336,71 +23498,106 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_unpackh - Vector ... Spelled Out Name TBD + Vector Unpack High - r = vec_unpackh (ARG1) + r = vec_unpackh (a) Purpose: - Unpacks the most-significant (“high”) half of a vector into a vector with larger elements. - - Result value: If ARG1 is an integer vector, the value of each element of the result is the value of the corresponding element of the most-significant half of ARG1. -If ARG1 is a floating-point vector, the value of each - element of the result is the value of the corresponding element - of the most-significant half of ARG1, widened to the result - precision. -If ARG1 is a pixel vector, the value of each element of the - result is taken from the corresponding element of the - most-significant half of ARG1 as follows: - - - All bits in the first byte of the element of the result - are set to the value of the first bit of the element of - ARG1. - - - The least-significant 5 bits of the second byte of the - element of the result are set to the value of the next 5 bits - in the element of ARG1. - - - The least-significant 5 bits of the third byte of the - element of the result are set to the value of the next 5 bits - in the element of ARG1. - - - The least-significant 5 bits of the fourth byte of the - element of the result are set to the value of the next 5 bits - in the element of ARG1. - - + Unpacks the most-significant (“high”) half of a vector into a vector + with larger elements. + + Result value: If a is an integer vector, the value of each element + of r is the value of the corresponding + element of the most-significant half of a. + If a is a floating-point vector, + the value of each element of r is the + value of the corresponding element of the most-significant half of + a, widened to the result + precision. + If a is a pixel vector, the value + of each element of r is taken from the + corresponding element of the most-significant half of a as follows: + + + All bits in the first byte of the element of r are set to the value of the first bit of + the element of a. + + + The least-significant 5 bits of the second byte of the + element of r are set to the value + of the next 5 bits in the element of a. + + + The least-significant 5 bits of the third byte of the + element of r are set to the value + of the next 5 bits in the element of a. + + + The least-significant 5 bits of the fourth byte of the + element of r are set to the value + of the next 5 bits in the element of a. + + Endian considerations: - None. + The "high" half of a vector with n elements is the + first n/2 elements of the vector. For little + endian, these elements are in the rightmost half of the vector. For + big endian, these elements are in the leftmost half of the vector. + Notes: + + + + Issue #439 in the power-gcc github tracker is open + against wrong code produced by GCC for unpacking floats to + doubles. + + + + + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred. + + +
Supported type signatures for vec_unpackh - + + - + r - + - ARG1 + a - Example Implementation + Example LE + Implementation + Example BE + Implementation + + Restrictions @@ -23414,7 +23611,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool short - sample implementation TBD + + vupklsh r,a + + + + + vupkhsh r,a + @@ -23428,7 +23632,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vupklsh r,a + + + + + vupkhsh r,a + @@ -23442,7 +23653,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector pixel - sample implementation TBD + + vupklpx r,a + + + + + vupkhpx r,a + @@ -23456,7 +23674,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool int - sample implementation TBD + + vupklsw r,a + + + + + vupkhsw r,a + @@ -23470,7 +23695,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vupklsw r,a + + + + + vupkhsw r,a + @@ -23484,7 +23716,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool char - sample implementation TBD + + vupklsb r,a + + + + + vupkhsb r,a + @@ -23498,7 +23737,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - sample implementation TBD + + vupklsb r,a + + + + + vupkhsb r,a + @@ -23512,7 +23758,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + [TBD] + + + [TBD] @@ -23526,10 +23775,13 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector _Float16 - sample implementation TBD + [TBD] + + + [TBD] - ISA 3.0 or later + Deferred @@ -23541,71 +23793,106 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vec_unpackl - Vector ... Spelled Out Name TBD + Vector Unpack Low - r = vec_unpackl (ARG1) + r = vec_unpackl (a) Purpose: - Unpacks the least-significant (“low”) half of a vector into a vector with larger elements. - - Result value: If ARG1 is an integer vector, the value of each element of the result is the value of the corresponding element of the least-significant half of ARG1. -If ARG1 is a floating-point vector, the value of each - element of the result is the value of the corresponding element - of the least-significant half of ARG, widened to the result - precision. -If ARG1 is a pixel vector, the value of each element of the - result is taken from the corresponding element of the - least-significant half of ARG1 as follows: - - - All bits in the first byte of the element of the result - are set to the value of the first bit of the element of - ARG1. - - - The least-significant 5 bits of the second byte of the - element of the result are set to the value of the next 5 bits - in the element of ARG1. - - - The least-significant 5 bits of the third byte of the - element of the result are set to the value of the next 5 bits - in the element of ARG1. - - - The least-significant 5 bits of the fourth byte of the - element of the result are set to the value of the next 5 bits - in the element of ARG1. - - + Unpacks the least-significant (“low”) half of a vector into a vector + with larger elements. + + Result value: If a is an integer vector, the value of each element + of r is the value of the corresponding + element of the least-significant half of a. + If a is a floating-point vector, + the value of each element of r is the + value of the corresponding element of the least-significant half of + a, widened to the result + precision. + If a is a pixel vector, the value + of each element of r is taken from the + corresponding element of the least-significant half of a as follows: + + + All bits in the first byte of the element of r are set to the value of the first bit of + the element of a. + + + The least-significant 5 bits of the second byte of the + element of r are set to the value + of the next 5 bits in the element of a. + + + The least-significant 5 bits of the third byte of the + element of r are set to the value + of the next 5 bits in the element of a. + + + The least-significant 5 bits of the fourth byte of the + element of r are set to the value + of the next 5 bits in the element of a. + + Endian considerations: - None. + The "high" half of a vector with n elements is the + first n/2 elements of the vector. For little + endian, these elements are in the rightmost half of the vector. For + big endian, these elements are in the leftmost half of the vector. + Notes: + + + + Issue #439 in the power-gcc github tracker is open + against wrong code produced by GCC for unpacking floats to + doubles. + + + + + No Power compilers yet support the vector _Float16 type, so that + interface is currently deferred. + + +
Supported type signatures for vec_unpackl - + + - + r - + ARG1 - Example Implementation + Example LE + Implementation + Example BE + Implementation + + Restrictions @@ -23619,7 +23906,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool short - sample implementation TBD + + vupkhsh r,a + + + + + vupklsh r,a + @@ -23633,7 +23927,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed short - sample implementation TBD + + vupkhsh r,a + + + + + vupklsh r,a + @@ -23647,7 +23948,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector pixel - sample implementation TBD + + vupkhpx r,a + + + + + vupklpx r,a + @@ -23661,7 +23969,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool int - sample implementation TBD + + vupkhsw r,a + + + + + vupklsw r,a + @@ -23675,7 +23990,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed int - sample implementation TBD + + vupkhsw r,a + + + + + vupklsw r,a + @@ -23689,7 +24011,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector bool char - sample implementation TBD + + vupkhsb r,a + + + + + vupklsb r,a + @@ -23703,7 +24032,14 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector signed char - sample implementation TBD + + vupkhsb r,a + + + + + vupklsb r,a + @@ -23717,7 +24053,10 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector float - sample implementation TBD + [TBD] + + + [TBD] @@ -23731,10 +24070,13 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="section_vec_intrinsics"> vector _Float16 - sample implementation TBD + [TBD] + + + [TBD] - ISA 3.0 or later + Deferred