From 49e3ac00e5eb5553787c5ee6723fe9c49b09b43e Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Tue, 31 Aug 2021 08:39:39 -0500 Subject: [PATCH] Various updates --- Intrinsics_Reference/ch_biendian.xml | 218 ++++++++++++++-------- Intrinsics_Reference/ch_mma_reference.xml | 139 ++------------ Intrinsics_Reference/ch_vec_reference.xml | 17 +- 3 files changed, 174 insertions(+), 200 deletions(-) diff --git a/Intrinsics_Reference/ch_biendian.xml b/Intrinsics_Reference/ch_biendian.xml index f09965e..cca2bfb 100644 --- a/Intrinsics_Reference/ch_biendian.xml +++ b/Intrinsics_Reference/ch_biendian.xml @@ -800,11 +800,14 @@ a[3] = c; - + + + - @@ -814,10 +817,16 @@ a[3] = c; - + + + - + + + @@ -825,10 +834,10 @@ a[3] = c; - + - + @@ -839,10 +848,13 @@ a[3] = c; - + - + + + @@ -853,10 +865,10 @@ a[3] = c; - + - + @@ -864,10 +876,10 @@ a[3] = c; - + - + @@ -875,10 +887,10 @@ a[3] = c; - + - + @@ -886,10 +898,10 @@ a[3] = c; - + - + @@ -897,10 +909,10 @@ a[3] = c; - + - + @@ -908,10 +920,13 @@ a[3] = c; - + - + + + @@ -919,12 +934,12 @@ a[3] = c; - + - + @@ -933,13 +948,10 @@ a[3] = c; - + - - - + @@ -947,110 +959,153 @@ a[3] = c; - + + + + + + - - + + + + + + + + - + + + - - + - + + + - + - + + + - - - - + - + + + + + + + + - + - + - + - + - + - + - + - + + + + + + + + - + + + + + + - - + + + + + - - - - + + + + - + - + @@ -1059,10 +1114,10 @@ a[3] = c; - + - (ISA 2.07 only) + @@ -1073,11 +1128,11 @@ a[3] = c; - + + + + - - - @@ -1087,11 +1142,11 @@ a[3] = c; - + + + + (ISA 2.07 only) - - (ISA 2.07 only) - @@ -1101,26 +1156,35 @@ a[3] = c; - - - + - + + + + + + (ISA 2.07 only) + + + + + + - + diff --git a/Intrinsics_Reference/ch_mma_reference.xml b/Intrinsics_Reference/ch_mma_reference.xml index fb94d63..d8e8b50 100644 --- a/Intrinsics_Reference/ch_mma_reference.xml +++ b/Intrinsics_Reference/ch_mma_reference.xml @@ -38,9 +38,10 @@ 4i to 4i+3 as scratch space. That is, when ACC[i] contains defined data, the contents of VSRs 4i to 4i+3 are undefined until - either an xxmfacc instruction is used to copy the - contents of ACC[i] to the VSRs, or some other - instruction directly writes to one of these VSRs. + an xxmfacc instruction is used to copy the contents + of ACC[i] to the VSRs. Writing to a VSR associated + with ACC[i] that contains defined data will cause + ACC[i] to become undefined. This reference is not intended to be a complete introduction to @@ -50,7 +51,8 @@ Review status: Chapter reviewed - by Paul Clarke; changes made. + by Paul Clarke; changes made. Chapter reviewed by Peter + Bergner; changes made. @@ -154,19 +156,18 @@
Assembly and Disassembly of Large Types - The following builtins are used to construct + The following intrinsics are used to construct __vector_pair and __vector_quad objects from 128-bit vectors, and deconstruct them into such vectors. The disassembly interfaces place the results into - arrays of vectors. - - - FIXME: I've chosen not to include sample code generation here, - but I can be persuaded if folks think that's useful. - - - FIXME: Peter to provide some wording about semantics and - deprecation? I am not sure I have the latest information. + arrays of vectors using natural element order. The build + interfaces treat the vector input arguments as if they form an + array of vectors, with the first vector argument being array + element 0 in natural element order, the second vector argument + being array element 1, and so forth. The assemble interfaces + are deprecated because they do not give consistent results for + big- and little-endian targets, and users should use the build + interfaces instead. @@ -250,20 +251,10 @@
- Accumulator Move Operations + Accumulator Clear Operation - These instructions move data from vector quads to accumulators - (a "priming" operation) or vice versa ( a "depriming" - operation), or initialize an accumulator to zeros. + This intrinsic function initializes an accumulator to zeros. - - xxmfacc - __builtin_mma_xxmfacc - - - xxmtacc - __builtin_mma_xxmtacc - xxsetaccz __builtin_mma_xxsetaccz @@ -287,30 +278,6 @@ - void __builtin_mma_xxmfacc (__vector_quad* a) - - - - - xxmfacc a - - - - - - - void __builtin_mma_xxmtacc (__vector_quad* a) - - - - - xxmtacc a - - - - - - void __builtin_mma_xxsetaccz (__vector_quad* a) @@ -486,22 +453,6 @@ pmxvi16ger2spp __builtin_mma_pmxvi64ger2spp - - pmxvi64ger2 - __builtin_mma_pmxvi64ger2 - - - pmxvi64ger2pp - __builtin_mma_pmxvi64ger2pp - - - pmxvi64ger2s - __builtin_mma_pmxvi64ger2s - - - pmxvi64ger2spp - __builtin_mma_pmxvi64ger2spp - pmxvi4ger8 __builtin_mma_pmxvi4ger8 @@ -993,62 +944,6 @@ - void __builtin_mma_pmxvi64ger2 (__vector_quad* a, vuc b, vuc c, - const int d, const int e, const int f) - - - - - pmxvi64ger2 a,b,c,d,e,f - - - - - - - - void __builtin_mma_pmxvi64ger2pp (__vector_quad* a, vuc b, vuc c, - const int d, const int e, const int f) - - - - - pmxvi64ger2pp a,b,c,d,e,f - - - - - - - - void __builtin_mma_pmxvi64ger2s (__vector_quad* a, vuc b, vuc c, - const int d, const int e, const int f) - - - - - pmxvi64ger2s a,b,c,d,e,f - - - - - - - - void __builtin_mma_pmxvi64ger2spp (__vector_quad* a, vuc b, vuc c, - const int d, const int e, const int f) - - - - - pmxvi64ger2spp a,b,c,d,e,f - - - - - - - void __builtin_mma_pmxvi4ger8 (__vector_quad* a, vuc b, vuc c, const int d, const int e, const int f) diff --git a/Intrinsics_Reference/ch_vec_reference.xml b/Intrinsics_Reference/ch_vec_reference.xml index ebaffa1..e75ee44 100644 --- a/Intrinsics_Reference/ch_vec_reference.xml +++ b/Intrinsics_Reference/ch_vec_reference.xml @@ -151,6 +151,15 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> + + Note that this document does not make distinctions about + availability of intrinsic functions prior to ISA 2.07, as + implemented on POWER8 processors. Many intrinsics not + restricted to ISA 3.0 or later are also available on earlier + processor generations. However, any interfaces that make use + of "vector long long" data types are only available beginning + with ISA 2.07. +
Terminology @@ -28589,6 +28598,12 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref"> Endian considerations: None. + Notes: + For floating-point types, if both source elements contain signed + zeros, or if either source element contains a NaN, it is + undefined which of the two source elements is copied into the + corresponding result element. + vminsb @@ -55773,7 +55788,7 @@ xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="VIPR.vec-ref">
-
+
Built-In Vector Functions for Fortran