Vc
0.7.5-dev
SIMD Vector Classes for C++
|
|
SIMD Vector of 16 bit signed integers.
#include <Vc/short_v>
Public Types | |
enum | { Size } |
typedef ushort_v | IndexType |
The type of the vector used for indexes in gather and scatter operations. | |
typedef short | EntryType |
The type of the entries in the vector. | |
typedef short_m | Mask |
The type of the mask used for masked operations and returned from comparisons. |
Public Member Functions | |||||||||||
short_v () | |||||||||||
Construct an uninitialized vector. | |||||||||||
short_v (Vc::Zero) | |||||||||||
Construct a vector with the entries initialized to zero. | |||||||||||
short_v (Vc::One) | |||||||||||
Construct a vector with the entries initialized to one. | |||||||||||
short_v (Vc::IndexesFromZero) | |||||||||||
Construct a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ... | |||||||||||
short_v (short *alignedMemory) | |||||||||||
Construct a vector loading its entries from alignedMemory . | |||||||||||
template<typename OtherVector > | |||||||||||
short_v (const OtherVector &) | |||||||||||
Convert from another vector type. | |||||||||||
short_v (short x) | |||||||||||
Broadcast Constructor. | |||||||||||
void | load (const short *memory, LoadStoreFlags align=Aligned) | ||||||||||
Construct a vector from an array of vectors with different Size. | |||||||||||
void | setZero () | ||||||||||
Set all entries to zero. | |||||||||||
void | setZero (const short_m &mask) | ||||||||||
Set all entries to zero where the mask is set. | |||||||||||
void | store (EntryType *memory, LoadStoreFlags align=Aligned) const | ||||||||||
Store the vector data to memory . | |||||||||||
short & | operator[] (int index) | ||||||||||
This operator can be used to modify scalar entries of the vector. | |||||||||||
short | operator[] (int index) const | ||||||||||
This operator can be used to read scalar entries of the vector. | |||||||||||
MaskedVector | operator() (const short_m &mask) | ||||||||||
Writemask the vector before an assignment. | |||||||||||
short_v | sorted () const | ||||||||||
Return a sorted copy of the vector. | |||||||||||
Gather and Scatter Functions | |||||||||||
The gather and scatter functions allow you to easily use vectors with structured data and random accesses. There are several variants:
All gather and scatter functions optionally take a mask as last argument. In that case only the entries that are selected in the mask are read in memory and copied to the vector. This allows you to have invalid indexes in the
The index type (IndexT) can either be a pointer to integers (array) or a vector of integers. Accessing values of a struct works like this: struct MyData {
float a;
int b;
};
void foo(MyData *data, uint_v indexes) {
const float_v v1(data, &MyData::a, indexes);
const int_v v2(data, &MyData::b, indexes);
v1.scatter(data, &MyData::a, indexes - float_v::Size);
v2.scatter(data, &MyData::b, indexes - 1);
}
| |||||||||||
template<typename IndexT > | |||||||||||
short_v (const short *array, const IndexT indexes) | |||||||||||
gather constructor | |||||||||||
template<typename IndexT > | |||||||||||
short_v (const short *array, const IndexT indexes, const short_m &mask) | |||||||||||
masked gather constructor, initialized to zero | |||||||||||
template<typename IndexT > | |||||||||||
void | gather (const short *array, const IndexT indexes) | ||||||||||
gather | |||||||||||
template<typename IndexT > | |||||||||||
void | gather (const short *array, const IndexT indexes, const short_m &mask) | ||||||||||
masked gather | |||||||||||
template<typename IndexT > | |||||||||||
void | scatter (short *array, const IndexT indexes) const | ||||||||||
scatter | |||||||||||
template<typename IndexT > | |||||||||||
void | scatter (short *array, const IndexT indexes, const short_m &mask) const | ||||||||||
masked scatter | |||||||||||
template<typename S1 , typename IndexT > | |||||||||||
short_v (const S1 *array, const short S1::*member1, const IndexT indexes) | |||||||||||
struct member gather constructor | |||||||||||
template<typename S1 , typename IndexT > | |||||||||||
short_v (const S1 *array, const short S1::*member1, const IndexT indexes, const short_m &mask) | |||||||||||
masked struct member gather constructor, initialized to zero | |||||||||||
template<typename S1 , typename IndexT > | |||||||||||
void | gather (const S1 *array, const short S1::*member1, const IndexT indexes) | ||||||||||
struct member gather | |||||||||||
template<typename S1 , typename IndexT > | |||||||||||
void | gather (const S1 *array, const short S1::*member1, const IndexT indexes, const short_m &mask) | ||||||||||
masked struct member gather | |||||||||||
template<typename S1 , typename IndexT > | |||||||||||
void | scatter (S1 *array, short S1::*member1, const IndexT indexes) const | ||||||||||
struct member scatter | |||||||||||
template<typename S1 , typename IndexT > | |||||||||||
void | scatter (S1 *array, short S1::*member1, const IndexT indexes, const short_m &mask) const | ||||||||||
masked struct member scatter | |||||||||||
template<typename S1 , typename S2 , typename IndexT > | |||||||||||
short_v (const S1 *array, const S2 S1::*member1, const short S2::*member2, const IndexT indexes) | |||||||||||
struct member of struct member gather constructor | |||||||||||
template<typename S1 , typename S2 , typename IndexT > | |||||||||||
short_v (const S1 *array, const S2 S1::*member1, const short S2::*member2, const IndexT indexes, const short_m &mask) | |||||||||||
masked struct member of struct member gather constructor, initialized to zero | |||||||||||
template<typename S1 , typename S2 , typename IndexT > | |||||||||||
void | gather (const S1 *array, const S2 S1::*member1, const short S2::*member2, const IndexT indexes) | ||||||||||
struct member of struct member gather | |||||||||||
template<typename S1 , typename S2 , typename IndexT > | |||||||||||
void | gather (const S1 *array, const S2 S1::*member1, const short S2::*member2, const IndexT indexes, const short_m &mask) | ||||||||||
masked struct member of struct member gather | |||||||||||
template<typename S1 , typename S2 , typename IndexT > | |||||||||||
void | scatter (S1 *array, S2 S1::*member1, short S2::*member2, const IndexT indexes) const | ||||||||||
struct member of struct member scatter | |||||||||||
template<typename S1 , typename S2 , typename IndexT > | |||||||||||
void | scatter (S1 *array, S2 S1::*member1, short S2::*member2, const IndexT indexes, const short_m &mask) const | ||||||||||
maksed struct member of struct member scatter | |||||||||||
Comparisons | |||||||||||
All comparison operators return a mask object. void foo(const float_v &a, const float_v &b) {
const float_m mask = a < b;
...
}
| |||||||||||
short_m | operator== (const short_v &x) const | ||||||||||
Returns mask that is true where vector entries are equal and false otherwise. | |||||||||||
short_m | operator!= (const short_v &x) const | ||||||||||
Returns mask that is true where vector entries are not equal and false otherwise. | |||||||||||
short_m | operator> (const short_v &x) const | ||||||||||
Returns mask that is true where the left vector entries are greater than on the right and false otherwise. | |||||||||||
short_m | operator>= (const short_v &x) const | ||||||||||
Returns mask that is true where the left vector entries are greater than on the right or equal and false otherwise. | |||||||||||
short_m | operator< (const short_v &x) const | ||||||||||
Returns mask that is true where the left vector entries are less than on the right and false otherwise. | |||||||||||
short_m | operator<= (const short_v &x) const | ||||||||||
Returns mask that is true where the left vector entries are less than on the right or equal and false otherwise. | |||||||||||
Arithmetic Operations | |||||||||||
The vector classes implement all the arithmetic and (bitwise) logical operations as you know from builtin types. void foo(const float_v &a, const float_v &b) {
const float_v product = a * b;
const float_v difference = a - b;
}
| |||||||||||
short_v | operator+ (short_v x) const | ||||||||||
Returns a new vector with the sum of the respective entries of the left and right vector. | |||||||||||
short_v & | operator+= (short_v x) | ||||||||||
Adds the respective entries of x to this vector. | |||||||||||
short_v | operator- (short_v x) const | ||||||||||
Returns a new vector with the difference of the respective entries of the left and right vector. | |||||||||||
short_v & | operator-= (short_v x) | ||||||||||
Subtracts the respective entries of x from this vector. | |||||||||||
short_v | operator* (short_v x) const | ||||||||||
Returns a new vector with the product of the respective entries of the left and right vector. | |||||||||||
short_v & | operator*= (short_v x) | ||||||||||
Multiplies the respective entries of x from to vector. | |||||||||||
short_v | operator/ (short_v x) const | ||||||||||
Returns a new vector with the quotient of the respective entries of the left and right vector. | |||||||||||
short_v & | operator/= (short_v x) | ||||||||||
Divides the respective entries of this vector by x . | |||||||||||
short_v | operator- () const | ||||||||||
Returns a new vector with all entries negated. | |||||||||||
short_v | operator| (short_v x) const | ||||||||||
Returns a new vector with the binary or of the respective entries of the left and right vector. | |||||||||||
short_v | operator& (short_v x) const | ||||||||||
Returns a new vector with the binary and of the respective entries of the left and right vector. | |||||||||||
short_v | operator^ (short_v x) const | ||||||||||
Returns a new vector with the binary xor of the respective entries of the left and right vector. | |||||||||||
short_v | operator<< (int x) const | ||||||||||
Returns a new vector with each entry bitshifted to the left by x bits. | |||||||||||
short_v & | operator<<= (int x) | ||||||||||
Bitshift each entry to the left by x bits. | |||||||||||
short_v | operator>> (int x) const | ||||||||||
Returns a new vector with each entry bitshifted to the right by x bits. | |||||||||||
short_v & | operator>>= (int x) | ||||||||||
Bitshift each entry to the right by x bits. | |||||||||||
short_v | operator<< (short_v x) const | ||||||||||
Returns a new vector with each entry bitshifted to the left by x [i] bits. | |||||||||||
short_v & | operator<<= (short_v x) | ||||||||||
Bitshift each entry to the left by x [i] bits. | |||||||||||
short_v | operator>> (short_v x) const | ||||||||||
Returns a new vector with each entry bitshifted to the right by x [i] bits. | |||||||||||
short_v & | operator>>= (short_v x) | ||||||||||
Bitshift each entry to the right by x [i] bits. | |||||||||||
void | fusedMultiplyAdd (short_v factor, short_v summand) | ||||||||||
Multiplies this vector with factor and then adds summand , without rounding between the multiplication and the addition. | |||||||||||
Horizontal Reduction Operations | |||||||||||
There are four horizontal operations available to reduce the values of a vector to a scalar value. void foo(const float_v &v) {
float min = v.min(); // smallest value in v
float sum = v.sum(); // sum of all values in v
}
| |||||||||||
short | min () const | ||||||||||
Returns the smallest entry in the vector. | |||||||||||
short | max () const | ||||||||||
Returns the largest entry in the vector. | |||||||||||
short | product () const | ||||||||||
Returns the product of all entries in the vector. | |||||||||||
short | sum () const | ||||||||||
Returns the sum of all entries in the vector. | |||||||||||
Apply/Call/Fill Functions | |||||||||||
There are still many situations where the code needs to switch from SIMD operations to scalar execution. In this case you can, of course rely on operator[]. But there are also a number of functions that can help with common patterns. The apply functions expect a function that returns a scalar value, i.e. a function of the form "T f(T)". The call functions do not return a value and thus the function passed does not need a return value. The fill functions are used to serially set the entries of the vector from the return values of a function. Example: void foo(float_v v) {
float_v logarithm = v.apply(std::log);
float_v exponential = v.apply(std::exp);
}
Of course, with C++11, you can also use lambdas here: float_v power = v.apply([](float f) { return std::pow(f, 0.6f); })
| |||||||||||
template<typename Functor > | |||||||||||
short_v | apply (Functor &f) const | ||||||||||
Return a new vector where each entry is the return value of f called on the current value. | |||||||||||
template<typename Functor > | |||||||||||
short_v | apply (const Functor &f) const | ||||||||||
Const overload of the above function. | |||||||||||
template<typename Functor > | |||||||||||
short_v | apply (Functor &f, short_m mask) const | ||||||||||
As above, but skip the entries where mask is not set. | |||||||||||
template<typename Functor > | |||||||||||
short_v | apply (const Functor &f, short_m mask) const | ||||||||||
Const overload of the above function. | |||||||||||
template<typename Functor > | |||||||||||
void | call (Functor &f) const | ||||||||||
Call f with the scalar entries of the vector. | |||||||||||
template<typename Functor > | |||||||||||
void | call (const Functor &f) const | ||||||||||
Const overload of the above function. | |||||||||||
template<typename Functor > | |||||||||||
void | call (Functor &f, short_m mask) const | ||||||||||
As above, but skip the entries where mask is not set. | |||||||||||
template<typename Functor > | |||||||||||
void | call (const Functor &f, short_m mask) const | ||||||||||
Const overload of the above function. | |||||||||||
void | fill (short(&f)()) | ||||||||||
Fill the vector with the values [f(), f(), f(), ...]. | |||||||||||
template<typename IndexT > | |||||||||||
void | fill (short(&f)(IndexT)) | ||||||||||
Fill the vector with the values [f(0), f(1), f(2), ...]. | |||||||||||
Swizzles | |||||||||||
Swizzles are a special form of shuffles that, depending on the target hardware and swizzle type, may be used without extra cost. The swizzles act on every successive four entries in the vector. Thus the swizzle [0, 1, 2, 3, 4, 5, 6, 7].dcba() results in [3, 2, 1, 0, 7, 6, 5, 4] . This implies a portability issue. The swizzles can only work on vectors where Size is a multiple of four. On Vc::Scalar all swizzles are implemented as no-ops. If a swizzle is used on a vector of Size == 2 compilation will fail. | |||||||||||
const short_v | abcd () const | ||||||||||
Identity. | |||||||||||
const short_v | badc () const | ||||||||||
Permute pairs. | |||||||||||
const short_v | cdab () const | ||||||||||
Permute pairs of two / Rotate twice. | |||||||||||
const short_v | aaaa () const | ||||||||||
Broadcast a. | |||||||||||
const short_v | bbbb () const | ||||||||||
Broadcast b. | |||||||||||
const short_v | cccc () const | ||||||||||
Broadcast c. | |||||||||||
const short_v | dddd () const | ||||||||||
Broadcast d. | |||||||||||
const short_v | bcad () const | ||||||||||
Rotate three: cross-product swizzle. | |||||||||||
const short_v | bcda () const | ||||||||||
Rotate left. | |||||||||||
const short_v | dabc () const | ||||||||||
Rotate right. | |||||||||||
const short_v | acbd () const | ||||||||||
Permute inner pair. | |||||||||||
const short_v | dbca () const | ||||||||||
Permute outer pair. | |||||||||||
const short_v | dcba () const | ||||||||||
Reverse. | |||||||||||
Shift and Rotate | |||||||||||
These functions allow to shift or rotate the entries in a vector by the given Both functions support positive and negative numbers for the shift/rotate value. Example: using namespace Vc;
int_v x;
x = foo.shifted( 1); // [2, 3, 4, 0]
x = foo.shifted( 2); // [3, 4, 0, 0]
x = foo.shifted( 3); // [4, 0, 0, 0]
x = foo.shifted( 4); // [0, 0, 0, 0]
x = foo.shifted(-1); // [0, 1, 2, 3]
x = foo.shifted(-2); // [0, 0, 1, 2]
x = foo.shifted(-3); // [0, 0, 0, 1]
x = foo.shifted(-4); // [0, 0, 0, 0]
x = foo.rotated( 1); // [2, 3, 4, 1]
x = foo.rotated( 2); // [3, 4, 1, 2]
x = foo.rotated( 3); // [4, 1, 2, 3]
x = foo.rotated( 4); // [1, 2, 3, 4]
x = foo.rotated(-1); // [4, 1, 2, 3]
x = foo.rotated(-2); // [3, 4, 1, 2]
x = foo.rotated(-3); // [2, 3, 4, 1]
x = foo.rotated(-4); // [1, 2, 3, 4]
These functions are slightly related to the above swizzles. In any case, they are often useful for communication between SIMD lanes or binary decoding operations. | |||||||||||
const short_v | shifted (int amount) const | ||||||||||
Shift vector entries to the left by amount ; shifting in zeros. | |||||||||||
const short_v | rotated (int amount) const | ||||||||||
Rotate vector entries to the left by amount . |
Static Public Member Functions | |
static short_v | Zero () |
Returns a vector with the entries initialized to zero. | |
static short_v | One () |
Returns a vector with the entries initialized to one. | |
static short_v | IndexesFromZero () |
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ... | |
static short_v | Random () |
Returns a vector with pseudo-random entries. |
anonymous enum |
Size |
The size of the vector. I.e. the number of scalar entries in the vector. Do not make any assumptions about the size of vectors. If you need a vector of float vs. integer of the same size make use of IndexType instead. Note that this still does not guarantee the same size (e.g. double_v on SSE has two entries but there exists no 64 bit integer vector type in Vc - which would have two entries; thus double_v::IndexType is uint_v). Also you can easily use if clauses that compare sizes. The compiler can statically evaluate and fully optimize dead code away (very much like #ifdef, but with syntax checking). |
Construct a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
short_v | ( | short * | alignedMemory | ) |
Construct a vector loading its entries from alignedMemory
.
alignedMemory | A pointer to data. The pointer must be aligned on a Vc::VectorAlignment boundary. |
short_v | ( | short | x | ) |
Broadcast Constructor.
Constructs a vector with all entries of the vector filled with the given value.
x | The scalar value to broadcast to all entries of the constructed vector. |
|
static |
Returns a vector with pseudo-random entries.
Currently the state of the random number generator cannot be modified and starts off with the same state. Thus you will get the same sequence of numbers for the same sequence of calls.
void load | ( | const short * | memory, |
LoadStoreFlags | align = Aligned |
||
) |
Construct a vector from an array of vectors with different Size.
E.g. convert from two double_v to one float_v.
E.g. convert from one float_v to two double_v.
This is the reverse of the above constructor. Load the vector entries from memory
, overwriting the previous values.
memory | A pointer to data. |
align | Determines whether memory is an aligned pointer or not. |
void setZero | ( | const short_m & | mask | ) |
Set all entries to zero where the mask is set.
I.e. a 4-vector with a mask of 0111 would set the last three entries to 0.
mask | Selects the entries to be set to zero. |
void store | ( | EntryType * | memory, |
LoadStoreFlags | align = Aligned |
||
) | const |
Store the vector data to memory
.
memory | A pointer to memory, where to store. |
align | Determines whether memory is an aligned pointer or not. |
short& operator[] | ( | int | index | ) |
This operator can be used to modify scalar entries of the vector.
index | A value between 0 and Size. This value is not checked internally so you must make/be sure it is in range. |
index
.short operator[] | ( | int | index | ) | const |
This operator can be used to read scalar entries of the vector.
index | A value between 0 and Size. This value is not checked internally so you must make/be sure it is in range. |
index
. MaskedVector operator() | ( | const short_m & | mask | ) |
Writemask the vector before an assignment.
mask | The writemask to be used. |
The returned object is only to be used for assignments and should not be assigned to a variable.
Examples:
Multiplies this vector with factor
and then adds summand
, without rounding between the multiplication and the addition.
factor | The multiplication factor. |
summand | The summand that will be added after multiplication. |
short_v sorted | ( | ) | const |
Return a sorted copy of the vector.
v[0] <= v[1] <= v[2] <= v[3] ...
Example:
With SSE the output would be:
[1513634383, -963914658, 1763536262, -1285037745] [-1285037745, -963914658, 1513634383, 1763536262]
With the Scalar implementation:
[1513634383] [1513634383]