Vc  1.1.0
SIMD Vector Classes for C++
simdarrayhelper.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the names of contributing organizations nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 }}}*/
28 
29 #ifndef VC_COMMON_SIMDARRAYHELPER_H_
30 #define VC_COMMON_SIMDARRAYHELPER_H_
31 
32 #include "macros.h"
33 
34 namespace Vc_VERSIONED_NAMESPACE
35 {
36 namespace Common
37 {
38 
41 
42 namespace Operations/*{{{*/
43 {
44 struct tag {};
45 #define Vc_DEFINE_OPERATION(name_) \
46  struct name_ : public tag { \
47  template <typename V, typename... Args> \
48  Vc_INTRINSIC void operator()(V &v, Args &&... args) \
49  { \
50  v.name_(std::forward<Args>(args)...); \
51  } \
52  }
53 Vc_DEFINE_OPERATION(gather);
54 Vc_DEFINE_OPERATION(scatter);
55 Vc_DEFINE_OPERATION(load);
56 Vc_DEFINE_OPERATION(store);
57 Vc_DEFINE_OPERATION(setZero);
58 Vc_DEFINE_OPERATION(setZeroInverted);
59 Vc_DEFINE_OPERATION(assign);
60 #undef Vc_DEFINE_OPERATION
61 #define Vc_DEFINE_OPERATION(name_, code_) \
62  struct name_ : public tag { \
63  template <typename V> Vc_INTRINSIC void operator()(V &v) { code_; } \
64  }
65 Vc_DEFINE_OPERATION(increment, ++(v));
66 Vc_DEFINE_OPERATION(decrement, --(v));
67 Vc_DEFINE_OPERATION(random, v = V::Random());
68 #undef Vc_DEFINE_OPERATION
69 #define Vc_DEFINE_OPERATION_FORWARD(name_) \
70  struct Forward_##name_ : public tag \
71  { \
72  template <typename... Args, typename = decltype(name_(std::declval<Args>()...))> \
73  Vc_INTRINSIC void operator()(decltype(name_(std::declval<Args>()...)) &v, \
74  Args &&... args) \
75  { \
76  v = name_(std::forward<Args>(args)...); \
77  } \
78  template <typename... Args, typename = decltype(name_(std::declval<Args>()...))> \
79  Vc_INTRINSIC void operator()(std::nullptr_t, Args && ... args) \
80  { \
81  name_(std::forward<Args>(args)...); \
82  } \
83  }
84 Vc_DEFINE_OPERATION_FORWARD(abs);
85 Vc_DEFINE_OPERATION_FORWARD(asin);
86 Vc_DEFINE_OPERATION_FORWARD(atan);
87 Vc_DEFINE_OPERATION_FORWARD(atan2);
88 Vc_DEFINE_OPERATION_FORWARD(cos);
89 Vc_DEFINE_OPERATION_FORWARD(ceil);
90 Vc_DEFINE_OPERATION_FORWARD(copysign);
91 Vc_DEFINE_OPERATION_FORWARD(exp);
92 Vc_DEFINE_OPERATION_FORWARD(exponent);
93 Vc_DEFINE_OPERATION_FORWARD(fma);
94 Vc_DEFINE_OPERATION_FORWARD(floor);
95 Vc_DEFINE_OPERATION_FORWARD(frexp);
96 Vc_DEFINE_OPERATION_FORWARD(isfinite);
97 Vc_DEFINE_OPERATION_FORWARD(isinf);
98 Vc_DEFINE_OPERATION_FORWARD(isnan);
99 Vc_DEFINE_OPERATION_FORWARD(isnegative);
100 Vc_DEFINE_OPERATION_FORWARD(ldexp);
101 Vc_DEFINE_OPERATION_FORWARD(log);
102 Vc_DEFINE_OPERATION_FORWARD(log10);
103 Vc_DEFINE_OPERATION_FORWARD(log2);
104 Vc_DEFINE_OPERATION_FORWARD(reciprocal);
105 Vc_DEFINE_OPERATION_FORWARD(round);
106 Vc_DEFINE_OPERATION_FORWARD(rsqrt);
107 Vc_DEFINE_OPERATION_FORWARD(sin);
108 Vc_DEFINE_OPERATION_FORWARD(sincos);
109 Vc_DEFINE_OPERATION_FORWARD(sqrt);
110 Vc_DEFINE_OPERATION_FORWARD(trunc);
111 Vc_DEFINE_OPERATION_FORWARD(min);
112 Vc_DEFINE_OPERATION_FORWARD(max);
113 #undef Vc_DEFINE_OPERATION_FORWARD
114 template<typename T> using is_operation = std::is_base_of<tag, T>;
115 } // namespace Operations }}}
116 
122 template <typename T_, std::size_t Pieces_, std::size_t Index_> struct Segment/*{{{*/
123 {
124  static_assert(Index_ < Pieces_, "You found a bug in Vc. Please report.");
125 
126  using type = T_;
127  using type_decayed = typename std::decay<type>::type;
128  static constexpr std::size_t Pieces = Pieces_;
129  static constexpr std::size_t Index = Index_;
130  using simd_array_type = SimdArray<
131  typename std::conditional<Traits::is_simd_vector<type_decayed>::value,
132  typename type_decayed::EntryType, float>::type,
133  type_decayed::size() / Pieces>;
134 
135  type data;
136 
137  static constexpr std::size_t EntryOffset = Index * type_decayed::Size / Pieces;
138 
139  decltype(std::declval<type>()[0]) operator[](size_t i) { return data[i + EntryOffset]; }
140  decltype(std::declval<type>()[0]) operator[](size_t i) const { return data[i + EntryOffset]; }
141 
142  simd_array_type asSimdArray() const
143  {
144  return simd_cast<simd_array_type, Index>(data);
145  }
146 };/*}}}*/
147 
148 //Segment<T *, ...> specialization {{{
149 template <typename T_, std::size_t Pieces_, std::size_t Index_>
150 struct Segment<T_ *, Pieces_, Index_> {
151  static_assert(Index_ < Pieces_, "You found a bug in Vc. Please report.");
152 
153  using type = T_ *;
154  using type_decayed = typename std::decay<T_>::type;
155  static constexpr size_t Pieces = Pieces_;
156  static constexpr size_t Index = Index_;
157  using simd_array_type = SimdArray<
158  typename std::conditional<Traits::is_simd_vector<type_decayed>::value,
159  typename type_decayed::VectorEntryType, float>::type,
160  type_decayed::size() / Pieces> *;
161 
162  type data;
163 
164  static constexpr std::size_t EntryOffset = Index * type_decayed::size() / Pieces;
165 
166  simd_array_type asSimdArray() const
167  {
168  return reinterpret_cast<
169 #ifdef Vc_GCC
170  // GCC might ICE if this type is declared with may_alias. If it doesn't
171  // ICE it warns about ignoring the attribute.
172  typename std::remove_pointer<simd_array_type>::type
173 #else
174  MayAlias<typename std::remove_pointer<simd_array_type>::type>
175 #endif
176  *>(data) +
177  Index;
178  }
179 
180  //decltype(std::declval<type>()[0]) operator[](size_t i) { return data[i + EntryOffset]; }
181  //decltype(std::declval<type>()[0]) operator[](size_t i) const { return data[i + EntryOffset]; }
182 };/*}}}*/
183 
193 template <typename T, std::size_t Offset> struct AddOffset
194 {
195  constexpr AddOffset() = default;
196 };
197 
198 // class Split {{{1
207 template <std::size_t secondOffset> class Split
208 {
209  static Vc_INTRINSIC AddOffset<VectorSpecialInitializerIndexesFromZero, secondOffset>
210  hiImpl(VectorSpecialInitializerIndexesFromZero)
211  {
212  return {};
213  }
214  template <std::size_t Offset>
215  static Vc_INTRINSIC
216  AddOffset<VectorSpecialInitializerIndexesFromZero, Offset + secondOffset>
217  hiImpl(AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
218  {
219  return {};
220  }
221 
222  // split composite SimdArray
223  template <typename U, std::size_t N, typename V, std::size_t M,
224  typename = enable_if<N != M>>
225  static Vc_INTRINSIC auto loImpl(const SimdArray<U, N, V, M> &x)
226  -> decltype(internal_data0(x))
227  {
228  return internal_data0(x);
229  }
230  template <typename U, std::size_t N, typename V, std::size_t M,
231  typename = enable_if<N != M>>
232  static Vc_INTRINSIC auto hiImpl(const SimdArray<U, N, V, M> &x)
233  -> decltype(internal_data1(x))
234  {
235  return internal_data1(x);
236  }
237  template <typename U, std::size_t N, typename V, std::size_t M,
238  typename = enable_if<N != M>>
239  static Vc_INTRINSIC auto loImpl(SimdArray<U, N, V, M> *x)
240  -> decltype(&internal_data0(*x))
241  {
242  return &internal_data0(*x);
243  }
244  template <typename U, std::size_t N, typename V, std::size_t M,
245  typename = enable_if<N != M>>
246  static Vc_INTRINSIC auto hiImpl(SimdArray<U, N, V, M> *x)
247  -> decltype(&internal_data1(*x))
248  {
249  return &internal_data1(*x);
250  }
251 
252  // split atomic SimdArray
253  template <typename U, std::size_t N, typename V>
254  static Vc_INTRINSIC Segment<V, 2, 0> loImpl(const SimdArray<U, N, V, N> &x)
255  {
256  return {internal_data(x)};
257  }
258  template <typename U, std::size_t N, typename V>
259  static Vc_INTRINSIC Segment<V, 2, 1> hiImpl(const SimdArray<U, N, V, N> &x)
260  {
261  return {internal_data(x)};
262  }
263  template <typename U, std::size_t N, typename V>
264  static Vc_INTRINSIC Segment<V *, 2, 0> loImpl(SimdArray<U, N, V, N> *x)
265  {
266  return {&internal_data(*x)};
267  }
268  template <typename U, std::size_t N, typename V>
269  static Vc_INTRINSIC Segment<V *, 2, 1> hiImpl(SimdArray<U, N, V, N> *x)
270  {
271  return {&internal_data(*x)};
272  }
273 
274  // split composite SimdMaskArray
275  template <typename U, std::size_t N, typename V, std::size_t M>
276  static Vc_INTRINSIC auto loImpl(const SimdMaskArray<U, N, V, M> &x) -> decltype(internal_data0(x))
277  {
278  return internal_data0(x);
279  }
280  template <typename U, std::size_t N, typename V, std::size_t M>
281  static Vc_INTRINSIC auto hiImpl(const SimdMaskArray<U, N, V, M> &x) -> decltype(internal_data1(x))
282  {
283  return internal_data1(x);
284  }
285 
286  template <typename U, std::size_t N, typename V>
287  static Vc_INTRINSIC Segment<typename SimdMaskArray<U, N, V, N>::mask_type, 2, 0> loImpl(
288  const SimdMaskArray<U, N, V, N> &x)
289  {
290  return {internal_data(x)};
291  }
292  template <typename U, std::size_t N, typename V>
293  static Vc_INTRINSIC Segment<typename SimdMaskArray<U, N, V, N>::mask_type, 2, 1> hiImpl(
294  const SimdMaskArray<U, N, V, N> &x)
295  {
296  return {internal_data(x)};
297  }
298 
299  // split Vector<T> and Mask<T>
300  template <typename T>
301  static constexpr bool is_vector_or_mask(){
302  return (Traits::is_simd_vector<T>::value && !Traits::isSimdArray<T>::value) ||
303  (Traits::is_simd_mask<T>::value && !Traits::isSimdMaskArray<T>::value);
304  }
305  template <typename V>
306  static Vc_INTRINSIC Segment<V, 2, 0> loImpl(V &&x, enable_if<is_vector_or_mask<V>()> = nullarg)
307  {
308  return {std::forward<V>(x)};
309  }
310  template <typename V>
311  static Vc_INTRINSIC Segment<V, 2, 1> hiImpl(V &&x, enable_if<is_vector_or_mask<V>()> = nullarg)
312  {
313  return {std::forward<V>(x)};
314  }
315 
316  // generically split Segments
317  template <typename V, std::size_t Pieces, std::size_t Index>
318  static Vc_INTRINSIC Segment<V, 2 * Pieces, 2 * Index> loImpl(
319  const Segment<V, Pieces, Index> &x)
320  {
321  return {x.data};
322  }
323  template <typename V, std::size_t Pieces, std::size_t Index>
324  static Vc_INTRINSIC Segment<V, 2 * Pieces, 2 * Index + 1> hiImpl(
325  const Segment<V, Pieces, Index> &x)
326  {
327  return {x.data};
328  }
329 
334  template <typename T, typename = decltype(loImpl(std::declval<T>()))>
335  static std::true_type have_lo_impl(int);
336  template <typename T> static std::false_type have_lo_impl(float);
337  template <typename T> static constexpr bool have_lo_impl()
338  {
339  return decltype(have_lo_impl<T>(1))::value;
340  }
341 
342  template <typename T, typename = decltype(hiImpl(std::declval<T>()))>
343  static std::true_type have_hi_impl(int);
344  template <typename T> static std::false_type have_hi_impl(float);
345  template <typename T> static constexpr bool have_hi_impl()
346  {
347  return decltype(have_hi_impl<T>(1))::value;
348  }
350 
351 public:
359  template <typename U>
360  static Vc_INTRINSIC const U *lo(Operations::gather, const U *ptr)
361  {
362  return ptr;
363  }
364  template <typename U>
365  static Vc_INTRINSIC const U *hi(Operations::gather, const U *ptr)
366  {
367  return ptr + secondOffset;
368  }
369  template <typename U, typename = enable_if<!std::is_pointer<U>::value>>
370  static Vc_ALWAYS_INLINE decltype(loImpl(std::declval<U>()))
371  lo(Operations::gather, U &&x)
372  {
373  return loImpl(std::forward<U>(x));
374  }
375  template <typename U, typename = enable_if<!std::is_pointer<U>::value>>
376  static Vc_ALWAYS_INLINE decltype(hiImpl(std::declval<U>()))
377  hi(Operations::gather, U &&x)
378  {
379  return hiImpl(std::forward<U>(x));
380  }
381  template <typename U>
382  static Vc_INTRINSIC const U *lo(Operations::scatter, const U *ptr)
383  {
384  return ptr;
385  }
386  template <typename U>
387  static Vc_INTRINSIC const U *hi(Operations::scatter, const U *ptr)
388  {
389  return ptr + secondOffset;
390  }
392 
404  template <typename U>
405  static Vc_ALWAYS_INLINE decltype(loImpl(std::declval<U>())) lo(U &&x)
406  {
407  return loImpl(std::forward<U>(x));
408  }
409  template <typename U>
410  static Vc_ALWAYS_INLINE decltype(hiImpl(std::declval<U>())) hi(U &&x)
411  {
412  return hiImpl(std::forward<U>(x));
413  }
414 
415  template <typename U>
416  static Vc_ALWAYS_INLINE enable_if<!have_lo_impl<U>(), U> lo(U &&x)
417  {
418  return std::forward<U>(x);
419  }
420  template <typename U>
421  static Vc_ALWAYS_INLINE enable_if<!have_hi_impl<U>(), U> hi(U &&x)
422  {
423  return std::forward<U>(x);
424  }
426 };
427 
428 // actual_value {{{1
429 template <typename Op, typename U, std::size_t M, typename V>
430 static Vc_INTRINSIC const V &actual_value(Op, const SimdArray<U, M, V, M> &x)
431 {
432  return internal_data(x);
433 }
434 template <typename Op, typename U, std::size_t M, typename V>
435 static Vc_INTRINSIC V *actual_value(Op, SimdArray<U, M, V, M> *x)
436 {
437  return &internal_data(*x);
438 }
439 template <typename Op, typename T, size_t Pieces, size_t Index>
440 static Vc_INTRINSIC typename Segment<T, Pieces, Index>::simd_array_type actual_value(
441  Op, Segment<T, Pieces, Index> &&seg)
442 {
443  return seg.asSimdArray();
444 }
445 
446 template <typename Op, typename U, std::size_t M, typename V>
447 static Vc_INTRINSIC const typename V::Mask &actual_value(Op, const SimdMaskArray<U, M, V, M> &x)
448 {
449  return internal_data(x);
450 }
451 template <typename Op, typename U, std::size_t M, typename V>
452 static Vc_INTRINSIC typename V::Mask *actual_value(Op, SimdMaskArray<U, M, V, M> *x)
453 {
454  return &internal_data(*x);
455 }
456 
457 // unpackArgumentsAuto {{{1
473 
476 template <typename Op, typename Arg>
477 decltype(actual_value(std::declval<Op &>(), std::declval<Arg>())) conditionalUnpack(
478  std::true_type, Op op, Arg &&arg)
479 {
480  return actual_value(op, std::forward<Arg>(arg));
481 }
483 template <typename Op, typename Arg> Arg conditionalUnpack(std::false_type, Op, Arg &&arg)
484 {
485  return std::forward<Arg>(arg);
486 }
487 
489 template <size_t A, size_t B, size_t N>
490 using selectorType = std::integral_constant<bool, ((A & (1 << B)) != 0)>;
491 
493 template <typename... Args> static constexpr size_t icc_sizeof_workaround()
494 {
495  return sizeof...(Args);
496 }
497 
499 template <size_t I, typename Op, typename R, typename... Args, size_t... Indexes>
500 Vc_INTRINSIC decltype(std::declval<Op &>()(
501  std::declval<R &>(),
502  conditionalUnpack(selectorType<I, Indexes, icc_sizeof_workaround<Args...>()>(),
503  std::declval<Op &>(), std::declval<Args>())...))
504 unpackArgumentsAutoImpl(int, index_sequence<Indexes...>, Op op, R &&r, Args &&... args)
505 {
506  op(std::forward<R>(r),
507  conditionalUnpack(selectorType<I, Indexes, icc_sizeof_workaround<Args...>()>(), op,
508  std::forward<Args>(args))...);
509 }
510 
512 template <size_t I, typename Op, typename R, typename... Args, size_t... Indexes>
513 Vc_INTRINSIC void unpackArgumentsAutoImpl(float, index_sequence<Indexes...> is, Op op,
514  R &&r, Args &&... args)
515 {
516  static_assert(I < (1 << sizeof...(Args)),
517  "Vc or compiler bug. Please report. Failed to find a combination of "
518  "actual_value(arg) transformations that allows calling Op.");
519  unpackArgumentsAutoImpl<I + 1, Op, R, Args...>(int(), is, op, std::forward<R>(r),
520  std::forward<Args>(args)...);
521 }
522 
524 template <typename Op, typename R, typename... Args>
525 Vc_INTRINSIC void unpackArgumentsAuto(Op op, R &&r, Args &&... args)
526 {
528  // if R is nullptr_t then the return type cannot enforce that actually any
529  // unwrapping of the SimdArray types happens. Thus, you could get an endless loop
530  // of the SimdArray function overload calling itself, if the index I starts at 0
531  // (0 means no argument transformations via actual_value). Therefore, start at 1
532  // if R is nullptr_t:
533  std::is_same<R, std::nullptr_t>::value ? 1 : 0>(
534  int(), make_index_sequence<sizeof...(Args)>(), op, std::forward<R>(r),
535  std::forward<Args>(args)...);
536 }
538 
539 //}}}1
541 } // namespace Common
542 } // namespace Vc
543 
544 #endif // VC_COMMON_SIMDARRAYHELPER_H_
545 
546 // vim: foldmethod=marker
Vc::Vector< T > frexp(const Vc::Vector< T > &x, Vc::SimdArray< int, size()> *e)
Convert floating-point number to fractional and integral components.
Vc::Vector< T > log2(const Vc::Vector< T > &v)
Vc::Vector< T > exp(const Vc::Vector< T > &v)
Vc::Vector< T > sin(const Vc::Vector< T > &v)
Vc::Vector< T > cos(const Vc::Vector< T > &v)
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Vc::Vector< T > reciprocal(const Vc::Vector< T > &v)
Returns the reciprocal of v.
Vc::Vector< T > ldexp(Vc::Vector< T > x, Vc::SimdArray< int, size()> e)
Multiply floating-point number by integral power of 2.
Vc::Vector< T > abs(const Vc::Vector< T > &v)
Returns the absolute value of v.
Arg conditionalUnpack(std::false_type, Op, Arg &&arg)
forwards arg to its return value
Vc::Vector< T > max(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Definition: vector.h:258
Vc::Vector< T > log(const Vc::Vector< T > &v)
void unpackArgumentsAuto(Op op, R &&r, Args &&...args)
The interface to start the machinery.
Vc::Vector< T > fma(Vc::Vector< T > a, Vc::Vector< T > b, Vc::Vector< T > c)
Multiplies a with b and then adds c, without rounding between the multiplication and the addition...
enable_if< std::is_same< To, Traits::decay< From > >::value, To > simd_cast(From &&x)
Casts the argument x from type From to type To.
Definition: simd_cast.h:49
Vc::Vector< T > round(const Vc::Vector< T > &v)
Returns the closest integer to v; 0.5 is rounded to even.
Vc::Vector< T > rsqrt(const Vc::Vector< T > &v)
Returns the reciprocal square root of v.
Vc::Vector< T > log10(const Vc::Vector< T > &v)
std::integral_constant< bool,((A &(1<< B))!=0)> selectorType
true-/false_type that selects whether the argument with index B should be unpacked ...
Vc::Vector< T > atan2(const Vc::Vector< T > &y, const Vc::Vector< T > &x)
Calculates the angle given the lengths of the opposite and adjacent legs in a right triangle...
Vc::Vector< T > atan(const Vc::Vector< T > &v)
Vc::Vector< T > asin(const Vc::Vector< T > &v)
void assign(Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:938
Vc::Mask< T > isfinite(const Vc::Vector< T > &x)
void unpackArgumentsAutoImpl(float, index_sequence< Indexes... > is, Op op, R &&r, Args &&...args)
the current actual_value calls don't work: recurse to I + 1
Vc::Mask< T > isnan(const Vc::Vector< T > &x)
#define Vc_GCC
This macro is defined to a number identifying the GCC version if the current translation unit is comp...
Definition: global.h:66
Vc::Vector< T > sqrt(const Vc::Vector< T > &v)
Returns the square root of v.