Vc  1.3.80-dev
SIMD Vector Classes for C++
simdarray.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_SIMDARRAY_H_
29 #define VC_COMMON_SIMDARRAY_H_
30 
31 //#define Vc_DEBUG_SIMD_CAST 1
32 //#define Vc_DEBUG_SORTED 1
33 //#include "../IO"
34 
35 #include <array>
36 
37 #include "writemaskedvector.h"
38 #include "simdarrayhelper.h"
39 #include "simdmaskarray.h"
40 #include "utility.h"
41 #include "interleave.h"
42 #include "indexsequence.h"
43 #include "transpose.h"
44 #include "macros.h"
45 
46 namespace Vc_VERSIONED_NAMESPACE
47 {
48 // select_best_vector_type {{{
49 namespace Common
50 {
53 
57 template <std::size_t N, class... Candidates> struct select_best_vector_type_impl;
58 // last candidate; this one must work; assume it does:
59 template <std::size_t N, class T> struct select_best_vector_type_impl<N, T> {
60  using type = T;
61 };
62 // check the next candidate; use it if N >= T::size(); recurse otherwise:
63 template <std::size_t N, class T, class... Candidates>
64 struct select_best_vector_type_impl<N, T, Candidates...> {
65  using type = typename std::conditional<
66  (N < T::Size), typename select_best_vector_type_impl<N, Candidates...>::type,
67  T>::type;
68 };
69 template <class T, std::size_t N>
70 struct select_best_vector_type : select_best_vector_type_impl<N,
71 #ifdef Vc_IMPL_AVX2
72  Vc::AVX2::Vector<T>,
73 #elif defined Vc_IMPL_AVX
74  Vc::AVX::Vector<T>,
75 #endif
76 #ifdef Vc_IMPL_SSE
77  Vc::SSE::Vector<T>,
78 #endif
79  Vc::Scalar::Vector<T>> {
80 };
82 } // namespace Common
83 // }}}
84 // internal namespace (product & sum helper) {{{1
85 namespace internal
86 {
87 template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
88 template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
89 } // namespace internal
90 
91 // min & max declarations {{{1
92 template <typename T, std::size_t N, typename V, std::size_t M>
94  const SimdArray<T, N, V, M> &y);
95 template <typename T, std::size_t N, typename V, std::size_t M>
97  const SimdArray<T, N, V, M> &y);
98 
99 // SimdArray class {{{1
102 
103 // atomic SimdArray {{{1
104 #define Vc_CURRENT_CLASS_NAME SimdArray
105 
114 template <typename T, std::size_t N, typename VectorType_>
115 class SimdArray<T, N, VectorType_, N>
116 {
117  static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
118  std::is_same<T, int32_t>::value ||
119  std::is_same<T, uint32_t>::value ||
120  std::is_same<T, int16_t>::value ||
121  std::is_same<T, uint16_t>::value,
122  "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
123  "int16_t, uint16_t }");
124 
125 public:
126  using VectorType = VectorType_;
127  using vector_type = VectorType;
128  using storage_type = vector_type;
129  using vectorentry_type = typename vector_type::VectorEntryType;
130  using value_type = T;
133  static constexpr std::size_t size() { return N; }
134  using Mask = mask_type;
135  using MaskType = Mask;
136  using MaskArgument = const MaskType &;
137  using VectorEntryType = vectorentry_type;
138  using EntryType = value_type;
140  using AsArg = const SimdArray &;
141  using reference = Detail::ElementReference<SimdArray>;
142  static constexpr std::size_t Size = size();
143  static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
144 
145  // zero init
146 #ifndef Vc_MSVC // bogus error C2580
147  Vc_INTRINSIC SimdArray() = default;
148 #endif
149 
150  // default copy ctor/operator
151  Vc_INTRINSIC SimdArray(const SimdArray &) = default;
152  Vc_INTRINSIC SimdArray(SimdArray &&) = default;
153  Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
154 
155  // broadcast
156  Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
157  Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
158  Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
159  template <
160  typename U,
161  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
162  Vc_INTRINSIC SimdArray(U a)
163  : SimdArray(static_cast<value_type>(a))
164  {
165  }
166 
167  // implicit casts
168  template <class U, class V, class..., class = enable_if<N == V::Size>>
169  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
170  : data(simd_cast<vector_type>(internal_data(x)))
171  {
172  }
173  template <class U, class V, class..., class...,
174  class = enable_if<(N > V::Size && N <= 2 * V::Size)>>
175  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
176  : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
177  internal_data(internal_data1(x))))
178  {
179  }
180  template <class U, class V, class..., class..., class...,
181  class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>>
182  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
183  : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
184  internal_data(internal_data1(internal_data0(x))),
185  internal_data(internal_data0(internal_data1(x))),
186  internal_data(internal_data1(internal_data1(x)))))
187  {
188  }
189 
190  template <typename V, std::size_t Pieces, std::size_t Index>
191  Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
192  : data(simd_cast<vector_type, Index>(x.data))
193  {
194  }
196  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
197  : data(init.begin(), Vc::Unaligned)
198  {
199 #if defined Vc_CXX14 && 0 // doesn't compile yet
200  static_assert(init.size() == size(), "The initializer_list argument to "
201  "SimdArray<T, N> must contain exactly N "
202  "values.");
203 #else
204  Vc_ASSERT(init.size() == size());
205 #endif
206  }
207 
208  // implicit conversion from underlying vector_type
209  template <
210  typename V,
211  typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
212  Vc_INTRINSIC SimdArray(const V &x)
213  : data(simd_cast<vector_type>(x))
214  {
215  }
216 
217  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
218  // T implicitly convertible to U
219  template <typename U, typename A,
220  typename =
221  enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
222  !std::is_same<A, simd_abi::fixed_size<N>>::value>>
223  Vc_INTRINSIC operator Vector<U, A>() const
224  {
225  return simd_cast<Vector<U, A>>(data);
226  }
227  operator fixed_size_simd<T, N>() const
228  {
229  return static_cast<fixed_size_simd<T, N>>(data);
230  }
231 
232 #include "gatherinterface.h"
233 #include "scatterinterface.h"
234 
235  // forward all remaining ctors
236  template <typename... Args,
237  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
238  !Traits::is_gather_signature<Args...>::value &&
239  !Traits::is_initializer_list<Args...>::value>>
240  explicit Vc_INTRINSIC SimdArray(Args &&... args)
241  : data(std::forward<Args>(args)...)
242  {
243  }
244 
245  template <std::size_t Offset>
246  explicit Vc_INTRINSIC SimdArray(
247  Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
248  : data(Vc::IndexesFromZero)
249  {
250  data += value_type(Offset);
251  }
252 
253  Vc_INTRINSIC void setZero() { data.setZero(); }
254  Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
255  Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
256  Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
257 
258  Vc_INTRINSIC void setQnan() { data.setQnan(); }
259  Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
260 
261  // internal: execute specified Operation
262  template <typename Op, typename... Args>
263  static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
264  {
265  SimdArray r;
266  Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
267  return r;
268  }
269 
270  template <typename Op, typename... Args>
271  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
272  {
273  Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
274  }
275 
276  static Vc_INTRINSIC SimdArray Zero()
277  {
278  return SimdArray(Vc::Zero);
279  }
280  static Vc_INTRINSIC SimdArray One()
281  {
282  return SimdArray(Vc::One);
283  }
284  static Vc_INTRINSIC SimdArray IndexesFromZero()
285  {
286  return SimdArray(Vc::IndexesFromZero);
287  }
288  static Vc_INTRINSIC SimdArray Random()
289  {
290  return fromOperation(Common::Operations::random());
291  }
292 
293  template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
294  {
295  data.load(std::forward<Args>(args)...);
296  }
297 
298  template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
299  {
300  data.store(std::forward<Args>(args)...);
301  }
302 
303  Vc_INTRINSIC mask_type operator!() const
304  {
305  return {private_init, !data};
306  }
307 
308  Vc_INTRINSIC SimdArray operator-() const
309  {
310  return {private_init, -data};
311  }
312 
314  Vc_INTRINSIC SimdArray operator+() const { return *this; }
315 
316  Vc_INTRINSIC SimdArray operator~() const
317  {
318  return {private_init, ~data};
319  }
320 
321  template <typename U,
322  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
323  Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
324  {
325  return {private_init, data << x};
326  }
327  template <typename U,
328  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
329  Vc_INTRINSIC SimdArray &operator<<=(U x)
330  {
331  data <<= x;
332  return *this;
333  }
334  template <typename U,
335  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
336  Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
337  {
338  return {private_init, data >> x};
339  }
340  template <typename U,
341  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
342  Vc_INTRINSIC SimdArray &operator>>=(U x)
343  {
344  data >>= x;
345  return *this;
346  }
347 
348 #define Vc_BINARY_OPERATOR_(op) \
349  Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
350  { \
351  return {private_init, data op rhs.data}; \
352  } \
353  Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
354  { \
355  data op## = rhs.data; \
356  return *this; \
357  }
358  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
359  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
360  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
361 #undef Vc_BINARY_OPERATOR_
362 
363 #define Vc_COMPARES(op) \
364  Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
365  { \
366  return {private_init, data op rhs.data}; \
367  }
368  Vc_ALL_COMPARES(Vc_COMPARES);
369 #undef Vc_COMPARES
370 
372  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
373  {
374  return {private_init, isnegative(data)};
375  }
376 
377 private:
378  friend reference;
379  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
380  {
381  return o.data[i];
382  }
383  template <typename U>
384  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
385  noexcept(std::declval<value_type &>() = v))
386  {
387  o.data[i] = v;
388  }
389 
390 public:
397  Vc_INTRINSIC reference operator[](size_t i) noexcept
398  {
399  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
400  return {*this, int(i)};
401  }
402  Vc_INTRINSIC value_type operator[](size_t i) const noexcept
403  {
404  return get(*this, int(i));
405  }
406 
407  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
408  {
409  return {*this, k};
410  }
411 
412  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
413  {
414  data.assign(v.data, internal_data(k));
415  }
416 
417  // reductions ////////////////////////////////////////////////////////
418 #define Vc_REDUCTION_FUNCTION_(name_) \
419  Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
420  Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
421  { \
422  return data.name_(internal_data(mask)); \
423  } \
424  Vc_NOTHING_EXPECTING_SEMICOLON
425  Vc_REDUCTION_FUNCTION_(min);
426  Vc_REDUCTION_FUNCTION_(max);
427  Vc_REDUCTION_FUNCTION_(product);
428  Vc_REDUCTION_FUNCTION_(sum);
429 #undef Vc_REDUCTION_FUNCTION_
430  Vc_INTRINSIC Vc_PURE SimdArray partialSum() const
431  {
432  return {private_init, data.partialSum()};
433  }
434 
435  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f) const
436  {
437  return {private_init, data.apply(std::forward<F>(f))};
438  }
439  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f, const mask_type &k) const
440  {
441  return {private_init, data.apply(std::forward<F>(f), k)};
442  }
443 
444  Vc_INTRINSIC SimdArray shifted(int amount) const
445  {
446  return {private_init, data.shifted(amount)};
447  }
448 
449  template <std::size_t NN>
450  Vc_INTRINSIC SimdArray shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
451  const
452  {
453  return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
454  }
455 
456  Vc_INTRINSIC SimdArray rotated(int amount) const
457  {
458  return {private_init, data.rotated(amount)};
459  }
460 
462  Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const
463  {
464  return {private_init, exponent(data)};
465  }
466 
467  Vc_INTRINSIC SimdArray interleaveLow(SimdArray x) const
468  {
469  return {private_init, data.interleaveLow(x.data)};
470  }
471  Vc_INTRINSIC SimdArray interleaveHigh(SimdArray x) const
472  {
473  return {private_init, data.interleaveHigh(x.data)};
474  }
475 
476  Vc_INTRINSIC SimdArray reversed() const
477  {
478  return {private_init, data.reversed()};
479  }
480 
481  Vc_INTRINSIC SimdArray sorted() const
482  {
483  return {private_init, data.sorted()};
484  }
485 
486  template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen)
487  {
488  return {private_init, VectorType::generate(gen)};
489  }
490 
491  Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray
492  copySign(const SimdArray &x) const
493  {
494  return {private_init, Vc::copysign(data, x.data)};
495  }
496 
497  friend VectorType &internal_data<>(SimdArray &x);
498  friend const VectorType &internal_data<>(const SimdArray &x);
499 
501  Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
502 
503  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
504 
505 private:
506  // The alignas attribute attached to the class declaration above is ignored by ICC
507  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
508  // all compilers.
509  alignas(static_cast<std::size_t>(
510  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
511  VectorType_::size()>::value)) storage_type data;
512 };
513 template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
514 template <typename T, std::size_t N, typename VectorType>
516 template <typename T, std::size_t N, typename VectorType>
517 #ifndef Vc_MSVC
518 Vc_INTRINSIC
519 #endif
520 VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
521 {
522  return x.data;
523 }
524 template <typename T, std::size_t N, typename VectorType>
525 #ifndef Vc_MSVC
526 Vc_INTRINSIC
527 #endif
528 const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
529 {
530  return x.data;
531 }
532 
533 // unpackIfSegment {{{2
534 template <typename T> T unpackIfSegment(T &&x) { return std::forward<T>(x); }
535 template <typename T, size_t Pieces, size_t Index>
536 auto unpackIfSegment(Common::Segment<T, Pieces, Index> &&x) -> decltype(x.asSimdArray())
537 {
538  return x.asSimdArray();
539 }
540 
541 // gatherImplementation {{{2
542 template <typename T, std::size_t N, typename VectorType>
543 template <typename MT, typename IT>
545  const IT &indexes)
546 {
547  data.gather(mem, unpackIfSegment(indexes));
548 }
549 template <typename T, std::size_t N, typename VectorType>
550 template <typename MT, typename IT>
552  const IT &indexes,
553  MaskArgument mask)
554 {
555  data.gather(mem, unpackIfSegment(indexes), mask);
556 }
557 
558 // scatterImplementation {{{2
559 template <typename T, std::size_t N, typename VectorType>
560 template <typename MT, typename IT>
562  IT &&indexes) const
563 {
564  data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)));
565 }
566 template <typename T, std::size_t N, typename VectorType>
567 template <typename MT, typename IT>
569  IT &&indexes,
570  MaskArgument mask) const
571 {
572  data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)), mask);
573 }
574 
575 // generic SimdArray {{{1
608 template <typename T, size_t N, typename V, size_t Wt> class SimdArray
609 {
610  static_assert(std::is_same<T, double>::value ||
611  std::is_same<T, float>::value ||
612  std::is_same<T, int32_t>::value ||
613  std::is_same<T, uint32_t>::value ||
614  std::is_same<T, int16_t>::value ||
615  std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
616  static_assert(
617  // either the EntryType and VectorEntryType of the main V are equal
618  std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
619  // or N is a multiple of V::size()
620  (N % V::size() == 0),
621  "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
622  "MIC::(u)short_v::size(), i.e. k * 16.");
623 
624  using my_traits = SimdArrayTraits<T, N>;
625  static constexpr std::size_t N0 = my_traits::N0;
626  static constexpr std::size_t N1 = my_traits::N1;
627  using Split = Common::Split<N0>;
628  template <typename U, std::size_t K> using CArray = U[K];
629 
630 public:
631  using storage_type0 = typename my_traits::storage_type0;
632  using storage_type1 = typename my_traits::storage_type1;
633  static_assert(storage_type0::size() == N0, "");
634 
638  using vector_type = V;
639  using vectorentry_type = typename storage_type0::vectorentry_type;
640  typedef vectorentry_type alias_type Vc_MAY_ALIAS;
641 
643  using value_type = T;
644 
647 
650 
661  static constexpr std::size_t size() { return N; }
662 
664  using Mask = mask_type;
666  using MaskType = Mask;
667  using MaskArgument = const MaskType &;
668  using VectorEntryType = vectorentry_type;
673  using AsArg = const SimdArray &;
674 
675  using reference = Detail::ElementReference<SimdArray>;
676 
678  static constexpr std::size_t MemoryAlignment =
682 
685 
687  static Vc_INTRINSIC SimdArray Zero()
688  {
689  return SimdArray(Vc::Zero);
690  }
691 
693  static Vc_INTRINSIC SimdArray One()
694  {
695  return SimdArray(Vc::One);
696  }
697 
699  static Vc_INTRINSIC SimdArray IndexesFromZero()
700  {
702  }
703 
705  static Vc_INTRINSIC SimdArray Random()
706  {
707  return fromOperation(Common::Operations::random());
708  }
709 
711  template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen) // {{{2
712  {
713  auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in
714  // an initializer list is well-defined
715  // (front to back), but GCC 4.8 doesn't
716  // implement this correctly. Therefore
717  // we enforce correct order.
718  return {std::move(tmp),
719  storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
720  }
722 
725 
727 #ifndef Vc_MSVC // bogus error C2580
728  SimdArray() = default;
729 #endif
730 
734 
736  Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
737  template <
738  typename U,
739  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
740  SimdArray(U a)
741  : SimdArray(static_cast<value_type>(a))
742  {
743  }
745 
746  // default copy ctor/operator
747  SimdArray(const SimdArray &) = default;
748  SimdArray(SimdArray &&) = default;
749  SimdArray &operator=(const SimdArray &) = default;
750 
751  // load ctor
752  template <typename U, typename Flags = DefaultLoadTag,
753  typename = enable_if<std::is_arithmetic<U>::value &&
754  Traits::is_load_store_flag<Flags>::value>>
755  explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags())
756  : data0(mem, f), data1(mem + storage_type0::size(), f)
757  {
758  }
759 
760 // MSVC does overload resolution differently and takes the const U *mem overload (I hope)
761 #ifndef Vc_MSVC
762 
768  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
769  typename = enable_if<std::is_arithmetic<U>::value &&
770  Traits::is_load_store_flag<Flags>::value>>
771  explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
772  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
773  {
774  }
778  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
779  typename = enable_if<std::is_arithmetic<U>::value &&
780  Traits::is_load_store_flag<Flags>::value>>
781  explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = Flags())
782  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
783  {
784  }
785 #endif
786 
787  // initializer list
788  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
789  : data0(init.begin(), Vc::Unaligned)
790  , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
791  {
792 #if defined Vc_CXX14 && 0 // doesn't compile yet
793  static_assert(init.size() == size(), "The initializer_list argument to "
794  "SimdArray<T, N> must contain exactly N "
795  "values.");
796 #else
797  Vc_ASSERT(init.size() == size());
798 #endif
799  }
800 
801 #include "gatherinterface.h"
802 #include "scatterinterface.h"
803 
804  // forward all remaining ctors
805  template <typename... Args,
806  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
807  !Traits::is_initializer_list<Args...>::value &&
808  !Traits::is_gather_signature<Args...>::value &&
809  !Traits::is_load_arguments<Args...>::value>>
810  explicit Vc_INTRINSIC SimdArray(Args &&... args)
811  : data0(Split::lo(args)...) // no forward here - it could move and thus
812  // break the next line
813  , data1(Split::hi(std::forward<Args>(args))...)
814  {
815  }
816 
817  // explicit casts
818  template <
819  class W, class...,
820  class = enable_if<(Traits::is_simd_vector<W>::value &&
822  !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
824  Vc_INTRINSIC explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
825  {
826  }
827 
828  // implicit casts
829  template <
830  class W, class..., class...,
831  class = enable_if<(Traits::isSimdArray<W>::value &&
832  Traits::simd_vector_size<W>::value == N &&
833  std::is_convertible<Traits::entry_type_of<W>, T>::value)>>
834  Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
835  {
836  }
837 
838  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
839  // T implicitly convertible to U
840  template <typename U, typename A,
841  typename =
842  enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
843  !std::is_same<A, simd_abi::fixed_size<N>>::value>>
844  operator Vector<U, A>() const
845  {
846  auto r = simd_cast<Vector<U, A>>(data0, data1);
847  return r;
848  }
849  Vc_INTRINSIC operator const fixed_size_simd<T, N> &() const
850  {
851  return static_cast<const fixed_size_simd<T, N> &>(*this);
852  }
853 
855 
856  Vc_INTRINSIC void setZero()
857  {
858  data0.setZero();
859  data1.setZero();
860  }
861  Vc_INTRINSIC void setZero(const mask_type &k)
862  {
863  data0.setZero(Split::lo(k));
864  data1.setZero(Split::hi(k));
865  }
866  Vc_INTRINSIC void setZeroInverted()
867  {
868  data0.setZeroInverted();
869  data1.setZeroInverted();
870  }
871  Vc_INTRINSIC void setZeroInverted(const mask_type &k)
872  {
873  data0.setZeroInverted(Split::lo(k));
874  data1.setZeroInverted(Split::hi(k));
875  }
876 
877 
878  Vc_INTRINSIC void setQnan() {
879  data0.setQnan();
880  data1.setQnan();
881  }
882  Vc_INTRINSIC void setQnan(const mask_type &m) {
883  data0.setQnan(Split::lo(m));
884  data1.setQnan(Split::hi(m));
885  }
886 
888  template <typename Op, typename... Args>
889  static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
890  {
891  SimdArray r = {
892  storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
893  // could move and thus
894  // break the next line
895  storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
896  return r;
897  }
898 
900  template <typename Op, typename... Args>
901  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
902  {
903  storage_type0::callOperation(op, Split::lo(args)...);
904  storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
905  }
906 
907 
908  template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
909  {
910  data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus
911  // break the next line
912  data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
913  }
914 
915  template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
916  {
917  data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus
918  // break the next line
919  data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
920  }
921 
922  Vc_INTRINSIC mask_type operator!() const
923  {
924  return {!data0, !data1};
925  }
926 
927  Vc_INTRINSIC SimdArray operator-() const
928  {
929  return {-data0, -data1};
930  }
931 
933  Vc_INTRINSIC SimdArray operator+() const { return *this; }
934 
935  Vc_INTRINSIC SimdArray operator~() const
936  {
937  return {~data0, ~data1};
938  }
939 
940  // left/right shift operators {{{2
941  template <typename U,
942  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
943  Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
944  {
945  return {data0 << x, data1 << x};
946  }
947  template <typename U,
948  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
949  Vc_INTRINSIC SimdArray &operator<<=(U x)
950  {
951  data0 <<= x;
952  data1 <<= x;
953  return *this;
954  }
955  template <typename U,
956  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
957  Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
958  {
959  return {data0 >> x, data1 >> x};
960  }
961  template <typename U,
962  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
963  Vc_INTRINSIC SimdArray &operator>>=(U x)
964  {
965  data0 >>= x;
966  data1 >>= x;
967  return *this;
968  }
969 
970  // binary operators {{{2
971 #define Vc_BINARY_OPERATOR_(op) \
972  Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
973  { \
974  return {data0 op rhs.data0, data1 op rhs.data1}; \
975  } \
976  Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
977  { \
978  data0 op## = rhs.data0; \
979  data1 op## = rhs.data1; \
980  return *this; \
981  }
982  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
983  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
984  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
985 #undef Vc_BINARY_OPERATOR_
986 
987 #define Vc_COMPARES(op) \
988  Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
989  { \
990  return {data0 op rhs.data0, data1 op rhs.data1}; \
991  }
992  Vc_ALL_COMPARES(Vc_COMPARES);
993 #undef Vc_COMPARES
994 
995  // operator[] {{{2
998 
999 private:
1000  friend reference;
1001  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
1002  {
1003  return reinterpret_cast<const alias_type *>(&o)[i];
1004  }
1005  template <typename U>
1006  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
1007  noexcept(std::declval<value_type &>() = v))
1008  {
1009  reinterpret_cast<alias_type *>(&o)[i] = v;
1010  }
1011 
1012 public:
1014 
1020  Vc_INTRINSIC reference operator[](size_t i) noexcept
1021  {
1022  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
1023  return {*this, int(i)};
1024  }
1025 
1027  Vc_INTRINSIC value_type operator[](size_t index) const noexcept
1028  {
1029  return get(*this, int(index));
1030  }
1032 
1033  // operator(){{{2
1035  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
1036  const mask_type &mask)
1037  {
1038  return {*this, mask};
1039  }
1040 
1042  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
1043  {
1044  data0.assign(v.data0, internal_data0(k));
1045  data1.assign(v.data1, internal_data1(k));
1046  }
1047 
1048  // reductions {{{2
1049 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
1050 private: \
1051  template <typename ForSfinae = void> \
1052  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1053  storage_type0::Size == storage_type1::Size, \
1054  value_type> name_##_impl() const \
1055  { \
1056  return binary_fun_(data0, data1).name_(); \
1057  } \
1058  \
1059  template <typename ForSfinae = void> \
1060  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1061  storage_type0::Size != storage_type1::Size, \
1062  value_type> name_##_impl() const \
1063  { \
1064  return scalar_fun_(data0.name_(), data1.name_()); \
1065  } \
1066  \
1067 public: \
1068  \
1069  Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1070  \
1071  Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1072  { \
1073  if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1074  return data1.name_(Split::hi(mask)); \
1075  } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1076  return data0.name_(Split::lo(mask)); \
1077  } else { \
1078  return scalar_fun_(data0.name_(Split::lo(mask)), \
1079  data1.name_(Split::hi(mask))); \
1080  } \
1081  } \
1082  Vc_NOTHING_EXPECTING_SEMICOLON
1083  Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1084  Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1085  Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1086  Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1087 #undef Vc_REDUCTION_FUNCTION_
1088  Vc_INTRINSIC Vc_PURE SimdArray partialSum() const //{{{2
1090  {
1091  auto ps0 = data0.partialSum();
1092  auto tmp = data1;
1093  tmp[0] += ps0[data0.size() - 1];
1094  return {std::move(ps0), tmp.partialSum()};
1095  }
1096 
1097  // apply {{{2
1099  template <typename F> inline SimdArray apply(F &&f) const
1100  {
1101  return {data0.apply(f), data1.apply(f)};
1102  }
1104  template <typename F> inline SimdArray apply(F &&f, const mask_type &k) const
1105  {
1106  return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1107  }
1108 
1109  // shifted {{{2
1111  inline SimdArray shifted(int amount) const
1112  {
1113  constexpr int SSize = Size;
1114  constexpr int SSize0 = storage_type0::Size;
1115  constexpr int SSize1 = storage_type1::Size;
1116  if (amount == 0) {
1117  return *this;
1118  }
1119  if (amount < 0) {
1120  if (amount > -SSize0) {
1121  return {data0.shifted(amount), data1.shifted(amount, data0)};
1122  }
1123  if (amount == -SSize0) {
1124  return {storage_type0::Zero(), simd_cast<storage_type1>(data0)};
1125  }
1126  if (amount < -SSize0) {
1127  return {storage_type0::Zero(), simd_cast<storage_type1>(data0.shifted(
1128  amount + SSize0))};
1129  }
1130  return Zero();
1131  } else {
1132  if (amount >= SSize) {
1133  return Zero();
1134  } else if (amount >= SSize0) {
1135  return {
1136  simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1138  } else if (amount >= SSize1) {
1139  return {data0.shifted(amount, data1), storage_type1::Zero()};
1140  } else {
1141  return {data0.shifted(amount, data1), data1.shifted(amount)};
1142  }
1143  }
1144  }
1145 
1146  template <std::size_t NN>
1147  inline enable_if<
1148  !(std::is_same<storage_type0, storage_type1>::value && // not bisectable
1149  N == NN),
1150  SimdArray>
1151  shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1152  {
1153  constexpr int SSize = Size;
1154  if (amount < 0) {
1155  return SimdArray::generate([&](int i) -> value_type {
1156  i += amount;
1157  if (i >= 0) {
1158  return operator[](i);
1159  } else if (i >= -SSize) {
1160  return shiftIn[i + SSize];
1161  }
1162  return 0;
1163  });
1164  }
1165  return SimdArray::generate([&](int i) -> value_type {
1166  i += amount;
1167  if (i < SSize) {
1168  return operator[](i);
1169  } else if (i < 2 * SSize) {
1170  return shiftIn[i - SSize];
1171  }
1172  return 0;
1173  });
1174  }
1175 
1176 private:
1177  // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1178  // expression directly in the enable_if below
1179  template <std::size_t NN> struct bisectable_shift
1180  : public std::integral_constant<bool,
1181  std::is_same<storage_type0, storage_type1>::value && // bisectable
1182  N == NN>
1183  {
1184  };
1185 
1186 public:
1187  template <std::size_t NN>
1188  inline SimdArray shifted(enable_if<bisectable_shift<NN>::value, int> amount,
1189  const SimdArray<value_type, NN> &shiftIn) const
1190  {
1191  constexpr int SSize = Size;
1192  if (amount < 0) {
1193  if (amount > -static_cast<int>(storage_type0::Size)) {
1194  return {data0.shifted(amount, internal_data1(shiftIn)),
1195  data1.shifted(amount, data0)};
1196  }
1197  if (amount == -static_cast<int>(storage_type0::Size)) {
1198  return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1199  }
1200  if (amount > -SSize) {
1201  return {
1202  internal_data1(shiftIn)
1203  .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1204  data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1205  }
1206  if (amount == -SSize) {
1207  return shiftIn;
1208  }
1209  if (amount > -2 * SSize) {
1210  return shiftIn.shifted(amount + SSize);
1211  }
1212  }
1213  if (amount == 0) {
1214  return *this;
1215  }
1216  if (amount < static_cast<int>(storage_type0::Size)) {
1217  return {data0.shifted(amount, data1),
1218  data1.shifted(amount, internal_data0(shiftIn))};
1219  }
1220  if (amount == static_cast<int>(storage_type0::Size)) {
1221  return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1222  }
1223  if (amount < SSize) {
1224  return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1225  internal_data0(shiftIn)
1226  .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1227  }
1228  if (amount == SSize) {
1229  return shiftIn;
1230  }
1231  if (amount < 2 * SSize) {
1232  return shiftIn.shifted(amount - SSize);
1233  }
1234  return Zero();
1235  }
1236 
1237  // rotated {{{2
1239  Vc_INTRINSIC SimdArray rotated(int amount) const
1240  {
1241  amount %= int(size());
1242  if (amount == 0) {
1243  return *this;
1244  } else if (amount < 0) {
1245  amount += size();
1246  }
1247 
1248 #ifdef Vc_MSVC
1249  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1250  // ->
1251  // load to implement the function instead.
1252  alignas(MemoryAlignment) T tmp[N + data0.size()];
1253  data0.store(&tmp[0], Vc::Aligned);
1254  data1.store(&tmp[data0.size()], Vc::Aligned);
1255  data0.store(&tmp[N], Vc::Unaligned);
1256  SimdArray r;
1257  r.data0.load(&tmp[amount], Vc::Unaligned);
1258  r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1259  return r;
1260 #else
1261  auto &&d0cvtd = simd_cast<storage_type1>(data0);
1262  auto &&d1cvtd = simd_cast<storage_type0>(data1);
1263  constexpr int size0 = storage_type0::size();
1264  constexpr int size1 = storage_type1::size();
1265 
1266  if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1267  return {std::move(d1cvtd), std::move(d0cvtd)};
1268  } else if (amount < size1) {
1269  return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1270  } else if (amount == size1) {
1271  return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1272  } else if (int(size()) - amount < size1) {
1273  return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1274  data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1275  } else if (int(size()) - amount == size1) {
1276  return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1277  simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1278  } else if (amount <= size0) {
1279  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1280  simd_cast<storage_type1>(data0.shifted(amount - size1))};
1281  } else {
1282  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1283  simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1284  }
1285  return *this;
1286 #endif
1287  }
1288 
1289  // interleaveLow/-High {{{2
1291  Vc_INTRINSIC SimdArray interleaveLow(const SimdArray &x) const
1292  {
1293  // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1294  return {data0.interleaveLow(x.data0),
1295  simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1296  }
1298  Vc_INTRINSIC SimdArray interleaveHigh(const SimdArray &x) const
1299  {
1300  return interleaveHighImpl(
1301  x,
1302  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1303  }
1304 
1305 private:
1307  Vc_INTRINSIC SimdArray interleaveHighImpl(const SimdArray &x, std::true_type) const
1308  {
1309  return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1310  }
1312  inline SimdArray interleaveHighImpl(const SimdArray &x, std::false_type) const
1313  {
1314  return {data0.interleaveHigh(x.data0)
1315  .shifted(storage_type1::Size,
1316  simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1317  data1.interleaveHigh(x.data1)};
1318  }
1319 
1320 public:
1322  inline SimdArray reversed() const //{{{2
1323  {
1324  if (std::is_same<storage_type0, storage_type1>::value) {
1325  return {simd_cast<storage_type0>(data1).reversed(),
1326  simd_cast<storage_type1>(data0).reversed()};
1327  } else {
1328 #ifdef Vc_MSVC
1329  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1330  // store
1331  // -> load to implement the function instead.
1332  alignas(MemoryAlignment) T tmp[N];
1333  data1.reversed().store(&tmp[0], Vc::Aligned);
1334  data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1335  return SimdArray{&tmp[0], Vc::Aligned};
1336 #else
1337  return {data0.shifted(storage_type1::Size, data1).reversed(),
1338  simd_cast<storage_type1>(data0.reversed().shifted(
1339  storage_type0::Size - storage_type1::Size))};
1340 #endif
1341  }
1342  }
1344  inline SimdArray sorted() const //{{{2
1345  {
1346  return sortedImpl(
1347  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1348  }
1349 
1351  Vc_INTRINSIC SimdArray sortedImpl(std::true_type) const
1352  {
1353 #ifdef Vc_DEBUG_SORTED
1354  std::cerr << "-- " << data0 << data1 << '\n';
1355 #endif
1356  const auto a = data0.sorted();
1357  const auto b = data1.sorted().reversed();
1358  const auto lo = Vc::min(a, b);
1359  const auto hi = Vc::max(a, b);
1360  return {lo.sorted(), hi.sorted()};
1361  }
1362 
1364  Vc_INTRINSIC SimdArray sortedImpl(std::false_type) const
1365  {
1366  using SortableArray =
1368  auto sortable = simd_cast<SortableArray>(*this);
1369  for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1370  using limits = std::numeric_limits<value_type>;
1371  if (limits::has_infinity) {
1372  sortable[i] = limits::infinity();
1373  } else {
1374  sortable[i] = std::numeric_limits<value_type>::max();
1375  }
1376  }
1377  return simd_cast<SimdArray>(sortable.sorted());
1378 
1379  /* The following implementation appears to be less efficient. But this may need further
1380  * work.
1381  const auto a = data0.sorted();
1382  const auto b = data1.sorted();
1383 #ifdef Vc_DEBUG_SORTED
1384  std::cerr << "== " << a << b << '\n';
1385 #endif
1386  auto aIt = Vc::begin(a);
1387  auto bIt = Vc::begin(b);
1388  const auto aEnd = Vc::end(a);
1389  const auto bEnd = Vc::end(b);
1390  return SimdArray::generate([&](std::size_t) {
1391  if (aIt == aEnd) {
1392  return *(bIt++);
1393  }
1394  if (bIt == bEnd) {
1395  return *(aIt++);
1396  }
1397  if (*aIt < *bIt) {
1398  return *(aIt++);
1399  } else {
1400  return *(bIt++);
1401  }
1402  });
1403  */
1404  }
1405 
1408 
1411  static constexpr std::size_t Size = size();
1412 
1414  Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const
1415  {
1416  return {exponent(data0), exponent(data1)};
1417  }
1418 
1420  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1421  {
1422  return {isnegative(data0), isnegative(data1)};
1423  }
1424 
1426  Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray
1427  copySign(const SimdArray &x) const
1428  {
1429  return {Vc::copysign(data0, x.data0),
1430  Vc::copysign(data1, x.data1)};
1431  }
1433 
1434  // internal_data0/1 {{{2
1435  friend storage_type0 &internal_data0<>(SimdArray &x);
1436  friend storage_type1 &internal_data1<>(SimdArray &x);
1437  friend const storage_type0 &internal_data0<>(const SimdArray &x);
1438  friend const storage_type1 &internal_data1<>(const SimdArray &x);
1439 
1441  Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1442  : data0(std::move(x)), data1(std::move(y))
1443  {
1444  }
1445 
1446  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1447 
1448 private: //{{{2
1449  // The alignas attribute attached to the class declaration above is ignored by ICC
1450  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1451  // all compilers.
1452  alignas(static_cast<std::size_t>(
1453  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1454  V::size()>::value)) storage_type0 data0;
1455  storage_type1 data1;
1456 };
1457 #undef Vc_CURRENT_CLASS_NAME
1458 template <typename T, std::size_t N, typename V, std::size_t M>
1459 constexpr std::size_t SimdArray<T, N, V, M>::Size;
1460 template <typename T, std::size_t N, typename V, std::size_t M>
1461 constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1462 
1463 // gatherImplementation {{{2
1464 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1465 template <typename MT, typename IT>
1467  const IT &indexes)
1468 {
1469  data0.gather(mem, Split::lo(Common::Operations::gather(), indexes));
1470  data1.gather(mem, Split::hi(Common::Operations::gather(), indexes));
1471 }
1472 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1473 template <typename MT, typename IT>
1475  const IT &indexes,
1476  MaskArgument mask)
1477 {
1478  data0.gather(mem, Split::lo(Common::Operations::gather(), indexes), Split::lo(mask));
1479  data1.gather(mem, Split::hi(Common::Operations::gather(), indexes), Split::hi(mask));
1480 }
1481 
1482 // scatterImplementation {{{2
1483 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1484 template <typename MT, typename IT>
1486  IT &&indexes) const
1487 {
1488  data0.scatter(mem, Split::lo(Common::Operations::gather(),
1489  indexes)); // don't forward indexes - it could move and
1490  // thus break the next line
1491  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1492 }
1493 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1494 template <typename MT, typename IT>
1496  IT &&indexes, MaskArgument mask) const
1497 {
1498  data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1499  Split::lo(mask)); // don't forward indexes - it could move and
1500  // thus break the next line
1501  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1502  Split::hi(mask));
1503 }
1504 
1505 // internal_data0/1 (SimdArray) {{{1
1507 template <typename T, std::size_t N, typename V, std::size_t M>
1508 #ifndef Vc_MSVC
1509 Vc_INTRINSIC
1510 #endif
1511 typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1513 {
1514  return x.data0;
1515 }
1517 template <typename T, std::size_t N, typename V, std::size_t M>
1518 #ifndef Vc_MSVC
1519 Vc_INTRINSIC
1520 #endif
1521 typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1523 {
1524  return x.data1;
1525 }
1527 template <typename T, std::size_t N, typename V, std::size_t M>
1528 #ifndef Vc_MSVC
1529 Vc_INTRINSIC
1530 #endif
1531 const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1532  const SimdArray<T, N, V, M> &x)
1533 {
1534  return x.data0;
1535 }
1537 template <typename T, std::size_t N, typename V, std::size_t M>
1538 #ifndef Vc_MSVC
1539 Vc_INTRINSIC
1540 #endif
1541 const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1542  const SimdArray<T, N, V, M> &x)
1543 {
1544  return x.data1;
1545 }
1546 
1547 // MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1548 // MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1549 // in the body the bug is supressed.
1550 #if defined Vc_MSVC && defined Vc_IMPL_SSE
1551 template <>
1554  : data0(x), data1(0)
1555 {
1556  data1 = y;
1557 }
1558 #endif
1559 
1560 // binary operators {{{1
1561 namespace result_vector_type_internal
1562 {
1563 template <typename T>
1564 using type = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1565 
1566 template <typename T>
1567 using is_integer_larger_than_int = std::integral_constant<
1568  bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1569  std::is_same<T, long>::value ||
1570  std::is_same<T, unsigned long>::value)>;
1571 
1572 template <
1573  typename L, typename R,
1576  bool =
1578  Traits::isSimdArray<R>::value) // one of the operands must be a SimdArray
1579  && !std::is_same<type<L>, type<R>>::value // if the operands are of the same type
1580  // use the member function
1581  &&
1582  ((std::is_arithmetic<type<L>>::value &&
1583  !is_integer_larger_than_int<type<L>>::value) ||
1584  (std::is_arithmetic<type<R>>::value &&
1585  !is_integer_larger_than_int<type<R>>::value) // one of the operands is a scalar
1586  // type
1587  ||
1588  ( // or one of the operands is Vector<T> with Vector<T>::size() ==
1589  // SimdArray::size()
1593 struct evaluate;
1594 
1595 template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1596 {
1597 private:
1598  using LScalar = Traits::entry_type_of<L>;
1599  using RScalar = Traits::entry_type_of<R>;
1600 
1601  template <bool B, typename T, typename F>
1602  using conditional = typename std::conditional<B, T, F>::type;
1603 
1604 public:
1605  // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1606  // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1607  // int are promoted to int before any operation). This would imply that SIMD types with integral
1608  // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1609  // start. Therefore we special-case those operations where the scalar type of both operands is
1610  // integral and smaller than int.
1611  // In addition, there is no generic support for 64-bit int SIMD types. Therefore
1612  // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1613  // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1614  // then the operator is disabled altogether. We do not want an implicit demotion.
1615  using type = SimdArray<
1616  conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1617  sizeof(LScalar) < sizeof(int) &&
1618  sizeof(RScalar) < sizeof(int)),
1619  conditional<(sizeof(LScalar) == sizeof(RScalar)),
1620  conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1621  conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1622  decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1623  N>;
1624 };
1625 
1626 } // namespace result_vector_type_internal
1627 
1628 template <typename L, typename R>
1629 using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1630 
1631 static_assert(
1632  std::is_same<result_vector_type<short int, Vc::SimdArray<short unsigned int, 32ul>>,
1634  "result_vector_type does not work");
1635 
1636 #define Vc_BINARY_OPERATORS_(op_) \
1637  \
1638  template <typename L, typename R> \
1639  Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1640  { \
1641  using Return = result_vector_type<L, R>; \
1642  return Return(std::forward<L>(lhs)).operator op_(std::forward<R>(rhs)); \
1643  }
1644 
1661 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1663 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1665 #undef Vc_BINARY_OPERATORS_
1666 #define Vc_BINARY_OPERATORS_(op_) \
1667  \
1668  template <typename L, typename R> \
1669  Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1670  R &&rhs) \
1671  { \
1672  using Promote = result_vector_type<L, R>; \
1673  return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1674  }
1675 
1692 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1695 #undef Vc_BINARY_OPERATORS_
1696 
1697 // math functions {{{1
1698 #define Vc_FORWARD_UNARY_OPERATOR(name_) \
1699  \
1700  template <typename T, std::size_t N, typename V, std::size_t M> \
1701  inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \
1702  { \
1703  return SimdArray<T, N, V, M>::fromOperation( \
1704  Common::Operations::Forward_##name_(), x); \
1705  } \
1706  Vc_NOTHING_EXPECTING_SEMICOLON
1707 
1708 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1709  \
1710  template <typename T, std::size_t N, typename V, std::size_t M> \
1711  inline SimdMaskArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \
1712  { \
1713  return SimdMaskArray<T, N, V, M>::fromOperation( \
1714  Common::Operations::Forward_##name_(), x); \
1715  } \
1716  Vc_NOTHING_EXPECTING_SEMICOLON
1717 
1718 #define Vc_FORWARD_BINARY_OPERATOR(name_) \
1719  \
1720  template <typename T, std::size_t N, typename V, std::size_t M> \
1721  inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x, \
1722  const SimdArray<T, N, V, M> &y) \
1723  { \
1724  return SimdArray<T, N, V, M>::fromOperation( \
1725  Common::Operations::Forward_##name_(), x, y); \
1726  } \
1727  Vc_NOTHING_EXPECTING_SEMICOLON
1728 
1733 Vc_FORWARD_UNARY_OPERATOR(abs);
1735 Vc_FORWARD_UNARY_OPERATOR(asin);
1736 Vc_FORWARD_UNARY_OPERATOR(atan);
1737 Vc_FORWARD_BINARY_OPERATOR(atan2);
1738 Vc_FORWARD_UNARY_OPERATOR(ceil);
1739 Vc_FORWARD_BINARY_OPERATOR(copysign);
1740 Vc_FORWARD_UNARY_OPERATOR(cos);
1741 Vc_FORWARD_UNARY_OPERATOR(exp);
1742 Vc_FORWARD_UNARY_OPERATOR(exponent);
1743 Vc_FORWARD_UNARY_OPERATOR(floor);
1745 template <typename T, std::size_t N>
1747  const SimdArray<T, N> &c)
1748 {
1749  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1750 }
1751 Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1752 Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1753 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1754 #if defined Vc_MSVC && defined Vc_IMPL_SSE
1756  const SimdArray<double, 8, SSE::Vector<double>, 2> &x)
1757 {
1758  using V = SSE::Vector<double>;
1759  const SimdArray<double, 4, V, 2> &x0 = internal_data0(x);
1760  const SimdArray<double, 4, V, 2> &x1 = internal_data1(x);
1763  internal_data(internal_data0(r0)) = isnan(internal_data(internal_data0(x0)));
1764  internal_data(internal_data1(r0)) = isnan(internal_data(internal_data1(x0)));
1765  internal_data(internal_data0(r1)) = isnan(internal_data(internal_data0(x1)));
1766  internal_data(internal_data1(r1)) = isnan(internal_data(internal_data1(x1)));
1767  return {std::move(r0), std::move(r1)};
1768 }
1769 #endif
1770 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1772 template <typename T, std::size_t N>
1774 {
1775  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1776 }
1778 template <typename T, std::size_t N>
1780 {
1781  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1782 }
1783 Vc_FORWARD_UNARY_OPERATOR(log);
1784 Vc_FORWARD_UNARY_OPERATOR(log10);
1785 Vc_FORWARD_UNARY_OPERATOR(log2);
1786 Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1787 Vc_FORWARD_UNARY_OPERATOR(round);
1788 Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1789 Vc_FORWARD_UNARY_OPERATOR(sin);
1791 template <typename T, std::size_t N>
1793 {
1794  SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1795 }
1796 Vc_FORWARD_UNARY_OPERATOR(sqrt);
1797 Vc_FORWARD_UNARY_OPERATOR(trunc);
1798 Vc_FORWARD_BINARY_OPERATOR(min);
1799 Vc_FORWARD_BINARY_OPERATOR(max);
1801 #undef Vc_FORWARD_UNARY_OPERATOR
1802 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1803 #undef Vc_FORWARD_BINARY_OPERATOR
1804 
1805 // simd_cast {{{1
1806 #ifdef Vc_MSVC
1807 #define Vc_DUMMY_ARG0 , int = 0
1808 #define Vc_DUMMY_ARG1 , long = 0
1809 #define Vc_DUMMY_ARG2 , short = 0
1810 #define Vc_DUMMY_ARG3 , char = '0'
1811 #define Vc_DUMMY_ARG4 , unsigned = 0u
1812 #define Vc_DUMMY_ARG5 , unsigned short = 0u
1813 #else
1814 #define Vc_DUMMY_ARG0
1815 #define Vc_DUMMY_ARG1
1816 #define Vc_DUMMY_ARG2
1817 #define Vc_DUMMY_ARG3
1818 #define Vc_DUMMY_ARG4
1819 #define Vc_DUMMY_ARG5
1820 #endif // Vc_MSVC
1821 
1822 // simd_cast_impl_smaller_input {{{2
1823 // The following function can be implemented without the sizeof...(From) overload.
1824 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1825 // function in two works around the issue.
1826 template <typename Return, std::size_t N, typename T, typename... From>
1827 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1828 simd_cast_impl_smaller_input(const From &... xs, const T &last)
1829 {
1830  Return r = simd_cast<Return>(xs...);
1831  for (size_t i = 0; i < N; ++i) {
1832  r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1833  }
1834  return r;
1835 }
1836 template <typename Return, std::size_t N, typename T>
1837 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1838 {
1839  Return r = Return();
1840  for (size_t i = 0; i < N; ++i) {
1841  r[i] = static_cast<typename Return::EntryType>(last[i]);
1842  }
1843  return r;
1844 }
1845 template <typename Return, std::size_t N, typename T, typename... From>
1846 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1847  const From &... xs, const T &last)
1848 {
1849  Return r = simd_cast<Return>(xs...);
1850  for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1851  r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1852  }
1853  return r;
1854 }
1855 template <typename Return, std::size_t N, typename T>
1856 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1857 {
1858  Return r = Return();
1859  for (size_t i = 0; i < Return::size(); ++i) {
1860  r[i] = static_cast<typename Return::EntryType>(last[i]);
1861  }
1862  return r;
1863 }
1864 
1865 // simd_cast_without_last (declaration) {{{2
1866 template <typename Return, typename T, typename... From>
1867 Vc_INTRINSIC_L Vc_CONST_L Return
1868  simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1869 
1870 // are_all_types_equal {{{2
1871 template <typename... Ts> struct are_all_types_equal;
1872 template <typename T>
1873 struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1874 {
1875 };
1876 template <typename T0, typename T1, typename... Ts>
1877 struct are_all_types_equal<T0, T1, Ts...>
1878  : public std::integral_constant<
1879  bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1880 {
1881 };
1882 
1883 // simd_cast_interleaved_argument_order (declarations) {{{2
1903 template <typename Return, typename... Ts>
1904 Vc_INTRINSIC Vc_CONST Return
1905  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1906 
1907 // simd_cast_with_offset (declarations and one impl) {{{2
1908 // offset == 0 {{{3
1909 template <typename Return, std::size_t offset, typename From, typename... Froms>
1910 Vc_INTRINSIC Vc_CONST
1911  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1912  simd_cast_with_offset(const From &x, const Froms &... xs);
1913 // offset > 0 && offset divisible by Return::Size {{{3
1914 template <typename Return, std::size_t offset, typename From>
1915 Vc_INTRINSIC Vc_CONST
1916  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1917  simd_cast_with_offset(const From &x);
1918 // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1919 template <typename Return, std::size_t offset, typename From>
1920 Vc_INTRINSIC Vc_CONST
1921  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1923  !Traits::isAtomicSimdArray<Return>::value) ||
1925  !Traits::isAtomicSimdMaskArray<Return>::value))),
1926  Return>
1927  simd_cast_with_offset(const From &x);
1928 // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1929 template <typename Return, std::size_t offset, typename From>
1930 Vc_INTRINSIC Vc_CONST
1931  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1933  Traits::isAtomicSimdArray<Return>::value) ||
1935  Traits::isAtomicSimdMaskArray<Return>::value))),
1936  Return>
1937  simd_cast_with_offset(const From &x);
1938 // offset > first argument (drops first arg) {{{3
1939 template <typename Return, std::size_t offset, typename From, typename... Froms>
1940 Vc_INTRINSIC Vc_CONST enable_if<
1941  (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1942  simd_cast_with_offset(const From &, const Froms &... xs)
1943 {
1944  return simd_cast_with_offset<Return, offset - From::Size>(xs...);
1945 }
1946 
1947 // offset > first and only argument (returns Zero) {{{3
1948 template <typename Return, std::size_t offset, typename From>
1949 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
1950  const From &)
1951 {
1952  return Return::Zero();
1953 }
1954 
1955 // first_type_of {{{2
1956 template <typename T, typename... Ts> struct first_type_of_impl
1957 {
1958  using type = T;
1959 };
1960 template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
1961 
1962 // simd_cast_drop_arguments (declarations) {{{2
1963 template <typename Return, typename From>
1964 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
1965 template <typename Return, typename... Froms>
1966 Vc_INTRINSIC Vc_CONST
1967  enable_if<(are_all_types_equal<Froms...>::value &&
1968  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1969  Return>
1970  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
1971 // The following function can be implemented without the sizeof...(From) overload.
1972 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1973 // function in two works around the issue.
1974 template <typename Return, typename From, typename... Froms>
1975 Vc_INTRINSIC Vc_CONST enable_if<
1976  (are_all_types_equal<From, Froms...>::value &&
1977  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
1978  Return>
1979 simd_cast_drop_arguments(Froms... xs, From x, From);
1980 template <typename Return, typename From>
1981 Vc_INTRINSIC Vc_CONST
1982  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1983  simd_cast_drop_arguments(From x, From);
1984 
1985 namespace
1986 {
1987 #ifdef Vc_DEBUG_SIMD_CAST
1988 void debugDoNothing(const std::initializer_list<void *> &) {}
1989 template <typename T0, typename... Ts>
1990 inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
1991  const Ts &... args)
1992 {
1993  std::cerr << prefix << arg0;
1994  debugDoNothing({&(std::cerr << ", " << args)...});
1995  std::cerr << suffix;
1996 }
1997 #else
1998 template <typename T0, typename... Ts>
1999 Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
2000 {
2001 }
2002 #endif
2003 } // unnamed namespace
2004 
2005 // is_less trait{{{2
2006 template <size_t A, size_t B>
2007 struct is_less : public std::integral_constant<bool, (A < B)> {
2008 };
2009 
2010 // is_power_of_2 trait{{{2
2011 template <size_t N>
2012 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2013 };
2014 
2015 // simd_cast<T>(xs...) to SimdArray/-mask {{{2
2016 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2017  template <typename Return, typename T, typename A, typename... Froms> \
2018  Vc_INTRINSIC Vc_CONST enable_if< \
2019  (Traits::isAtomic##SimdArrayType_<Return>::value && \
2020  is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2021  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2022  !detail::is_fixed_size_abi<A>::value), \
2023  Return> \
2024  simd_cast(NativeType_<T, A> x, Froms... xs) \
2025  { \
2026  vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
2027  return {private_init, simd_cast<typename Return::storage_type>(x, xs...)}; \
2028  } \
2029  template <typename Return, typename T, typename A, typename... Froms> \
2030  Vc_INTRINSIC Vc_CONST enable_if< \
2031  (Traits::isAtomic##SimdArrayType_<Return>::value && \
2032  !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2033  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2034  !detail::is_fixed_size_abi<A>::value), \
2035  Return> \
2036  simd_cast(NativeType_<T, A> x, Froms... xs) \
2037  { \
2038  vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
2039  return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
2040  } \
2041  template <typename Return, typename T, typename A, typename... Froms> \
2042  Vc_INTRINSIC Vc_CONST \
2043  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2044  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2045  is_less<Common::left_size<Return::Size>(), \
2046  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2047  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2048  !detail::is_fixed_size_abi<A>::value), \
2049  Return> \
2050  simd_cast(NativeType_<T, A> x, Froms... xs) \
2051  { \
2052  vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
2053  using R0 = typename Return::storage_type0; \
2054  using R1 = typename Return::storage_type1; \
2055  return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2056  simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2057  } \
2058  template <typename Return, typename T, typename A, typename... Froms> \
2059  Vc_INTRINSIC Vc_CONST \
2060  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2061  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2062  !is_less<Common::left_size<Return::Size>(), \
2063  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2064  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2065  !detail::is_fixed_size_abi<A>::value), \
2066  Return> \
2067  simd_cast(NativeType_<T, A> x, Froms... xs) \
2068  { \
2069  vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2070  using R0 = typename Return::storage_type0; \
2071  using R1 = typename Return::storage_type1; \
2072  return {simd_cast<R0>(x, xs...), R1::Zero()}; \
2073  } \
2074  Vc_NOTHING_EXPECTING_SEMICOLON
2075 
2076 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2077 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2078 #undef Vc_SIMDARRAY_CASTS
2079 
2080 // simd_cast<SimdArray/-mask, offset>(V) {{{2
2081 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2082  /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \
2083  template <typename Return, int offset, typename T, typename A> \
2084  Vc_INTRINSIC Vc_CONST \
2085  enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2086  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2087  { \
2088  vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2089  return {private_init, simd_cast<typename Return::storage_type, offset>(x)}; \
2090  } \
2091  /* both halves of Return array are extracted from argument */ \
2092  template <typename Return, int offset, typename T, typename A> \
2093  Vc_INTRINSIC Vc_CONST \
2094  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2095  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2096  Return::Size * offset + Common::left_size<Return::Size>() < \
2097  NativeType_<T, A>::Size), \
2098  Return> \
2099  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2100  { \
2101  vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2102  using R0 = typename Return::storage_type0; \
2103  constexpr int entries_offset = offset * Return::Size; \
2104  constexpr int entries_offset_right = entries_offset + R0::Size; \
2105  return { \
2106  simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2107  simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2108  x)}; \
2109  } \
2110  /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \
2111  /* right half of Return array is zero */ \
2112  template <typename Return, int offset, typename T, typename A> \
2113  Vc_INTRINSIC Vc_CONST \
2114  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2115  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2116  Return::Size * offset + Common::left_size<Return::Size>() >= \
2117  NativeType_<T, A>::Size), \
2118  Return> \
2119  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2120  { \
2121  vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2122  using R0 = typename Return::storage_type0; \
2123  using R1 = typename Return::storage_type1; \
2124  constexpr int entries_offset = offset * Return::Size; \
2125  return {simd_cast_with_offset<R0, entries_offset>(x), R1::Zero()}; \
2126  } \
2127  Vc_NOTHING_EXPECTING_SEMICOLON
2128 
2129 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2130 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2131 #undef Vc_SIMDARRAY_CASTS
2132 
2133 // simd_cast<T>(xs...) from SimdArray/-mask {{{2
2134 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2135  /* indivisible SimdArrayType_ */ \
2136  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2137  Vc_INTRINSIC Vc_CONST \
2138  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2139  (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2140  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2141  Return> \
2142  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2143  { \
2144  vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2145  return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2146  } \
2147  /* indivisible SimdArrayType_ && can drop arguments from the end */ \
2148  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2149  Vc_INTRINSIC Vc_CONST \
2150  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2151  (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2152  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2153  Return> \
2154  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2155  { \
2156  vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
2157  return simd_cast_without_last<Return, \
2158  typename SimdArrayType_<T, N, V, N>::storage_type, \
2159  typename From::storage_type...>( \
2160  internal_data(x0), internal_data(xs)...); \
2161  } \
2162  /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \
2163  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2164  typename... From> \
2165  Vc_INTRINSIC Vc_CONST enable_if< \
2166  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2167  !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2168  is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2169  Return> \
2170  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2171  { \
2172  vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
2173  return simd_cast_interleaved_argument_order< \
2174  Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \
2175  typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2176  internal_data1(x0), internal_data1(xs)...); \
2177  } \
2178  /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \
2179  * input can be dropped */ \
2180  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2181  typename... From> \
2182  Vc_INTRINSIC Vc_CONST enable_if< \
2183  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2184  !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2185  Return> \
2186  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2187  { \
2188  vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
2189  return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2190  x0, xs...); \
2191  } \
2192  /* remaining SimdArrayType_ input never larger (N != 2^n) */ \
2193  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2194  typename... From> \
2195  Vc_INTRINSIC Vc_CONST enable_if< \
2196  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2197  N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2198  Return> \
2199  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2200  { \
2201  vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
2202  return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2203  From...>(x0, xs...); \
2204  } \
2205  /* remaining SimdArrayType_ input larger (N != 2^n) */ \
2206  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2207  typename... From> \
2208  Vc_INTRINSIC Vc_CONST enable_if< \
2209  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2210  N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2211  Return> \
2212  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2213  { \
2214  vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
2215  return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2216  From...>(x0, xs...); \
2217  } \
2218  /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \
2219  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2220  Vc_INTRINSIC Vc_CONST \
2221  enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2222  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2223  { \
2224  vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
2225  return simd_cast<Return>(internal_data0(x)); \
2226  } \
2227  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2228  Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2229  N < 2 * Return::Size && is_power_of_2<N>::value), \
2230  Return> \
2231  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2232  { \
2233  vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
2234  return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2235  } \
2236  Vc_NOTHING_EXPECTING_SEMICOLON
2237 
2238 Vc_SIMDARRAY_CASTS(SimdArray);
2239 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2240 #undef Vc_SIMDARRAY_CASTS
2241 
2242 // simd_cast<T, offset>(SimdArray/-mask) {{{2
2243 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2244  /* offset == 0 is like without offset */ \
2245  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2246  std::size_t M> \
2247  Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2248  const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2249  { \
2250  vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2251  return simd_cast<Return>(x); \
2252  } \
2253  /* forward to V */ \
2254  template <typename Return, int offset, typename T, std::size_t N, typename V> \
2255  Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2256  const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2257  { \
2258  vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2259  return simd_cast<Return, offset>(internal_data(x)); \
2260  } \
2261  /* convert from right member of SimdArray */ \
2262  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2263  std::size_t M> \
2264  Vc_INTRINSIC Vc_CONST \
2265  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2266  offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2267  Return> \
2268  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2269  { \
2270  vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2271  return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2272  internal_data1(x)); \
2273  } \
2274  /* same as above except for odd cases where offset * Return::Size doesn't fit the \
2275  * left side of the SimdArray */ \
2276  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2277  std::size_t M> \
2278  Vc_INTRINSIC Vc_CONST \
2279  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2280  offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2281  Return> \
2282  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2283  { \
2284  vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2285  return simd_cast_with_offset<Return, \
2286  offset * Return::Size - Common::left_size<N>()>( \
2287  internal_data1(x)); \
2288  } \
2289  /* convert from left member of SimdArray */ \
2290  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2291  std::size_t M> \
2292  Vc_INTRINSIC Vc_CONST enable_if< \
2293  (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \
2294  offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2295  Return> \
2296  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2297  { \
2298  vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2299  return simd_cast<Return, offset>(internal_data0(x)); \
2300  } \
2301  /* fallback to copying scalars */ \
2302  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2303  std::size_t M> \
2304  Vc_INTRINSIC Vc_CONST \
2305  enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2306  offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2307  Return> \
2308  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2309  { \
2310  vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2311  using R = typename Return::EntryType; \
2312  Return r = Return::Zero(); \
2313  for (std::size_t i = offset * Return::Size; \
2314  i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2315  r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2316  } \
2317  return r; \
2318  } \
2319  Vc_NOTHING_EXPECTING_SEMICOLON
2320 Vc_SIMDARRAY_CASTS(SimdArray);
2321 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2322 #undef Vc_SIMDARRAY_CASTS
2323 // simd_cast_drop_arguments (definitions) {{{2
2324 template <typename Return, typename From>
2325 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2326 {
2327  return simd_cast<Return>(x);
2328 }
2329 template <typename Return, typename... Froms>
2330 Vc_INTRINSIC Vc_CONST
2331  enable_if<(are_all_types_equal<Froms...>::value &&
2332  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2333  Return>
2334  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2335 {
2336  return simd_cast<Return>(xs..., x);
2337 }
2338 // The following function can be implemented without the sizeof...(From) overload.
2339 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2340 // function in two works around the issue.
2341 template <typename Return, typename From, typename... Froms>
2342 Vc_INTRINSIC Vc_CONST enable_if<
2343  (are_all_types_equal<From, Froms...>::value &&
2344  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2345  Return>
2346 simd_cast_drop_arguments(Froms... xs, From x, From)
2347 {
2348  return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2349 }
2350 template <typename Return, typename From>
2351 Vc_INTRINSIC Vc_CONST
2352  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2353  simd_cast_drop_arguments(From x, From)
2354 {
2355  return simd_cast_drop_arguments<Return>(x);
2356 }
2357 
2358 // simd_cast_with_offset (definitions) {{{2
2359  template <typename Return, std::size_t offset, typename From>
2360  Vc_INTRINSIC Vc_CONST
2361  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2362  Return> simd_cast_with_offset(const From &x)
2363 {
2364  return simd_cast<Return, offset / Return::Size>(x);
2365 }
2366 template <typename Return, std::size_t offset, typename From>
2367 Vc_INTRINSIC Vc_CONST
2368  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2370  !Traits::isAtomicSimdArray<Return>::value) ||
2372  !Traits::isAtomicSimdMaskArray<Return>::value))),
2373  Return>
2374  simd_cast_with_offset(const From &x)
2375 {
2376  using R0 = typename Return::storage_type0;
2377  using R1 = typename Return::storage_type1;
2378  return {simd_cast_with_offset<R0, offset>(x),
2379  simd_cast_with_offset<R1, offset + R0::Size>(x)};
2380 }
2381 template <typename Return, std::size_t offset, typename From>
2382 Vc_INTRINSIC Vc_CONST
2383  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2385  Traits::isAtomicSimdArray<Return>::value) ||
2387  Traits::isAtomicSimdMaskArray<Return>::value))),
2388  Return>
2389  simd_cast_with_offset(const From &x)
2390 {
2391  return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2392 }
2393 template <typename Return, std::size_t offset, typename From, typename... Froms>
2394 Vc_INTRINSIC Vc_CONST
2395  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2396  simd_cast_with_offset(const From &x, const Froms &... xs)
2397 {
2398  return simd_cast<Return>(x, xs...);
2399 }
2400 
2401 // simd_cast_without_last (definition) {{{2
2402 template <typename Return, typename T, typename... From>
2403 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2404 {
2405  return simd_cast<Return>(xs...);
2406 }
2407 
2408 // simd_cast_interleaved_argument_order (definitions) {{{2
2409 
2410 #ifdef Vc_MSVC
2411 // MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2412 // is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2413 // MSVC do the right thing.
2414 template <std::size_t I, typename T0>
2415 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2416 {
2417  return a0;
2418 }
2419 template <std::size_t I, typename T0>
2420 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2421 {
2422  return b0;
2423 }
2424 #endif // Vc_MSVC
2425 
2427 template <std::size_t I, typename T0, typename... Ts>
2428 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2429  const Ts &...,
2430  const T0 &,
2431  const Ts &...)
2432 {
2433  return a0;
2434 }
2436 template <std::size_t I, typename T0, typename... Ts>
2437 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2438  const Ts &...,
2439  const T0 &b0,
2440  const Ts &...)
2441 {
2442  return b0;
2443 }
2445 template <std::size_t I, typename T0, typename... Ts>
2446 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2447  const Ts &... a,
2448  const T0 &,
2449  const Ts &... b)
2450 {
2451  return extract_interleaved<I - 2, Ts...>(a..., b...);
2452 }
2454 template <typename Return, typename... Ts, std::size_t... Indexes>
2455 Vc_INTRINSIC Vc_CONST Return
2456  simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2457  const Ts &... b)
2458 {
2459  return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2460 }
2463 template <typename Return, typename... Ts>
2464 Vc_INTRINSIC Vc_CONST Return
2465  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2466 {
2467  using seq = make_index_sequence<sizeof...(Ts)*2>;
2468  return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2469 }
2470 
2471 // conditional_assign {{{1
2472 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2473  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2474  typename U> \
2475  Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2476  SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2477  { \
2478  lhs(mask) op_ rhs; \
2479  } \
2480  Vc_NOTHING_EXPECTING_SEMICOLON
2481 Vc_CONDITIONAL_ASSIGN( Assign, =);
2482 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2483 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2484 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2485 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2486 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2487 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2488 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2489 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2490 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2491 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2492 #undef Vc_CONDITIONAL_ASSIGN
2493 
2494 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2495  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2496  Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2497  conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2498  { \
2499  return expr_; \
2500  } \
2501  Vc_NOTHING_EXPECTING_SEMICOLON
2502 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2503 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2504 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2505 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2506 #undef Vc_CONDITIONAL_ASSIGN
2507 // transpose_impl {{{1
2508 namespace Common
2509 {
2510 template <typename T, size_t N, typename V>
2511 inline void transpose_impl(
2512  TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2513  const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2515 {
2516  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2517  &internal_data(*r[2]), &internal_data(*r[3])};
2518  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2519  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2520  internal_data(std::get<1>(proxy.in)),
2521  internal_data(std::get<2>(proxy.in)),
2522  internal_data(std::get<3>(proxy.in))});
2523 }
2524 
2525 template <typename T, typename V>
2526 inline void transpose_impl(
2527  TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2528  const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2530 {
2531  auto &lo = *r[0];
2532  auto &hi = *r[1];
2533  internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2534  internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2535  internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2536  internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2537  internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2538  internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2539  internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2540  internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2541 }
2542 
2543 template <typename T, typename V>
2544 inline void transpose_impl(
2545  TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2546  const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2548 {
2549  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2550  &internal_data(*r[2]), &internal_data(*r[3])};
2551  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2552  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2553  internal_data(std::get<1>(proxy.in)),
2554  internal_data(std::get<2>(proxy.in)),
2555  internal_data(std::get<3>(proxy.in))});
2556 }
2557 
2558 template <typename T, size_t N, typename V>
2559 inline void transpose_impl(
2560  TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2561  const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2563 {
2564  SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2565  SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2566  using H = SimdArray<T, 2>;
2567  transpose_impl(TransposeTag<2, 4>(), &r0[0],
2568  TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2569  internal_data0(std::get<1>(proxy.in)),
2570  internal_data0(std::get<2>(proxy.in)),
2571  internal_data0(std::get<3>(proxy.in))});
2572  transpose_impl(TransposeTag<2, 4>(), &r1[0],
2573  TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2574  internal_data1(std::get<1>(proxy.in)),
2575  internal_data1(std::get<2>(proxy.in)),
2576  internal_data1(std::get<3>(proxy.in))});
2577 }
2578 
2579 /* TODO:
2580 template <typename T, std::size_t N, typename V, std::size_t VSize>
2581 inline enable_if<(N > VSize), void> transpose_impl(
2582  std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2583  const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2584  SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2585 {
2586  typedef SimdArray<T, N, V, VSize> SA;
2587  std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2588  {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2589  &internal_data0(*r[3])}};
2590  transpose_impl(
2591  r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2592  typename SA::storage_type0, typename SA::storage_type0>{
2593  internal_data0(std::get<0>(proxy.in)),
2594  internal_data0(std::get<1>(proxy.in)),
2595  internal_data0(std::get<2>(proxy.in)),
2596  internal_data0(std::get<3>(proxy.in))});
2597 
2598  std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2599  {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2600  &internal_data1(*r[3])}};
2601  transpose_impl(
2602  r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2603  typename SA::storage_type1, typename SA::storage_type1>{
2604  internal_data1(std::get<0>(proxy.in)),
2605  internal_data1(std::get<1>(proxy.in)),
2606  internal_data1(std::get<2>(proxy.in)),
2607  internal_data1(std::get<3>(proxy.in))});
2608 }
2609 */
2610 } // namespace Common
2611 
2612 // Traits static assertions {{{1
2613 static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4> &>::value, "");
2614 static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4>>::value, "");
2615 static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4> &>::value, "");
2616 static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4>>::value, "");
2617 static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4> &>::value, "");
2618 static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4>>::value, "");
2619 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4>>::value, "");
2620 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4> &&>::value, "");
2621 // }}}1
2622 // InterleaveImpl for SimdArrays {{{
2623 namespace Detail
2624 {
2625 // atomic {{{1
2626 template <class T, size_t N, class V, size_t VSizeof>
2627 struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2628  template <class I, class... VV>
2629  static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2630  {
2631  InterleaveImpl<V, N, VSizeof>::interleave(data, i, internal_data(vv)...);
2632  }
2633  template <class I, class... VV>
2634  static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2635  {
2636  InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2637  }
2638 };
2639 
2640 // generic (TODO) {{{1
2641 /*
2642 template <class T, size_t N, class V, size_t Wt, size_t VSizeof>
2643 struct InterleaveImpl<SimdArray<T, N, V, Wt>, N, VSizeof> {
2644  using SA = SimdArray<T, N, V, Wt>;
2645  using SA0 = typename SA::storage_type0;
2646  using SA1 = typename SA::storage_type1;
2647 
2648  template <class I, class... VV>
2649  static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2650  {
2651  InterleaveImpl<SA0, SA0::size(), sizeof(SA0)>::interleave(
2652  data, i, // i needs to be split
2653  internal_data0(vv)...);
2654  InterleaveImpl<SA1, SA1::size(), sizeof(SA1)>::interleave(
2655  data, // how far to advance data?
2656  i, // i needs to be split
2657  internal_data1(vv)...);
2658  }
2659  template <class I, class... VV>
2660  static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2661  {
2662  InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2663  }
2664 };
2665 */
2666 } // namespace Detail
2667 // }}}
2669 
2670 } // namespace Vc_VERSIONED_NAMESPACE
2671 
2672 // numeric_limits {{{1
2673 namespace std
2674 {
2675 template <typename T, size_t N, typename V, size_t VN>
2676 struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2677 private:
2678  using R = Vc::SimdArray<T, N, V, VN>;
2679 
2680 public:
2681  static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2682  static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2683  static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2684  {
2685  return numeric_limits<T>::lowest();
2686  }
2687  static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2688  {
2689  return numeric_limits<T>::epsilon();
2690  }
2691  static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2692  {
2693  return numeric_limits<T>::round_error();
2694  }
2695  static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2696  {
2697  return numeric_limits<T>::infinity();
2698  }
2699  static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2700  {
2701  return numeric_limits<T>::quiet_NaN();
2702  }
2703  static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2704  {
2705  return numeric_limits<T>::signaling_NaN();
2706  }
2707  static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2708  {
2709  return numeric_limits<T>::denorm_min();
2710  }
2711 };
2712 } // namespace std
2713 //}}}1
2714 
2715 #endif // VC_COMMON_SIMDARRAY_H_
2716 
2717 // vim: foldmethod=marker
SimdArray< T, N, V, M > ceil(const SimdArray< T, N, V, M > &x)
Applies the std:: ceil function component-wise and concurrently.
Definition: simdarray.h:1738
static SimdArray generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
Definition: simdarray.h:711
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
Definition: simdarray.h:1027
SimdArray< T, N, V, M > floor(const SimdArray< T, N, V, M > &x)
Applies the std:: floor function component-wise and concurrently.
Definition: simdarray.h:1743
The main vector class for expressing data parallelism.
Definition: fwddecl.h:53
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Definition: types.h:90
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
SimdArray rotated(int amount) const
Rotate vector entries to the left by amount.
Definition: simdarray.h:1239
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
Definition: IO:117
SimdArray< T, N, V, M > asin(const SimdArray< T, N, V, M > &x)
Applies the std:: asin function component-wise and concurrently.
Definition: simdarray.h:1735
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
SimdArray< T, N, V, M > rsqrt(const SimdArray< T, N, V, M > &x)
Applies the std:: rsqrt function component-wise and concurrently.
Definition: simdarray.h:1788
SimdArray apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
Definition: simdarray.h:1104
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
Definition: simdarray.h:1662
Vc::Vector< T > max(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Definition: vector.h:249
Identifies any possible SimdArray<T, N> type (independent of const/volatile or reference) ...
Definition: type_traits.h:152
SimdArray< T, N, V, M > log10(const SimdArray< T, N, V, M > &x)
Applies the std:: log10 function component-wise and concurrently.
Definition: simdarray.h:1784
SimdArray< T, N, V, M > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
Definition: simdarray.h:1798
static SimdArray IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
Definition: simdarray.h:699
SimdArray< T, N, V, M > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
Definition: simdarray.h:1799
Identifies any possible SimdMaskArray<T, N> type (independent of const/volatile or reference) ...
Definition: type_traits.h:158
SimdArray shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
Definition: simdarray.h:1111
Data-parallel arithmetic type with user-defined number of elements.
Definition: fwddecl.h:82
The value member will either be the number of SIMD vector entries or 0 if T is not a SIMD type...
Definition: type_traits.h:181
SimdArray< T, N, V, M > sin(const SimdArray< T, N, V, M > &x)
Applies the std:: sin function component-wise and concurrently.
Definition: simdarray.h:1789
Data-parallel mask type with user-defined number of boolean elements.
Definition: fwddecl.h:86
SimdArray< T, N, V, M > exp(const SimdArray< T, N, V, M > &x)
Applies the std:: exp function component-wise and concurrently.
Definition: simdarray.h:1741
SimdArray< T, N, V, M > sqrt(const SimdArray< T, N, V, M > &x)
Applies the std:: sqrt function component-wise and concurrently.
Definition: simdarray.h:1796
SimdArray apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
Definition: simdarray.h:1099
SimdArray< T, N, V, M > reciprocal(const SimdArray< T, N, V, M > &x)
Applies the std:: reciprocal function component-wise and concurrently.
Definition: simdarray.h:1786
void assign(SimdizeDetail::Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1216
static SimdArray Zero()
Returns a vector with the entries initialized to zero.
Definition: simdarray.h:687
Identifies any SIMD vector type (independent of implementation or whether it&#39;s SimdArray<T, N>).
Definition: type_traits.h:143
SimdMaskArray< T, N, V, M > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
Definition: simdarray.h:1770
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
Definition: simdarray.h:1035
SimdArray< T, N, V, M > abs(const SimdArray< T, N, V, M > &x)
Applies the std:: abs function component-wise and concurrently.
Definition: simdarray.h:1734
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
Definition: simdarray.h:1662
SimdArray< T, N, V, M > trunc(const SimdArray< T, N, V, M > &x)
Applies the std:: trunc function component-wise and concurrently.
Definition: simdarray.h:1797
Type trait that tells whether a container stores its data inside the object or inside allocated memor...
SimdArray< T, N, V, M > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
Definition: simdarray.h:1742
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
Definition: simdarray.h:661
value_type EntryType
The type of the elements (i.e. T)
Definition: simdarray.h:670
void deinterleave(V *a, V *b, const M *memory, A align)
Definition: deinterleave.h:76
SimdArray< T, N, V, M > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
Definition: simdarray.h:1739
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
SimdArray operator+() const
Returns a copy of itself.
Definition: simdarray.h:933
SimdArray< T, N, V, M > atan(const SimdArray< T, N, V, M > &x)
Applies the std:: atan function component-wise and concurrently.
Definition: simdarray.h:1736
void gather(const MT *mem, const IT &indexes)
Gather function.
Definition: simdarray.h:214
SimdArray(value_type a)
Broadcast Constructor.
Definition: simdarray.h:736
SimdArray< T, N, V, M > log2(const SimdArray< T, N, V, M > &x)
Applies the std:: log2 function component-wise and concurrently.
Definition: simdarray.h:1785
SimdMaskArray< T, N, V, M > isfinite(const SimdArray< T, N, V, M > &x)
Applies the std:: isfinite function component-wise and concurrently.
Definition: simdarray.h:1751
static SimdArray Random()
Returns a vector with pseudo-random entries.
Definition: simdarray.h:705
SimdMaskArray< T, N, V, M > isnan(const SimdArray< T, N, V, M > &x)
Applies the std:: isnan function component-wise and concurrently.
Definition: simdarray.h:1753
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:80
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
Definition: simdize.h:1064
void scatter(MT *mem, IT &&indexes) const
Scatter function.
Definition: simdarray.h:99
SimdArray< T, N, V, M > cos(const SimdArray< T, N, V, M > &x)
Applies the std:: cos function component-wise and concurrently.
Definition: simdarray.h:1740
T value_type
The type of the elements (i.e. T)
Definition: simdarray.h:643
SimdArray< T, N > frexp(const SimdArray< T, N > &x, SimdArray< int, N > *e)
Applies the std::frexp function component-wise and concurrently.
Definition: simdarray.h:1773
SimdArray< T, N, V, M > log(const SimdArray< T, N, V, M > &x)
Applies the std:: log function component-wise and concurrently.
Definition: simdarray.h:1783
Vector Classes Namespace.
Definition: dox.h:584
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
Definition: types.h:85
std::pair< V, V > interleave(const V &a, const V &b)
Interleaves the entries from a and b into two vectors of the same type.
Definition: interleave.h:55
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:215
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
Definition: simdarray.h:1020
SimdArray< T, N, V, M > round(const SimdArray< T, N, V, M > &x)
Applies the std:: round function component-wise and concurrently.
Definition: simdarray.h:1787
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
Definition: simdarray.h:1792
SimdArray< T, N > fma(const SimdArray< T, N > &a, const SimdArray< T, N > &b, const SimdArray< T, N > &c)
Applies the std::fma function component-wise and concurrently.
Definition: simdarray.h:1746
SimdArray reversed() const
Returns a vector with all components reversed.
Definition: simdarray.h:1322
SimdArray< T, N > ldexp(const SimdArray< T, N > &x, const SimdArray< int, N > &e)
Applies the std::ldexp function component-wise and concurrently.
Definition: simdarray.h:1779
SimdArray copySign(const SimdArray &x) const
Copies the signs of the components of reference to the components of the current vector, returning the result.
Definition: simdarray.h:1427
static SimdArray One()
Returns a vector with the entries initialized to one.
Definition: simdarray.h:693
SimdArray sorted() const
Return a sorted copy of the vector.
Definition: simdarray.h:1344
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.
SimdArray< T, N, V, M > atan2(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: atan2 function component-wise and concurrently.
Definition: simdarray.h:1737
SimdMaskArray< T, N, V, M > isinf(const SimdArray< T, N, V, M > &x)
Applies the std:: isinf function component-wise and concurrently.
Definition: simdarray.h:1752