Vc  1.3.2-dev
SIMD Vector Classes for C++
simdarray.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_SIMDARRAY_H_
29 #define VC_COMMON_SIMDARRAY_H_
30 
31 //#define Vc_DEBUG_SIMD_CAST 1
32 //#define Vc_DEBUG_SORTED 1
33 #if defined Vc_DEBUG_SIMD_CAST || defined Vc_DEBUG_SORTED
34 #include <Vc/IO>
35 #endif
36 
37 #include <array>
38 
39 #include "writemaskedvector.h"
40 #include "simdarrayhelper.h"
41 #include "simdmaskarray.h"
42 #include "utility.h"
43 #include "interleave.h"
44 #include "indexsequence.h"
45 #include "transpose.h"
46 #include "macros.h"
47 
48 namespace Vc_VERSIONED_NAMESPACE
49 {
50 // internal namespace (product & sum helper) {{{1
51 namespace internal
52 {
53 template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
54 template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
55 } // namespace internal
56 
57 // min & max declarations {{{1
58 template <typename T, std::size_t N, typename V, std::size_t M>
59 inline SimdArray<T, N, V, M> min(const SimdArray<T, N, V, M> &x,
60  const SimdArray<T, N, V, M> &y);
61 template <typename T, std::size_t N, typename V, std::size_t M>
62 inline SimdArray<T, N, V, M> max(const SimdArray<T, N, V, M> &x,
63  const SimdArray<T, N, V, M> &y);
64 
65 // SimdArray class {{{1
68 
69 // atomic SimdArray {{{1
70 #define Vc_CURRENT_CLASS_NAME SimdArray
71 
80 template <typename T, std::size_t N, typename VectorType_>
81 class SimdArray<T, N, VectorType_, N>
82 {
83  static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
84  std::is_same<T, int32_t>::value ||
85  std::is_same<T, uint32_t>::value ||
86  std::is_same<T, int16_t>::value ||
87  std::is_same<T, uint16_t>::value,
88  "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
89  "int16_t, uint16_t }");
90 
91 public:
92  using VectorType = VectorType_;
93  using vector_type = VectorType;
94  using storage_type = vector_type;
95  using vectorentry_type = typename vector_type::VectorEntryType;
96  using value_type = T;
97  using mask_type = SimdMaskArray<T, N, vector_type>;
98  using index_type = SimdArray<int, N>;
99  static constexpr std::size_t size() { return N; }
100  using Mask = mask_type;
101  using MaskType = Mask;
102  using MaskArgument = const MaskType &;
103  using VectorEntryType = vectorentry_type;
104  using EntryType = value_type;
105  using IndexType = index_type;
106  using AsArg = const SimdArray &;
107  using reference = Detail::ElementReference<SimdArray>;
108  static constexpr std::size_t Size = size();
109  static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
110 
111  // zero init
112 #ifndef Vc_MSVC // bogus error C2580
113  Vc_INTRINSIC SimdArray() = default;
114 #endif
115 
116  // default copy ctor/operator
117  Vc_INTRINSIC SimdArray(const SimdArray &) = default;
118  Vc_INTRINSIC SimdArray(SimdArray &&) = default;
119  Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
120 
121  // broadcast
122  Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
123  Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
124  Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
125  template <
126  typename U,
127  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
128  Vc_INTRINSIC SimdArray(U a)
129  : SimdArray(static_cast<value_type>(a))
130  {
131  }
132 
133  // implicit casts
134  template <typename U, typename V>
135  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x, enable_if<N == V::Size> = nullarg)
136  : data(simd_cast<vector_type>(internal_data(x)))
137  {
138  }
139  template <typename U, typename V>
140  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x,
141  enable_if<(N > V::Size && N <= 2 * V::Size)> = nullarg)
142  : data(simd_cast<vector_type>(internal_data(internal_data0(x)), internal_data(internal_data1(x))))
143  {
144  }
145  template <typename U, typename V>
146  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x,
147  enable_if<(N > 2 * V::Size && N <= 4 * V::Size)> = nullarg)
148  : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
149  internal_data(internal_data1(internal_data0(x))),
150  internal_data(internal_data0(internal_data1(x))),
151  internal_data(internal_data1(internal_data1(x)))))
152  {
153  }
154 
155  template <typename V, std::size_t Pieces, std::size_t Index>
156  Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
157  : data(simd_cast<vector_type, Index>(x.data))
158  {
159  }
160 
161  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
162  : data(init.begin(), Vc::Unaligned)
163  {
164 #if defined Vc_CXX14 && 0 // doesn't compile yet
165  static_assert(init.size() == size(), "The initializer_list argument to "
166  "SimdArray<T, N> must contain exactly N "
167  "values.");
168 #else
169  Vc_ASSERT(init.size() == size());
170 #endif
171  }
172 
173  // implicit conversion from underlying vector_type
174  template <
175  typename V,
176  typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
177  explicit Vc_INTRINSIC SimdArray(const V &x)
178  : data(simd_cast<vector_type>(x))
179  {
180  }
182  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
183  // T implicitly convertible to U
184  template <
185  typename U, typename A,
186  typename = enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N>>
187  Vc_INTRINSIC operator Vector<U, A>() const
188  {
189  return simd_cast<Vector<U, A>>(data);
190  }
191 
192 #include "gatherinterface.h"
193 #include "scatterinterface.h"
194 
195  // forward all remaining ctors
196  template <typename... Args,
197  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
198  !Traits::is_gather_signature<Args...>::value &&
199  !Traits::is_initializer_list<Args...>::value>>
200  explicit Vc_INTRINSIC SimdArray(Args &&... args)
201  : data(std::forward<Args>(args)...)
202  {
203  }
204 
205  template <std::size_t Offset>
206  explicit Vc_INTRINSIC SimdArray(
207  Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
208  : data(Vc::IndexesFromZero)
209  {
210  data += value_type(Offset);
211  }
213  Vc_INTRINSIC void setZero() { data.setZero(); }
214  Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
215  Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
216  Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
217 
218  Vc_INTRINSIC void setQnan() { data.setQnan(); }
219  Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
220 
221  // internal: execute specified Operation
222  template <typename Op, typename... Args>
223  static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
224  {
225  SimdArray r;
226  Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
227  return r;
228  }
229 
230  template <typename Op, typename... Args>
231  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
232  {
233  Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
234  }
235 
236  static Vc_INTRINSIC SimdArray Zero()
237  {
238  return SimdArray(Vc::Zero);
239  }
240  static Vc_INTRINSIC SimdArray One()
241  {
242  return SimdArray(Vc::One);
243  }
244  static Vc_INTRINSIC SimdArray IndexesFromZero()
245  {
246  return SimdArray(Vc::IndexesFromZero);
247  }
248  static Vc_INTRINSIC SimdArray Random()
249  {
250  return fromOperation(Common::Operations::random());
251  }
252 
253  template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
254  {
255  data.load(std::forward<Args>(args)...);
256  }
257 
258  template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
259  {
260  data.store(std::forward<Args>(args)...);
261  }
262 
263  Vc_INTRINSIC mask_type operator!() const
264  {
265  return {!data};
266  }
267 
268  Vc_INTRINSIC SimdArray operator-() const
269  {
270  return {-data};
271  }
272 
274  Vc_INTRINSIC SimdArray operator+() const { return *this; }
275 
276  Vc_INTRINSIC SimdArray operator~() const
277  {
278  return {~data};
279  }
280 
281  template <typename U,
282  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
283  Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
284  {
285  return {data << x};
286  }
287  template <typename U,
288  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
289  Vc_INTRINSIC SimdArray &operator<<=(U x)
290  {
291  data <<= x;
292  return *this;
293  }
294  template <typename U,
295  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
296  Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
297  {
298  return {data >> x};
299  }
300  template <typename U,
301  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
302  Vc_INTRINSIC SimdArray &operator>>=(U x)
303  {
304  data >>= x;
305  return *this;
306  }
307 
308 #define Vc_BINARY_OPERATOR_(op) \
309  Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
310  { \
311  return {data op rhs.data}; \
312  } \
313  Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
314  { \
315  data op## = rhs.data; \
316  return *this; \
317  }
318  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
319  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
320  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
321 #undef Vc_BINARY_OPERATOR_
322 
323 #define Vc_COMPARES(op) \
324  Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
325  { \
326  return {data op rhs.data}; \
327  }
328  Vc_ALL_COMPARES(Vc_COMPARES);
329 #undef Vc_COMPARES
330 
332  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
333  {
334  return {isnegative(data)};
335  }
336 
337 private:
338  friend reference;
339  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
340  {
341  return o.data[i];
342  }
343  template <typename U>
344  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
345  noexcept(std::declval<value_type &>() = v))
346  {
347  o.data[i] = v;
348  }
349 
350 public:
357  Vc_INTRINSIC reference operator[](size_t i) noexcept
358  {
359  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
360  return {*this, int(i)};
361  }
362  Vc_INTRINSIC value_type operator[](size_t i) const noexcept
363  {
364  return get(*this, int(i));
365  }
366 
367  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
368  {
369  return {*this, k};
370  }
371 
372  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
373  {
374  data.assign(v.data, internal_data(k));
375  }
376 
377  // reductions ////////////////////////////////////////////////////////
378 #define Vc_REDUCTION_FUNCTION_(name_) \
379  Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
380  Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
381  { \
382  return data.name_(internal_data(mask)); \
383  } \
384  Vc_NOTHING_EXPECTING_SEMICOLON
385  Vc_REDUCTION_FUNCTION_(min);
386  Vc_REDUCTION_FUNCTION_(max);
387  Vc_REDUCTION_FUNCTION_(product);
388  Vc_REDUCTION_FUNCTION_(sum);
389 #undef Vc_REDUCTION_FUNCTION_
390  Vc_INTRINSIC Vc_PURE SimdArray partialSum() const { return data.partialSum(); }
391 
392  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f) const
393  {
394  return {data.apply(std::forward<F>(f))};
395  }
396  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f, const mask_type &k) const
397  {
398  return {data.apply(std::forward<F>(f), k)};
399  }
400 
401  Vc_INTRINSIC SimdArray shifted(int amount) const
402  {
403  return {data.shifted(amount)};
404  }
405 
406  template <std::size_t NN>
407  Vc_INTRINSIC SimdArray shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
408  const
409  {
410  return {data.shifted(amount, simd_cast<VectorType>(shiftIn))};
411  }
412 
413  Vc_INTRINSIC SimdArray rotated(int amount) const
414  {
415  return {data.rotated(amount)};
416  }
417 
419  Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const
420  {
421  return {exponent(data)};
422  }
423 
424  Vc_INTRINSIC SimdArray interleaveLow(SimdArray x) const
425  {
426  return {data.interleaveLow(x.data)};
427  }
428  Vc_INTRINSIC SimdArray interleaveHigh(SimdArray x) const
429  {
430  return {data.interleaveHigh(x.data)};
431  }
432 
433  Vc_INTRINSIC SimdArray reversed() const
434  {
435  return {data.reversed()};
436  }
438  Vc_INTRINSIC SimdArray sorted() const
439  {
440  return {data.sorted()};
441  }
442 
443  template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen)
444  {
445  return {VectorType::generate(gen)};
446  }
447 
448  Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray
449  copySign(const SimdArray &reference) const
450  {
451  return {Vc::copysign(data, reference.data)};
452  }
453 
454  friend VectorType &internal_data<>(SimdArray &x);
455  friend const VectorType &internal_data<>(const SimdArray &x);
456 
458  Vc_INTRINSIC SimdArray(VectorType &&x) : data(std::move(x)) {}
459 
460  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
461 
462 private:
463  // The alignas attribute attached to the class declaration above is ignored by ICC
464  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
465  // all compilers.
466  alignas(static_cast<std::size_t>(
467  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
468  VectorType_::size()>::value)) storage_type data;
469 };
470 template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
471 template <typename T, std::size_t N, typename VectorType>
473 template <typename T, std::size_t N, typename VectorType>
474 #ifndef Vc_MSVC
475 Vc_INTRINSIC
476 #endif
477 VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
478 {
479  return x.data;
480 }
481 template <typename T, std::size_t N, typename VectorType>
482 #ifndef Vc_MSVC
483 Vc_INTRINSIC
484 #endif
485 const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
486 {
487  return x.data;
488 }
489 
490 // unpackIfSegment {{{2
491 template <typename T> T unpackIfSegment(T &&x) { return std::forward<T>(x); }
492 template <typename T, size_t Pieces, size_t Index>
493 auto unpackIfSegment(Common::Segment<T, Pieces, Index> &&x) -> decltype(x.asSimdArray())
494 {
495  return x.asSimdArray();
496 }
497 
498 // gatherImplementation {{{2
499 template <typename T, std::size_t N, typename VectorType>
500 template <typename MT, typename IT>
501 inline void SimdArray<T, N, VectorType, N>::gatherImplementation(const MT *mem,
502  const IT &indexes)
503 {
504  data.gather(mem, unpackIfSegment(indexes));
505 }
506 template <typename T, std::size_t N, typename VectorType>
507 template <typename MT, typename IT>
509  const IT &indexes,
510  MaskArgument mask)
511 {
512  data.gather(mem, unpackIfSegment(indexes), mask);
513 }
514 
515 // scatterImplementation {{{2
516 template <typename T, std::size_t N, typename VectorType>
517 template <typename MT, typename IT>
519  IT &&indexes) const
520 {
521  data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)));
522 }
523 template <typename T, std::size_t N, typename VectorType>
524 template <typename MT, typename IT>
525 inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
526  IT &&indexes,
527  MaskArgument mask) const
528 {
529  data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)), mask);
530 }
531 
532 // generic SimdArray {{{1
565 template <typename T, size_t N, typename V, size_t Wt> class SimdArray
566 {
567  static_assert(std::is_same<T, double>::value ||
568  std::is_same<T, float>::value ||
569  std::is_same<T, int32_t>::value ||
570  std::is_same<T, uint32_t>::value ||
571  std::is_same<T, int16_t>::value ||
572  std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
573  static_assert(
574  // either the EntryType and VectorEntryType of the main V are equal
575  std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
576  // or N is a multiple of V::size()
577  (N % V::size() == 0),
578  "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
579  "MIC::(u)short_v::size(), i.e. k * 16.");
580 
581  using my_traits = SimdArrayTraits<T, N>;
582  static constexpr std::size_t N0 = my_traits::N0;
583  static constexpr std::size_t N1 = my_traits::N1;
584  using Split = Common::Split<N0>;
585  template <typename U, std::size_t K> using CArray = U[K];
586 
587 public:
588  using storage_type0 = typename my_traits::storage_type0;
589  using storage_type1 = typename my_traits::storage_type1;
590  static_assert(storage_type0::size() == N0, "");
591 
595  using vector_type = V;
596  using vectorentry_type = typename storage_type0::vectorentry_type;
597  typedef vectorentry_type alias_type Vc_MAY_ALIAS;
598 
600  using value_type = T;
601 
604 
607 
618  static constexpr std::size_t size() { return N; }
619 
621  using Mask = mask_type;
623  using MaskType = Mask;
624  using MaskArgument = const MaskType &;
625  using VectorEntryType = vectorentry_type;
630  using AsArg = const SimdArray &;
631 
632  using reference = Detail::ElementReference<SimdArray>;
633 
635  static constexpr std::size_t MemoryAlignment =
639 
642 
644  static Vc_INTRINSIC SimdArray Zero()
645  {
646  return SimdArray(Vc::Zero);
647  }
648 
650  static Vc_INTRINSIC SimdArray One()
651  {
652  return SimdArray(Vc::One);
653  }
654 
656  static Vc_INTRINSIC SimdArray IndexesFromZero()
657  {
658  return SimdArray(Vc::IndexesFromZero);
659  }
660 
662  static Vc_INTRINSIC SimdArray Random()
663  {
664  return fromOperation(Common::Operations::random());
665  }
666 
668  template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen) // {{{2
669  {
670  auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in
671  // an initializer list is well-defined
672  // (front to back), but GCC 4.8 doesn't
673  // implement this correctly. Therefore
674  // we enforce correct order.
675  return {std::move(tmp),
676  storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
677  }
679 
682 
684 #ifndef Vc_MSVC // bogus error C2580
685  SimdArray() = default;
686 #endif
687 
691 
693  Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
694  template <
695  typename U,
696  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
697  SimdArray(U a)
698  : SimdArray(static_cast<value_type>(a))
699  {
700  }
702 
703  // default copy ctor/operator
704  SimdArray(const SimdArray &) = default;
705  SimdArray(SimdArray &&) = default;
706  SimdArray &operator=(const SimdArray &) = default;
707 
708  // load ctor
709  template <typename U,
710  typename Flags = DefaultLoadTag,
711  typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
712  explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags())
713  : data0(mem, f), data1(mem + storage_type0::size(), f)
714  {
715  }
716 
717 // MSVC does overload resolution differently and takes the const U *mem overload (I hope)
718 #ifndef Vc_MSVC
719 
725  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
726  typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
727  explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
728  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
729  {
730  }
734  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
735  typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
736  explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = Flags())
737  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
738  {
739  }
740 #endif
741 
742  // initializer list
743  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
744  : data0(init.begin(), Vc::Unaligned)
745  , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
746  {
747 #if defined Vc_CXX14 && 0 // doesn't compile yet
748  static_assert(init.size() == size(), "The initializer_list argument to "
749  "SimdArray<T, N> must contain exactly N "
750  "values.");
751 #else
752  Vc_ASSERT(init.size() == size());
753 #endif
754  }
755 
756 #include "gatherinterface.h"
757 #include "scatterinterface.h"
758 
759  // forward all remaining ctors
760  template <typename... Args,
761  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
762  !Traits::is_initializer_list<Args...>::value &&
763  !Traits::is_gather_signature<Args...>::value &&
764  !Traits::is_load_arguments<Args...>::value>>
765  explicit Vc_INTRINSIC SimdArray(Args &&... args)
766  : data0(Split::lo(args)...) // no forward here - it could move and thus
767  // break the next line
768  , data1(Split::hi(std::forward<Args>(args))...)
769  {
770  }
771 
772  // explicit casts
773  template <typename W>
774  Vc_INTRINSIC explicit SimdArray(
775  W &&x,
776  enable_if<(Traits::is_simd_vector<W>::value && Traits::simd_vector_size<W>::value == N &&
777  !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
778  Traits::isSimdArray<W>::value))> = nullarg)
779  : data0(Split::lo(x)), data1(Split::hi(x))
780  {
781  }
782 
783  // implicit casts
784  template <typename W>
785  Vc_INTRINSIC SimdArray(
786  W &&x,
787  enable_if<(Traits::isSimdArray<W>::value && Traits::simd_vector_size<W>::value == N &&
788  std::is_convertible<Traits::entry_type_of<W>, T>::value)> = nullarg)
789  : data0(Split::lo(x)), data1(Split::hi(x))
790  {
791  }
792 
793  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
794  // T implicitly convertible to U
795  template <
796  typename U, typename A,
797  typename = enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N>>
798  operator Vector<U, A>() const
799  {
800  return simd_cast<Vector<U, A>>(data0, data1);
801  }
802 
804 
805  Vc_INTRINSIC void setZero()
806  {
807  data0.setZero();
808  data1.setZero();
809  }
810  Vc_INTRINSIC void setZero(const mask_type &k)
811  {
812  data0.setZero(Split::lo(k));
813  data1.setZero(Split::hi(k));
814  }
815  Vc_INTRINSIC void setZeroInverted()
816  {
817  data0.setZeroInverted();
818  data1.setZeroInverted();
819  }
820  Vc_INTRINSIC void setZeroInverted(const mask_type &k)
821  {
822  data0.setZeroInverted(Split::lo(k));
823  data1.setZeroInverted(Split::hi(k));
824  }
825 
826 
827  Vc_INTRINSIC void setQnan() {
828  data0.setQnan();
829  data1.setQnan();
830  }
831  Vc_INTRINSIC void setQnan(const mask_type &m) {
832  data0.setQnan(Split::lo(m));
833  data1.setQnan(Split::hi(m));
834  }
835 
837  template <typename Op, typename... Args>
838  static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
839  {
840  SimdArray r = {
841  storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
842  // could move and thus
843  // break the next line
844  storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
845  return r;
846  }
847 
849  template <typename Op, typename... Args>
850  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
851  {
852  storage_type0::callOperation(op, Split::lo(args)...);
853  storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
854  }
855 
856 
857  template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
858  {
859  data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus
860  // break the next line
861  data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
862  }
863 
864  template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
865  {
866  data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus
867  // break the next line
868  data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
869  }
870 
871  Vc_INTRINSIC mask_type operator!() const
872  {
873  return {!data0, !data1};
874  }
875 
876  Vc_INTRINSIC SimdArray operator-() const
877  {
878  return {-data0, -data1};
879  }
880 
882  Vc_INTRINSIC SimdArray operator+() const { return *this; }
883 
884  Vc_INTRINSIC SimdArray operator~() const
885  {
886  return {~data0, ~data1};
887  }
888 
889  // left/right shift operators {{{2
890  template <typename U,
891  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
892  Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
893  {
894  return {data0 << x, data1 << x};
895  }
896  template <typename U,
897  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
898  Vc_INTRINSIC SimdArray &operator<<=(U x)
899  {
900  data0 <<= x;
901  data1 <<= x;
902  return *this;
903  }
904  template <typename U,
905  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
906  Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
907  {
908  return {data0 >> x, data1 >> x};
909  }
910  template <typename U,
911  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
912  Vc_INTRINSIC SimdArray &operator>>=(U x)
913  {
914  data0 >>= x;
915  data1 >>= x;
916  return *this;
917  }
918 
919  // binary operators {{{2
920 #define Vc_BINARY_OPERATOR_(op) \
921  Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
922  { \
923  return {data0 op rhs.data0, data1 op rhs.data1}; \
924  } \
925  Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
926  { \
927  data0 op## = rhs.data0; \
928  data1 op## = rhs.data1; \
929  return *this; \
930  }
931  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
932  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
933  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
934 #undef Vc_BINARY_OPERATOR_
935 
936 #define Vc_COMPARES(op) \
937  Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
938  { \
939  return {data0 op rhs.data0, data1 op rhs.data1}; \
940  }
941  Vc_ALL_COMPARES(Vc_COMPARES);
942 #undef Vc_COMPARES
943 
944  // operator[] {{{2
947 
948 private:
949  friend reference;
950  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
951  {
952  return reinterpret_cast<const alias_type *>(&o)[i];
953  }
954  template <typename U>
955  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
956  noexcept(std::declval<value_type &>() = v))
957  {
958  reinterpret_cast<alias_type *>(&o)[i] = v;
959  }
960 
961 public:
963 
969  Vc_INTRINSIC reference operator[](size_t i) noexcept
970  {
971  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
972  return {*this, int(i)};
973  }
974 
976  Vc_INTRINSIC value_type operator[](size_t index) const noexcept
977  {
978  return get(*this, int(index));
979  }
981 
982  // operator(){{{2
984  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
985  const mask_type &mask)
986  {
987  return {*this, mask};
988  }
989 
991  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
992  {
993  data0.assign(v.data0, internal_data0(k));
994  data1.assign(v.data1, internal_data1(k));
995  }
996 
997  // reductions {{{2
998 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
999 private: \
1000  template <typename ForSfinae = void> \
1001  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1002  storage_type0::Size == storage_type1::Size, \
1003  value_type> name_##_impl() const \
1004  { \
1005  return binary_fun_(data0, data1).name_(); \
1006  } \
1007  \
1008  template <typename ForSfinae = void> \
1009  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1010  storage_type0::Size != storage_type1::Size, \
1011  value_type> name_##_impl() const \
1012  { \
1013  return scalar_fun_(data0.name_(), data1.name_()); \
1014  } \
1015  \
1016 public: \
1017  \
1018  Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1019  \
1020  Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1021  { \
1022  if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1023  return data1.name_(Split::hi(mask)); \
1024  } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1025  return data0.name_(Split::lo(mask)); \
1026  } else { \
1027  return scalar_fun_(data0.name_(Split::lo(mask)), \
1028  data1.name_(Split::hi(mask))); \
1029  } \
1030  } \
1031  Vc_NOTHING_EXPECTING_SEMICOLON
1032  Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1033  Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1034  Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1035  Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1036 #undef Vc_REDUCTION_FUNCTION_
1037  Vc_INTRINSIC Vc_PURE SimdArray partialSum() const //{{{2
1039  {
1040  auto ps0 = data0.partialSum();
1041  auto tmp = data1;
1042  tmp[0] += ps0[data0.size() - 1];
1043  return {std::move(ps0), tmp.partialSum()};
1044  }
1045 
1046  // apply {{{2
1048  template <typename F> inline SimdArray apply(F &&f) const
1049  {
1050  return {data0.apply(f), data1.apply(f)};
1051  }
1053  template <typename F> inline SimdArray apply(F &&f, const mask_type &k) const
1054  {
1055  return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1056  }
1057 
1058  // shifted {{{2
1060  inline SimdArray shifted(int amount) const
1061  {
1062  constexpr int SSize = Size;
1063  constexpr int SSize0 = storage_type0::Size;
1064  constexpr int SSize1 = storage_type1::Size;
1065  if (amount == 0) {
1066  return *this;
1067  }
1068  if (amount < 0) {
1069  if (amount > -SSize0) {
1070  return {data0.shifted(amount), data1.shifted(amount, data0)};
1071  }
1072  if (amount == -SSize0) {
1073  return {storage_type0::Zero(), simd_cast<storage_type1>(data0)};
1074  }
1075  if (amount < -SSize0) {
1076  return {storage_type0::Zero(), simd_cast<storage_type1>(data0.shifted(
1077  amount + SSize0))};
1078  }
1079  return Zero();
1080  } else {
1081  if (amount >= SSize) {
1082  return Zero();
1083  } else if (amount >= SSize0) {
1084  return {
1085  simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1087  } else if (amount >= SSize1) {
1088  return {data0.shifted(amount, data1), storage_type1::Zero()};
1089  } else {
1090  return {data0.shifted(amount, data1), data1.shifted(amount)};
1091  }
1092  }
1093  }
1094 
1095  template <std::size_t NN>
1096  inline enable_if<
1097  !(std::is_same<storage_type0, storage_type1>::value && // not bisectable
1098  N == NN),
1099  SimdArray>
1100  shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1101  {
1102  constexpr int SSize = Size;
1103  if (amount < 0) {
1104  return SimdArray::generate([&](int i) -> value_type {
1105  i += amount;
1106  if (i >= 0) {
1107  return operator[](i);
1108  } else if (i >= -SSize) {
1109  return shiftIn[i + SSize];
1110  }
1111  return 0;
1112  });
1113  }
1114  return SimdArray::generate([&](int i) -> value_type {
1115  i += amount;
1116  if (i < SSize) {
1117  return operator[](i);
1118  } else if (i < 2 * SSize) {
1119  return shiftIn[i - SSize];
1120  }
1121  return 0;
1122  });
1123  }
1124 
1125 private:
1126  // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1127  // expression directly in the enable_if below
1128  template <std::size_t NN> struct bisectable_shift
1129  : public std::integral_constant<bool,
1130  std::is_same<storage_type0, storage_type1>::value && // bisectable
1131  N == NN>
1132  {
1133  };
1134 
1135 public:
1136  template <std::size_t NN>
1137  inline SimdArray shifted(enable_if<bisectable_shift<NN>::value, int> amount,
1138  const SimdArray<value_type, NN> &shiftIn) const
1139  {
1140  constexpr int SSize = Size;
1141  if (amount < 0) {
1142  if (amount > -static_cast<int>(storage_type0::Size)) {
1143  return {data0.shifted(amount, internal_data1(shiftIn)),
1144  data1.shifted(amount, data0)};
1145  }
1146  if (amount == -static_cast<int>(storage_type0::Size)) {
1147  return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1148  }
1149  if (amount > -SSize) {
1150  return {
1151  internal_data1(shiftIn)
1152  .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1153  data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1154  }
1155  if (amount == -SSize) {
1156  return shiftIn;
1157  }
1158  if (amount > -2 * SSize) {
1159  return shiftIn.shifted(amount + SSize);
1160  }
1161  }
1162  if (amount == 0) {
1163  return *this;
1164  }
1165  if (amount < static_cast<int>(storage_type0::Size)) {
1166  return {data0.shifted(amount, data1),
1167  data1.shifted(amount, internal_data0(shiftIn))};
1168  }
1169  if (amount == static_cast<int>(storage_type0::Size)) {
1170  return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1171  }
1172  if (amount < SSize) {
1173  return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1174  internal_data0(shiftIn)
1175  .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1176  }
1177  if (amount == SSize) {
1178  return shiftIn;
1179  }
1180  if (amount < 2 * SSize) {
1181  return shiftIn.shifted(amount - SSize);
1182  }
1183  return Zero();
1184  }
1185 
1186  // rotated {{{2
1188  Vc_INTRINSIC SimdArray rotated(int amount) const
1189  {
1190  amount %= int(size());
1191  if (amount == 0) {
1192  return *this;
1193  } else if (amount < 0) {
1194  amount += size();
1195  }
1196 
1197 #ifdef Vc_MSVC
1198  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1199  // ->
1200  // load to implement the function instead.
1201  alignas(MemoryAlignment) T tmp[N + data0.size()];
1202  data0.store(&tmp[0], Vc::Aligned);
1203  data1.store(&tmp[data0.size()], Vc::Aligned);
1204  data0.store(&tmp[N], Vc::Unaligned);
1205  SimdArray r;
1206  r.data0.load(&tmp[amount], Vc::Unaligned);
1207  r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1208  return r;
1209 #else
1210  auto &&d0cvtd = simd_cast<storage_type1>(data0);
1211  auto &&d1cvtd = simd_cast<storage_type0>(data1);
1212  constexpr int size0 = storage_type0::size();
1213  constexpr int size1 = storage_type1::size();
1214 
1215  if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1216  return {std::move(d1cvtd), std::move(d0cvtd)};
1217  } else if (amount < size1) {
1218  return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1219  } else if (amount == size1) {
1220  return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1221  } else if (int(size()) - amount < size1) {
1222  return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1223  data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1224  } else if (int(size()) - amount == size1) {
1225  return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1226  simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1227  } else if (amount <= size0) {
1228  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1229  simd_cast<storage_type1>(data0.shifted(amount - size1))};
1230  } else {
1231  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1232  simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1233  }
1234  return *this;
1235 #endif
1236  }
1237 
1238  // interleaveLow/-High {{{2
1240  Vc_INTRINSIC SimdArray interleaveLow(const SimdArray &x) const
1241  {
1242  // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1243  return {data0.interleaveLow(x.data0),
1244  simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1245  }
1247  Vc_INTRINSIC SimdArray interleaveHigh(const SimdArray &x) const
1248  {
1249  return interleaveHighImpl(
1250  x,
1251  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1252  }
1253 
1254 private:
1256  Vc_INTRINSIC SimdArray interleaveHighImpl(const SimdArray &x, std::true_type) const
1257  {
1258  return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1259  }
1261  inline SimdArray interleaveHighImpl(const SimdArray &x, std::false_type) const
1262  {
1263  return {data0.interleaveHigh(x.data0)
1264  .shifted(storage_type1::Size,
1265  simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1266  data1.interleaveHigh(x.data1)};
1267  }
1268 
1269 public:
1271  inline SimdArray reversed() const //{{{2
1272  {
1273  if (std::is_same<storage_type0, storage_type1>::value) {
1274  return {simd_cast<storage_type0>(data1).reversed(),
1275  simd_cast<storage_type1>(data0).reversed()};
1276  } else {
1277 #ifdef Vc_MSVC
1278  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1279  // store
1280  // -> load to implement the function instead.
1281  alignas(MemoryAlignment) T tmp[N];
1282  data1.reversed().store(&tmp[0], Vc::Aligned);
1283  data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1284  return SimdArray{&tmp[0], Vc::Aligned};
1285 #else
1286  return {data0.shifted(storage_type1::Size, data1).reversed(),
1287  simd_cast<storage_type1>(data0.reversed().shifted(
1288  storage_type0::Size - storage_type1::Size))};
1289 #endif
1290  }
1291  }
1293  inline SimdArray sorted() const //{{{2
1294  {
1295  return sortedImpl(
1296  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1297  }
1298 
1300  Vc_INTRINSIC SimdArray sortedImpl(std::true_type) const
1301  {
1302 #ifdef Vc_DEBUG_SORTED
1303  std::cerr << "-- " << data0 << data1 << '\n';
1304 #endif
1305  const auto a = data0.sorted();
1306  const auto b = data1.sorted().reversed();
1307  const auto lo = Vc::min(a, b);
1308  const auto hi = Vc::max(a, b);
1309  return {lo.sorted(), hi.sorted()};
1310  }
1311 
1313  Vc_INTRINSIC SimdArray sortedImpl(std::false_type) const
1314  {
1315  using SortableArray =
1316  SimdArray<value_type, Common::NextPowerOfTwo<size()>::value>;
1317  auto sortable = simd_cast<SortableArray>(*this);
1318  for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1319  using limits = std::numeric_limits<value_type>;
1320  if (limits::has_infinity) {
1321  sortable[i] = limits::infinity();
1322  } else {
1323  sortable[i] = std::numeric_limits<value_type>::max();
1324  }
1325  }
1326  return simd_cast<SimdArray>(sortable.sorted());
1327 
1328  /* The following implementation appears to be less efficient. But this may need further
1329  * work.
1330  const auto a = data0.sorted();
1331  const auto b = data1.sorted();
1332 #ifdef Vc_DEBUG_SORTED
1333  std::cerr << "== " << a << b << '\n';
1334 #endif
1335  auto aIt = Vc::begin(a);
1336  auto bIt = Vc::begin(b);
1337  const auto aEnd = Vc::end(a);
1338  const auto bEnd = Vc::end(b);
1339  return SimdArray::generate([&](std::size_t) {
1340  if (aIt == aEnd) {
1341  return *(bIt++);
1342  }
1343  if (bIt == bEnd) {
1344  return *(aIt++);
1345  }
1346  if (*aIt < *bIt) {
1347  return *(aIt++);
1348  } else {
1349  return *(bIt++);
1350  }
1351  });
1352  */
1353  }
1354 
1357 
1360  static constexpr std::size_t Size = size();
1361 
1363  Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const
1364  {
1365  return {exponent(data0), exponent(data1)};
1366  }
1367 
1369  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1370  {
1371  return {isnegative(data0), isnegative(data1)};
1372  }
1373 
1375  Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray
1376  copySign(const SimdArray &reference) const
1377  {
1378  return {Vc::copysign(data0, reference.data0),
1379  Vc::copysign(data1, reference.data1)};
1380  }
1382 
1383  // internal_data0/1 {{{2
1384  friend storage_type0 &internal_data0<>(SimdArray &x);
1385  friend storage_type1 &internal_data1<>(SimdArray &x);
1386  friend const storage_type0 &internal_data0<>(const SimdArray &x);
1387  friend const storage_type1 &internal_data1<>(const SimdArray &x);
1388 
1390  Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1391  : data0(std::move(x)), data1(std::move(y))
1392  {
1393  }
1394 
1395  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1396 
1397 private: //{{{2
1398  // The alignas attribute attached to the class declaration above is ignored by ICC
1399  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1400  // all compilers.
1401  alignas(static_cast<std::size_t>(
1402  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1403  V::size()>::value)) storage_type0 data0;
1404  storage_type1 data1;
1405 };
1406 #undef Vc_CURRENT_CLASS_NAME
1407 template <typename T, std::size_t N, typename V, std::size_t M>
1408 constexpr std::size_t SimdArray<T, N, V, M>::Size;
1409 template <typename T, std::size_t N, typename V, std::size_t M>
1410 constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1411 
1412 // gatherImplementation {{{2
1413 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1414 template <typename MT, typename IT>
1416  const IT &indexes)
1417 {
1418  data0.gather(mem, Split::lo(Common::Operations::gather(), indexes));
1419  data1.gather(mem, Split::hi(Common::Operations::gather(), indexes));
1420 }
1421 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1422 template <typename MT, typename IT>
1423 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(const MT *mem,
1424  const IT &indexes,
1425  MaskArgument mask)
1426 {
1427  data0.gather(mem, Split::lo(Common::Operations::gather(), indexes), Split::lo(mask));
1428  data1.gather(mem, Split::hi(Common::Operations::gather(), indexes), Split::hi(mask));
1429 }
1430 
1431 // scatterImplementation {{{2
1432 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1433 template <typename MT, typename IT>
1434 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1435  IT &&indexes) const
1436 {
1437  data0.scatter(mem, Split::lo(Common::Operations::gather(),
1438  indexes)); // don't forward indexes - it could move and
1439  // thus break the next line
1440  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1441 }
1442 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1443 template <typename MT, typename IT>
1444 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1445  IT &&indexes, MaskArgument mask) const
1446 {
1447  data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1448  Split::lo(mask)); // don't forward indexes - it could move and
1449  // thus break the next line
1450  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1451  Split::hi(mask));
1452 }
1453 
1454 // internal_data0/1 (SimdArray) {{{1
1456 template <typename T, std::size_t N, typename V, std::size_t M>
1457 #ifndef Vc_MSVC
1458 Vc_INTRINSIC
1459 #endif
1460 typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1461  SimdArray<T, N, V, M> &x)
1462 {
1463  return x.data0;
1464 }
1466 template <typename T, std::size_t N, typename V, std::size_t M>
1467 #ifndef Vc_MSVC
1468 Vc_INTRINSIC
1469 #endif
1470 typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1471  SimdArray<T, N, V, M> &x)
1472 {
1473  return x.data1;
1474 }
1476 template <typename T, std::size_t N, typename V, std::size_t M>
1477 #ifndef Vc_MSVC
1478 Vc_INTRINSIC
1479 #endif
1480 const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1481  const SimdArray<T, N, V, M> &x)
1482 {
1483  return x.data0;
1484 }
1486 template <typename T, std::size_t N, typename V, std::size_t M>
1487 #ifndef Vc_MSVC
1488 Vc_INTRINSIC
1489 #endif
1490 const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1491  const SimdArray<T, N, V, M> &x)
1492 {
1493  return x.data1;
1494 }
1495 
1496 // MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1497 // MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1498 // in the body the bug is supressed.
1499 #if defined Vc_MSVC && defined Vc_IMPL_SSE
1500 template <>
1501 Vc_INTRINSIC SimdArray<double, 8, SSE::Vector<double>, 2>::SimdArray(
1502  SimdArray<double, 4> &&x, SimdArray<double, 4> &&y)
1503  : data0(x), data1(0)
1504 {
1505  data1 = y;
1506 }
1507 #endif
1508 
1509 // binary operators {{{1
1510 namespace result_vector_type_internal
1511 {
1512 template <typename T>
1513 using type = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1514 
1515 template <typename T>
1516 using is_integer_larger_than_int = std::integral_constant<
1517  bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1518  std::is_same<T, long>::value ||
1519  std::is_same<T, unsigned long>::value)>;
1520 
1521 template <
1522  typename L, typename R,
1523  std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value
1524  : Traits::simd_vector_size<R>::value,
1525  bool =
1526  (Traits::isSimdArray<L>::value ||
1527  Traits::isSimdArray<R>::value) // one of the operands must be a SimdArray
1528  && !std::is_same<type<L>, type<R>>::value // if the operands are of the same type
1529  // use the member function
1530  &&
1531  ((std::is_arithmetic<type<L>>::value &&
1532  !is_integer_larger_than_int<type<L>>::value) ||
1533  (std::is_arithmetic<type<R>>::value &&
1534  !is_integer_larger_than_int<type<R>>::value) // one of the operands is a scalar
1535  // type
1536  ||
1537  ( // or one of the operands is Vector<T> with Vector<T>::size() ==
1538  // SimdArray::size()
1539  Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value &&
1540  ((Traits::is_simd_vector<L>::value && !Traits::isSimdArray<L>::value) ||
1541  (Traits::is_simd_vector<R>::value && !Traits::isSimdArray<R>::value))))>
1542 struct evaluate;
1543 
1544 template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1545 {
1546 private:
1547  using LScalar = Traits::entry_type_of<L>;
1548  using RScalar = Traits::entry_type_of<R>;
1549 
1550  template <bool B, typename True, typename False>
1551  using conditional = typename std::conditional<B, True, False>::type;
1552 
1553 public:
1554  // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1555  // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1556  // int are promoted to int before any operation). This would imply that SIMD types with integral
1557  // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1558  // start. Therefore we special-case those operations where the scalar type of both operands is
1559  // integral and smaller than int.
1560  // In addition to that there is no generic support for 64-bit int SIMD types. Therefore
1561  // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1562  // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1563  // then the operator is disabled altogether. We do not want an implicit demotion.
1564  using type = SimdArray<
1565  conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1566  sizeof(LScalar) < sizeof(int) &&
1567  sizeof(RScalar) < sizeof(int)),
1568  conditional<(sizeof(LScalar) == sizeof(RScalar)),
1569  conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1570  conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1571  decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1572  N>;
1573 };
1574 
1575 } // namespace result_vector_type_internal
1576 
1577 template <typename L, typename R>
1578 using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1579 
1580 static_assert(
1581  std::is_same<result_vector_type<short int, Vc::SimdArray<short unsigned int, 32ul>>,
1583  "result_vector_type does not work");
1584 
1585 #define Vc_BINARY_OPERATORS_(op_) \
1586  \
1587  template <typename L, typename R> \
1588  Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1589  { \
1590  using Return = result_vector_type<L, R>; \
1591  return Return(std::forward<L>(lhs)) op_ Return(std::forward<R>(rhs)); \
1592  }
1593 
1610 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1612 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1614 #undef Vc_BINARY_OPERATORS_
1615 #define Vc_BINARY_OPERATORS_(op_) \
1616  \
1617  template <typename L, typename R> \
1618  Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1619  R &&rhs) \
1620  { \
1621  using Promote = result_vector_type<L, R>; \
1622  return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1623  }
1624 
1641 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1644 #undef Vc_BINARY_OPERATORS_
1645 
1646 // math functions {{{1
1647 #define Vc_FORWARD_UNARY_OPERATOR(name_) \
1648  \
1649  template <typename T, std::size_t N, typename V, std::size_t M> \
1650  inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \
1651  { \
1652  return SimdArray<T, N, V, M>::fromOperation( \
1653  Common::Operations::Forward_##name_(), x); \
1654  } \
1655  Vc_NOTHING_EXPECTING_SEMICOLON
1656 
1657 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1658  \
1659  template <typename T, std::size_t N, typename V, std::size_t M> \
1660  inline SimdMaskArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \
1661  { \
1662  return SimdMaskArray<T, N, V, M>::fromOperation( \
1663  Common::Operations::Forward_##name_(), x); \
1664  } \
1665  Vc_NOTHING_EXPECTING_SEMICOLON
1666 
1667 #define Vc_FORWARD_BINARY_OPERATOR(name_) \
1668  \
1669  template <typename T, std::size_t N, typename V, std::size_t M> \
1670  inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x, \
1671  const SimdArray<T, N, V, M> &y) \
1672  { \
1673  return SimdArray<T, N, V, M>::fromOperation( \
1674  Common::Operations::Forward_##name_(), x, y); \
1675  } \
1676  Vc_NOTHING_EXPECTING_SEMICOLON
1677 
1682 Vc_FORWARD_UNARY_OPERATOR(abs);
1684 Vc_FORWARD_UNARY_OPERATOR(asin);
1685 Vc_FORWARD_UNARY_OPERATOR(atan);
1686 Vc_FORWARD_BINARY_OPERATOR(atan2);
1687 Vc_FORWARD_UNARY_OPERATOR(ceil);
1688 Vc_FORWARD_BINARY_OPERATOR(copysign);
1689 Vc_FORWARD_UNARY_OPERATOR(cos);
1690 Vc_FORWARD_UNARY_OPERATOR(exp);
1691 Vc_FORWARD_UNARY_OPERATOR(exponent);
1692 Vc_FORWARD_UNARY_OPERATOR(floor);
1694 template <typename T, std::size_t N>
1696  const SimdArray<T, N> &c)
1697 {
1698  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1699 }
1700 Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1701 Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1702 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1703 #if defined Vc_MSVC && defined Vc_IMPL_SSE
1705  const SimdArray<double, 8, SSE::Vector<double>, 2> &x)
1706 {
1707  using V = SSE::Vector<double>;
1708  const SimdArray<double, 4, V, 2> &x0 = internal_data0(x);
1709  const SimdArray<double, 4, V, 2> &x1 = internal_data1(x);
1712  internal_data(internal_data0(r0)) = isnan(internal_data(internal_data0(x0)));
1713  internal_data(internal_data1(r0)) = isnan(internal_data(internal_data1(x0)));
1714  internal_data(internal_data0(r1)) = isnan(internal_data(internal_data0(x1)));
1715  internal_data(internal_data1(r1)) = isnan(internal_data(internal_data1(x1)));
1716  return {std::move(r0), std::move(r1)};
1717 }
1718 #endif
1719 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1721 template <typename T, std::size_t N>
1723 {
1724  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1725 }
1727 template <typename T, std::size_t N>
1729 {
1730  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1731 }
1732 Vc_FORWARD_UNARY_OPERATOR(log);
1733 Vc_FORWARD_UNARY_OPERATOR(log10);
1734 Vc_FORWARD_UNARY_OPERATOR(log2);
1735 Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1736 Vc_FORWARD_UNARY_OPERATOR(round);
1737 Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1738 Vc_FORWARD_UNARY_OPERATOR(sin);
1740 template <typename T, std::size_t N>
1742 {
1743  SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1744 }
1745 Vc_FORWARD_UNARY_OPERATOR(sqrt);
1746 Vc_FORWARD_UNARY_OPERATOR(trunc);
1747 Vc_FORWARD_BINARY_OPERATOR(min);
1748 Vc_FORWARD_BINARY_OPERATOR(max);
1750 #undef Vc_FORWARD_UNARY_OPERATOR
1751 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1752 #undef Vc_FORWARD_BINARY_OPERATOR
1753 
1754 // simd_cast {{{1
1755 #ifdef Vc_MSVC
1756 #define Vc_DUMMY_ARG0 , int = 0
1757 #define Vc_DUMMY_ARG1 , long = 0
1758 #define Vc_DUMMY_ARG2 , short = 0
1759 #define Vc_DUMMY_ARG3 , char = '0'
1760 #define Vc_DUMMY_ARG4 , unsigned = 0u
1761 #define Vc_DUMMY_ARG5 , unsigned short = 0u
1762 #else
1763 #define Vc_DUMMY_ARG0
1764 #define Vc_DUMMY_ARG1
1765 #define Vc_DUMMY_ARG2
1766 #define Vc_DUMMY_ARG3
1767 #define Vc_DUMMY_ARG4
1768 #define Vc_DUMMY_ARG5
1769 #endif // Vc_MSVC
1770 
1771 // simd_cast_impl_smaller_input {{{2
1772 // The following function can be implemented without the sizeof...(From) overload.
1773 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1774 // function in two works around the issue.
1775 template <typename Return, std::size_t N, typename T, typename... From>
1776 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1777 simd_cast_impl_smaller_input(const From &... xs, const T &last)
1778 {
1779  Return r = simd_cast<Return>(xs...);
1780  for (size_t i = 0; i < N; ++i) {
1781  r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1782  }
1783  return r;
1784 }
1785 template <typename Return, std::size_t N, typename T>
1786 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1787 {
1788  Return r = Return();
1789  for (size_t i = 0; i < N; ++i) {
1790  r[i] = static_cast<typename Return::EntryType>(last[i]);
1791  }
1792  return r;
1793 }
1794 template <typename Return, std::size_t N, typename T, typename... From>
1795 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1796  const From &... xs, const T &last)
1797 {
1798  Return r = simd_cast<Return>(xs...);
1799  for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1800  r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1801  }
1802  return r;
1803 }
1804 template <typename Return, std::size_t N, typename T>
1805 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1806 {
1807  Return r = Return();
1808  for (size_t i = 0; i < Return::size(); ++i) {
1809  r[i] = static_cast<typename Return::EntryType>(last[i]);
1810  }
1811  return r;
1812 }
1813 
1814 // simd_cast_without_last (declaration) {{{2
1815 template <typename Return, typename T, typename... From>
1816 Vc_INTRINSIC_L Vc_CONST_L Return
1817  simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1818 
1819 // are_all_types_equal {{{2
1820 template <typename... Ts> struct are_all_types_equal;
1821 template <typename T>
1822 struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1823 {
1824 };
1825 template <typename T0, typename T1, typename... Ts>
1826 struct are_all_types_equal<T0, T1, Ts...>
1827  : public std::integral_constant<
1828  bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1829 {
1830 };
1831 
1832 // simd_cast_interleaved_argument_order (declarations) {{{2
1852 template <typename Return, typename... Ts>
1853 Vc_INTRINSIC Vc_CONST Return
1854  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1855 
1856 // simd_cast_with_offset (declarations and one impl) {{{2
1857 // offset == 0 {{{3
1858 template <typename Return, std::size_t offset, typename From, typename... Froms>
1859 Vc_INTRINSIC Vc_CONST
1860  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1861  simd_cast_with_offset(const From &x, const Froms &... xs);
1862 // offset > 0 && offset divisible by Return::Size {{{3
1863 template <typename Return, std::size_t offset, typename From>
1864 Vc_INTRINSIC Vc_CONST
1865  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1866  simd_cast_with_offset(const From &x);
1867 // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1868 template <typename Return, std::size_t offset, typename From>
1869 Vc_INTRINSIC Vc_CONST
1870  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1871  ((Traits::isSimdArray<Return>::value &&
1872  !Traits::isAtomicSimdArray<Return>::value) ||
1873  (Traits::isSimdMaskArray<Return>::value &&
1874  !Traits::isAtomicSimdMaskArray<Return>::value))),
1875  Return>
1876  simd_cast_with_offset(const From &x);
1877 // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1878 template <typename Return, std::size_t offset, typename From>
1879 Vc_INTRINSIC Vc_CONST
1880  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1881  ((Traits::isSimdArray<Return>::value &&
1882  Traits::isAtomicSimdArray<Return>::value) ||
1883  (Traits::isSimdMaskArray<Return>::value &&
1884  Traits::isAtomicSimdMaskArray<Return>::value))),
1885  Return>
1886  simd_cast_with_offset(const From &x);
1887 // offset > first argument (drops first arg) {{{3
1888 template <typename Return, std::size_t offset, typename From, typename... Froms>
1889 Vc_INTRINSIC Vc_CONST enable_if<
1890  (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1891  simd_cast_with_offset(const From &, const Froms &... xs)
1892 {
1893  return simd_cast_with_offset<Return, offset - From::Size>(xs...);
1894 }
1895 
1896 // offset > first and only argument (returns Zero) {{{3
1897 template <typename Return, std::size_t offset, typename From>
1898 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
1899  const From &)
1900 {
1901  return Return::Zero();
1902 }
1903 
1904 // first_type_of {{{2
1905 template <typename T, typename... Ts> struct first_type_of_impl
1906 {
1907  using type = T;
1908 };
1909 template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
1910 
1911 // simd_cast_drop_arguments (declarations) {{{2
1912 template <typename Return, typename From>
1913 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
1914 template <typename Return, typename... Froms>
1915 Vc_INTRINSIC Vc_CONST
1916  enable_if<(are_all_types_equal<Froms...>::value &&
1917  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1918  Return>
1919  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
1920 // The following function can be implemented without the sizeof...(From) overload.
1921 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1922 // function in two works around the issue.
1923 template <typename Return, typename From, typename... Froms>
1924 Vc_INTRINSIC Vc_CONST enable_if<
1925  (are_all_types_equal<From, Froms...>::value &&
1926  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
1927  Return>
1928 simd_cast_drop_arguments(Froms... xs, From x, From);
1929 template <typename Return, typename From>
1930 Vc_INTRINSIC Vc_CONST
1931  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1932  simd_cast_drop_arguments(From x, From);
1933 
1934 namespace
1935 {
1936 #ifdef Vc_DEBUG_SIMD_CAST
1937 void debugDoNothing(const std::initializer_list<void *> &) {}
1938 template <typename T0, typename... Ts>
1939 inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
1940  const Ts &... args)
1941 {
1942  std::cerr << prefix << arg0;
1943  debugDoNothing({&(std::cerr << ", " << args)...});
1944  std::cerr << suffix;
1945 }
1946 #else
1947 template <typename T0, typename... Ts>
1948 Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
1949 {
1950 }
1951 #endif
1952 } // unnamed namespace
1953 
1954 // is_less trait{{{2
1955 template <size_t A, size_t B>
1956 struct is_less : public std::integral_constant<bool, (A < B)> {
1957 };
1958 
1959 // is_power_of_2 trait{{{2
1960 template <size_t N>
1961 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
1962 };
1963 
1964 // simd_cast<T>(xs...) to SimdArray/-mask {{{2
1965 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
1966  template <typename Return, typename T, typename A, typename... Froms> \
1967  Vc_INTRINSIC Vc_CONST enable_if< \
1968  (Traits::isAtomic##SimdArrayType_<Return>::value && \
1969  is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
1970  are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
1971  Return> \
1972  simd_cast(NativeType_<T, A> x, Froms... xs) \
1973  { \
1974  vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
1975  return {simd_cast<typename Return::storage_type>(x, xs...)}; \
1976  } \
1977  template <typename Return, typename T, typename A, typename... Froms> \
1978  Vc_INTRINSIC Vc_CONST enable_if< \
1979  (Traits::isAtomic##SimdArrayType_<Return>::value && \
1980  !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
1981  are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
1982  Return> \
1983  simd_cast(NativeType_<T, A> x, Froms... xs) \
1984  { \
1985  vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
1986  return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
1987  } \
1988  template <typename Return, typename T, typename A, typename... Froms> \
1989  Vc_INTRINSIC Vc_CONST \
1990  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
1991  !Traits::isAtomic##SimdArrayType_<Return>::value && \
1992  is_less<Common::left_size<Return::Size>(), \
1993  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
1994  are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
1995  Return> \
1996  simd_cast(NativeType_<T, A> x, Froms... xs) \
1997  { \
1998  vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
1999  using R0 = typename Return::storage_type0; \
2000  using R1 = typename Return::storage_type1; \
2001  return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2002  simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2003  } \
2004  template <typename Return, typename T, typename A, typename... Froms> \
2005  Vc_INTRINSIC Vc_CONST \
2006  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2007  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2008  !is_less<Common::left_size<Return::Size>(), \
2009  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2010  are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
2011  Return> \
2012  simd_cast(NativeType_<T, A> x, Froms... xs) \
2013  { \
2014  vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2015  using R0 = typename Return::storage_type0; \
2016  using R1 = typename Return::storage_type1; \
2017  return {simd_cast<R0>(x, xs...), R1::Zero()}; \
2018  } \
2019  Vc_NOTHING_EXPECTING_SEMICOLON
2020 
2021 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2022 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2023 #undef Vc_SIMDARRAY_CASTS
2024 
2025 // simd_cast<SimdArray/-mask, offset>(V) {{{2
2026 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2027  /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \
2028  template <typename Return, int offset, typename T, typename A> \
2029  Vc_INTRINSIC Vc_CONST \
2030  enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2031  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2032  { \
2033  vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2034  return {simd_cast<typename Return::storage_type, offset>(x)}; \
2035  } \
2036  /* both halves of Return array are extracted from argument */ \
2037  template <typename Return, int offset, typename T, typename A> \
2038  Vc_INTRINSIC Vc_CONST \
2039  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2040  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2041  Return::Size * offset + Common::left_size<Return::Size>() < \
2042  NativeType_<T, A>::Size), \
2043  Return> \
2044  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2045  { \
2046  vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2047  using R0 = typename Return::storage_type0; \
2048  constexpr int entries_offset = offset * Return::Size; \
2049  constexpr int entries_offset_right = entries_offset + R0::Size; \
2050  return { \
2051  simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2052  simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2053  x)}; \
2054  } \
2055  /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \
2056  /* right half of Return array is zero */ \
2057  template <typename Return, int offset, typename T, typename A> \
2058  Vc_INTRINSIC Vc_CONST \
2059  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2060  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2061  Return::Size * offset + Common::left_size<Return::Size>() >= \
2062  NativeType_<T, A>::Size), \
2063  Return> \
2064  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2065  { \
2066  vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2067  using R0 = typename Return::storage_type0; \
2068  using R1 = typename Return::storage_type1; \
2069  constexpr int entries_offset = offset * Return::Size; \
2070  return {simd_cast_with_offset<R0, entries_offset>(x), R1::Zero()}; \
2071  } \
2072  Vc_NOTHING_EXPECTING_SEMICOLON
2073 
2074 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2075 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2076 #undef Vc_SIMDARRAY_CASTS
2077 
2078 // simd_cast<T>(xs...) from SimdArray/-mask {{{2
2079 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2080  /* indivisible SimdArrayType_ */ \
2081  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2082  Vc_INTRINSIC Vc_CONST \
2083  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2084  (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2085  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2086  Return> \
2087  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2088  { \
2089  vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2090  return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2091  } \
2092  /* indivisible SimdArrayType_ && can drop arguments from the end */ \
2093  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2094  Vc_INTRINSIC Vc_CONST \
2095  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2096  (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2097  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2098  Return> \
2099  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2100  { \
2101  vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
2102  return simd_cast_without_last<Return, \
2103  typename SimdArrayType_<T, N, V, N>::storage_type, \
2104  typename From::storage_type...>( \
2105  internal_data(x0), internal_data(xs)...); \
2106  } \
2107  /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \
2108  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2109  typename... From> \
2110  Vc_INTRINSIC Vc_CONST enable_if< \
2111  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2112  !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2113  is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2114  Return> \
2115  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2116  { \
2117  vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
2118  return simd_cast_interleaved_argument_order< \
2119  Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \
2120  typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2121  internal_data1(x0), internal_data1(xs)...); \
2122  } \
2123  /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \
2124  * input can be dropped */ \
2125  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2126  typename... From> \
2127  Vc_INTRINSIC Vc_CONST enable_if< \
2128  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2129  !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2130  Return> \
2131  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2132  { \
2133  vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
2134  return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2135  x0, xs...); \
2136  } \
2137  /* remaining SimdArrayType_ input never larger (N != 2^n) */ \
2138  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2139  typename... From> \
2140  Vc_INTRINSIC Vc_CONST enable_if< \
2141  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2142  N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2143  Return> \
2144  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2145  { \
2146  vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
2147  return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2148  From...>(x0, xs...); \
2149  } \
2150  /* remaining SimdArrayType_ input larger (N != 2^n) */ \
2151  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2152  typename... From> \
2153  Vc_INTRINSIC Vc_CONST enable_if< \
2154  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2155  N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2156  Return> \
2157  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2158  { \
2159  vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
2160  return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2161  From...>(x0, xs...); \
2162  } \
2163  /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \
2164  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2165  Vc_INTRINSIC Vc_CONST \
2166  enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2167  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2168  { \
2169  vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
2170  return simd_cast<Return>(internal_data0(x)); \
2171  } \
2172  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2173  Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2174  N < 2 * Return::Size && is_power_of_2<N>::value), \
2175  Return> \
2176  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2177  { \
2178  vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
2179  return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2180  } \
2181  Vc_NOTHING_EXPECTING_SEMICOLON
2182 
2183 Vc_SIMDARRAY_CASTS(SimdArray);
2184 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2185 #undef Vc_SIMDARRAY_CASTS
2186 
2187 // simd_cast<T, offset>(SimdArray/-mask) {{{2
2188 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2189  /* offset == 0 is like without offset */ \
2190  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2191  std::size_t M> \
2192  Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2193  const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2194  { \
2195  vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2196  return simd_cast<Return>(x); \
2197  } \
2198  /* forward to V */ \
2199  template <typename Return, int offset, typename T, std::size_t N, typename V> \
2200  Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2201  const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2202  { \
2203  vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2204  return simd_cast<Return, offset>(internal_data(x)); \
2205  } \
2206  /* convert from right member of SimdArray */ \
2207  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2208  std::size_t M> \
2209  Vc_INTRINSIC Vc_CONST \
2210  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2211  offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2212  Return> \
2213  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2214  { \
2215  vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2216  return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2217  internal_data1(x)); \
2218  } \
2219  /* same as above except for odd cases where offset * Return::Size doesn't fit the \
2220  * left side of the SimdArray */ \
2221  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2222  std::size_t M> \
2223  Vc_INTRINSIC Vc_CONST \
2224  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2225  offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2226  Return> \
2227  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2228  { \
2229  vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2230  return simd_cast_with_offset<Return, \
2231  offset * Return::Size - Common::left_size<N>()>( \
2232  internal_data1(x)); \
2233  } \
2234  /* convert from left member of SimdArray */ \
2235  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2236  std::size_t M> \
2237  Vc_INTRINSIC Vc_CONST enable_if< \
2238  (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \
2239  offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2240  Return> \
2241  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2242  { \
2243  vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2244  return simd_cast<Return, offset>(internal_data0(x)); \
2245  } \
2246  /* fallback to copying scalars */ \
2247  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2248  std::size_t M> \
2249  Vc_INTRINSIC Vc_CONST \
2250  enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2251  offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2252  Return> \
2253  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2254  { \
2255  vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2256  using R = typename Return::EntryType; \
2257  Return r = Return::Zero(); \
2258  for (std::size_t i = offset * Return::Size; \
2259  i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2260  r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2261  } \
2262  return r; \
2263  } \
2264  Vc_NOTHING_EXPECTING_SEMICOLON
2265 Vc_SIMDARRAY_CASTS(SimdArray);
2266 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2267 #undef Vc_SIMDARRAY_CASTS
2268 // simd_cast_drop_arguments (definitions) {{{2
2269 template <typename Return, typename From>
2270 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2271 {
2272  return simd_cast<Return>(x);
2273 }
2274 template <typename Return, typename... Froms>
2275 Vc_INTRINSIC Vc_CONST
2276  enable_if<(are_all_types_equal<Froms...>::value &&
2277  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2278  Return>
2279  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2280 {
2281  return simd_cast<Return>(xs..., x);
2282 }
2283 // The following function can be implemented without the sizeof...(From) overload.
2284 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2285 // function in two works around the issue.
2286 template <typename Return, typename From, typename... Froms>
2287 Vc_INTRINSIC Vc_CONST enable_if<
2288  (are_all_types_equal<From, Froms...>::value &&
2289  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2290  Return>
2291 simd_cast_drop_arguments(Froms... xs, From x, From)
2292 {
2293  return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2294 }
2295 template <typename Return, typename From>
2296 Vc_INTRINSIC Vc_CONST
2297  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2298  simd_cast_drop_arguments(From x, From)
2299 {
2300  return simd_cast_drop_arguments<Return>(x);
2301 }
2302 
2303 // simd_cast_with_offset (definitions) {{{2
2304  template <typename Return, std::size_t offset, typename From>
2305  Vc_INTRINSIC Vc_CONST
2306  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2307  Return> simd_cast_with_offset(const From &x)
2308 {
2309  return simd_cast<Return, offset / Return::Size>(x);
2310 }
2311 template <typename Return, std::size_t offset, typename From>
2312 Vc_INTRINSIC Vc_CONST
2313  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2314  ((Traits::isSimdArray<Return>::value &&
2315  !Traits::isAtomicSimdArray<Return>::value) ||
2316  (Traits::isSimdMaskArray<Return>::value &&
2317  !Traits::isAtomicSimdMaskArray<Return>::value))),
2318  Return>
2319  simd_cast_with_offset(const From &x)
2320 {
2321  using R0 = typename Return::storage_type0;
2322  using R1 = typename Return::storage_type1;
2323  return {simd_cast_with_offset<R0, offset>(x),
2324  simd_cast_with_offset<R1, offset + R0::Size>(x)};
2325 }
2326 template <typename Return, std::size_t offset, typename From>
2327 Vc_INTRINSIC Vc_CONST
2328  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2329  ((Traits::isSimdArray<Return>::value &&
2330  Traits::isAtomicSimdArray<Return>::value) ||
2331  (Traits::isSimdMaskArray<Return>::value &&
2332  Traits::isAtomicSimdMaskArray<Return>::value))),
2333  Return>
2334  simd_cast_with_offset(const From &x)
2335 {
2336  return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2337 }
2338 template <typename Return, std::size_t offset, typename From, typename... Froms>
2339 Vc_INTRINSIC Vc_CONST
2340  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2341  simd_cast_with_offset(const From &x, const Froms &... xs)
2342 {
2343  return simd_cast<Return>(x, xs...);
2344 }
2345 
2346 // simd_cast_without_last (definition) {{{2
2347 template <typename Return, typename T, typename... From>
2348 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2349 {
2350  return simd_cast<Return>(xs...);
2351 }
2352 
2353 // simd_cast_interleaved_argument_order (definitions) {{{2
2354 
2355 #ifdef Vc_MSVC
2356 // MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2357 // is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2358 // MSVC do the right thing.
2359 template <std::size_t I, typename T0>
2360 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2361 {
2362  return a0;
2363 }
2364 template <std::size_t I, typename T0>
2365 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2366 {
2367  return b0;
2368 }
2369 #endif // Vc_MSVC
2370 
2372 template <std::size_t I, typename T0, typename... Ts>
2373 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2374  const Ts &...,
2375  const T0 &,
2376  const Ts &...)
2377 {
2378  return a0;
2379 }
2381 template <std::size_t I, typename T0, typename... Ts>
2382 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2383  const Ts &...,
2384  const T0 &b0,
2385  const Ts &...)
2386 {
2387  return b0;
2388 }
2390 template <std::size_t I, typename T0, typename... Ts>
2391 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2392  const Ts &... a,
2393  const T0 &,
2394  const Ts &... b)
2395 {
2396  return extract_interleaved<I - 2, Ts...>(a..., b...);
2397 }
2399 template <typename Return, typename... Ts, std::size_t... Indexes>
2400 Vc_INTRINSIC Vc_CONST Return
2401  simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2402  const Ts &... b)
2403 {
2404  return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2405 }
2408 template <typename Return, typename... Ts>
2409 Vc_INTRINSIC Vc_CONST Return
2410  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2411 {
2412  using seq = make_index_sequence<sizeof...(Ts)*2>;
2413  return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2414 }
2415 
2416 // conditional_assign {{{1
2417 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2418  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2419  typename U> \
2420  Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2421  SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2422  { \
2423  lhs(mask) op_ rhs; \
2424  } \
2425  Vc_NOTHING_EXPECTING_SEMICOLON
2426 Vc_CONDITIONAL_ASSIGN( Assign, =);
2427 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2428 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2429 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2430 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2431 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2432 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2433 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2434 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2435 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2436 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2437 #undef Vc_CONDITIONAL_ASSIGN
2438 
2439 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2440  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2441  Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2442  conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2443  { \
2444  return expr_; \
2445  } \
2446  Vc_NOTHING_EXPECTING_SEMICOLON
2447 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2448 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2449 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2450 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2451 #undef Vc_CONDITIONAL_ASSIGN
2452 // transpose_impl {{{1
2453 namespace Common
2454 {
2455 template <typename T, size_t N, typename V>
2456 inline void transpose_impl(
2457  TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2458  const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2459  SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
2460 {
2461  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2462  &internal_data(*r[2]), &internal_data(*r[3])};
2463  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2464  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2465  internal_data(std::get<1>(proxy.in)),
2466  internal_data(std::get<2>(proxy.in)),
2467  internal_data(std::get<3>(proxy.in))});
2468 }
2469 
2470 template <typename T, typename V>
2471 inline void transpose_impl(
2472  TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2473  const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2474  SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
2475 {
2476  auto &lo = *r[0];
2477  auto &hi = *r[1];
2478  internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2479  internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2480  internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2481  internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2482  internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2483  internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2484  internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2485  internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2486 }
2487 
2488 template <typename T, typename V>
2489 inline void transpose_impl(
2490  TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2491  const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2492  SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
2493 {
2494  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2495  &internal_data(*r[2]), &internal_data(*r[3])};
2496  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2497  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2498  internal_data(std::get<1>(proxy.in)),
2499  internal_data(std::get<2>(proxy.in)),
2500  internal_data(std::get<3>(proxy.in))});
2501 }
2502 
2503 template <typename T, size_t N, typename V>
2504 inline void transpose_impl(
2505  TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2506  const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2507  SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
2508 {
2509  SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2510  SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2511  using H = SimdArray<T, 2>;
2512  transpose_impl(TransposeTag<2, 4>(), &r0[0],
2513  TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2514  internal_data0(std::get<1>(proxy.in)),
2515  internal_data0(std::get<2>(proxy.in)),
2516  internal_data0(std::get<3>(proxy.in))});
2517  transpose_impl(TransposeTag<2, 4>(), &r1[0],
2518  TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2519  internal_data1(std::get<1>(proxy.in)),
2520  internal_data1(std::get<2>(proxy.in)),
2521  internal_data1(std::get<3>(proxy.in))});
2522 }
2523 
2524 /* TODO:
2525 template <typename T, std::size_t N, typename V, std::size_t VSize>
2526 inline enable_if<(N > VSize), void> transpose_impl(
2527  std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2528  const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2529  SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2530 {
2531  typedef SimdArray<T, N, V, VSize> SA;
2532  std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2533  {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2534  &internal_data0(*r[3])}};
2535  transpose_impl(
2536  r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2537  typename SA::storage_type0, typename SA::storage_type0>{
2538  internal_data0(std::get<0>(proxy.in)),
2539  internal_data0(std::get<1>(proxy.in)),
2540  internal_data0(std::get<2>(proxy.in)),
2541  internal_data0(std::get<3>(proxy.in))});
2542 
2543  std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2544  {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2545  &internal_data1(*r[3])}};
2546  transpose_impl(
2547  r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2548  typename SA::storage_type1, typename SA::storage_type1>{
2549  internal_data1(std::get<0>(proxy.in)),
2550  internal_data1(std::get<1>(proxy.in)),
2551  internal_data1(std::get<2>(proxy.in)),
2552  internal_data1(std::get<3>(proxy.in))});
2553 }
2554 */
2555 } // namespace Common
2556 
2557 // Traits static assertions {{{1
2558 static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4> &>::value, "");
2559 static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4>>::value, "");
2560 static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4> &>::value, "");
2561 static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4>>::value, "");
2562 static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4> &>::value, "");
2563 static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4>>::value, "");
2564 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4>>::value, "");
2565 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4> &&>::value, "");
2566 // }}}1
2568 
2569 } // namespace Vc_VERSIONED_NAMESPACE
2570 
2571 // numeric_limits {{{1
2572 namespace std
2573 {
2574 template <typename T, size_t N, typename V, size_t VN>
2575 struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2576 private:
2577  using R = Vc::SimdArray<T, N, V, VN>;
2578 
2579 public:
2580  static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2581  static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2582  static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2583  {
2584  return numeric_limits<T>::lowest();
2585  }
2586  static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2587  {
2588  return numeric_limits<T>::epsilon();
2589  }
2590  static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2591  {
2592  return numeric_limits<T>::round_error();
2593  }
2594  static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2595  {
2596  return numeric_limits<T>::infinity();
2597  }
2598  static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2599  {
2600  return numeric_limits<T>::quiet_NaN();
2601  }
2602  static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2603  {
2604  return numeric_limits<T>::signaling_NaN();
2605  }
2606  static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2607  {
2608  return numeric_limits<T>::denorm_min();
2609  }
2610 };
2611 } // namespace std
2612 //}}}1
2613 
2614 #endif // VC_COMMON_SIMDARRAY_H_
2615 
2616 // vim: foldmethod=marker
SimdArray< T, N, V, M > ceil(const SimdArray< T, N, V, M > &x)
Applies the std:: ceil function component-wise and concurrently.
Definition: simdarray.h:1687
Vc::Vector< T > frexp(const Vc::Vector< T > &x, Vc::SimdArray< int, size()> *e)
Convert floating-point number to fractional and integral components.
Vc::Vector< T > log2(const Vc::Vector< T > &v)
Vc::Vector< T > exp(const Vc::Vector< T > &v)
static SimdArray generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
Definition: simdarray.h:668
SimdArray< T, N, V, M > floor(const SimdArray< T, N, V, M > &x)
Applies the std:: floor function component-wise and concurrently.
Definition: simdarray.h:1692
The main vector class for expressing data parallelism.
Definition: types.h:44
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Definition: types.h:94
Vc::Vector< T > sin(const Vc::Vector< T > &v)
Vc::Vector< T > cos(const Vc::Vector< T > &v)
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Vc::Vector< T > reciprocal(const Vc::Vector< T > &v)
Returns the reciprocal of v.
SimdArray rotated(int amount) const
Rotate vector entries to the left by amount.
Definition: simdarray.h:1188
Vc::Vector< T > ldexp(Vc::Vector< T > x, Vc::SimdArray< int, size()> e)
Multiply floating-point number by integral power of 2.
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
Definition: IO:117
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
Vc::Vector< T > abs(const Vc::Vector< T > &v)
Returns the absolute value of v.
SimdArray apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
Definition: simdarray.h:1053
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
Definition: simdarray.h:1611
Vc::Vector< T > max(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Definition: vector.h:257
SimdArray< T, N, V, M > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
Definition: simdarray.h:1747
static SimdArray IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
Definition: simdarray.h:656
SimdArray< T, N, V, M > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
Definition: simdarray.h:1748
Vc::Vector< T > log(const Vc::Vector< T > &v)
SimdArray shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
Definition: simdarray.h:1060
Vc::Vector< T > fma(Vc::Vector< T > a, Vc::Vector< T > b, Vc::Vector< T > c)
Multiplies a with b and then adds c, without rounding between the multiplication and the addition...
Data-parallel arithmetic type with user-defined number of elements.
Definition: simdarray.h:565
Data-parallel mask type with user-defined number of boolean elements.
Definition: simdarrayfwd.h:121
Vc::Vector< T > round(const Vc::Vector< T > &v)
Returns the closest integer to v; 0.5 is rounded to even.
SimdArray apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
Definition: simdarray.h:1048
static SimdArray Zero()
Returns a vector with the entries initialized to zero.
Definition: simdarray.h:644
Vc::Vector< T > rsqrt(const Vc::Vector< T > &v)
Returns the reciprocal square root of v.
SimdMaskArray< T, N, V, M > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
Definition: simdarray.h:1719
Vc::Vector< T > log10(const Vc::Vector< T > &v)
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
Definition: simdarray.h:984
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
Definition: simdarray.h:1611
SimdArray< T, N, V, M > trunc(const SimdArray< T, N, V, M > &x)
Applies the std:: trunc function component-wise and concurrently.
Definition: simdarray.h:1746
SimdArray< T, N, V, M > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
Definition: simdarray.h:1691
Vc::Vector< T > atan2(const Vc::Vector< T > &y, const Vc::Vector< T > &x)
Calculates the angle given the lengths of the opposite and adjacent legs in a right triangle...
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
Definition: simdarray.h:618
value_type EntryType
The type of the elements (i.e. T)
Definition: simdarray.h:627
SimdArray< T, N, V, M > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
Definition: simdarray.h:1688
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
Vc::Vector< T > atan(const Vc::Vector< T > &v)
Vc::Vector< T > asin(const Vc::Vector< T > &v)
SimdArray operator+() const
Returns a copy of itself.
Definition: simdarray.h:882
void assign(Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1008
void gather(const MT *mem, const IT &indexes)
Gather function.
Definition: simdarray.h:203
Vc::Mask< T > isfinite(const Vc::Vector< T > &x)
SimdArray(value_type a)
Broadcast Constructor.
Definition: simdarray.h:693
The main SIMD mask class.
Definition: mask.h:41
Vc::Mask< T > isnan(const Vc::Vector< T > &x)
static SimdArray Random()
Returns a vector with pseudo-random entries.
Definition: simdarray.h:662
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:84
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
Definition: simdize.h:1069
void scatter(MT *mem, IT &&indexes) const
Scatter function.
Definition: simdarray.h:99
T value_type
The type of the elements (i.e. T)
Definition: simdarray.h:600
Vector Classes Namespace.
Definition: cpuid.h:32
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
Definition: types.h:89
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
Definition: simdarray.h:976
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:218
To simd_cast(From &&x, enable_if< std::is_same< To, Traits::decay< From >>::value >=nullarg)
Casts the argument x from type From to type To.
Definition: simd_cast.h:52
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
Definition: simdarray.h:969
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
Definition: simdarray.h:1741
SimdArray reversed() const
Returns a vector with all components reversed.
Definition: simdarray.h:1271
static SimdArray One()
Returns a vector with the entries initialized to one.
Definition: simdarray.h:650
SimdArray sorted() const
Return a sorted copy of the vector.
Definition: simdarray.h:1293
Vc::Vector< T > sqrt(const Vc::Vector< T > &v)
Returns the square root of v.
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.
SimdMaskArray< T, N, V, M > isinf(const SimdArray< T, N, V, M > &x)
Applies the std:: isinf function component-wise and concurrently.
Definition: simdarray.h:1701