Vc  1.3.0
SIMD Vector Classes for C++
simdarray.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_SIMDARRAY_H_
29 #define VC_COMMON_SIMDARRAY_H_
30 
31 //#define Vc_DEBUG_SIMD_CAST 1
32 //#define Vc_DEBUG_SORTED 1
33 #if defined Vc_DEBUG_SIMD_CAST || defined Vc_DEBUG_SORTED
34 #include <Vc/IO>
35 #endif
36 
37 #include <array>
38 
39 #include "writemaskedvector.h"
40 #include "simdarrayhelper.h"
41 #include "simdmaskarray.h"
42 #include "utility.h"
43 #include "interleave.h"
44 #include "indexsequence.h"
45 #include "transpose.h"
46 #include "macros.h"
47 
48 namespace Vc_VERSIONED_NAMESPACE
49 {
50 // internal namespace (product & sum helper) {{{1
51 namespace internal
52 {
53 template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
54 template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
55 } // namespace internal
56 
57 // min & max declarations {{{1
58 template <typename T, std::size_t N, typename V, std::size_t M>
59 inline SimdArray<T, N, V, M> min(const SimdArray<T, N, V, M> &x,
60  const SimdArray<T, N, V, M> &y);
61 template <typename T, std::size_t N, typename V, std::size_t M>
62 inline SimdArray<T, N, V, M> max(const SimdArray<T, N, V, M> &x,
63  const SimdArray<T, N, V, M> &y);
64 
65 // SimdArray class {{{1
68 
69 // atomic SimdArray {{{1
70 #define Vc_CURRENT_CLASS_NAME SimdArray
71 
80 template <typename T, std::size_t N, typename VectorType_>
81 class SimdArray<T, N, VectorType_, N>
82 {
83  static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
84  std::is_same<T, int32_t>::value ||
85  std::is_same<T, uint32_t>::value ||
86  std::is_same<T, int16_t>::value ||
87  std::is_same<T, uint16_t>::value,
88  "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
89  "int16_t, uint16_t }");
90 
91 public:
92  using VectorType = VectorType_;
93  using vector_type = VectorType;
94  using storage_type = vector_type;
95  using vectorentry_type = typename vector_type::VectorEntryType;
96  using value_type = T;
97  using mask_type = SimdMaskArray<T, N, vector_type>;
98  using index_type = SimdArray<int, N>;
99  static constexpr std::size_t size() { return N; }
100  using Mask = mask_type;
101  using MaskType = Mask;
102  using MaskArgument = const MaskType &;
103  using VectorEntryType = vectorentry_type;
104  using EntryType = value_type;
105  using IndexType = index_type;
106  using AsArg = const SimdArray &;
107  using reference = Detail::ElementReference<SimdArray>;
108  static constexpr std::size_t Size = size();
109  static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
110 
111  // zero init
112 #ifndef Vc_MSVC // bogus error C2580
113  Vc_INTRINSIC SimdArray() = default;
114 #endif
115 
116  // default copy ctor/operator
117  Vc_INTRINSIC SimdArray(const SimdArray &) = default;
118  Vc_INTRINSIC SimdArray(SimdArray &&) = default;
119  Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
120 
121  // broadcast
122  Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
123  Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
124  Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
125  template <
126  typename U,
127  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
128  Vc_INTRINSIC SimdArray(U a)
129  : SimdArray(static_cast<value_type>(a))
130  {
131  }
132 
133  // implicit casts
134  template <typename U, typename V>
135  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x, enable_if<N == V::Size> = nullarg)
136  : data(simd_cast<vector_type>(internal_data(x)))
137  {
138  }
139  template <typename U, typename V>
140  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x,
141  enable_if<(N > V::Size && N <= 2 * V::Size)> = nullarg)
142  : data(simd_cast<vector_type>(internal_data(internal_data0(x)), internal_data(internal_data1(x))))
143  {
144  }
145  template <typename U, typename V>
146  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x,
147  enable_if<(N > 2 * V::Size && N <= 4 * V::Size)> = nullarg)
148  : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
149  internal_data(internal_data1(internal_data0(x))),
150  internal_data(internal_data0(internal_data1(x))),
151  internal_data(internal_data1(internal_data1(x)))))
152  {
153  }
154 
155  template <typename V, std::size_t Pieces, std::size_t Index>
156  Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
157  : data(simd_cast<vector_type, Index>(x.data))
158  {
159  }
160 
161  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
162  : data(init.begin(), Vc::Unaligned)
163  {
164 #if defined Vc_CXX14 && 0 // doesn't compile yet
165  static_assert(init.size() == size(), "The initializer_list argument to "
166  "SimdArray<T, N> must contain exactly N "
167  "values.");
168 #else
169  Vc_ASSERT(init.size() == size());
170 #endif
171  }
172 
173  // implicit conversion from underlying vector_type
174  template <
175  typename V,
176  typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
177  explicit Vc_INTRINSIC SimdArray(const V &x)
178  : data(simd_cast<vector_type>(x))
179  {
180  }
182  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
183  // T implicitly convertible to U
184  template <
185  typename U, typename A,
186  typename = enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N>>
187  Vc_INTRINSIC operator Vector<U, A>() const
188  {
189  return simd_cast<Vector<U, A>>(data);
190  }
191 
192 #include "gatherinterface.h"
193 #include "scatterinterface.h"
194 
195  // forward all remaining ctors
196  template <typename... Args,
197  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
198  !Traits::is_gather_signature<Args...>::value &&
199  !Traits::is_initializer_list<Args...>::value>>
200  explicit Vc_INTRINSIC SimdArray(Args &&... args)
201  : data(std::forward<Args>(args)...)
202  {
203  }
204 
205  template <std::size_t Offset>
206  explicit Vc_INTRINSIC SimdArray(
207  Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
208  : data(Vc::IndexesFromZero)
209  {
210  data += value_type(Offset);
211  }
213  Vc_INTRINSIC void setZero() { data.setZero(); }
214  Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
215  Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
216  Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
217 
218  Vc_INTRINSIC void setQnan() { data.setQnan(); }
219  Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
220 
221  // internal: execute specified Operation
222  template <typename Op, typename... Args>
223  static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
224  {
225  SimdArray r;
226  Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
227  return r;
228  }
229 
230  template <typename Op, typename... Args>
231  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
232  {
233  Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
234  }
235 
236  static Vc_INTRINSIC SimdArray Zero()
237  {
238  return SimdArray(Vc::Zero);
239  }
240  static Vc_INTRINSIC SimdArray One()
241  {
242  return SimdArray(Vc::One);
243  }
244  static Vc_INTRINSIC SimdArray IndexesFromZero()
245  {
247  }
248  static Vc_INTRINSIC SimdArray Random()
249  {
250  return fromOperation(Common::Operations::random());
251  }
252 
253  template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
254  {
255  data.load(std::forward<Args>(args)...);
256  }
257 
258  template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
259  {
260  data.store(std::forward<Args>(args)...);
261  }
262 
263  Vc_INTRINSIC mask_type operator!() const
264  {
265  return {!data};
266  }
267 
268  Vc_INTRINSIC SimdArray operator-() const
269  {
270  return {-data};
271  }
272 
274  Vc_INTRINSIC SimdArray operator+() const { return *this; }
275 
276  Vc_INTRINSIC SimdArray operator~() const
277  {
278  return {~data};
279  }
280 
281  template <typename U,
282  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
283  Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
284  {
285  return {data << x};
286  }
287  template <typename U,
288  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
289  Vc_INTRINSIC SimdArray &operator<<=(U x)
290  {
291  data <<= x;
292  return *this;
293  }
294  template <typename U,
295  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
296  Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
297  {
298  return {data >> x};
299  }
300  template <typename U,
301  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
302  Vc_INTRINSIC SimdArray &operator>>=(U x)
303  {
304  data >>= x;
305  return *this;
306  }
307 
308 #define Vc_BINARY_OPERATOR_(op) \
309  Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
310  { \
311  return {data op rhs.data}; \
312  } \
313  Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
314  { \
315  data op## = rhs.data; \
316  return *this; \
317  }
318  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
319  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
320  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
321 #undef Vc_BINARY_OPERATOR_
322 
323 #define Vc_COMPARES(op) \
324  Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
325  { \
326  return {data op rhs.data}; \
327  }
328  Vc_ALL_COMPARES(Vc_COMPARES);
329 #undef Vc_COMPARES
330 
332  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
333  {
334  return {isnegative(data)};
335  }
336 
337 private:
338  friend reference;
339  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
340  {
341  return o.data[i];
342  }
343  template <typename U>
344  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
345  noexcept(std::declval<value_type &>() = v))
346  {
347  o.data[i] = v;
348  }
349 
350 public:
351  Vc_INTRINSIC reference operator[](size_t i) noexcept
352  {
353  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
354  return {*this, int(i)};
355  }
356  Vc_INTRINSIC value_type operator[](size_t i) const noexcept
357  {
358  return get(*this, int(i));
359  }
360 
361  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
362  {
363  return {*this, k};
364  }
365 
366  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
367  {
368  data.assign(v.data, internal_data(k));
369  }
370 
371  // reductions ////////////////////////////////////////////////////////
372 #define Vc_REDUCTION_FUNCTION_(name_) \
373  Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
374  Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
375  { \
376  return data.name_(internal_data(mask)); \
377  } \
378  Vc_NOTHING_EXPECTING_SEMICOLON
379  Vc_REDUCTION_FUNCTION_(min);
380  Vc_REDUCTION_FUNCTION_(max);
381  Vc_REDUCTION_FUNCTION_(product);
382  Vc_REDUCTION_FUNCTION_(sum);
383 #undef Vc_REDUCTION_FUNCTION_
384  Vc_INTRINSIC Vc_PURE SimdArray partialSum() const { return data.partialSum(); }
385 
386  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f) const
387  {
388  return {data.apply(std::forward<F>(f))};
389  }
390  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f, const mask_type &k) const
391  {
392  return {data.apply(std::forward<F>(f), k)};
393  }
394 
395  Vc_INTRINSIC SimdArray shifted(int amount) const
396  {
397  return {data.shifted(amount)};
398  }
399 
400  template <std::size_t NN>
401  Vc_INTRINSIC SimdArray shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
402  const
403  {
404  return {data.shifted(amount, simd_cast<VectorType>(shiftIn))};
405  }
406 
407  Vc_INTRINSIC SimdArray rotated(int amount) const
408  {
409  return {data.rotated(amount)};
410  }
411 
413  Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const
414  {
415  return {exponent(data)};
416  }
417 
418  Vc_INTRINSIC SimdArray interleaveLow(SimdArray x) const
419  {
420  return {data.interleaveLow(x.data)};
421  }
422  Vc_INTRINSIC SimdArray interleaveHigh(SimdArray x) const
423  {
424  return {data.interleaveHigh(x.data)};
425  }
426 
427  Vc_INTRINSIC SimdArray reversed() const
428  {
429  return {data.reversed()};
430  }
431 
432  Vc_INTRINSIC SimdArray sorted() const
433  {
434  return {data.sorted()};
435  }
436 
437  template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen)
438  {
439  return {VectorType::generate(gen)};
440  }
441 
442  Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray
443  copySign(const SimdArray &reference) const
444  {
445  return {Vc::copysign(data, reference.data)};
446  }
447 
448  friend VectorType &internal_data<>(SimdArray &x);
449  friend const VectorType &internal_data<>(const SimdArray &x);
450 
452  Vc_INTRINSIC SimdArray(VectorType &&x) : data(std::move(x)) {}
453 
454  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
455 
456 private:
457  // The alignas attribute attached to the class declaration above is ignored by ICC
458  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
459  // all compilers.
460  alignas(static_cast<std::size_t>(
461  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
462  VectorType_::size()>::value)) storage_type data;
463 };
464 template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
465 template <typename T, std::size_t N, typename VectorType>
467 template <typename T, std::size_t N, typename VectorType>
468 #ifndef Vc_MSVC
469 Vc_INTRINSIC
470 #endif
471 VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
472 {
473  return x.data;
474 }
475 template <typename T, std::size_t N, typename VectorType>
476 #ifndef Vc_MSVC
477 Vc_INTRINSIC
478 #endif
479 const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
480 {
481  return x.data;
482 }
483 
484 // unpackIfSegment {{{2
485 template <typename T> T unpackIfSegment(T &&x) { return std::forward<T>(x); }
486 template <typename T, size_t Pieces, size_t Index>
487 auto unpackIfSegment(Common::Segment<T, Pieces, Index> &&x) -> decltype(x.asSimdArray())
488 {
489  return x.asSimdArray();
490 }
491 
492 // gatherImplementation {{{2
493 template <typename T, std::size_t N, typename VectorType>
494 template <typename MT, typename IT>
496  IT &&indexes)
497 {
498  data.gather(mem, unpackIfSegment(std::forward<IT>(indexes)));
499 }
500 template <typename T, std::size_t N, typename VectorType>
501 template <typename MT, typename IT>
503  IT &&indexes,
504  MaskArgument mask)
505 {
506  data.gather(mem, unpackIfSegment(std::forward<IT>(indexes)), mask);
507 }
509 // scatterImplementation {{{2
510 template <typename T, std::size_t N, typename VectorType>
511 template <typename MT, typename IT>
513  IT &&indexes) const
514 {
515  data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)));
516 }
517 template <typename T, std::size_t N, typename VectorType>
518 template <typename MT, typename IT>
520  IT &&indexes,
521  MaskArgument mask) const
522 {
523  data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)), mask);
524 }
525 
526 // generic SimdArray {{{1
559 template <typename T, size_t N, typename V, size_t Wt> class SimdArray
560 {
561  static_assert(std::is_same<T, double>::value ||
562  std::is_same<T, float>::value ||
563  std::is_same<T, int32_t>::value ||
564  std::is_same<T, uint32_t>::value ||
565  std::is_same<T, int16_t>::value ||
566  std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
567  static_assert(
568  // either the EntryType and VectorEntryType of the main V are equal
569  std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
570  // or N is a multiple of V::size()
571  (N % V::size() == 0),
572  "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
573  "MIC::(u)short_v::size(), i.e. k * 16.");
574 
575  using my_traits = SimdArrayTraits<T, N>;
576  static constexpr std::size_t N0 = my_traits::N0;
577  static constexpr std::size_t N1 = my_traits::N1;
578  using Split = Common::Split<N0>;
579  template <typename U, std::size_t K> using CArray = U[K];
580 
581 public:
582  using storage_type0 = typename my_traits::storage_type0;
583  using storage_type1 = typename my_traits::storage_type1;
584  static_assert(storage_type0::size() == N0, "");
585 
589  using vector_type = V;
590  using vectorentry_type = typename storage_type0::vectorentry_type;
591  typedef vectorentry_type alias_type Vc_MAY_ALIAS;
592 
594  using value_type = T;
595 
598 
601 
612  static constexpr std::size_t size() { return N; }
613 
615  using Mask = mask_type;
617  using MaskType = Mask;
618  using MaskArgument = const MaskType &;
619  using VectorEntryType = vectorentry_type;
624  using AsArg = const SimdArray &;
625 
626  using reference = Detail::ElementReference<SimdArray>;
627 
629  static constexpr std::size_t MemoryAlignment =
633 
636 
638  static Vc_INTRINSIC SimdArray Zero()
639  {
640  return SimdArray(Vc::Zero);
641  }
642 
644  static Vc_INTRINSIC SimdArray One()
645  {
646  return SimdArray(Vc::One);
647  }
648 
650  static Vc_INTRINSIC SimdArray IndexesFromZero()
651  {
652  return SimdArray(Vc::IndexesFromZero);
653  }
654 
656  static Vc_INTRINSIC SimdArray Random()
657  {
658  return fromOperation(Common::Operations::random());
659  }
660 
662  template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen) // {{{2
663  {
664  auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in
665  // an initializer list is well-defined
666  // (front to back), but GCC 4.8 doesn't
667  // implement this correctly. Therefore
668  // we enforce correct order.
669  return {std::move(tmp),
670  storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
671  }
673 
676 
678 #ifndef Vc_MSVC // bogus error C2580
679  SimdArray() = default;
680 #endif
681 
685 
687  Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
688  template <
689  typename U,
690  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
691  SimdArray(U a)
692  : SimdArray(static_cast<value_type>(a))
693  {
694  }
696 
697  // default copy ctor/operator
698  SimdArray(const SimdArray &) = default;
699  SimdArray(SimdArray &&) = default;
700  SimdArray &operator=(const SimdArray &) = default;
701 
702  // load ctor
703  template <typename U,
704  typename Flags = DefaultLoadTag,
705  typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
706  explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags())
707  : data0(mem, f), data1(mem + storage_type0::size(), f)
708  {
709  }
710 
711 // MSVC does overload resolution differently and takes the const U *mem overload (I hope)
712 #ifndef Vc_MSVC
713 
719  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
720  typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
721  explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
722  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
723  {
724  }
728  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
729  typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
730  explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = Flags())
731  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
732  {
733  }
734 #endif
735 
736  // initializer list
737  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
738  : data0(init.begin(), Vc::Unaligned)
739  , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
740  {
741 #if defined Vc_CXX14 && 0 // doesn't compile yet
742  static_assert(init.size() == size(), "The initializer_list argument to "
743  "SimdArray<T, N> must contain exactly N "
744  "values.");
745 #else
746  Vc_ASSERT(init.size() == size());
747 #endif
748  }
749 
750 #include "gatherinterface.h"
751 #include "scatterinterface.h"
752 
753  // forward all remaining ctors
754  template <typename... Args,
755  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
756  !Traits::is_initializer_list<Args...>::value &&
757  !Traits::is_gather_signature<Args...>::value &&
758  !Traits::is_load_arguments<Args...>::value>>
759  explicit Vc_INTRINSIC SimdArray(Args &&... args)
760  : data0(Split::lo(args)...) // no forward here - it could move and thus
761  // break the next line
762  , data1(Split::hi(std::forward<Args>(args))...)
763  {
764  }
765 
766  // explicit casts
767  template <typename W>
768  Vc_INTRINSIC explicit SimdArray(
769  W &&x,
771  !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
772  Traits::isSimdArray<W>::value))> = nullarg)
773  : data0(Split::lo(x)), data1(Split::hi(x))
774  {
775  }
776 
777  // implicit casts
778  template <typename W>
779  Vc_INTRINSIC SimdArray(
780  W &&x,
782  std::is_convertible<Traits::entry_type_of<W>, T>::value)> = nullarg)
783  : data0(Split::lo(x)), data1(Split::hi(x))
784  {
785  }
786 
787  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
788  // T implicitly convertible to U
789  template <
790  typename U, typename A,
791  typename = enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N>>
792  operator Vector<U, A>() const
793  {
794  return simd_cast<Vector<U, A>>(data0, data1);
795  }
796 
798 
799  Vc_INTRINSIC void setZero()
800  {
801  data0.setZero();
802  data1.setZero();
803  }
804  Vc_INTRINSIC void setZero(const mask_type &k)
805  {
806  data0.setZero(Split::lo(k));
807  data1.setZero(Split::hi(k));
808  }
809  Vc_INTRINSIC void setZeroInverted()
810  {
811  data0.setZeroInverted();
812  data1.setZeroInverted();
813  }
814  Vc_INTRINSIC void setZeroInverted(const mask_type &k)
815  {
816  data0.setZeroInverted(Split::lo(k));
817  data1.setZeroInverted(Split::hi(k));
818  }
819 
820 
821  Vc_INTRINSIC void setQnan() {
822  data0.setQnan();
823  data1.setQnan();
824  }
825  Vc_INTRINSIC void setQnan(const mask_type &m) {
826  data0.setQnan(Split::lo(m));
827  data1.setQnan(Split::hi(m));
828  }
829 
831  template <typename Op, typename... Args>
832  static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
833  {
834  SimdArray r = {
835  storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
836  // could move and thus
837  // break the next line
838  storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
839  return r;
840  }
841 
843  template <typename Op, typename... Args>
844  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
845  {
846  storage_type0::callOperation(op, Split::lo(args)...);
847  storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
848  }
849 
850 
851  template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
852  {
853  data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus
854  // break the next line
855  data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
856  }
857 
858  template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
859  {
860  data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus
861  // break the next line
862  data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
863  }
864 
865  Vc_INTRINSIC mask_type operator!() const
866  {
867  return {!data0, !data1};
868  }
869 
870  Vc_INTRINSIC SimdArray operator-() const
871  {
872  return {-data0, -data1};
873  }
874 
876  Vc_INTRINSIC SimdArray operator+() const { return *this; }
877 
878  Vc_INTRINSIC SimdArray operator~() const
879  {
880  return {~data0, ~data1};
881  }
882 
883  // left/right shift operators {{{2
884  template <typename U,
885  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
886  Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
887  {
888  return {data0 << x, data1 << x};
889  }
890  template <typename U,
891  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
892  Vc_INTRINSIC SimdArray &operator<<=(U x)
893  {
894  data0 <<= x;
895  data1 <<= x;
896  return *this;
897  }
898  template <typename U,
899  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
900  Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
901  {
902  return {data0 >> x, data1 >> x};
903  }
904  template <typename U,
905  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
906  Vc_INTRINSIC SimdArray &operator>>=(U x)
907  {
908  data0 >>= x;
909  data1 >>= x;
910  return *this;
911  }
912 
913  // binary operators {{{2
914 #define Vc_BINARY_OPERATOR_(op) \
915  Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
916  { \
917  return {data0 op rhs.data0, data1 op rhs.data1}; \
918  } \
919  Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
920  { \
921  data0 op## = rhs.data0; \
922  data1 op## = rhs.data1; \
923  return *this; \
924  }
925  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
926  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
927  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
928 #undef Vc_BINARY_OPERATOR_
929 
930 #define Vc_COMPARES(op) \
931  Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
932  { \
933  return {data0 op rhs.data0, data1 op rhs.data1}; \
934  }
935  Vc_ALL_COMPARES(Vc_COMPARES);
936 #undef Vc_COMPARES
937 
938  // operator[] {{{2
941 
942 private:
943  friend reference;
944  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
945  {
946  return reinterpret_cast<const alias_type *>(&o)[i];
947  }
948  template <typename U>
949  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
950  noexcept(std::declval<value_type &>() = v))
951  {
952  reinterpret_cast<alias_type *>(&o)[i] = v;
953  }
954 
955 public:
957  Vc_INTRINSIC reference operator[](size_t i) noexcept
958  {
959  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
960  return {*this, int(i)};
961  }
962 
964  Vc_INTRINSIC value_type operator[](size_t index) const noexcept
965  {
966  return get(*this, int(index));
967  }
969 
970  // operator(){{{2
972  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
973  const mask_type &mask)
974  {
975  return {*this, mask};
976  }
977 
979  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
980  {
981  data0.assign(v.data0, internal_data0(k));
982  data1.assign(v.data1, internal_data1(k));
983  }
984 
985  // reductions {{{2
986 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
987 private: \
988  template <typename ForSfinae = void> \
989  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
990  storage_type0::Size == storage_type1::Size, \
991  value_type> name_##_impl() const \
992  { \
993  return binary_fun_(data0, data1).name_(); \
994  } \
995  \
996  template <typename ForSfinae = void> \
997  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
998  storage_type0::Size != storage_type1::Size, \
999  value_type> name_##_impl() const \
1000  { \
1001  return scalar_fun_(data0.name_(), data1.name_()); \
1002  } \
1003  \
1004 public: \
1005  \
1006  Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1007  \
1008  Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1009  { \
1010  if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1011  return data1.name_(Split::hi(mask)); \
1012  } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1013  return data0.name_(Split::lo(mask)); \
1014  } else { \
1015  return scalar_fun_(data0.name_(Split::lo(mask)), \
1016  data1.name_(Split::hi(mask))); \
1017  } \
1018  } \
1019  Vc_NOTHING_EXPECTING_SEMICOLON
1020  Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1021  Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1022  Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1023  Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1024 #undef Vc_REDUCTION_FUNCTION_
1025  Vc_INTRINSIC Vc_PURE SimdArray partialSum() const //{{{2
1027  {
1028  auto ps0 = data0.partialSum();
1029  auto tmp = data1;
1030  tmp[0] += ps0[data0.size() - 1];
1031  return {std::move(ps0), tmp.partialSum()};
1032  }
1033 
1034  // apply {{{2
1036  template <typename F> inline SimdArray apply(F &&f) const
1037  {
1038  return {data0.apply(f), data1.apply(f)};
1039  }
1041  template <typename F> inline SimdArray apply(F &&f, const mask_type &k) const
1042  {
1043  return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1044  }
1045 
1046  // shifted {{{2
1048  inline SimdArray shifted(int amount) const
1049  {
1050  constexpr int SSize = Size;
1051  constexpr int SSize0 = storage_type0::Size;
1052  constexpr int SSize1 = storage_type1::Size;
1053  if (amount == 0) {
1054  return *this;
1055  }
1056  if (amount < 0) {
1057  if (amount > -SSize0) {
1058  return {data0.shifted(amount), data1.shifted(amount, data0)};
1059  }
1060  if (amount == -SSize0) {
1061  return {storage_type0::Zero(), simd_cast<storage_type1>(data0)};
1062  }
1063  if (amount < -SSize0) {
1064  return {storage_type0::Zero(), simd_cast<storage_type1>(data0.shifted(
1065  amount + SSize0))};
1066  }
1067  return Zero();
1068  } else {
1069  if (amount >= SSize) {
1070  return Zero();
1071  } else if (amount >= SSize0) {
1072  return {
1073  simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1075  } else if (amount >= SSize1) {
1076  return {data0.shifted(amount, data1), storage_type1::Zero()};
1077  } else {
1078  return {data0.shifted(amount, data1), data1.shifted(amount)};
1079  }
1080  }
1081  }
1082 
1083  template <std::size_t NN>
1084  inline enable_if<
1085  !(std::is_same<storage_type0, storage_type1>::value && // not bisectable
1086  N == NN),
1087  SimdArray>
1088  shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1089  {
1090  constexpr int SSize = Size;
1091  if (amount < 0) {
1092  return SimdArray::generate([&](int i) -> value_type {
1093  i += amount;
1094  if (i >= 0) {
1095  return operator[](i);
1096  } else if (i >= -SSize) {
1097  return shiftIn[i + SSize];
1098  }
1099  return 0;
1100  });
1101  }
1102  return SimdArray::generate([&](int i) -> value_type {
1103  i += amount;
1104  if (i < SSize) {
1105  return operator[](i);
1106  } else if (i < 2 * SSize) {
1107  return shiftIn[i - SSize];
1108  }
1109  return 0;
1110  });
1111  }
1112 
1113 private:
1114  // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1115  // expression directly in the enable_if below
1116  template <std::size_t NN> struct bisectable_shift
1117  : public std::integral_constant<bool,
1118  std::is_same<storage_type0, storage_type1>::value && // bisectable
1119  N == NN>
1120  {
1121  };
1122 
1123 public:
1124  template <std::size_t NN>
1125  inline SimdArray shifted(enable_if<bisectable_shift<NN>::value, int> amount,
1126  const SimdArray<value_type, NN> &shiftIn) const
1127  {
1128  constexpr int SSize = Size;
1129  if (amount < 0) {
1130  if (amount > -static_cast<int>(storage_type0::Size)) {
1131  return {data0.shifted(amount, internal_data1(shiftIn)),
1132  data1.shifted(amount, data0)};
1133  }
1134  if (amount == -static_cast<int>(storage_type0::Size)) {
1135  return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1136  }
1137  if (amount > -SSize) {
1138  return {
1139  internal_data1(shiftIn)
1140  .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1141  data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1142  }
1143  if (amount == -SSize) {
1144  return shiftIn;
1145  }
1146  if (amount > -2 * SSize) {
1147  return shiftIn.shifted(amount + SSize);
1148  }
1149  }
1150  if (amount == 0) {
1151  return *this;
1152  }
1153  if (amount < static_cast<int>(storage_type0::Size)) {
1154  return {data0.shifted(amount, data1),
1155  data1.shifted(amount, internal_data0(shiftIn))};
1156  }
1157  if (amount == static_cast<int>(storage_type0::Size)) {
1158  return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1159  }
1160  if (amount < SSize) {
1161  return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1162  internal_data0(shiftIn)
1163  .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1164  }
1165  if (amount == SSize) {
1166  return shiftIn;
1167  }
1168  if (amount < 2 * SSize) {
1169  return shiftIn.shifted(amount - SSize);
1170  }
1171  return Zero();
1172  }
1173 
1174  // rotated {{{2
1176  Vc_INTRINSIC SimdArray rotated(int amount) const
1177  {
1178  amount %= int(size());
1179  if (amount == 0) {
1180  return *this;
1181  } else if (amount < 0) {
1182  amount += size();
1183  }
1184 
1185 #ifdef Vc_MSVC
1186  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1187  // ->
1188  // load to implement the function instead.
1189  alignas(MemoryAlignment) T tmp[N + data0.size()];
1190  data0.store(&tmp[0], Vc::Aligned);
1191  data1.store(&tmp[data0.size()], Vc::Aligned);
1192  data0.store(&tmp[N], Vc::Unaligned);
1193  SimdArray r;
1194  r.data0.load(&tmp[amount], Vc::Unaligned);
1195  r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1196  return r;
1197 #else
1198  auto &&d0cvtd = simd_cast<storage_type1>(data0);
1199  auto &&d1cvtd = simd_cast<storage_type0>(data1);
1200  constexpr int size0 = storage_type0::size();
1201  constexpr int size1 = storage_type1::size();
1202 
1203  if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1204  return {std::move(d1cvtd), std::move(d0cvtd)};
1205  } else if (amount < size1) {
1206  return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1207  } else if (amount == size1) {
1208  return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1209  } else if (int(size()) - amount < size1) {
1210  return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1211  data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1212  } else if (int(size()) - amount == size1) {
1213  return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1214  simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1215  } else if (amount <= size0) {
1216  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1217  simd_cast<storage_type1>(data0.shifted(amount - size1))};
1218  } else {
1219  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1220  simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1221  }
1222  return *this;
1223 #endif
1224  }
1225 
1226  // interleaveLow/-High {{{2
1228  Vc_INTRINSIC SimdArray interleaveLow(const SimdArray &x) const
1229  {
1230  // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1231  return {data0.interleaveLow(x.data0),
1232  simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1233  }
1235  Vc_INTRINSIC SimdArray interleaveHigh(const SimdArray &x) const
1236  {
1237  return interleaveHighImpl(
1238  x,
1239  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1240  }
1241 
1242 private:
1244  Vc_INTRINSIC SimdArray interleaveHighImpl(const SimdArray &x, std::true_type) const
1245  {
1246  return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1247  }
1249  inline SimdArray interleaveHighImpl(const SimdArray &x, std::false_type) const
1250  {
1251  return {data0.interleaveHigh(x.data0)
1252  .shifted(storage_type1::Size,
1253  simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1254  data1.interleaveHigh(x.data1)};
1255  }
1256 
1257 public:
1259  inline SimdArray reversed() const //{{{2
1260  {
1261  if (std::is_same<storage_type0, storage_type1>::value) {
1262  return {simd_cast<storage_type0>(data1).reversed(),
1263  simd_cast<storage_type1>(data0).reversed()};
1264  } else {
1265 #ifdef Vc_MSVC
1266  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1267  // store
1268  // -> load to implement the function instead.
1269  alignas(MemoryAlignment) T tmp[N];
1270  data1.reversed().store(&tmp[0], Vc::Aligned);
1271  data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1272  return SimdArray{&tmp[0], Vc::Aligned};
1273 #else
1274  return {data0.shifted(storage_type1::Size, data1).reversed(),
1275  simd_cast<storage_type1>(data0.reversed().shifted(
1276  storage_type0::Size - storage_type1::Size))};
1277 #endif
1278  }
1279  }
1281  inline SimdArray sorted() const //{{{2
1282  {
1283  return sortedImpl(
1284  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1285  }
1286 
1288  Vc_INTRINSIC SimdArray sortedImpl(std::true_type) const
1289  {
1290 #ifdef Vc_DEBUG_SORTED
1291  std::cerr << "-- " << data0 << data1 << '\n';
1292 #endif
1293  const auto a = data0.sorted();
1294  const auto b = data1.sorted().reversed();
1295  const auto lo = Vc::min(a, b);
1296  const auto hi = Vc::max(a, b);
1297  return {lo.sorted(), hi.sorted()};
1298  }
1299 
1301  Vc_INTRINSIC SimdArray sortedImpl(std::false_type) const
1302  {
1303  using SortableArray =
1305  auto sortable = simd_cast<SortableArray>(*this);
1306  for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1307  using limits = std::numeric_limits<value_type>;
1308  if (limits::has_infinity) {
1309  sortable[i] = limits::infinity();
1310  } else {
1311  sortable[i] = std::numeric_limits<value_type>::max();
1312  }
1313  }
1314  return simd_cast<SimdArray>(sortable.sorted());
1315 
1316  /* The following implementation appears to be less efficient. But this may need further
1317  * work.
1318  const auto a = data0.sorted();
1319  const auto b = data1.sorted();
1320 #ifdef Vc_DEBUG_SORTED
1321  std::cerr << "== " << a << b << '\n';
1322 #endif
1323  auto aIt = Vc::begin(a);
1324  auto bIt = Vc::begin(b);
1325  const auto aEnd = Vc::end(a);
1326  const auto bEnd = Vc::end(b);
1327  return SimdArray::generate([&](std::size_t) {
1328  if (aIt == aEnd) {
1329  return *(bIt++);
1330  }
1331  if (bIt == bEnd) {
1332  return *(aIt++);
1333  }
1334  if (*aIt < *bIt) {
1335  return *(aIt++);
1336  } else {
1337  return *(bIt++);
1338  }
1339  });
1340  */
1341  }
1342 
1345 
1348  static constexpr std::size_t Size = size();
1349 
1351  Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC SimdArray exponent() const
1352  {
1353  return {exponent(data0), exponent(data1)};
1354  }
1355 
1357  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1358  {
1359  return {isnegative(data0), isnegative(data1)};
1360  }
1361 
1363  Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC SimdArray
1364  copySign(const SimdArray &reference) const
1365  {
1366  return {Vc::copysign(data0, reference.data0),
1367  Vc::copysign(data1, reference.data1)};
1368  }
1370 
1371  // internal_data0/1 {{{2
1372  friend storage_type0 &internal_data0<>(SimdArray &x);
1373  friend storage_type1 &internal_data1<>(SimdArray &x);
1374  friend const storage_type0 &internal_data0<>(const SimdArray &x);
1375  friend const storage_type1 &internal_data1<>(const SimdArray &x);
1376 
1378  Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1379  : data0(std::move(x)), data1(std::move(y))
1380  {
1381  }
1382 
1383  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1384 
1385 private: //{{{2
1386  // The alignas attribute attached to the class declaration above is ignored by ICC
1387  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1388  // all compilers.
1389  alignas(static_cast<std::size_t>(
1390  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1391  V::size()>::value)) storage_type0 data0;
1392  storage_type1 data1;
1393 };
1394 #undef Vc_CURRENT_CLASS_NAME
1395 template <typename T, std::size_t N, typename V, std::size_t M>
1396 constexpr std::size_t SimdArray<T, N, V, M>::Size;
1397 template <typename T, std::size_t N, typename V, std::size_t M>
1398 constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1399 
1400 // gatherImplementation {{{2
1401 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1402 template <typename MT, typename IT>
1404  IT &&indexes)
1405 {
1406  data0.gather(mem, Split::lo(Common::Operations::gather(),
1407  indexes)); // don't forward indexes - it could move and
1408  // thus break the next line
1409  data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1410 }
1411 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1412 template <typename MT, typename IT>
1414  IT &&indexes, MaskArgument mask)
1415 {
1416  data0.gather(mem, Split::lo(Common::Operations::gather(), indexes),
1417  Split::lo(mask)); // don't forward indexes - it could move and
1418  // thus break the next line
1419  data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1420  Split::hi(mask));
1421 }
1422 
1423 // scatterImplementation {{{2
1424 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1425 template <typename MT, typename IT>
1427  IT &&indexes) const
1428 {
1429  data0.scatter(mem, Split::lo(Common::Operations::gather(),
1430  indexes)); // don't forward indexes - it could move and
1431  // thus break the next line
1432  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1433 }
1434 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1435 template <typename MT, typename IT>
1437  IT &&indexes, MaskArgument mask) const
1438 {
1439  data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1440  Split::lo(mask)); // don't forward indexes - it could move and
1441  // thus break the next line
1442  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1443  Split::hi(mask));
1444 }
1445 
1446 // internal_data0/1 (SimdArray) {{{1
1448 template <typename T, std::size_t N, typename V, std::size_t M>
1449 #ifndef Vc_MSVC
1450 Vc_INTRINSIC
1451 #endif
1452 typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1454 {
1455  return x.data0;
1456 }
1458 template <typename T, std::size_t N, typename V, std::size_t M>
1459 #ifndef Vc_MSVC
1460 Vc_INTRINSIC
1461 #endif
1462 typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1464 {
1465  return x.data1;
1466 }
1468 template <typename T, std::size_t N, typename V, std::size_t M>
1469 #ifndef Vc_MSVC
1470 Vc_INTRINSIC
1471 #endif
1472 const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1473  const SimdArray<T, N, V, M> &x)
1474 {
1475  return x.data0;
1476 }
1478 template <typename T, std::size_t N, typename V, std::size_t M>
1479 #ifndef Vc_MSVC
1480 Vc_INTRINSIC
1481 #endif
1482 const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1483  const SimdArray<T, N, V, M> &x)
1484 {
1485  return x.data1;
1486 }
1487 
1488 // MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1489 // MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1490 // in the body the bug is supressed.
1491 #if defined Vc_MSVC && defined Vc_IMPL_SSE
1492 template <>
1495  : data0(x), data1(0)
1496 {
1497  data1 = y;
1498 }
1499 #endif
1500 
1501 // binary operators {{{1
1502 namespace result_vector_type_internal
1503 {
1504 template <typename T>
1505 using type = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1506 
1507 template <typename T>
1508 using is_integer_larger_than_int = std::integral_constant<
1509  bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1510  std::is_same<T, long>::value ||
1511  std::is_same<T, unsigned long>::value)>;
1512 
1513 template <
1514  typename L, typename R,
1517  bool =
1519  Traits::isSimdArray<R>::value) // one of the operands must be a SimdArray
1520  && !std::is_same<type<L>, type<R>>::value // if the operands are of the same type
1521  // use the member function
1522  &&
1523  ((std::is_arithmetic<type<L>>::value &&
1524  !is_integer_larger_than_int<type<L>>::value) ||
1525  (std::is_arithmetic<type<R>>::value &&
1526  !is_integer_larger_than_int<type<R>>::value) // one of the operands is a scalar
1527  // type
1528  ||
1529  ( // or one of the operands is Vector<T> with Vector<T>::size() ==
1530  // SimdArray::size()
1534 struct evaluate;
1535 
1536 template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1537 {
1538 private:
1539  using LScalar = Traits::entry_type_of<L>;
1540  using RScalar = Traits::entry_type_of<R>;
1541 
1542  template <bool B, typename True, typename False>
1543  using conditional = typename std::conditional<B, True, False>::type;
1544 
1545 public:
1546  // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1547  // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1548  // int are promoted to int before any operation). This would imply that SIMD types with integral
1549  // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1550  // start. Therefore we special-case those operations where the scalar type of both operands is
1551  // integral and smaller than int.
1552  // In addition to that there is no generic support for 64-bit int SIMD types. Therefore
1553  // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1554  // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1555  // then the operator is disabled altogether. We do not want an implicit demotion.
1556  using type = SimdArray<
1557  conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1558  sizeof(LScalar) < sizeof(int) &&
1559  sizeof(RScalar) < sizeof(int)),
1560  conditional<(sizeof(LScalar) == sizeof(RScalar)),
1561  conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1562  conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1563  decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1564  N>;
1565 };
1566 
1567 } // namespace result_vector_type_internal
1568 
1569 template <typename L, typename R>
1570 using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1571 
1572 static_assert(
1573  std::is_same<result_vector_type<short int, Vc::SimdArray<short unsigned int, 32ul>>,
1575  "result_vector_type does not work");
1576 
1577 #define Vc_BINARY_OPERATORS_(op_) \
1578  \
1579  template <typename L, typename R> \
1580  Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1581  { \
1582  using Return = result_vector_type<L, R>; \
1583  return Return(std::forward<L>(lhs)) op_ Return(std::forward<R>(rhs)); \
1584  }
1585 
1602 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1604 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1606 #undef Vc_BINARY_OPERATORS_
1607 #define Vc_BINARY_OPERATORS_(op_) \
1608  \
1609  template <typename L, typename R> \
1610  Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1611  R &&rhs) \
1612  { \
1613  using Promote = result_vector_type<L, R>; \
1614  return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1615  }
1616 
1633 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1636 #undef Vc_BINARY_OPERATORS_
1637 
1638 // math functions {{{1
1639 #define Vc_FORWARD_UNARY_OPERATOR(name_) \
1640  \
1641  template <typename T, std::size_t N, typename V, std::size_t M> \
1642  inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \
1643  { \
1644  return SimdArray<T, N, V, M>::fromOperation( \
1645  Common::Operations::Forward_##name_(), x); \
1646  } \
1647  Vc_NOTHING_EXPECTING_SEMICOLON
1648 
1649 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1650  \
1651  template <typename T, std::size_t N, typename V, std::size_t M> \
1652  inline SimdMaskArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \
1653  { \
1654  return SimdMaskArray<T, N, V, M>::fromOperation( \
1655  Common::Operations::Forward_##name_(), x); \
1656  } \
1657  Vc_NOTHING_EXPECTING_SEMICOLON
1658 
1659 #define Vc_FORWARD_BINARY_OPERATOR(name_) \
1660  \
1661  template <typename T, std::size_t N, typename V, std::size_t M> \
1662  inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x, \
1663  const SimdArray<T, N, V, M> &y) \
1664  { \
1665  return SimdArray<T, N, V, M>::fromOperation( \
1666  Common::Operations::Forward_##name_(), x, y); \
1667  } \
1668  Vc_NOTHING_EXPECTING_SEMICOLON
1669 
1674 Vc_FORWARD_UNARY_OPERATOR(abs);
1676 Vc_FORWARD_UNARY_OPERATOR(asin);
1677 Vc_FORWARD_UNARY_OPERATOR(atan);
1678 Vc_FORWARD_BINARY_OPERATOR(atan2);
1679 Vc_FORWARD_UNARY_OPERATOR(ceil);
1680 Vc_FORWARD_BINARY_OPERATOR(copysign);
1681 Vc_FORWARD_UNARY_OPERATOR(cos);
1682 Vc_FORWARD_UNARY_OPERATOR(exp);
1683 Vc_FORWARD_UNARY_OPERATOR(exponent);
1684 Vc_FORWARD_UNARY_OPERATOR(floor);
1686 template <typename T, std::size_t N>
1688  const SimdArray<T, N> &c)
1689 {
1690  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1691 }
1692 Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1693 Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1694 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1695 #if defined Vc_MSVC && defined Vc_IMPL_SSE
1697  const SimdArray<double, 8, SSE::Vector<double>, 2> &x)
1698 {
1699  using V = SSE::Vector<double>;
1700  const SimdArray<double, 4, V, 2> &x0 = internal_data0(x);
1701  const SimdArray<double, 4, V, 2> &x1 = internal_data1(x);
1704  internal_data(internal_data0(r0)) = isnan(internal_data(internal_data0(x0)));
1705  internal_data(internal_data1(r0)) = isnan(internal_data(internal_data1(x0)));
1706  internal_data(internal_data0(r1)) = isnan(internal_data(internal_data0(x1)));
1707  internal_data(internal_data1(r1)) = isnan(internal_data(internal_data1(x1)));
1708  return {std::move(r0), std::move(r1)};
1709 }
1710 #endif
1711 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1713 template <typename T, std::size_t N>
1715 {
1716  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1717 }
1719 template <typename T, std::size_t N>
1721 {
1722  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1723 }
1724 Vc_FORWARD_UNARY_OPERATOR(log);
1725 Vc_FORWARD_UNARY_OPERATOR(log10);
1726 Vc_FORWARD_UNARY_OPERATOR(log2);
1727 Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1728 Vc_FORWARD_UNARY_OPERATOR(round);
1729 Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1730 Vc_FORWARD_UNARY_OPERATOR(sin);
1732 template <typename T, std::size_t N>
1734 {
1735  SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1736 }
1737 Vc_FORWARD_UNARY_OPERATOR(sqrt);
1738 Vc_FORWARD_UNARY_OPERATOR(trunc);
1739 Vc_FORWARD_BINARY_OPERATOR(min);
1740 Vc_FORWARD_BINARY_OPERATOR(max);
1742 #undef Vc_FORWARD_UNARY_OPERATOR
1743 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1744 #undef Vc_FORWARD_BINARY_OPERATOR
1745 
1746 // simd_cast {{{1
1747 #ifdef Vc_MSVC
1748 #define Vc_DUMMY_ARG0 , int = 0
1749 #define Vc_DUMMY_ARG1 , long = 0
1750 #define Vc_DUMMY_ARG2 , short = 0
1751 #define Vc_DUMMY_ARG3 , char = '0'
1752 #define Vc_DUMMY_ARG4 , unsigned = 0u
1753 #define Vc_DUMMY_ARG5 , unsigned short = 0u
1754 #else
1755 #define Vc_DUMMY_ARG0
1756 #define Vc_DUMMY_ARG1
1757 #define Vc_DUMMY_ARG2
1758 #define Vc_DUMMY_ARG3
1759 #define Vc_DUMMY_ARG4
1760 #define Vc_DUMMY_ARG5
1761 #endif // Vc_MSVC
1762 
1763 // simd_cast_impl_smaller_input {{{2
1764 // The following function can be implemented without the sizeof...(From) overload.
1765 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1766 // function in two works around the issue.
1767 template <typename Return, std::size_t N, typename T, typename... From>
1768 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1769 simd_cast_impl_smaller_input(const From &... xs, const T &last)
1770 {
1771  Return r = simd_cast<Return>(xs...);
1772  for (size_t i = 0; i < N; ++i) {
1773  r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1774  }
1775  return r;
1776 }
1777 template <typename Return, std::size_t N, typename T>
1778 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1779 {
1780  Return r = Return();
1781  for (size_t i = 0; i < N; ++i) {
1782  r[i] = static_cast<typename Return::EntryType>(last[i]);
1783  }
1784  return r;
1785 }
1786 template <typename Return, std::size_t N, typename T, typename... From>
1787 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1788  const From &... xs, const T &last)
1789 {
1790  Return r = simd_cast<Return>(xs...);
1791  for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1792  r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1793  }
1794  return r;
1795 }
1796 template <typename Return, std::size_t N, typename T>
1797 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1798 {
1799  Return r = Return();
1800  for (size_t i = 0; i < Return::size(); ++i) {
1801  r[i] = static_cast<typename Return::EntryType>(last[i]);
1802  }
1803  return r;
1804 }
1805 
1806 // simd_cast_without_last (declaration) {{{2
1807 template <typename Return, typename T, typename... From>
1808 Vc_INTRINSIC_L Vc_CONST_L Return
1809  simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1810 
1811 // are_all_types_equal {{{2
1812 template <typename... Ts> struct are_all_types_equal;
1813 template <typename T>
1814 struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1815 {
1816 };
1817 template <typename T0, typename T1, typename... Ts>
1818 struct are_all_types_equal<T0, T1, Ts...>
1819  : public std::integral_constant<
1820  bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1821 {
1822 };
1823 
1824 // simd_cast_interleaved_argument_order (declarations) {{{2
1844 template <typename Return, typename... Ts>
1845 Vc_INTRINSIC Vc_CONST Return
1846  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1847 
1848 // simd_cast_with_offset (declarations and one impl) {{{2
1849 // offset == 0 {{{3
1850 template <typename Return, std::size_t offset, typename From, typename... Froms>
1851 Vc_INTRINSIC Vc_CONST
1852  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1853  simd_cast_with_offset(const From &x, const Froms &... xs);
1854 // offset > 0 && offset divisible by Return::Size {{{3
1855 template <typename Return, std::size_t offset, typename From>
1856 Vc_INTRINSIC Vc_CONST
1857  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1858  simd_cast_with_offset(const From &x);
1859 // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1860 template <typename Return, std::size_t offset, typename From>
1861 Vc_INTRINSIC Vc_CONST
1862  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1864  !Traits::isAtomicSimdArray<Return>::value) ||
1866  !Traits::isAtomicSimdMaskArray<Return>::value))),
1867  Return>
1868  simd_cast_with_offset(const From &x);
1869 // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1870 template <typename Return, std::size_t offset, typename From>
1871 Vc_INTRINSIC Vc_CONST
1872  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1874  Traits::isAtomicSimdArray<Return>::value) ||
1876  Traits::isAtomicSimdMaskArray<Return>::value))),
1877  Return>
1878  simd_cast_with_offset(const From &x);
1879 // offset > first argument (drops first arg) {{{3
1880 template <typename Return, std::size_t offset, typename From, typename... Froms>
1881 Vc_INTRINSIC Vc_CONST enable_if<
1882  (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1883  simd_cast_with_offset(const From &, const Froms &... xs)
1884 {
1885  return simd_cast_with_offset<Return, offset - From::Size>(xs...);
1886 }
1887 
1888 // offset > first and only argument (returns Zero) {{{3
1889 template <typename Return, std::size_t offset, typename From>
1890 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
1891  const From &)
1892 {
1893  return Return::Zero();
1894 }
1895 
1896 // first_type_of {{{2
1897 template <typename T, typename... Ts> struct first_type_of_impl
1898 {
1899  using type = T;
1900 };
1901 template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
1902 
1903 // simd_cast_drop_arguments (declarations) {{{2
1904 template <typename Return, typename From>
1905 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
1906 template <typename Return, typename... Froms>
1907 Vc_INTRINSIC Vc_CONST
1908  enable_if<(are_all_types_equal<Froms...>::value &&
1909  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1910  Return>
1911  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
1912 // The following function can be implemented without the sizeof...(From) overload.
1913 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1914 // function in two works around the issue.
1915 template <typename Return, typename From, typename... Froms>
1916 Vc_INTRINSIC Vc_CONST enable_if<
1917  (are_all_types_equal<From, Froms...>::value &&
1918  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
1919  Return>
1920 simd_cast_drop_arguments(Froms... xs, From x, From);
1921 template <typename Return, typename From>
1922 Vc_INTRINSIC Vc_CONST
1923  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1924  simd_cast_drop_arguments(From x, From);
1925 
1926 namespace
1927 {
1928 #ifdef Vc_DEBUG_SIMD_CAST
1929 void debugDoNothing(const std::initializer_list<void *> &) {}
1930 template <typename T0, typename... Ts>
1931 inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
1932  const Ts &... args)
1933 {
1934  std::cerr << prefix << arg0;
1935  debugDoNothing({&(std::cerr << ", " << args)...});
1936  std::cerr << suffix;
1937 }
1938 #else
1939 template <typename T0, typename... Ts>
1940 Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
1941 {
1942 }
1943 #endif
1944 } // unnamed namespace
1945 
1946 // is_less trait{{{2
1947 template <size_t A, size_t B>
1948 struct is_less : public std::integral_constant<bool, (A < B)> {
1949 };
1950 
1951 // is_power_of_2 trait{{{2
1952 template <size_t N>
1953 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
1954 };
1955 
1956 // simd_cast<T>(xs...) to SimdArray/-mask {{{2
1957 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
1958  template <typename Return, typename T, typename A, typename... Froms> \
1959  Vc_INTRINSIC Vc_CONST enable_if< \
1960  (Traits::isAtomic##SimdArrayType_<Return>::value && \
1961  is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
1962  are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
1963  Return> \
1964  simd_cast(NativeType_<T, A> x, Froms... xs) \
1965  { \
1966  vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
1967  return {simd_cast<typename Return::storage_type>(x, xs...)}; \
1968  } \
1969  template <typename Return, typename T, typename A, typename... Froms> \
1970  Vc_INTRINSIC Vc_CONST enable_if< \
1971  (Traits::isAtomic##SimdArrayType_<Return>::value && \
1972  !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
1973  are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
1974  Return> \
1975  simd_cast(NativeType_<T, A> x, Froms... xs) \
1976  { \
1977  vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
1978  return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
1979  } \
1980  template <typename Return, typename T, typename A, typename... Froms> \
1981  Vc_INTRINSIC Vc_CONST \
1982  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
1983  !Traits::isAtomic##SimdArrayType_<Return>::value && \
1984  is_less<Common::left_size<Return::Size>(), \
1985  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
1986  are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
1987  Return> \
1988  simd_cast(NativeType_<T, A> x, Froms... xs) \
1989  { \
1990  vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
1991  using R0 = typename Return::storage_type0; \
1992  using R1 = typename Return::storage_type1; \
1993  return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
1994  simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
1995  } \
1996  template <typename Return, typename T, typename A, typename... Froms> \
1997  Vc_INTRINSIC Vc_CONST \
1998  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
1999  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2000  !is_less<Common::left_size<Return::Size>(), \
2001  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2002  are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
2003  Return> \
2004  simd_cast(NativeType_<T, A> x, Froms... xs) \
2005  { \
2006  vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2007  using R0 = typename Return::storage_type0; \
2008  using R1 = typename Return::storage_type1; \
2009  return {simd_cast<R0>(x, xs...), R1::Zero()}; \
2010  } \
2011  Vc_NOTHING_EXPECTING_SEMICOLON
2012 
2013 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2014 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2015 #undef Vc_SIMDARRAY_CASTS
2016 
2017 // simd_cast<SimdArray/-mask, offset>(V) {{{2
2018 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2019  /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \
2020  template <typename Return, int offset, typename T, typename A> \
2021  Vc_INTRINSIC Vc_CONST \
2022  enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2023  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2024  { \
2025  vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2026  return {simd_cast<typename Return::storage_type, offset>(x)}; \
2027  } \
2028  /* both halves of Return array are extracted from argument */ \
2029  template <typename Return, int offset, typename T, typename A> \
2030  Vc_INTRINSIC Vc_CONST \
2031  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2032  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2033  Return::Size * offset + Common::left_size<Return::Size>() < \
2034  NativeType_<T, A>::Size), \
2035  Return> \
2036  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2037  { \
2038  vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2039  using R0 = typename Return::storage_type0; \
2040  constexpr int entries_offset = offset * Return::Size; \
2041  constexpr int entries_offset_right = entries_offset + R0::Size; \
2042  return { \
2043  simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2044  simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2045  x)}; \
2046  } \
2047  /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \
2048  /* right half of Return array is zero */ \
2049  template <typename Return, int offset, typename T, typename A> \
2050  Vc_INTRINSIC Vc_CONST \
2051  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2052  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2053  Return::Size * offset + Common::left_size<Return::Size>() >= \
2054  NativeType_<T, A>::Size), \
2055  Return> \
2056  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2057  { \
2058  vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2059  using R0 = typename Return::storage_type0; \
2060  using R1 = typename Return::storage_type1; \
2061  constexpr int entries_offset = offset * Return::Size; \
2062  return {simd_cast_with_offset<R0, entries_offset>(x), R1::Zero()}; \
2063  } \
2064  Vc_NOTHING_EXPECTING_SEMICOLON
2065 
2066 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2067 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2068 #undef Vc_SIMDARRAY_CASTS
2069 
2070 // simd_cast<T>(xs...) from SimdArray/-mask {{{2
2071 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2072  /* indivisible SimdArrayType_ */ \
2073  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2074  Vc_INTRINSIC Vc_CONST \
2075  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2076  (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2077  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2078  Return> \
2079  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2080  { \
2081  vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2082  return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2083  } \
2084  /* indivisible SimdArrayType_ && can drop arguments from the end */ \
2085  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2086  Vc_INTRINSIC Vc_CONST \
2087  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2088  (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2089  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2090  Return> \
2091  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2092  { \
2093  vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
2094  return simd_cast_without_last<Return, \
2095  typename SimdArrayType_<T, N, V, N>::storage_type, \
2096  typename From::storage_type...>( \
2097  internal_data(x0), internal_data(xs)...); \
2098  } \
2099  /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \
2100  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2101  typename... From> \
2102  Vc_INTRINSIC Vc_CONST enable_if< \
2103  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2104  !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2105  is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2106  Return> \
2107  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2108  { \
2109  vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
2110  return simd_cast_interleaved_argument_order< \
2111  Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \
2112  typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2113  internal_data1(x0), internal_data1(xs)...); \
2114  } \
2115  /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \
2116  * input can be dropped */ \
2117  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2118  typename... From> \
2119  Vc_INTRINSIC Vc_CONST enable_if< \
2120  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2121  !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2122  Return> \
2123  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2124  { \
2125  vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
2126  return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2127  x0, xs...); \
2128  } \
2129  /* remaining SimdArrayType_ input never larger (N != 2^n) */ \
2130  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2131  typename... From> \
2132  Vc_INTRINSIC Vc_CONST enable_if< \
2133  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2134  N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2135  Return> \
2136  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2137  { \
2138  vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
2139  return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2140  From...>(x0, xs...); \
2141  } \
2142  /* remaining SimdArrayType_ input larger (N != 2^n) */ \
2143  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2144  typename... From> \
2145  Vc_INTRINSIC Vc_CONST enable_if< \
2146  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2147  N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2148  Return> \
2149  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2150  { \
2151  vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
2152  return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2153  From...>(x0, xs...); \
2154  } \
2155  /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \
2156  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2157  Vc_INTRINSIC Vc_CONST \
2158  enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2159  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2160  { \
2161  vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
2162  return simd_cast<Return>(internal_data0(x)); \
2163  } \
2164  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2165  Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2166  N < 2 * Return::Size && is_power_of_2<N>::value), \
2167  Return> \
2168  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2169  { \
2170  vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
2171  return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2172  } \
2173  Vc_NOTHING_EXPECTING_SEMICOLON
2174 
2175 Vc_SIMDARRAY_CASTS(SimdArray);
2176 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2177 #undef Vc_SIMDARRAY_CASTS
2178 
2179 // simd_cast<T, offset>(SimdArray/-mask) {{{2
2180 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2181  /* offset == 0 is like without offset */ \
2182  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2183  std::size_t M> \
2184  Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2185  const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2186  { \
2187  vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2188  return simd_cast<Return>(x); \
2189  } \
2190  /* forward to V */ \
2191  template <typename Return, int offset, typename T, std::size_t N, typename V> \
2192  Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2193  const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2194  { \
2195  vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2196  return simd_cast<Return, offset>(internal_data(x)); \
2197  } \
2198  /* convert from right member of SimdArray */ \
2199  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2200  std::size_t M> \
2201  Vc_INTRINSIC Vc_CONST \
2202  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2203  offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2204  Return> \
2205  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2206  { \
2207  vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2208  return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2209  internal_data1(x)); \
2210  } \
2211  /* same as above except for odd cases where offset * Return::Size doesn't fit the \
2212  * left side of the SimdArray */ \
2213  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2214  std::size_t M> \
2215  Vc_INTRINSIC Vc_CONST \
2216  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2217  offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2218  Return> \
2219  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2220  { \
2221  vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2222  return simd_cast_with_offset<Return, \
2223  offset * Return::Size - Common::left_size<N>()>( \
2224  internal_data1(x)); \
2225  } \
2226  /* convert from left member of SimdArray */ \
2227  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2228  std::size_t M> \
2229  Vc_INTRINSIC Vc_CONST enable_if< \
2230  (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \
2231  offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2232  Return> \
2233  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2234  { \
2235  vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2236  return simd_cast<Return, offset>(internal_data0(x)); \
2237  } \
2238  /* fallback to copying scalars */ \
2239  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2240  std::size_t M> \
2241  Vc_INTRINSIC Vc_CONST \
2242  enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2243  offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2244  Return> \
2245  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2246  { \
2247  vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2248  using R = typename Return::EntryType; \
2249  Return r = Return::Zero(); \
2250  for (std::size_t i = offset * Return::Size; \
2251  i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2252  r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2253  } \
2254  return r; \
2255  } \
2256  Vc_NOTHING_EXPECTING_SEMICOLON
2257 Vc_SIMDARRAY_CASTS(SimdArray);
2258 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2259 #undef Vc_SIMDARRAY_CASTS
2260 // simd_cast_drop_arguments (definitions) {{{2
2261 template <typename Return, typename From>
2262 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2263 {
2264  return simd_cast<Return>(x);
2265 }
2266 template <typename Return, typename... Froms>
2267 Vc_INTRINSIC Vc_CONST
2268  enable_if<(are_all_types_equal<Froms...>::value &&
2269  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2270  Return>
2271  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2272 {
2273  return simd_cast<Return>(xs..., x);
2274 }
2275 // The following function can be implemented without the sizeof...(From) overload.
2276 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2277 // function in two works around the issue.
2278 template <typename Return, typename From, typename... Froms>
2279 Vc_INTRINSIC Vc_CONST enable_if<
2280  (are_all_types_equal<From, Froms...>::value &&
2281  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2282  Return>
2283 simd_cast_drop_arguments(Froms... xs, From x, From)
2284 {
2285  return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2286 }
2287 template <typename Return, typename From>
2288 Vc_INTRINSIC Vc_CONST
2289  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2290  simd_cast_drop_arguments(From x, From)
2291 {
2292  return simd_cast_drop_arguments<Return>(x);
2293 }
2294 
2295 // simd_cast_with_offset (definitions) {{{2
2296  template <typename Return, std::size_t offset, typename From>
2297  Vc_INTRINSIC Vc_CONST
2298  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2299  Return> simd_cast_with_offset(const From &x)
2300 {
2301  return simd_cast<Return, offset / Return::Size>(x);
2302 }
2303 template <typename Return, std::size_t offset, typename From>
2304 Vc_INTRINSIC Vc_CONST
2305  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2307  !Traits::isAtomicSimdArray<Return>::value) ||
2309  !Traits::isAtomicSimdMaskArray<Return>::value))),
2310  Return>
2311  simd_cast_with_offset(const From &x)
2312 {
2313  using R0 = typename Return::storage_type0;
2314  using R1 = typename Return::storage_type1;
2315  return {simd_cast_with_offset<R0, offset>(x),
2316  simd_cast_with_offset<R1, offset + R0::Size>(x)};
2317 }
2318 template <typename Return, std::size_t offset, typename From>
2319 Vc_INTRINSIC Vc_CONST
2320  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2322  Traits::isAtomicSimdArray<Return>::value) ||
2324  Traits::isAtomicSimdMaskArray<Return>::value))),
2325  Return>
2326  simd_cast_with_offset(const From &x)
2327 {
2328  return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2329 }
2330 template <typename Return, std::size_t offset, typename From, typename... Froms>
2331 Vc_INTRINSIC Vc_CONST
2332  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2333  simd_cast_with_offset(const From &x, const Froms &... xs)
2334 {
2335  return simd_cast<Return>(x, xs...);
2336 }
2337 
2338 // simd_cast_without_last (definition) {{{2
2339 template <typename Return, typename T, typename... From>
2340 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2341 {
2342  return simd_cast<Return>(xs...);
2343 }
2344 
2345 // simd_cast_interleaved_argument_order (definitions) {{{2
2346 
2347 #ifdef Vc_MSVC
2348 // MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2349 // is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2350 // MSVC do the right thing.
2351 template <std::size_t I, typename T0>
2352 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2353 {
2354  return a0;
2355 }
2356 template <std::size_t I, typename T0>
2357 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2358 {
2359  return b0;
2360 }
2361 #endif // Vc_MSVC
2362 
2364 template <std::size_t I, typename T0, typename... Ts>
2365 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2366  const Ts &...,
2367  const T0 &,
2368  const Ts &...)
2369 {
2370  return a0;
2371 }
2373 template <std::size_t I, typename T0, typename... Ts>
2374 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2375  const Ts &...,
2376  const T0 &b0,
2377  const Ts &...)
2378 {
2379  return b0;
2380 }
2382 template <std::size_t I, typename T0, typename... Ts>
2383 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2384  const Ts &... a,
2385  const T0 &,
2386  const Ts &... b)
2387 {
2388  return extract_interleaved<I - 2, Ts...>(a..., b...);
2389 }
2391 template <typename Return, typename... Ts, std::size_t... Indexes>
2392 Vc_INTRINSIC Vc_CONST Return
2393  simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2394  const Ts &... b)
2395 {
2396  return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2397 }
2400 template <typename Return, typename... Ts>
2401 Vc_INTRINSIC Vc_CONST Return
2402  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2403 {
2404  using seq = make_index_sequence<sizeof...(Ts)*2>;
2405  return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2406 }
2407 
2408 // conditional_assign {{{1
2409 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2410  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2411  typename U> \
2412  Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2413  SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2414  { \
2415  lhs(mask) op_ rhs; \
2416  } \
2417  Vc_NOTHING_EXPECTING_SEMICOLON
2418 Vc_CONDITIONAL_ASSIGN( Assign, =);
2419 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2420 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2421 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2422 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2423 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2424 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2425 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2426 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2427 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2428 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2429 #undef Vc_CONDITIONAL_ASSIGN
2430 
2431 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2432  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2433  Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2434  conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2435  { \
2436  return expr_; \
2437  } \
2438  Vc_NOTHING_EXPECTING_SEMICOLON
2439 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2440 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2441 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2442 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2443 #undef Vc_CONDITIONAL_ASSIGN
2444 // transpose_impl {{{1
2445 namespace Common
2446 {
2447 template <typename T, size_t N, typename V>
2448 inline void transpose_impl(
2449  TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2450  const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2452 {
2453  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2454  &internal_data(*r[2]), &internal_data(*r[3])};
2455  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2456  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2457  internal_data(std::get<1>(proxy.in)),
2458  internal_data(std::get<2>(proxy.in)),
2459  internal_data(std::get<3>(proxy.in))});
2460 }
2461 
2462 template <typename T, typename V>
2463 inline void transpose_impl(
2464  TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2465  const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2467 {
2468  auto &lo = *r[0];
2469  auto &hi = *r[1];
2470  internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2471  internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2472  internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2473  internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2474  internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2475  internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2476  internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2477  internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2478 }
2479 
2480 template <typename T, typename V>
2481 inline void transpose_impl(
2482  TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2483  const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2485 {
2486  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2487  &internal_data(*r[2]), &internal_data(*r[3])};
2488  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2489  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2490  internal_data(std::get<1>(proxy.in)),
2491  internal_data(std::get<2>(proxy.in)),
2492  internal_data(std::get<3>(proxy.in))});
2493 }
2494 
2495 template <typename T, size_t N, typename V>
2496 inline void transpose_impl(
2497  TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2498  const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2500 {
2501  SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2502  SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2503  using H = SimdArray<T, 2>;
2504  transpose_impl(TransposeTag<2, 4>(), &r0[0],
2505  TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2506  internal_data0(std::get<1>(proxy.in)),
2507  internal_data0(std::get<2>(proxy.in)),
2508  internal_data0(std::get<3>(proxy.in))});
2509  transpose_impl(TransposeTag<2, 4>(), &r1[0],
2510  TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2511  internal_data1(std::get<1>(proxy.in)),
2512  internal_data1(std::get<2>(proxy.in)),
2513  internal_data1(std::get<3>(proxy.in))});
2514 }
2515 
2516 /* TODO:
2517 template <typename T, std::size_t N, typename V, std::size_t VSize>
2518 inline enable_if<(N > VSize), void> transpose_impl(
2519  std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2520  const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2521  SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2522 {
2523  typedef SimdArray<T, N, V, VSize> SA;
2524  std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2525  {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2526  &internal_data0(*r[3])}};
2527  transpose_impl(
2528  r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2529  typename SA::storage_type0, typename SA::storage_type0>{
2530  internal_data0(std::get<0>(proxy.in)),
2531  internal_data0(std::get<1>(proxy.in)),
2532  internal_data0(std::get<2>(proxy.in)),
2533  internal_data0(std::get<3>(proxy.in))});
2534 
2535  std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2536  {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2537  &internal_data1(*r[3])}};
2538  transpose_impl(
2539  r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2540  typename SA::storage_type1, typename SA::storage_type1>{
2541  internal_data1(std::get<0>(proxy.in)),
2542  internal_data1(std::get<1>(proxy.in)),
2543  internal_data1(std::get<2>(proxy.in)),
2544  internal_data1(std::get<3>(proxy.in))});
2545 }
2546 */
2547 } // namespace Common
2548 
2549 // Traits static assertions {{{1
2550 static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4> &>::value, "");
2551 static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4>>::value, "");
2552 static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4> &>::value, "");
2553 static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4>>::value, "");
2554 static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4> &>::value, "");
2555 static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4>>::value, "");
2556 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4>>::value, "");
2557 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4> &&>::value, "");
2558 // }}}1
2560 
2561 } // namespace Vc_VERSIONED_NAMESPACE
2562 
2563 // numeric_limits {{{1
2564 namespace std
2565 {
2566 template <typename T, size_t N, typename V, size_t VN>
2567 struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2568 private:
2569  using R = Vc::SimdArray<T, N, V, VN>;
2570 
2571 public:
2572  static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2573  static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2574  static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2575  {
2576  return numeric_limits<T>::lowest();
2577  }
2578  static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2579  {
2580  return numeric_limits<T>::epsilon();
2581  }
2582  static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2583  {
2584  return numeric_limits<T>::round_error();
2585  }
2586  static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2587  {
2588  return numeric_limits<T>::infinity();
2589  }
2590  static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2591  {
2592  return numeric_limits<T>::quiet_NaN();
2593  }
2594  static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2595  {
2596  return numeric_limits<T>::signaling_NaN();
2597  }
2598  static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2599  {
2600  return numeric_limits<T>::denorm_min();
2601  }
2602 };
2603 } // namespace std
2604 //}}}1
2605 
2606 #endif // VC_COMMON_SIMDARRAY_H_
2607 
2608 // vim: foldmethod=marker
SimdArray< T, N, V, M > ceil(const SimdArray< T, N, V, M > &x)
Applies the std:: ceil function component-wise and concurrently.
Definition: simdarray.h:1679
value_type max() const
Returns the largest entry in the vector.
Definition: simdarray.h:1021
Vc::Vector< T > frexp(const Vc::Vector< T > &x, Vc::SimdArray< int, size()> *e)
Convert floating-point number to fractional and integral components.
Vc::Vector< T > log2(const Vc::Vector< T > &v)
Vc::Vector< T > exp(const Vc::Vector< T > &v)
static SimdArray generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
Definition: simdarray.h:662
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
Definition: simdarray.h:964
SimdArray< T, N, V, M > floor(const SimdArray< T, N, V, M > &x)
Applies the std:: floor function component-wise and concurrently.
Definition: simdarray.h:1684
The main vector class for expressing data parallelism.
Definition: types.h:44
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Definition: types.h:94
Vc::Vector< T > sin(const Vc::Vector< T > &v)
Vc::Vector< T > cos(const Vc::Vector< T > &v)
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Vc::Vector< T > reciprocal(const Vc::Vector< T > &v)
Returns the reciprocal of v.
SimdArray rotated(int amount) const
Rotate vector entries to the left by amount.
Definition: simdarray.h:1176
Vc::Vector< T > ldexp(Vc::Vector< T > x, Vc::SimdArray< int, size()> e)
Multiply floating-point number by integral power of 2.
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
Definition: IO:117
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
Vc::Vector< T > abs(const Vc::Vector< T > &v)
Returns the absolute value of v.
SimdArray apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
Definition: simdarray.h:1041
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
Definition: simdarray.h:1603
Vc::Vector< T > max(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Definition: vector.h:257
SimdArray partialSum() const
Returns a vector containing the sum of all entries with smaller index.
Definition: simdarray.h:1026
Identifies any possible SimdArray<T, N> type (independent of const/volatile or reference) ...
Definition: type_traits.h:142
SimdArray< T, N, V, M > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
Definition: simdarray.h:1739
static SimdArray IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
Definition: simdarray.h:650
SimdArray< T, N, V, M > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
Definition: simdarray.h:1740
Identifies any possible SimdMaskArray<T, N> type (independent of const/volatile or reference) ...
Definition: type_traits.h:148
Vc::Vector< T > log(const Vc::Vector< T > &v)
SimdArray shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
Definition: simdarray.h:1048
Vc::Vector< T > fma(Vc::Vector< T > a, Vc::Vector< T > b, Vc::Vector< T > c)
Multiplies a with b and then adds c, without rounding between the multiplication and the addition...
Data-parallel arithmetic type with user-defined number of elements.
Definition: simdarray.h:559
The value member will either be the number of SIMD vector entries or 0 if T is not a SIMD type...
Definition: type_traits.h:171
Data-parallel mask type with user-defined number of boolean elements.
Definition: simdarrayfwd.h:121
Vc::Vector< T > round(const Vc::Vector< T > &v)
Returns the closest integer to v; 0.5 is rounded to even.
SimdArray apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
Definition: simdarray.h:1036
static SimdArray Zero()
Returns a vector with the entries initialized to zero.
Definition: simdarray.h:638
Vc::Vector< T > rsqrt(const Vc::Vector< T > &v)
Returns the reciprocal square root of v.
Identifies any SIMD vector type (independent of implementation or whether it&#39;s SimdArray<T, N>).
Definition: type_traits.h:133
SimdMaskArray< T, N, V, M > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
Definition: simdarray.h:1711
Vc::Vector< T > log10(const Vc::Vector< T > &v)
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
Definition: simdarray.h:972
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
Definition: simdarray.h:1603
SimdArray< T, N, V, M > trunc(const SimdArray< T, N, V, M > &x)
Applies the std:: trunc function component-wise and concurrently.
Definition: simdarray.h:1738
Type trait that tells whether a container stores its data inside the object or inside allocated memor...
SimdArray< T, N, V, M > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
Definition: simdarray.h:1683
SimdArray exponent() const
Returns the exponents of the floating-point values in the vector.
Definition: simdarray.h:1351
Vc::Vector< T > atan2(const Vc::Vector< T > &y, const Vc::Vector< T > &x)
Calculates the angle given the lengths of the opposite and adjacent legs in a right triangle...
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
Definition: simdarray.h:612
value_type EntryType
The type of the elements (i.e. T)
Definition: simdarray.h:621
SimdArray< T, N, V, M > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
Definition: simdarray.h:1680
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
Vc::Vector< T > atan(const Vc::Vector< T > &v)
Vc::Vector< T > asin(const Vc::Vector< T > &v)
value_type min() const
Returns the smallest entry in the vector.
Definition: simdarray.h:1020
SimdArray operator+() const
Returns a copy of itself.
Definition: simdarray.h:876
void assign(Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1008
Vc::Mask< T > isfinite(const Vc::Vector< T > &x)
SimdArray(value_type a)
Broadcast Constructor.
Definition: simdarray.h:687
Vc::Mask< T > isnan(const Vc::Vector< T > &x)
static SimdArray Random()
Returns a vector with pseudo-random entries.
Definition: simdarray.h:656
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:84
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
Definition: simdize.h:1069
void scatter(MT *mem, IT &&indexes) const
Scatter function.
Definition: simdarray.h:99
T value_type
The type of the elements (i.e. T)
Definition: simdarray.h:594
Vector Classes Namespace.
Definition: cpuid.h:32
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
Definition: types.h:89
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:218
void gather(const MT *mem, IT &&indexes)
Gather function.
Definition: simdarray.h:202
To simd_cast(From &&x, enable_if< std::is_same< To, Traits::decay< From >>::value >=nullarg)
Casts the argument x from type From to type To.
Definition: simd_cast.h:52
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
Definition: simdarray.h:957
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
Definition: simdarray.h:1733
SimdArray reversed() const
Returns a vector with all components reversed.
Definition: simdarray.h:1259
SimdArray copySign(const SimdArray &reference) const
Copies the signs of the components of reference to the components of the current vector, returning the result.
Definition: simdarray.h:1364
static SimdArray One()
Returns a vector with the entries initialized to one.
Definition: simdarray.h:644
SimdArray sorted() const
Return a sorted copy of the vector.
Definition: simdarray.h:1281
Vc::Vector< T > sqrt(const Vc::Vector< T > &v)
Returns the square root of v.
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.
SimdMaskArray< T, N, V, M > isinf(const SimdArray< T, N, V, M > &x)
Applies the std:: isinf function component-wise and concurrently.
Definition: simdarray.h:1693