Vc  1.0.0-dev
SIMD Vector Classes for C++
simdarray.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the names of contributing organizations nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 }}}*/
28 
29 #ifndef VC_COMMON_SIMDARRAY_H_
30 #define VC_COMMON_SIMDARRAY_H_
31 
32 //#define Vc_DEBUG_SIMD_CAST 1
33 //#define Vc_DEBUG_SORTED 1
34 #if defined Vc_DEBUG_SIMD_CAST || defined Vc_DEBUG_SORTED
35 #include <Vc/IO>
36 #endif
37 
38 #include <array>
39 
40 #include "writemaskedvector.h"
41 #include "simdarrayhelper.h"
42 #include "simdmaskarray.h"
43 #include "utility.h"
44 #include "interleave.h"
45 #include "indexsequence.h"
46 #include "transpose.h"
47 #include "macros.h"
48 
49 namespace Vc_VERSIONED_NAMESPACE
50 {
51 // internal namespace (min/max helper) {{{1
52 namespace internal
53 {
54 #define Vc_DECLARE_BINARY_FUNCTION__(name__) \
55  template <typename T, std::size_t N, typename V, std::size_t M> \
56  SimdArray<T, N, V, M> Vc_INTRINSIC_L Vc_PURE_L \
57  name__(const SimdArray<T, N, V, M> &l, const SimdArray<T, N, V, M> &r) \
58  Vc_INTRINSIC_R Vc_PURE_R; \
59  template <typename T, std::size_t N, typename V> \
60  SimdArray<T, N, V, N> Vc_INTRINSIC_L Vc_PURE_L \
61  name__(const SimdArray<T, N, V, N> &l, const SimdArray<T, N, V, N> &r) \
62  Vc_INTRINSIC_R Vc_PURE_R;
63 Vc_DECLARE_BINARY_FUNCTION__(min)
64 Vc_DECLARE_BINARY_FUNCTION__(max)
65 #undef Vc_DECLARE_BINARY_FUNCTION__
66 
67 template <typename T> Vc_INTRINSIC Vc_PURE T min(const T &l, const T &r)
68 {
69  T x = l;
70  where(r < l) | x = r;
71  return x;
72 }
73 template <typename T> Vc_INTRINSIC Vc_PURE T max(const T &l, const T &r)
74 {
75  T x = l;
76  where(r > l) | x = r;
77  return x;
78 }
79 template <typename T> T Vc_INTRINSIC Vc_PURE product_helper__(const T &l, const T &r) { return l * r; }
80 template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper__(const T &l, const T &r) { return l + r; }
81 } // namespace internal
82 
83 // SimdArray class {{{1
86 
87 // atomic SimdArray {{{1
88 #define Vc_CURRENT_CLASS_NAME SimdArray
89 
98 template <typename T, std::size_t N, typename VectorType_>
99 class alignas(
101  ((Common::nextPowerOfTwo(N) * (sizeof(VectorType_) / VectorType_::size()) - 1) & 127) +
103  1) SimdArray<T, N, VectorType_, N>
104 {
105  static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
106  std::is_same<T, int32_t>::value ||
107  std::is_same<T, uint32_t>::value ||
108  std::is_same<T, int16_t>::value ||
109  std::is_same<T, uint16_t>::value,
110  "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
111  "int16_t, uint16_t }");
112 
113 public:
114  using VectorType = VectorType_;
115  using vector_type = VectorType;
116  using storage_type = vector_type;
117  using vectorentry_type = typename vector_type::VectorEntryType;
118  using value_type = T;
119  using mask_type = SimdMaskArray<T, N, vector_type>;
120  using index_type = SimdArray<int, N>;
121  static constexpr std::size_t size() { return N; }
122  using Mask = mask_type;
123  using MaskType = Mask;
124  using MaskArgument = const MaskType &;
125  using VectorEntryType = vectorentry_type;
126  using EntryType = value_type;
127  using IndexType = index_type;
128  using AsArg = const SimdArray &;
129  static constexpr std::size_t Size = size();
130  static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
131 
132  // zero init
133  Vc_INTRINSIC SimdArray() = default;
134 
135  // default copy ctor/operator
136  Vc_INTRINSIC SimdArray(const SimdArray &) = default;
137  Vc_INTRINSIC SimdArray(SimdArray &&) = default;
138  Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
139 
140  // broadcast
141  Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
142  Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
143  Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
144  template <
145  typename U,
146  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
147  Vc_INTRINSIC SimdArray(U a)
148  : SimdArray(static_cast<value_type>(a))
149  {
150  }
151 
152  // implicit casts
153  template <typename U, typename V>
154  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x, enable_if<N == V::size()> = nullarg)
155  : data(simd_cast<vector_type>(internal_data(x)))
156  {
157  }
158  template <typename U, typename V>
159  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x,
160  enable_if<(N > V::size() && N <= 2 * V::size())> = nullarg)
161  : data(simd_cast<vector_type>(internal_data(internal_data0(x)), internal_data(internal_data1(x))))
162  {
163  }
164  template <typename U, typename V>
165  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x,
166  enable_if<(N > 2 * V::size() && N <= 4 * V::size())> = nullarg)
167  : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
168  internal_data(internal_data1(internal_data0(x))),
169  internal_data(internal_data0(internal_data1(x))),
170  internal_data(internal_data1(internal_data1(x)))))
171  {
172  }
173 
174  template <typename V, std::size_t Pieces, std::size_t Index>
175  Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
176  : data(simd_cast<vector_type, Index>(x.data))
177  {
178  }
179 
180  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
181  : data(init.begin(), Vc::Unaligned)
182  {
183 #if defined Vc_CXX14 && 0 // doesn't compile yet
184  static_assert(init.size() == size(), "The initializer_list argument to "
185  "SimdArray<T, N> must contain exactly N "
186  "values.");
187 #else
188  Vc_ASSERT(init.size() == size());
189 #endif
190  }
191 
192  // implicit conversion from underlying vector_type
193  template <
194  typename V,
195  typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
196  explicit Vc_INTRINSIC SimdArray(const V &x)
197  : data(simd_cast<vector_type>(x))
198  {
199  }
200 
201  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
202  // T implicitly convertible to U
203  template <typename V,
204  typename = enable_if<
205  Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value &&
206  std::is_convertible<T, typename V::EntryType>::value && V::size() == N>>
207  Vc_INTRINSIC operator V() const
208  {
209  return simd_cast<V>(*this);
210  }
211 
212 #include "gatherinterface.h"
213 
214  // forward all remaining ctors
215  template <typename... Args,
216  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
217  !Traits::is_gather_signature<Args...>::value &&
218  !Traits::is_initializer_list<Args...>::value>>
219  explicit Vc_INTRINSIC SimdArray(Args &&... args)
220  : data(std::forward<Args>(args)...)
221  {
222  }
223 
224  template <std::size_t Offset>
225  explicit Vc_INTRINSIC SimdArray(
226  Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
227  : data(Vc::IndexesFromZero)
228  {
229  data += value_type(Offset);
230  }
231 
232  Vc_INTRINSIC void setZero() { data.setZero(); }
233  Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
234  Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
235  Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
236 
237  // internal: execute specified Operation
238  template <typename Op, typename... Args>
239  static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
240  {
241  SimdArray r;
242  op(r.data, Common::actual_value(op, std::forward<Args>(args))...);
243  return r;
244  }
245 
246  static Vc_INTRINSIC SimdArray Zero()
247  {
248  return SimdArray(Vc::Zero);
249  }
250  static Vc_INTRINSIC SimdArray One()
251  {
252  return SimdArray(Vc::One);
253  }
254  static Vc_INTRINSIC SimdArray IndexesFromZero()
255  {
256  return SimdArray(Vc::IndexesFromZero);
257  }
258  static Vc_INTRINSIC SimdArray Random()
259  {
260  return fromOperation(Common::Operations::random());
261  }
262 
263  template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
264  {
265  data.load(std::forward<Args>(args)...);
266  }
267 
268  template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
269  {
270  data.store(std::forward<Args>(args)...);
271  }
272 
273  Vc_INTRINSIC mask_type operator!() const
274  {
275  return {!data};
276  }
277 
278  Vc_INTRINSIC SimdArray operator-() const
279  {
280  return {-data};
281  }
282 
283  Vc_INTRINSIC SimdArray operator~() const
284  {
285  return {~data};
286  }
287 
288  template <typename U,
289  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
290  Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
291  {
292  return {data << x};
293  }
294  template <typename U,
295  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
296  Vc_INTRINSIC SimdArray &operator<<=(U x)
297  {
298  data <<= x;
299  return *this;
300  }
301  template <typename U,
302  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
303  Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
304  {
305  return {data >> x};
306  }
307  template <typename U,
308  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
309  Vc_INTRINSIC SimdArray &operator>>=(U x)
310  {
311  data >>= x;
312  return *this;
313  }
314 
315 #define Vc_BINARY_OPERATOR_(op) \
316  Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
317  { \
318  return {data op rhs.data}; \
319  } \
320  Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
321  { \
322  data op## = rhs.data; \
323  return *this; \
324  }
325  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_)
326  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_)
327  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_)
328 #undef Vc_BINARY_OPERATOR_
329 
330 #define Vc_COMPARES(op) \
331  Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
332  { \
333  return {data op rhs.data}; \
334  }
335  Vc_ALL_COMPARES(Vc_COMPARES)
336 #undef Vc_COMPARES
337 
338  Vc_INTRINSIC decltype(std::declval<vector_type &>()[0]) operator[](std::size_t i)
339  {
340  return data[i];
341  }
342  Vc_INTRINSIC value_type operator[](std::size_t i) const { return data[i]; }
343 
344  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
345  {
346  return {this, k};
347  }
348 
349  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
350  {
351  data.assign(v.data, internal_data(k));
352  }
353 
354  // reductions ////////////////////////////////////////////////////////
355 #define Vc_REDUCTION_FUNCTION__(name__) \
356  Vc_INTRINSIC Vc_PURE value_type name__() const { return data.name__(); } \
357  \
358  Vc_INTRINSIC Vc_PURE value_type name__(mask_type mask) const \
359  { \
360  return data.name__(internal_data(mask)); \
361  }
362  Vc_REDUCTION_FUNCTION__(min)
363  Vc_REDUCTION_FUNCTION__(max)
364  Vc_REDUCTION_FUNCTION__(product)
365  Vc_REDUCTION_FUNCTION__(sum)
366 #undef Vc_REDUCTION_FUNCTION__
367  Vc_INTRINSIC Vc_PURE SimdArray partialSum() const { return data.partialSum(); }
368 
369  Vc_INTRINSIC void fusedMultiplyAdd(const SimdArray &factor, const SimdArray &summand)
370  {
371  data.fusedMultiplyAdd(internal_data(factor), internal_data(summand));
372  }
373 
374  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f) const
375  {
376  return {data.apply(std::forward<F>(f))};
377  }
378  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f, const mask_type &k) const
379  {
380  return {data.apply(std::forward<F>(f), k)};
381  }
382 
383  Vc_INTRINSIC SimdArray shifted(int amount) const
384  {
385  return {data.shifted(amount)};
386  }
387 
388  template <std::size_t NN>
389  Vc_INTRINSIC SimdArray shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
390  const
391  {
392  return {data.shifted(amount, simd_cast<VectorType>(shiftIn))};
393  }
394 
395  Vc_INTRINSIC SimdArray rotated(int amount) const
396  {
397  return {data.rotated(amount)};
398  }
399 
400  Vc_INTRINSIC SimdArray interleaveLow(SimdArray x) const
401  {
402  return {data.interleaveLow(x.data)};
403  }
404  Vc_INTRINSIC SimdArray interleaveHigh(SimdArray x) const
405  {
406  return {data.interleaveHigh(x.data)};
407  }
408 
409  Vc_INTRINSIC SimdArray reversed() const
410  {
411  return {data.reversed()};
412  }
413 
414  Vc_INTRINSIC SimdArray sorted() const
415  {
416  return {data.sorted()};
417  }
418 
419  template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen)
420  {
421  return {VectorType::generate(gen)};
422  }
423 
424  friend VectorType &internal_data<>(SimdArray &x);
425  friend const VectorType &internal_data<>(const SimdArray &x);
426 
428  Vc_INTRINSIC SimdArray(VectorType &&x) : data(std::move(x)) {}
429 private:
430  storage_type data;
431 };
432 template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
433 template <typename T, std::size_t N, typename VectorType>
435 template <typename T, std::size_t N, typename VectorType>
436 Vc_INTRINSIC VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
437 {
438  return x.data;
439 }
440 template <typename T, std::size_t N, typename VectorType>
441 Vc_INTRINSIC const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
442 {
443  return x.data;
444 }
445 
446 // gatherImplementation {{{2
447 template <typename T, std::size_t N, typename VectorType>
448 template <typename MT, typename IT>
449 inline void SimdArray<T, N, VectorType, N>::gatherImplementation(const MT *mem,
450  IT &&indexes)
451 {
452  data.gather(mem, std::forward<IT>(indexes));
453 }
454 template <typename T, std::size_t N, typename VectorType>
455 template <typename MT, typename IT>
456 inline void SimdArray<T, N, VectorType, N>::gatherImplementation(const MT *mem,
457  IT &&indexes,
458  MaskArgument mask)
459 {
460  data.gather(mem, std::forward<IT>(indexes), mask);
461 }
462 
463 // generic SimdArray {{{1
464 template <typename T, std::size_t N, typename VectorType, std::size_t>
465 class alignas(
467  ((Common::nextPowerOfTwo(N) * (sizeof(VectorType) / VectorType::size()) - 1) & 127) +
469  1) SimdArray
470 {
471  static_assert(std::is_same<T, double>::value ||
472  std::is_same<T, float>::value ||
473  std::is_same<T, int32_t>::value ||
474  std::is_same<T, uint32_t>::value ||
475  std::is_same<T, int16_t>::value ||
476  std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
477 
478  using my_traits = SimdArrayTraits<T, N>;
479  static constexpr std::size_t N0 = my_traits::N0;
480  static constexpr std::size_t N1 = my_traits::N1;
481  using Split = Common::Split<N0>;
482 
483 public:
484  using storage_type0 = typename my_traits::storage_type0;
485  using storage_type1 = typename my_traits::storage_type1;
486  static_assert(storage_type0::size() == N0, "");
487 
488  using vector_type = VectorType;
489  using vectorentry_type = typename storage_type0::vectorentry_type;
490  typedef vectorentry_type alias_type Vc_MAY_ALIAS;
491  using value_type = T;
492  using mask_type = SimdMaskArray<T, N, vector_type>;
493  using index_type = SimdArray<int, N>;
494  static constexpr std::size_t size() { return N; }
495  using Mask = mask_type;
496  using MaskType = Mask;
497  using MaskArgument = const MaskType &;
498  using VectorEntryType = vectorentry_type;
499  using EntryType = value_type;
500  using IndexType = index_type;
501  using AsArg = const SimdArray &;
502  static constexpr std::size_t Size = size();
503  static constexpr std::size_t MemoryAlignment =
507 
509 
510  // zero init
511  SimdArray() = default;
512 
513  // default copy ctor/operator
514  SimdArray(const SimdArray &) = default;
515  SimdArray(SimdArray &&) = default;
516  SimdArray &operator=(const SimdArray &) = default;
517 
518  // broadcast
519  Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
520  template <
521  typename U,
522  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
523  SimdArray(U a)
524  : SimdArray(static_cast<value_type>(a))
525  {
526  }
527 
528  // load ctor
529  template <typename U,
530  typename Flags = DefaultLoadTag,
531  typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
532  explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags())
533  : data0(mem, f), data1(mem + storage_type0::size(), f)
534  {
535  }
536 
537  // initializer list
538  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
539  : data0(init.begin(), Vc::Unaligned)
540  , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
541  {
542 #if defined Vc_CXX14 && 0 // doesn't compile yet
543  static_assert(init.size() == size(), "The initializer_list argument to "
544  "SimdArray<T, N> must contain exactly N "
545  "values.");
546 #else
547  Vc_ASSERT(init.size() == size());
548 #endif
549  }
550 
551 #include "gatherinterface.h"
552 
553  // forward all remaining ctors
554  template <typename... Args,
555  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
556  !Traits::is_initializer_list<Args...>::value &&
557  !Traits::is_gather_signature<Args...>::value &&
558  !Traits::is_load_arguments<Args...>::value>>
559  explicit Vc_INTRINSIC SimdArray(Args &&... args)
560  : data0(Split::lo(args)...) // no forward here - it could move and thus
561  // break the next line
562  , data1(Split::hi(std::forward<Args>(args))...)
563  {
564  }
565 
566  // explicit casts
567  template <typename V>
568  Vc_INTRINSIC explicit SimdArray(
569  V &&x,
570  enable_if<(Traits::is_simd_vector<V>::value && Traits::simd_vector_size<V>::value == N &&
571  !(std::is_convertible<Traits::entry_type_of<V>, T>::value &&
572  Traits::isSimdArray<V>::value))> = nullarg)
573  : data0(Split::lo(x)), data1(Split::hi(x))
574  {
575  }
576 
577  // implicit casts
578  template <typename V>
579  Vc_INTRINSIC SimdArray(
580  V &&x,
581  enable_if<(Traits::isSimdArray<V>::value && Traits::simd_vector_size<V>::value == N &&
582  std::is_convertible<Traits::entry_type_of<V>, T>::value)> = nullarg)
583  : data0(Split::lo(x)), data1(Split::hi(x))
584  {
585  }
586 
587  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
588  // T implicitly convertible to U
589  template <typename V,
590  typename = enable_if<
591  Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value &&
592  std::is_convertible<T, typename V::EntryType>::value && V::size() == N>>
593  operator V() const
594  {
595  return simd_cast<V>(*this);
596  }
597 
599 
600  Vc_INTRINSIC void setZero()
601  {
602  data0.setZero();
603  data1.setZero();
604  }
605  Vc_INTRINSIC void setZero(const mask_type &k)
606  {
607  data0.setZero(Split::lo(k));
608  data1.setZero(Split::hi(k));
609  }
610  Vc_INTRINSIC void setZeroInverted()
611  {
612  data0.setZeroInverted();
613  data1.setZeroInverted();
614  }
615  Vc_INTRINSIC void setZeroInverted(const mask_type &k)
616  {
617  data0.setZeroInverted(Split::lo(k));
618  data1.setZeroInverted(Split::hi(k));
619  }
620 
621  // internal: execute specified Operation
622  template <typename Op, typename... Args>
623  static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
624  {
625  SimdArray r = {
626  storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
627  // could move and thus
628  // break the next line
629  storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
630  return r;
631  }
632 
633  static Vc_INTRINSIC SimdArray Zero()
634  {
635  return SimdArray(Vc::Zero);
636  }
637  static Vc_INTRINSIC SimdArray One()
638  {
639  return SimdArray(Vc::One);
640  }
641  static Vc_INTRINSIC SimdArray IndexesFromZero()
642  {
643  return SimdArray(Vc::IndexesFromZero);
644  }
645  static Vc_INTRINSIC SimdArray Random()
646  {
647  return fromOperation(Common::Operations::random());
648  }
649 
650  template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
651  {
652  data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus
653  // break the next line
654  data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
655  }
656 
657  template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
658  {
659  data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus
660  // break the next line
661  data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
662  }
663 
664  Vc_INTRINSIC mask_type operator!() const
665  {
666  return {!data0, !data1};
667  }
668 
669  Vc_INTRINSIC SimdArray operator-() const
670  {
671  return {-data0, -data1};
672  }
673 
674  Vc_INTRINSIC SimdArray operator~() const
675  {
676  return {~data0, ~data1};
677  }
678 
679  // left/right shift operators {{{2
680  template <typename U,
681  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
682  Vc_INTRINSIC Vc_CONST SimdArray operator<<(U x) const
683  {
684  return {data0 << x, data1 << x};
685  }
686  template <typename U,
687  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
688  Vc_INTRINSIC SimdArray &operator<<=(U x)
689  {
690  data0 <<= x;
691  data1 <<= x;
692  return *this;
693  }
694  template <typename U,
695  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
696  Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x) const
697  {
698  return {data0 >> x, data1 >> x};
699  }
700  template <typename U,
701  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
702  Vc_INTRINSIC SimdArray &operator>>=(U x)
703  {
704  data0 >>= x;
705  data1 >>= x;
706  return *this;
707  }
708 
709  // binary operators {{{2
710 #define Vc_BINARY_OPERATOR_(op) \
711  Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
712  { \
713  return {data0 op rhs.data0, data1 op rhs.data1}; \
714  } \
715  Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
716  { \
717  data0 op## = rhs.data0; \
718  data1 op## = rhs.data1; \
719  return *this; \
720  }
721  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_)
722  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_)
723  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_)
724 #undef Vc_BINARY_OPERATOR_
725 
726 #define Vc_COMPARES(op) \
727  Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
728  { \
729  return {data0 op rhs.data0, data1 op rhs.data1}; \
730  }
731  Vc_ALL_COMPARES(Vc_COMPARES)
732 #undef Vc_COMPARES
733 
734  // operator[] {{{2
735  Vc_INTRINSIC value_type operator[](std::size_t i) const
736  {
737  const auto tmp = reinterpret_cast<const alias_type *>(this);
738  return tmp[i];
739  }
740 
741  Vc_INTRINSIC alias_type &operator[](std::size_t i)
742  {
743  auto tmp = reinterpret_cast<alias_type *>(this);
744  return tmp[i];
745  }
746 
747  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k) //{{{2
748  {
749  return {this, k};
750  }
751 
752  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
753  {
754  data0.assign(v.data0, internal_data0(k));
755  data1.assign(v.data1, internal_data1(k));
756  }
757 
758  // reductions {{{2
759 #define Vc_REDUCTION_FUNCTION__(name__, binary_fun__) \
760  template <typename ForSfinae = void> \
761  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
762  storage_type0::size() == storage_type1::size(), \
763  value_type> \
764  name__() const \
765  { \
766  return binary_fun__(data0, data1).name__(); \
767  } \
768  \
769  template <typename ForSfinae = void> \
770  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
771  storage_type0::size() != storage_type1::size(), \
772  value_type> \
773  name__() const \
774  { \
775  return binary_fun__(data0.name__(), data1.name__()); \
776  } \
777  \
778  Vc_INTRINSIC value_type name__(const mask_type &mask) const \
779  { \
780  if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
781  return data1.name__(Split::hi(mask)); \
782  } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
783  return data0.name__(Split::lo(mask)); \
784  } else { \
785  return binary_fun__(data0.name__(Split::lo(mask)), \
786  data1.name__(Split::hi(mask))); \
787  } \
788  }
789  Vc_REDUCTION_FUNCTION__(min, Vc::internal::min)
790  Vc_REDUCTION_FUNCTION__(max, Vc::internal::max)
791  Vc_REDUCTION_FUNCTION__(product, internal::product_helper__)
792  Vc_REDUCTION_FUNCTION__(sum, internal::sum_helper__)
793 #undef Vc_REDUCTION_FUNCTION__
794  Vc_INTRINSIC Vc_PURE SimdArray partialSum() const //{{{2
795  {
796  auto ps0 = data0.partialSum();
797  auto tmp = data1;
798  tmp[0] += ps0[data0.size() - 1];
799  return {std::move(ps0), tmp.partialSum()};
800  }
801 
802  void fusedMultiplyAdd(const SimdArray &factor, const SimdArray &summand) //{{{2
803  {
804  data0.fusedMultiplyAdd(Split::lo(factor), Split::lo(summand));
805  data1.fusedMultiplyAdd(Split::hi(factor), Split::hi(summand));
806  }
807 
808  // apply {{{2
809  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f) const
810  {
811  return {data0.apply(f), data1.apply(f)};
812  }
813  template <typename F> Vc_INTRINSIC SimdArray apply(F &&f, const mask_type &k) const
814  {
815  return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
816  }
817 
818  // shifted {{{2
819  inline SimdArray shifted(int amount) const
820  {
821  constexpr int SSize = Size;
822  constexpr int SSize0 = storage_type0::Size;
823  constexpr int SSize1 = storage_type1::Size;
824  if (amount == 0) {
825  return *this;
826  }
827  if (amount < 0) {
828  if (amount > -SSize0) {
829  return {data0.shifted(amount), data1.shifted(amount, data0)};
830  }
831  if (amount == -SSize0) {
832  return {storage_type0::Zero(), simd_cast<storage_type1>(data0)};
833  }
834  if (amount < -SSize0) {
835  return {storage_type0::Zero(), simd_cast<storage_type1>(data0.shifted(
836  amount + SSize0))};
837  }
838  return Zero();
839  } else {
840  if (amount >= SSize) {
841  return Zero();
842  } else if (amount >= SSize0) {
843  return {
844  simd_cast<storage_type0>(data1).shifted(amount - SSize0),
846  } else if (amount >= SSize1) {
847  return {data0.shifted(amount, data1), storage_type1::Zero()};
848  } else {
849  return {data0.shifted(amount, data1), data1.shifted(amount)};
850  }
851  }
852  }
853 
854  template <std::size_t NN>
855  inline enable_if<
856  !(std::is_same<storage_type0, storage_type1>::value && // not bisectable
857  N == NN),
858  SimdArray>
859  shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
860  {
861  constexpr int SSize = Size;
862  if (amount < 0) {
863  return SimdArray::generate([&](int i) -> value_type {
864  i += amount;
865  if (i >= 0) {
866  return operator[](i);
867  } else if (i >= -SSize) {
868  return shiftIn[i + SSize];
869  }
870  return 0;
871  });
872  }
873  return SimdArray::generate([&](int i) -> value_type {
874  i += amount;
875  if (i < SSize) {
876  return operator[](i);
877  } else if (i < 2 * SSize) {
878  return shiftIn[i - SSize];
879  }
880  return 0;
881  });
882  }
883 
884  template <std::size_t NN>
885  inline
886  enable_if<(std::is_same<storage_type0, storage_type1>::value && // bisectable
887  N == NN),
888  SimdArray>
889  shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
890  {
891  constexpr int SSize = Size;
892  if (amount < 0) {
893  if (amount > -static_cast<int>(storage_type0::Size)) {
894  return {data0.shifted(amount, internal_data1(shiftIn)),
895  data1.shifted(amount, data0)};
896  }
897  if (amount == -static_cast<int>(storage_type0::Size)) {
898  return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
899  }
900  if (amount > -SSize) {
901  return {
902  internal_data1(shiftIn)
903  .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
904  data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
905  }
906  if (amount == -SSize) {
907  return shiftIn;
908  }
909  if (amount > -2 * SSize) {
910  return shiftIn.shifted(amount + SSize);
911  }
912  }
913  if (amount == 0) {
914  return *this;
915  }
916  if (amount < static_cast<int>(storage_type0::Size)) {
917  return {data0.shifted(amount, data1),
918  data1.shifted(amount, internal_data0(shiftIn))};
919  }
920  if (amount == static_cast<int>(storage_type0::Size)) {
921  return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
922  }
923  if (amount < SSize) {
924  return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
925  internal_data0(shiftIn)
926  .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
927  }
928  if (amount == SSize) {
929  return shiftIn;
930  }
931  if (amount < 2 * SSize) {
932  return shiftIn.shifted(amount - SSize);
933  }
934  return Zero();
935  }
936 
937  // rotated {{{2
938  Vc_INTRINSIC SimdArray rotated(int amount) const
939  {
940  amount %= int(size());
941  if (amount == 0) {
942  return *this;
943  } else if (amount < 0) {
944  amount += size();
945  }
946 
947  auto &&d0cvtd = simd_cast<storage_type1>(data0);
948  auto &&d1cvtd = simd_cast<storage_type0>(data1);
949  constexpr int size0 = storage_type0::size();
950  constexpr int size1 = storage_type1::size();
951 
952  if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
953  return {std::move(d1cvtd), std::move(d0cvtd)};
954  } else if (amount < size1) {
955  return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
956  } else if (amount == size1) {
957  return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
958  } else if (int(size()) - amount < size1) {
959  return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
960  data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
961  } else if (int(size()) - amount == size1) {
962  return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
963  simd_cast<storage_type1>(data0.shifted(size0 - size1))};
964  } else if (amount <= size0) {
965  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
966  simd_cast<storage_type1>(data0.shifted(amount - size1))};
967  } else {
968  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
969  simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
970  }
971  return *this;
972  }
973 
974  // interleaveLow/-High {{{2
975  Vc_INTRINSIC SimdArray interleaveLow(const SimdArray &x) const
976  {
977  // return data0[0], x.data0[0], data0[1], x.data0[1], ...
978  return {data0.interleaveLow(x.data0),
979  simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
980  }
981  Vc_INTRINSIC SimdArray interleaveHigh(const SimdArray &x) const
982  {
983  return interleaveHighImpl(
984  x,
985  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
986  }
987 
988 private:
989  Vc_INTRINSIC SimdArray interleaveHighImpl(const SimdArray &x, std::true_type) const
990  {
991  return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
992  }
993  inline SimdArray interleaveHighImpl(const SimdArray &x, std::false_type) const
994  {
995  return {data0.interleaveHigh(x.data0)
996  .shifted(storage_type1::Size,
997  simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
998  data1.interleaveHigh(x.data1)};
999  }
1000 
1001 public:
1002  inline SimdArray reversed() const //{{{2
1003  {
1004  if (std::is_same<storage_type0, storage_type1>::value) {
1005  return {simd_cast<storage_type0>(data1).reversed(),
1006  simd_cast<storage_type1>(data0).reversed()};
1007  } else {
1008  return {data0.shifted(storage_type1::Size, data1).reversed(),
1009  simd_cast<storage_type1>(data0.reversed().shifted(
1010  storage_type0::Size - storage_type1::Size))};
1011  }
1012  }
1013  inline SimdArray sorted() const //{{{2
1014  {
1015  return sortedImpl(
1016  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1017  }
1018 
1019  Vc_INTRINSIC SimdArray sortedImpl(std::true_type) const
1020  {
1021 #ifdef Vc_DEBUG_SORTED
1022  std::cerr << "-- " << data0 << data1 << '\n';
1023 #endif
1024  const auto a = data0.sorted();
1025  const auto b = data1.sorted().reversed();
1026  const auto lo = internal::min(a, b);
1027  const auto hi = internal::max(a, b);
1028  return {lo.sorted(), hi.sorted()};
1029  }
1030 
1031  Vc_INTRINSIC SimdArray sortedImpl(std::false_type) const
1032  {
1033  using SortableArray = SimdArray<value_type, Common::nextPowerOfTwo(size())>;
1034  auto sortable = simd_cast<SortableArray>(*this);
1035  for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1036  using limits = std::numeric_limits<value_type>;
1037  if (limits::has_infinity) {
1038  sortable[i] = limits::infinity();
1039  } else {
1040  sortable[i] = std::numeric_limits<value_type>::max();
1041  }
1042  }
1043  return simd_cast<SimdArray>(sortable.sorted());
1044 
1045  /* The following implementation appears to be less efficient. But this may need further
1046  * work.
1047  const auto a = data0.sorted();
1048  const auto b = data1.sorted();
1049 #ifdef Vc_DEBUG_SORTED
1050  std::cerr << "== " << a << b << '\n';
1051 #endif
1052  auto aIt = Vc::begin(a);
1053  auto bIt = Vc::begin(b);
1054  const auto aEnd = Vc::end(a);
1055  const auto bEnd = Vc::end(b);
1056  return SimdArray::generate([&](std::size_t) {
1057  if (aIt == aEnd) {
1058  return *(bIt++);
1059  }
1060  if (bIt == bEnd) {
1061  return *(aIt++);
1062  }
1063  if (*aIt < *bIt) {
1064  return *(aIt++);
1065  } else {
1066  return *(bIt++);
1067  }
1068  });
1069  */
1070  }
1071 
1072  template <typename G> static Vc_INTRINSIC SimdArray generate(const G &gen) // {{{2
1073  {
1074  auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in
1075  // an initializer list is well-defined
1076  // (front to back), but GCC 4.8 doesn't
1077  // implement this correctly. Therefore
1078  // we enforce correct order.
1079  return {std::move(tmp),
1080  storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
1081  }
1082 
1083  // internal_data0/1 {{{2
1084  friend storage_type0 &internal_data0<>(SimdArray &x);
1085  friend storage_type1 &internal_data1<>(SimdArray &x);
1086  friend const storage_type0 &internal_data0<>(const SimdArray &x);
1087  friend const storage_type1 &internal_data1<>(const SimdArray &x);
1088 
1090  Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1091  : data0(std::move(x)), data1(std::move(y))
1092  {
1093  }
1094 private: //{{{2
1095  storage_type0 data0;
1096  storage_type1 data1;
1097 };
1098 #undef Vc_CURRENT_CLASS_NAME
1099 template <typename T, std::size_t N, typename VectorType, std::size_t M> constexpr std::size_t SimdArray<T, N, VectorType, M>::Size;
1100 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1102 
1103 // gatherImplementation {{{2
1104 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1105 template <typename MT, typename IT>
1106 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(const MT *mem,
1107  IT &&indexes)
1108 {
1109  data0.gather(mem, Split::lo(Common::Operations::gather(),
1110  indexes)); // don't forward indexes - it could move and
1111  // thus break the next line
1112  data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1113 }
1114 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1115 template <typename MT, typename IT>
1116 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(const MT *mem,
1117  IT &&indexes, MaskArgument mask)
1118 {
1119  data0.gather(mem, Split::lo(Common::Operations::gather(), indexes),
1120  Split::lo(mask)); // don't forward indexes - it could move and
1121  // thus break the next line
1122  data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1123  Split::hi(mask));
1124 }
1125 
1126 // internal_data0/1 (SimdArray) {{{1
1127 template <typename T, std::size_t N, typename V, std::size_t M>
1128 Vc_INTRINSIC typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1129  SimdArray<T, N, V, M> &x)
1130 {
1131  return x.data0;
1132 }
1133 template <typename T, std::size_t N, typename V, std::size_t M>
1134 Vc_INTRINSIC typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1135  SimdArray<T, N, V, M> &x)
1136 {
1137  return x.data1;
1138 }
1139 template <typename T, std::size_t N, typename V, std::size_t M>
1140 Vc_INTRINSIC const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1141  const SimdArray<T, N, V, M> &x)
1142 {
1143  return x.data0;
1144 }
1145 template <typename T, std::size_t N, typename V, std::size_t M>
1146 Vc_INTRINSIC const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1147  const SimdArray<T, N, V, M> &x)
1148 {
1149  return x.data1;
1150 }
1151 
1152 // binary operators {{{1
1153 namespace result_vector_type_internal
1154 {
1155 template <typename T>
1156 using type = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1157 
1158 template <typename T>
1159 using is_integer_larger_than_int = std::integral_constant<
1160  bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1161  std::is_same<T, long>::value ||
1162  std::is_same<T, unsigned long>::value)>;
1163 
1164 template <
1165  typename L, typename R, std::size_t N = Traits::isSimdArray<L>::value
1166  ? Traits::simd_vector_size<L>::value
1167  : Traits::simd_vector_size<R>::value,
1168  bool = (Traits::isSimdArray<L>::value ||
1169  Traits::isSimdArray<R>::value) // one of the operands must be a SimdArray
1170  &&
1171  !std::is_same<type<L>, type<R>>::value // if the operands are of the same type
1172  // use the member function
1173  &&
1174  ((std::is_arithmetic<type<L>>::value &&
1175  !is_integer_larger_than_int<type<L>>::value) ||
1176  (std::is_arithmetic<type<R>>::value &&
1177  !is_integer_larger_than_int<
1178  type<R>>::value) // one of the operands is a scalar type
1179  ||
1180  (Traits::is_simd_vector<L>::value && !Traits::isSimdArray<L>::value) ||
1181  (Traits::is_simd_vector<R>::value &&
1182  !Traits::isSimdArray<R>::value) // or one of the operands is Vector<T>
1183  ) > struct evaluate;
1184 
1185 template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1186 {
1187 private:
1188  using LScalar = Traits::entry_type_of<L>;
1189  using RScalar = Traits::entry_type_of<R>;
1190 
1191  template <bool B, typename True, typename False>
1192  using conditional = typename std::conditional<B, True, False>::type;
1193 
1194 public:
1195  // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1196  // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1197  // int are promoted to int before any operation). This would imply that SIMD types with integral
1198  // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1199  // start. Therefore we special-case those operations where the scalar type of both operands is
1200  // integral and smaller than int.
1201  // In addition to that there is no generic support for 64-bit int SIMD types. Therefore
1202  // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1203  // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1204  // then the operator is disabled altogether. We do not want an implicit demotion.
1205  using type = SimdArray<
1206  conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1207  sizeof(LScalar) < sizeof(int) &&
1208  sizeof(RScalar) < sizeof(int)),
1209  conditional<(sizeof(LScalar) == sizeof(RScalar)),
1210  conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1211  conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1212  decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1213  N>;
1214 };
1215 
1216 } // namespace result_vector_type_internal
1217 
1218 template <typename L, typename R>
1219 using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1220 
1221 static_assert(
1222  std::is_same<result_vector_type<short int, Vc::SimdArray<short unsigned int, 32ul>>,
1223  Vc::SimdArray<short unsigned int, 32ul>>::value,
1224  "result_vector_type does not work");
1225 
1226 #define Vc_BINARY_OPERATORS_(op__) \
1227  template <typename L, typename R> \
1228  Vc_INTRINSIC result_vector_type<L, R> operator op__(L &&lhs, R &&rhs) \
1229  { \
1230  using Return = result_vector_type<L, R>; \
1231  return Return(std::forward<L>(lhs)) op__ Return(std::forward<R>(rhs)); \
1232  }
1233 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_)
1234 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_)
1235 #undef Vc_BINARY_OPERATORS_
1236 #define Vc_BINARY_OPERATORS_(op__) \
1237  template <typename L, typename R> \
1238  Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op__(L &&lhs, \
1239  R &&rhs) \
1240  { \
1241  using Promote = result_vector_type<L, R>; \
1242  return Promote(std::forward<L>(lhs)) op__ Promote(std::forward<R>(rhs)); \
1243  }
1244 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_)
1245 #undef Vc_BINARY_OPERATORS_
1246 
1247 // math functions {{{1
1248 template <typename T, std::size_t N> SimdArray<T, N> abs(const SimdArray<T, N> &x)
1249 {
1250  return SimdArray<T, N>::fromOperation(Common::Operations::Abs(), x);
1251 }
1252 template <typename T, std::size_t N> SimdMaskArray<T, N> isnan(const SimdArray<T, N> &x)
1253 {
1254  return SimdMaskArray<T, N>::fromOperation(Common::Operations::Isnan(), x);
1255 }
1256 template <typename T, std::size_t N>
1257 SimdArray<T, N> frexp(const SimdArray<T, N> &x, SimdArray<int, N> *e)
1258 {
1259  return SimdArray<T, N>::fromOperation(Common::Operations::Frexp(), x, e);
1260 }
1261 template <typename T, std::size_t N>
1262 SimdArray<T, N> ldexp(const SimdArray<T, N> &x, const SimdArray<int, N> &e)
1263 {
1264  return SimdArray<T, N>::fromOperation(Common::Operations::Ldexp(), x, e);
1265 }
1266 
1267 // simd_cast {{{1
1268 // simd_cast_impl_smaller_input {{{2
1269 // The following function can be implemented without the sizeof...(From) overload.
1270 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1271 // function in two works around the issue.
1272 template <typename Return, std::size_t N, typename T, typename... From>
1273 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1274 simd_cast_impl_smaller_input(const From &... xs, const T &last)
1275 {
1276  Return r = simd_cast<Return>(xs...);
1277  for (size_t i = 0; i < N; ++i) {
1278  r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1279  }
1280  return r;
1281 }
1282 template <typename Return, std::size_t N, typename T>
1283 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1284 {
1285  Return r = Return();
1286  for (size_t i = 0; i < N; ++i) {
1287  r[i] = static_cast<typename Return::EntryType>(last[i]);
1288  }
1289  return r;
1290 }
1291 template <typename Return, std::size_t N, typename T, typename... From>
1292 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1293  const From &... xs, const T &last)
1294 {
1295  Return r = simd_cast<Return>(xs...);
1296  for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1297  r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1298  }
1299  return r;
1300 }
1301 template <typename Return, std::size_t N, typename T>
1302 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1303 {
1304  Return r = Return();
1305  for (size_t i = 0; i < Return::size(); ++i) {
1306  r[i] = static_cast<typename Return::EntryType>(last[i]);
1307  }
1308  return r;
1309 }
1310 
1311 // simd_cast_without_last (declaration) {{{2
1312 template <typename Return, typename T, typename... From>
1313 Vc_INTRINSIC_L Vc_CONST_L Return
1314  simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1315 
1316 // are_all_types_equal {{{2
1317 template <typename... Ts> struct are_all_types_equal;
1318 template <typename T>
1319 struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1320 {
1321 };
1322 template <typename T0, typename T1, typename... Ts>
1323 struct are_all_types_equal<T0, T1, Ts...>
1324  : public std::integral_constant<
1325  bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1326 {
1327 };
1328 
1329 // simd_cast_interleaved_argument_order (declarations) {{{2
1349 template <typename Return, typename... Ts>
1350 Vc_INTRINSIC Vc_CONST Return
1351  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1352 
1353 // simd_cast_with_offset (declarations and one impl) {{{2
1354 // offset == 0 {{{3
1355 template <typename Return, std::size_t offset, typename From, typename... Froms>
1356 Vc_INTRINSIC Vc_CONST
1357  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1358  simd_cast_with_offset(const From &x, const Froms &... xs);
1359 // offset > 0 && offset divisible by Return::Size {{{3
1360 template <typename Return, std::size_t offset, typename From>
1361 Vc_INTRINSIC Vc_CONST
1362  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1363  simd_cast_with_offset(const From &x);
1364 // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1365 template <typename Return, std::size_t offset, typename From>
1366 Vc_INTRINSIC Vc_CONST
1367  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1368  ((Traits::isSimdArray<Return>::value &&
1369  !Traits::isAtomicSimdArray<Return>::value) ||
1370  (Traits::isSimdMaskArray<Return>::value &&
1371  !Traits::isAtomicSimdMaskArray<Return>::value))),
1372  Return>
1373  simd_cast_with_offset(const From &x);
1374 // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1375 template <typename Return, std::size_t offset, typename From>
1376 Vc_INTRINSIC Vc_CONST
1377  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1378  ((Traits::isSimdArray<Return>::value &&
1379  Traits::isAtomicSimdArray<Return>::value) ||
1380  (Traits::isSimdMaskArray<Return>::value &&
1381  Traits::isAtomicSimdMaskArray<Return>::value))),
1382  Return>
1383  simd_cast_with_offset(const From &x);
1384 // offset > first argument (drops first arg) {{{3
1385 template <typename Return, std::size_t offset, typename From, typename... Froms>
1386 Vc_INTRINSIC Vc_CONST enable_if<
1387  (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1388  simd_cast_with_offset(const From &, const Froms &... xs)
1389 {
1390  return simd_cast_with_offset<Return, offset - From::Size>(xs...);
1391 }
1392 
1393 // offset > first and only argument (returns Zero) {{{3
1394 template <typename Return, std::size_t offset, typename From>
1395 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
1396  const From &)
1397 {
1398  return Return::Zero();
1399 }
1400 
1401 // first_type_of {{{2
1402 template <typename T, typename... Ts> struct first_type_of_impl
1403 {
1404  using type = T;
1405 };
1406 template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
1407 
1408 // simd_cast_drop_arguments (declarations) {{{2
1409 template <typename Return, typename From>
1410 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
1411 template <typename Return, typename... Froms>
1412 Vc_INTRINSIC Vc_CONST
1413  enable_if<(are_all_types_equal<Froms...>::value &&
1414  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1415  Return>
1416  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
1417 // The following function can be implemented without the sizeof...(From) overload.
1418 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1419 // function in two works around the issue.
1420 template <typename Return, typename From, typename... Froms>
1421 Vc_INTRINSIC Vc_CONST enable_if<
1422  (are_all_types_equal<From, Froms...>::value &&
1423  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
1424  Return>
1425 simd_cast_drop_arguments(Froms... xs, From x, From);
1426 template <typename Return, typename From>
1427 Vc_INTRINSIC Vc_CONST
1428  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1429  simd_cast_drop_arguments(From x, From);
1430 
1431 namespace
1432 {
1433 #ifdef Vc_DEBUG_SIMD_CAST
1434 void debugDoNothing(const std::initializer_list<void *> &) {}
1435 template <typename T0, typename... Ts>
1436 inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
1437  const Ts &... args)
1438 {
1439  std::cerr << prefix << arg0;
1440  debugDoNothing({&(std::cerr << ", " << args)...});
1441  std::cerr << suffix;
1442 }
1443 #else
1444 template <typename T0, typename... Ts>
1445 Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
1446 {
1447 }
1448 #endif
1449 } // unnamed namespace
1450 
1451 // simd_cast<T>(xs...) to SimdArray/-mask {{{2
1452 #define Vc_SIMDARRAY_CASTS(SimdArrayType__, trait_name__) \
1453  template <typename Return, typename From, typename... Froms> \
1454  Vc_INTRINSIC Vc_CONST enable_if<(Traits::isAtomic##SimdArrayType__<Return>::value && \
1455  !Traits::is##SimdArrayType__<From>::value && \
1456  Traits::is_simd_##trait_name__<From>::value && \
1457  From::Size * sizeof...(Froms) < Return::Size && \
1458  are_all_types_equal<From, Froms...>::value), \
1459  Return> \
1460  simd_cast(From x, Froms... xs) \
1461  { \
1462  vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
1463  return {simd_cast<typename Return::storage_type>(x, xs...)}; \
1464  } \
1465  template <typename Return, typename From, typename... Froms> \
1466  Vc_INTRINSIC Vc_CONST enable_if<(Traits::isAtomic##SimdArrayType__<Return>::value && \
1467  !Traits::is##SimdArrayType__<From>::value && \
1468  Traits::is_simd_##trait_name__<From>::value && \
1469  From::Size * sizeof...(Froms) >= Return::Size && \
1470  are_all_types_equal<From, Froms...>::value), \
1471  Return> \
1472  simd_cast(From x, Froms... xs) \
1473  { \
1474  vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
1475  return {simd_cast_without_last<Return, From, Froms...>(x, xs...)}; \
1476  } \
1477  template <typename Return, typename From, typename... Froms> \
1478  Vc_INTRINSIC Vc_CONST \
1479  enable_if<(Traits::is##SimdArrayType__<Return>::value && \
1480  !Traits::isAtomic##SimdArrayType__<Return>::value && \
1481  !Traits::is##SimdArrayType__<From>::value && \
1482  Traits::is_simd_##trait_name__<From>::value && \
1483  Common::left_size(Return::Size) < \
1484  From::Size * (1 + sizeof...(Froms)) && \
1485  are_all_types_equal<From, Froms...>::value), \
1486  Return> \
1487  simd_cast(From x, Froms... xs) \
1488  { \
1489  vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
1490  using R0 = typename Return::storage_type0; \
1491  using R1 = typename Return::storage_type1; \
1492  return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
1493  simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
1494  } \
1495  template <typename Return, typename From, typename... Froms> \
1496  Vc_INTRINSIC Vc_CONST \
1497  enable_if<(Traits::is##SimdArrayType__<Return>::value && \
1498  !Traits::isAtomic##SimdArrayType__<Return>::value && \
1499  !Traits::is##SimdArrayType__<From>::value && \
1500  Traits::is_simd_##trait_name__<From>::value && \
1501  Common::left_size(Return::Size) >= \
1502  From::Size * (1 + sizeof...(Froms)) && \
1503  are_all_types_equal<From, Froms...>::value), \
1504  Return> \
1505  simd_cast(From x, Froms... xs) \
1506  { \
1507  vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
1508  using R0 = typename Return::storage_type0; \
1509  using R1 = typename Return::storage_type1; \
1510  return {simd_cast<R0>(x, xs...), R1::Zero()}; \
1511  }
1512 Vc_SIMDARRAY_CASTS(SimdArray, vector)
1513 Vc_SIMDARRAY_CASTS(SimdMaskArray, mask)
1514 #undef Vc_SIMDARRAY_CASTS
1515 
1516 // simd_cast<SimdArray/-mask, offset>(V) {{{2
1517 #define Vc_SIMDARRAY_CASTS(SimdArrayType__, trait_name__) \
1518  /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \
1519  template <typename Return, int offset, typename From> \
1520  Vc_INTRINSIC Vc_CONST enable_if<(Traits::isAtomic##SimdArrayType__<Return>::value && \
1521  !Traits::is##SimdArrayType__<From>::value && \
1522  Traits::is_simd_##trait_name__<From>::value), \
1523  Return> \
1524  simd_cast(From x) \
1525  { \
1526  vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
1527  return {simd_cast<typename Return::storage_type, offset>(x)}; \
1528  } \
1529  /* both halves of Return array are extracted from argument */ \
1530  template <typename Return, int offset, typename From> \
1531  Vc_INTRINSIC Vc_CONST \
1532  enable_if<(Traits::is##SimdArrayType__<Return>::value && \
1533  !Traits::isAtomic##SimdArrayType__<Return>::value && \
1534  !Traits::is##SimdArrayType__<From>::value && \
1535  Traits::is_simd_##trait_name__<From>::value && \
1536  Return::Size * offset + Common::left_size(Return::Size) < \
1537  From::Size), \
1538  Return> \
1539  simd_cast(From x) \
1540  { \
1541  vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
1542  using R0 = typename Return::storage_type0; \
1543  constexpr int entries_offset = offset * Return::Size; \
1544  constexpr int entries_offset_right = entries_offset + R0::Size; \
1545  return { \
1546  simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
1547  simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
1548  x)}; \
1549  } \
1550  /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \
1551  /* right half of Return array is zero */ \
1552  template <typename Return, int offset, typename From> \
1553  Vc_INTRINSIC Vc_CONST \
1554  enable_if<(Traits::is##SimdArrayType__<Return>::value && \
1555  !Traits::isAtomic##SimdArrayType__<Return>::value && \
1556  !Traits::is##SimdArrayType__<From>::value && \
1557  Traits::is_simd_##trait_name__<From>::value && \
1558  Return::Size * offset + Common::left_size(Return::Size) >= \
1559  From::Size), \
1560  Return> \
1561  simd_cast(From x) \
1562  { \
1563  vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
1564  using R0 = typename Return::storage_type0; \
1565  using R1 = typename Return::storage_type1; \
1566  constexpr int entries_offset = offset * Return::Size; \
1567  return {simd_cast_with_offset<R0, entries_offset>(x), R1::Zero()}; \
1568  }
1569 Vc_SIMDARRAY_CASTS(SimdArray, vector)
1570 Vc_SIMDARRAY_CASTS(SimdMaskArray, mask)
1571 #undef Vc_SIMDARRAY_CASTS
1572 
1573 // simd_cast<T>(xs...) from SimdArray/-mask {{{2
1574 #define Vc_SIMDARRAY_CASTS(SimdArrayType__) \
1575  /* indivisible SimdArrayType__ */ \
1576  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
1577  Vc_INTRINSIC Vc_CONST \
1578  enable_if<(are_all_types_equal<SimdArrayType__<T, N, V, N>, From...>::value && \
1579  (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
1580  !std::is_same<Return, SimdArrayType__<T, N, V, N>>::value), \
1581  Return> \
1582  simd_cast(const SimdArrayType__<T, N, V, N> &x0, const From &... xs) \
1583  { \
1584  vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
1585  return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
1586  } \
1587  /* indivisible SimdArrayType__ && can drop arguments from the end */ \
1588  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
1589  Vc_INTRINSIC Vc_CONST \
1590  enable_if<(are_all_types_equal<SimdArrayType__<T, N, V, N>, From...>::value && \
1591  (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
1592  !std::is_same<Return, SimdArrayType__<T, N, V, N>>::value), \
1593  Return> \
1594  simd_cast(const SimdArrayType__<T, N, V, N> &x0, const From &... xs) \
1595  { \
1596  vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
1597  return simd_cast_without_last< \
1598  Return, typename SimdArrayType__<T, N, V, N>::storage_type, \
1599  typename From::storage_type...>(internal_data(x0), internal_data(xs)...); \
1600  } \
1601  /* bisectable SimdArrayType__ (N = 2^n) && never too large */ \
1602  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
1603  typename... From> \
1604  Vc_INTRINSIC Vc_CONST \
1605  enable_if<(N != M && \
1606  are_all_types_equal<SimdArrayType__<T, N, V, M>, From...>::value && \
1607  N * sizeof...(From) < Return::Size && ((N - 1) & N) == 0), \
1608  Return> \
1609  simd_cast(const SimdArrayType__<T, N, V, M> &x0, const From &... xs) \
1610  { \
1611  vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
1612  return simd_cast_interleaved_argument_order< \
1613  Return, typename SimdArrayType__<T, N, V, M>::storage_type0, \
1614  typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
1615  internal_data1(x0), internal_data1(xs)...); \
1616  } \
1617  /* bisectable SimdArrayType__ (N = 2^n) && input so large that at least the last \
1618  * input can be dropped */ \
1619  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
1620  typename... From> \
1621  Vc_INTRINSIC Vc_CONST \
1622  enable_if<(N != M && \
1623  are_all_types_equal<SimdArrayType__<T, N, V, M>, From...>::value && \
1624  N * sizeof...(From) >= Return::Size && ((N - 1) & N) == 0), \
1625  Return> \
1626  simd_cast(const SimdArrayType__<T, N, V, M> &x0, const From &... xs) \
1627  { \
1628  vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
1629  return simd_cast_without_last<Return, SimdArrayType__<T, N, V, M>, From...>( \
1630  x0, xs...); \
1631  } \
1632  /* remaining SimdArrayType__ input never larger (N != 2^n) */ \
1633  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
1634  typename... From> \
1635  Vc_INTRINSIC Vc_CONST \
1636  enable_if<(N != M && \
1637  are_all_types_equal<SimdArrayType__<T, N, V, M>, From...>::value && \
1638  N * (1 + sizeof...(From)) <= Return::Size && ((N - 1) & N) != 0), \
1639  Return> \
1640  simd_cast(const SimdArrayType__<T, N, V, M> &x0, const From &... xs) \
1641  { \
1642  vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
1643  return simd_cast_impl_smaller_input<Return, N, SimdArrayType__<T, N, V, M>, \
1644  From...>(x0, xs...); \
1645  } \
1646  /* remaining SimdArrayType__ input larger (N != 2^n) */ \
1647  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
1648  typename... From> \
1649  Vc_INTRINSIC Vc_CONST \
1650  enable_if<(N != M && \
1651  are_all_types_equal<SimdArrayType__<T, N, V, M>, From...>::value && \
1652  N * (1 + sizeof...(From)) > Return::Size && ((N - 1) & N) != 0), \
1653  Return> \
1654  simd_cast(const SimdArrayType__<T, N, V, M> &x0, const From &... xs) \
1655  { \
1656  vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
1657  return simd_cast_impl_larger_input<Return, N, SimdArrayType__<T, N, V, M>, \
1658  From...>(x0, xs...); \
1659  } \
1660  /* a single bisectable SimdArrayType__ (N = 2^n) too large */ \
1661  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
1662  Vc_INTRINSIC Vc_CONST \
1663  enable_if<(N != M && N >= 2 * Return::Size && ((N - 1) & N) == 0), Return> \
1664  simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1665  { \
1666  vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
1667  return simd_cast<Return>(internal_data0(x)); \
1668  } \
1669  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
1670  Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
1671  N < 2 * Return::Size && ((N - 1) & N) == 0), \
1672  Return> \
1673  simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1674  { \
1675  vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
1676  return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
1677  }
1678 Vc_SIMDARRAY_CASTS(SimdArray)
1679 Vc_SIMDARRAY_CASTS(SimdMaskArray)
1680 #undef Vc_SIMDARRAY_CASTS
1681 
1682 // simd_cast<T, offset>(SimdArray/-mask) {{{2
1683 #define Vc_SIMDARRAY_CASTS(SimdArrayType__) \
1684  /* offset == 0 is like without offset */ \
1685  template <typename Return, int offset, typename T, std::size_t N, typename V, \
1686  std::size_t M> \
1687  Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
1688  const SimdArrayType__<T, N, V, M> &x) \
1689  { \
1690  vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
1691  return simd_cast<Return>(x); \
1692  } \
1693  /* forward to V */ \
1694  template <typename Return, int offset, typename T, std::size_t N, typename V> \
1695  Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
1696  const SimdArrayType__<T, N, V, N> &x) \
1697  { \
1698  vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
1699  return simd_cast<Return, offset>(internal_data(x)); \
1700  } \
1701  /* convert from right member of SimdArray */ \
1702  template <typename Return, int offset, typename T, std::size_t N, typename V, \
1703  std::size_t M> \
1704  Vc_INTRINSIC Vc_CONST \
1705  enable_if<(N != M && offset * Return::Size >= Common::left_size(N) && \
1706  offset != 0 && Common::left_size(N) % Return::Size == 0), \
1707  Return> \
1708  simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1709  { \
1710  vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
1711  return simd_cast<Return, offset - Common::left_size(N) / Return::Size>( \
1712  internal_data1(x)); \
1713  } \
1714  /* same as above except for odd cases where offset * Return::Size doesn't fit the \
1715  * left side of the SimdArray */ \
1716  template <typename Return, int offset, typename T, std::size_t N, typename V, \
1717  std::size_t M> \
1718  Vc_INTRINSIC Vc_CONST \
1719  enable_if<(N != M && offset * Return::Size >= Common::left_size(N) && \
1720  offset != 0 && Common::left_size(N) % Return::Size != 0), \
1721  Return> \
1722  simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1723  { \
1724  vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
1725  return simd_cast_with_offset<Return, \
1726  offset * Return::Size - Common::left_size(N)>( \
1727  internal_data1(x)); \
1728  } \
1729  /* convert from left member of SimdArray */ \
1730  template <typename Return, int offset, typename T, std::size_t N, typename V, \
1731  std::size_t M> \
1732  Vc_INTRINSIC Vc_CONST \
1733  enable_if<(N != M && /*offset * Return::Size < Common::left_size(N) &&*/ \
1734  offset != 0 && (offset + 1) * Return::Size <= Common::left_size(N)), \
1735  Return> \
1736  simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1737  { \
1738  vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
1739  return simd_cast<Return, offset>(internal_data0(x)); \
1740  } \
1741  /* fallback to copying scalars */ \
1742  template <typename Return, int offset, typename T, std::size_t N, typename V, \
1743  std::size_t M> \
1744  Vc_INTRINSIC Vc_CONST \
1745  enable_if<(N != M && (offset * Return::Size < Common::left_size(N)) && \
1746  offset != 0 && (offset + 1) * Return::Size > Common::left_size(N)), \
1747  Return> \
1748  simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1749  { \
1750  vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
1751  using R = typename Return::EntryType; \
1752  Return r = Return::Zero(); \
1753  for (std::size_t i = offset * Return::Size; \
1754  i < std::min(N, (offset + 1) * Return::Size); ++i) { \
1755  r[i - offset * Return::Size] = static_cast<R>(x[i]); \
1756  } \
1757  return r; \
1758  }
1759 Vc_SIMDARRAY_CASTS(SimdArray)
1760 Vc_SIMDARRAY_CASTS(SimdMaskArray)
1761 #undef Vc_SIMDARRAY_CASTS
1762 // simd_cast_drop_arguments (definitions) {{{2
1763 template <typename Return, typename From>
1764 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
1765 {
1766  return simd_cast<Return>(x);
1767 }
1768 template <typename Return, typename... Froms>
1769 Vc_INTRINSIC Vc_CONST
1770  enable_if<(are_all_types_equal<Froms...>::value &&
1771  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1772  Return>
1773  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
1774 {
1775  return simd_cast<Return>(xs..., x);
1776 }
1777 // The following function can be implemented without the sizeof...(From) overload.
1778 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1779 // function in two works around the issue.
1780 template <typename Return, typename From, typename... Froms>
1781 Vc_INTRINSIC Vc_CONST enable_if<
1782  (are_all_types_equal<From, Froms...>::value &&
1783  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
1784  Return>
1785 simd_cast_drop_arguments(Froms... xs, From x, From)
1786 {
1787  return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
1788 }
1789 template <typename Return, typename From>
1790 Vc_INTRINSIC Vc_CONST
1791  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1792  simd_cast_drop_arguments(From x, From)
1793 {
1794  return simd_cast_drop_arguments<Return>(x);
1795 }
1796 
1797 // simd_cast_with_offset (definitions) {{{2
1798  template <typename Return, std::size_t offset, typename From>
1799  Vc_INTRINSIC Vc_CONST
1800  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
1801  Return> simd_cast_with_offset(const From &x)
1802 {
1803  return simd_cast<Return, offset / Return::Size>(x);
1804 }
1805 template <typename Return, std::size_t offset, typename From>
1806 Vc_INTRINSIC Vc_CONST
1807  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1808  ((Traits::isSimdArray<Return>::value &&
1809  !Traits::isAtomicSimdArray<Return>::value) ||
1810  (Traits::isSimdMaskArray<Return>::value &&
1811  !Traits::isAtomicSimdMaskArray<Return>::value))),
1812  Return>
1813  simd_cast_with_offset(const From &x)
1814 {
1815  using R0 = typename Return::storage_type0;
1816  using R1 = typename Return::storage_type1;
1817  return {simd_cast_with_offset<R0, offset>(x),
1818  simd_cast_with_offset<R1, offset + R0::Size>(x)};
1819 }
1820 template <typename Return, std::size_t offset, typename From>
1821 Vc_INTRINSIC Vc_CONST
1822  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1823  ((Traits::isSimdArray<Return>::value &&
1824  Traits::isAtomicSimdArray<Return>::value) ||
1825  (Traits::isSimdMaskArray<Return>::value &&
1826  Traits::isAtomicSimdMaskArray<Return>::value))),
1827  Return>
1828  simd_cast_with_offset(const From &x)
1829 {
1830  return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
1831 }
1832 template <typename Return, std::size_t offset, typename From, typename... Froms>
1833 Vc_INTRINSIC Vc_CONST
1834  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1835  simd_cast_with_offset(const From &x, const Froms &... xs)
1836 {
1837  return simd_cast<Return>(x, xs...);
1838 }
1839 
1840 // simd_cast_without_last (definition) {{{2
1841 template <typename Return, typename T, typename... From>
1842 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
1843 {
1844  return simd_cast<Return>(xs...);
1845 }
1846 
1847 // simd_cast_interleaved_argument_order (definitions) {{{2
1848 
1850 template <std::size_t I, typename T0, typename... Ts>
1851 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
1852  const Ts &...,
1853  const T0 &,
1854  const Ts &...)
1855 {
1856  return a0;
1857 }
1859 template <std::size_t I, typename T0, typename... Ts>
1860 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
1861  const Ts &...,
1862  const T0 &b0,
1863  const Ts &...)
1864 {
1865  return b0;
1866 }
1868 template <std::size_t I, typename T0, typename... Ts>
1869 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
1870  const Ts &... a,
1871  const T0 &,
1872  const Ts &... b)
1873 {
1874  return extract_interleaved<I - 2, Ts...>(a..., b...);
1875 }
1878 template <typename Return, typename... Ts, std::size_t... Indexes>
1879 Vc_INTRINSIC Vc_CONST Return
1880  simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
1881  const Ts &... b)
1882 {
1883  return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
1884 }
1887 template <typename Return, typename... Ts>
1888 Vc_INTRINSIC Vc_CONST Return
1889  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
1890 {
1891  using seq = make_index_sequence<sizeof...(Ts)*2>;
1892  return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
1893 }
1894 
1895 // binary min/max functions (internal) {{{1
1896 namespace internal
1897 {
1898 #define Vc_BINARY_FUNCTION__(name__) \
1899  template <typename T, std::size_t N, typename V, std::size_t M> \
1900  SimdArray<T, N, V, M> Vc_INTRINSIC Vc_PURE \
1901  name__(const SimdArray<T, N, V, M> &l, const SimdArray<T, N, V, M> &r) \
1902  { \
1903  return {name__(internal_data0(l), internal_data0(r)), \
1904  name__(internal_data1(l), internal_data1(r))}; \
1905  } \
1906  template <typename T, std::size_t N, typename V> \
1907  SimdArray<T, N, V, N> Vc_INTRINSIC Vc_PURE \
1908  name__(const SimdArray<T, N, V, N> &l, const SimdArray<T, N, V, N> &r) \
1909  { \
1910  return SimdArray<T, N, V, N>{name__(internal_data(l), internal_data(r))}; \
1911  }
1912 Vc_BINARY_FUNCTION__(min)
1913 Vc_BINARY_FUNCTION__(max)
1914 #undef Vc_BINARY_FUNCTION__
1915 } // namespace internal
1916 // conditional_assign {{{1
1917 #define Vc_CONDITIONAL_ASSIGN(name__, op__) \
1918  template <Operator O, typename T, std::size_t N, typename M, typename U> \
1919  Vc_INTRINSIC enable_if<O == Operator::name__, void> conditional_assign( \
1920  SimdArray<T, N> &lhs, M &&mask, U &&rhs) \
1921  { \
1922  lhs(mask) op__ rhs; \
1923  }
1924 Vc_CONDITIONAL_ASSIGN( Assign, =)
1925 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=)
1926 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=)
1927 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=)
1928 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=)
1929 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=)
1930 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=)
1931 Vc_CONDITIONAL_ASSIGN( AndAssign, &=)
1932 Vc_CONDITIONAL_ASSIGN( OrAssign, |=)
1933 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=)
1934 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=)
1935 #undef Vc_CONDITIONAL_ASSIGN
1936 
1937 #define Vc_CONDITIONAL_ASSIGN(name__, expr__) \
1938  template <Operator O, typename T, std::size_t N, typename M> \
1939  Vc_INTRINSIC enable_if<O == Operator::name__, SimdArray<T, N>> conditional_assign( \
1940  SimdArray<T, N> &lhs, M &&mask) \
1941  { \
1942  return expr__; \
1943  }
1944 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++)
1945 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask))
1946 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--)
1947 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask))
1948 #undef Vc_CONDITIONAL_ASSIGN
1949 // transpose_impl {{{1
1950 namespace Common
1951 {
1952  template <int L, typename T, std::size_t N, typename V>
1953  inline enable_if<L == 4, void> transpose_impl(
1954  SimdArray<T, N, V, N> * Vc_RESTRICT r[],
1955  const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
1956  SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
1957  {
1958  V *Vc_RESTRICT r2[L] = {&internal_data(*r[0]), &internal_data(*r[1]),
1959  &internal_data(*r[2]), &internal_data(*r[3])};
1960  transpose_impl<L>(
1961  &r2[0], TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
1962  internal_data(std::get<1>(proxy.in)),
1963  internal_data(std::get<2>(proxy.in)),
1964  internal_data(std::get<3>(proxy.in))});
1965  }
1966  template <int L, typename T, typename V>
1967  inline enable_if<(L == 2), void> transpose_impl(
1968  SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
1969  const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
1970  SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
1971  {
1972  auto &lo = *r[0];
1973  auto &hi = *r[1];
1974  internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
1975  internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
1976  internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
1977  internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
1978  internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
1979  internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
1980  internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
1981  internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
1982  }
1983  template <int L, typename T, std::size_t N, typename V>
1984  inline enable_if<(L == 4 && N > 1), void> transpose_impl(
1985  SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
1986  const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
1987  SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
1988  {
1989  SimdArray<T, N, V, 1> *Vc_RESTRICT r0[L / 2] = {r[0], r[1]};
1990  SimdArray<T, N, V, 1> *Vc_RESTRICT r1[L / 2] = {r[2], r[3]};
1991  using H = SimdArray<T, 2>;
1992  transpose_impl<2>(
1993  &r0[0], TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
1994  internal_data0(std::get<1>(proxy.in)),
1995  internal_data0(std::get<2>(proxy.in)),
1996  internal_data0(std::get<3>(proxy.in))});
1997  transpose_impl<2>(
1998  &r1[0], TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
1999  internal_data1(std::get<1>(proxy.in)),
2000  internal_data1(std::get<2>(proxy.in)),
2001  internal_data1(std::get<3>(proxy.in))});
2002  }
2003  /* TODO:
2004  template <typename T, std::size_t N, typename V, std::size_t VSize>
2005  inline enable_if<(N > VSize), void> transpose_impl(
2006  std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2007  const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2008  SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2009  {
2010  typedef SimdArray<T, N, V, VSize> SA;
2011  std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2012  {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2013  &internal_data0(*r[3])}};
2014  transpose_impl(
2015  r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2016  typename SA::storage_type0, typename SA::storage_type0>{
2017  internal_data0(std::get<0>(proxy.in)),
2018  internal_data0(std::get<1>(proxy.in)),
2019  internal_data0(std::get<2>(proxy.in)),
2020  internal_data0(std::get<3>(proxy.in))});
2021 
2022  std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2023  {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2024  &internal_data1(*r[3])}};
2025  transpose_impl(
2026  r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2027  typename SA::storage_type1, typename SA::storage_type1>{
2028  internal_data1(std::get<0>(proxy.in)),
2029  internal_data1(std::get<1>(proxy.in)),
2030  internal_data1(std::get<2>(proxy.in)),
2031  internal_data1(std::get<3>(proxy.in))});
2032  }
2033  */
2034 } // namespace Common
2035 
2036 // Traits static assertions {{{1
2037 static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4> &>::value, "");
2038 static_assert(Traits::has_no_allocated_data<const volatile Vc::SimdArray<int, 4>>::value, "");
2039 static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4> &>::value, "");
2040 static_assert(Traits::has_no_allocated_data<volatile Vc::SimdArray<int, 4>>::value, "");
2041 static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4> &>::value, "");
2042 static_assert(Traits::has_no_allocated_data<const Vc::SimdArray<int, 4>>::value, "");
2043 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4>>::value, "");
2044 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4> &&>::value, "");
2045 // }}}1
2047 
2048 } // namespace Vc_VERSIONED_NAMESPACE
#endif // VC_COMMON_SIMDARRAY_H_
// vim: foldmethod=marker
Vc::frexp
Vc::Vector< T > frexp(const Vc::Vector< T > &x, Vc::SimdArray< int, size()> *e)
Convert floating-point number to fractional and integral components.
Vc::IndexesFromZero
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Definition: types.h:95
Vc::min
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Vc::ldexp
Vc::Vector< T > ldexp(Vc::Vector< T > x, Vc::SimdArray< int, size()> e)
Multiply floating-point number by integral power of 2.
Vc::operator<<
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
Definition: IO:118
Vc::DefaultLoadTag
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
Definition: loadstoreflags.h:167
Vc::abs
Vc::Vector< T > abs(const Vc::Vector< T > &v)
Returns the absolute value of v.
Vc::max
Vc::Vector< T > max(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
std
Definition: vector.h:258
Vc::simd_cast
enable_if< std::is_same< To, Traits::decay< From > >::value, To > simd_cast(From &&x)
Casts the argument x from type From to type To.
Definition: simd_cast.h:49
Vc::vector
Common::AdaptSubscriptOperator< std::vector< T, Allocator >> vector
An adapted std::vector container with an additional subscript operator which implements gather and sc...
Definition: vector:51
Vc::SimdizeDetail::assign
void assign(Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:941
Vc::isnan
Vc::Mask< T > isnan(const Vc::Vector< T > &x)
Vc::Zero
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:85
Vc::SimdizeDetail::shifted
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
Definition: simdize.h:1002
Vc
Vector Classes Namespace.
Definition: cpuid.h:33
Vc::One
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
Definition: types.h:90
Vc::MemoryAlignment
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:219
Vc::where
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.
Definition: where.h:230
Vc::Unaligned
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.
Definition: loadstoreflags.h:197