29 #ifndef VC_COMMON_SIMDARRAY_H_
30 #define VC_COMMON_SIMDARRAY_H_
34 #if defined Vc_DEBUG_SIMD_CAST || defined Vc_DEBUG_SORTED
40 #include "writemaskedvector.h"
41 #include "simdarrayhelper.h"
42 #include "simdmaskarray.h"
44 #include "interleave.h"
45 #include "indexsequence.h"
46 #include "transpose.h"
49 namespace Vc_VERSIONED_NAMESPACE
54 #define Vc_DECLARE_BINARY_FUNCTION__(name__) \
55 template <typename T, std::size_t N, typename V, std::size_t M> \
56 SimdArray<T, N, V, M> Vc_INTRINSIC_L Vc_PURE_L \
57 name__(const SimdArray<T, N, V, M> &l, const SimdArray<T, N, V, M> &r) \
58 Vc_INTRINSIC_R Vc_PURE_R; \
59 template <typename T, std::size_t N, typename V> \
60 SimdArray<T, N, V, N> Vc_INTRINSIC_L Vc_PURE_L \
61 name__(const SimdArray<T, N, V, N> &l, const SimdArray<T, N, V, N> &r) \
62 Vc_INTRINSIC_R Vc_PURE_R;
63 Vc_DECLARE_BINARY_FUNCTION__(
min)
64 Vc_DECLARE_BINARY_FUNCTION__(
max)
65 #undef Vc_DECLARE_BINARY_FUNCTION__
67 template <
typename T> Vc_INTRINSIC Vc_PURE T
min(
const T &l,
const T &r)
73 template <
typename T> Vc_INTRINSIC Vc_PURE T
max(
const T &l,
const T &r)
79 template <
typename T> T Vc_INTRINSIC Vc_PURE product_helper__(
const T &l,
const T &r) {
return l * r; }
80 template <
typename T> T Vc_INTRINSIC Vc_PURE sum_helper__(
const T &l,
const T &r) {
return l + r; }
88 #define Vc_CURRENT_CLASS_NAME SimdArray
98 template <
typename T, std::
size_t N,
typename VectorType_>
101 ((Common::nextPowerOfTwo(N) * (sizeof(VectorType_) / VectorType_::size()) - 1) & 127) +
103 1) SimdArray<T, N, VectorType_, N>
105 static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
106 std::is_same<T, int32_t>::value ||
107 std::is_same<T, uint32_t>::value ||
108 std::is_same<T, int16_t>::value ||
109 std::is_same<T, uint16_t>::value,
110 "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
111 "int16_t, uint16_t }");
114 using VectorType = VectorType_;
115 using vector_type = VectorType;
116 using storage_type = vector_type;
117 using vectorentry_type =
typename vector_type::VectorEntryType;
118 using value_type = T;
119 using mask_type = SimdMaskArray<T, N, vector_type>;
120 using index_type = SimdArray<int, N>;
121 static constexpr std::size_t size() {
return N; }
122 using Mask = mask_type;
123 using MaskType = Mask;
124 using MaskArgument =
const MaskType &;
125 using VectorEntryType = vectorentry_type;
126 using EntryType = value_type;
127 using IndexType = index_type;
128 using AsArg =
const SimdArray &;
129 static constexpr std::size_t Size = size();
133 Vc_INTRINSIC SimdArray() =
default;
136 Vc_INTRINSIC SimdArray(
const SimdArray &) =
default;
137 Vc_INTRINSIC SimdArray(SimdArray &&) =
default;
138 Vc_INTRINSIC SimdArray &operator=(
const SimdArray &) =
default;
141 Vc_INTRINSIC SimdArray(
const value_type &a) : data(a) {}
142 Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
143 Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
146 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
147 Vc_INTRINSIC SimdArray(U a)
148 : SimdArray(static_cast<value_type>(a))
153 template <
typename U,
typename V>
154 Vc_INTRINSIC SimdArray(
const SimdArray<U, N, V> &x, enable_if<N == V::size()> = nullarg)
155 : data(
simd_cast<vector_type>(internal_data(x)))
158 template <
typename U,
typename V>
159 Vc_INTRINSIC SimdArray(
const SimdArray<U, N, V> &x,
160 enable_if<(N > V::size() && N <= 2 * V::size())> = nullarg)
161 : data(
simd_cast<vector_type>(internal_data(internal_data0(x)), internal_data(internal_data1(x))))
164 template <
typename U,
typename V>
165 Vc_INTRINSIC SimdArray(
const SimdArray<U, N, V> &x,
166 enable_if<(N > 2 * V::size() && N <= 4 * V::size())> = nullarg)
167 : data(
simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
168 internal_data(internal_data1(internal_data0(x))),
169 internal_data(internal_data0(internal_data1(x))),
170 internal_data(internal_data1(internal_data1(x)))))
174 template <
typename V, std::
size_t Pieces, std::
size_t Index>
175 Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
176 : data(
simd_cast<vector_type, Index>(x.data))
180 Vc_INTRINSIC SimdArray(
const std::initializer_list<value_type> &init)
183 #if defined Vc_CXX14 && 0 // doesn't compile yet
184 static_assert(init.size() == size(),
"The initializer_list argument to "
185 "SimdArray<T, N> must contain exactly N "
188 Vc_ASSERT(init.size() == size());
195 typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
196 explicit Vc_INTRINSIC SimdArray(
const V &x)
203 template <
typename V,
204 typename = enable_if<
205 Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value &&
206 std::is_convertible<T, typename V::EntryType>::value && V::size() == N>>
207 Vc_INTRINSIC
operator V()
const
212 #include "gatherinterface.h"
215 template <
typename... Args,
216 typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
217 !Traits::is_gather_signature<Args...>::value &&
218 !Traits::is_initializer_list<Args...>::value>>
219 explicit Vc_INTRINSIC SimdArray(Args &&... args)
220 : data(
std::forward<Args>(args)...)
224 template <std::
size_t Offset>
225 explicit Vc_INTRINSIC SimdArray(
226 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
229 data += value_type(Offset);
232 Vc_INTRINSIC
void setZero() { data.setZero(); }
233 Vc_INTRINSIC
void setZero(mask_type k) { data.setZero(internal_data(k)); }
234 Vc_INTRINSIC
void setZeroInverted() { data.setZeroInverted(); }
235 Vc_INTRINSIC
void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
238 template <
typename Op,
typename... Args>
239 static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
242 op(r.data, Common::actual_value(op, std::forward<Args>(args))...);
246 static Vc_INTRINSIC SimdArray
Zero()
250 static Vc_INTRINSIC SimdArray
One()
258 static Vc_INTRINSIC SimdArray Random()
260 return fromOperation(Common::Operations::random());
263 template <
typename... Args> Vc_INTRINSIC
void load(Args &&... args)
265 data.load(std::forward<Args>(args)...);
268 template <
typename... Args> Vc_INTRINSIC
void store(Args &&... args)
const
270 data.store(std::forward<Args>(args)...);
273 Vc_INTRINSIC mask_type operator!()
const
278 Vc_INTRINSIC SimdArray operator-()
const
283 Vc_INTRINSIC SimdArray operator~()
const
288 template <
typename U,
289 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
290 Vc_INTRINSIC Vc_CONST SimdArray
operator<<(U x)
const
294 template <
typename U,
295 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
296 Vc_INTRINSIC SimdArray &operator<<=(U x)
301 template <
typename U,
302 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
303 Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x)
const
307 template <
typename U,
308 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
309 Vc_INTRINSIC SimdArray &operator>>=(U x)
315 #define Vc_BINARY_OPERATOR_(op) \
316 Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
318 return {data op rhs.data}; \
320 Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
322 data op## = rhs.data; \
325 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_)
326 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_)
327 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_)
328 #undef Vc_BINARY_OPERATOR_
330 #define Vc_COMPARES(op) \
331 Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
333 return {data op rhs.data}; \
335 Vc_ALL_COMPARES(Vc_COMPARES)
338 Vc_INTRINSIC decltype(std::declval<vector_type &>()[0]) operator[](
std::
size_t i)
342 Vc_INTRINSIC value_type operator[](std::size_t i)
const {
return data[i]; }
344 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
const mask_type &k)
349 Vc_INTRINSIC
void assign(
const SimdArray &v,
const mask_type &k)
351 data.assign(v.data, internal_data(k));
355 #define Vc_REDUCTION_FUNCTION__(name__) \
356 Vc_INTRINSIC Vc_PURE value_type name__() const { return data.name__(); } \
358 Vc_INTRINSIC Vc_PURE value_type name__(mask_type mask) const \
360 return data.name__(internal_data(mask)); \
362 Vc_REDUCTION_FUNCTION__(
min)
363 Vc_REDUCTION_FUNCTION__(
max)
364 Vc_REDUCTION_FUNCTION__(product)
365 Vc_REDUCTION_FUNCTION__(sum)
366 #undef Vc_REDUCTION_FUNCTION__
367 Vc_INTRINSIC Vc_PURE SimdArray partialSum()
const {
return data.partialSum(); }
369 Vc_INTRINSIC
void fusedMultiplyAdd(
const SimdArray &factor,
const SimdArray &summand)
371 data.fusedMultiplyAdd(internal_data(factor), internal_data(summand));
374 template <
typename F> Vc_INTRINSIC SimdArray apply(F &&f)
const
376 return {data.apply(std::forward<F>(f))};
378 template <
typename F> Vc_INTRINSIC SimdArray apply(F &&f,
const mask_type &k)
const
380 return {data.apply(std::forward<F>(f), k)};
383 Vc_INTRINSIC SimdArray
shifted(
int amount)
const
385 return {data.shifted(amount)};
388 template <std::
size_t NN>
389 Vc_INTRINSIC SimdArray
shifted(
int amount,
const SimdArray<value_type, NN> &shiftIn)
392 return {data.shifted(amount, simd_cast<VectorType>(shiftIn))};
395 Vc_INTRINSIC SimdArray rotated(
int amount)
const
397 return {data.rotated(amount)};
400 Vc_INTRINSIC SimdArray interleaveLow(SimdArray x)
const
402 return {data.interleaveLow(x.data)};
404 Vc_INTRINSIC SimdArray interleaveHigh(SimdArray x)
const
406 return {data.interleaveHigh(x.data)};
409 Vc_INTRINSIC SimdArray reversed()
const
411 return {data.reversed()};
414 Vc_INTRINSIC SimdArray sorted()
const
416 return {data.sorted()};
419 template <
typename G>
static Vc_INTRINSIC SimdArray generate(
const G &gen)
421 return {VectorType::generate(gen)};
424 friend VectorType &internal_data<>(SimdArray &x);
425 friend const VectorType &internal_data<>(
const SimdArray &x);
428 Vc_INTRINSIC SimdArray(VectorType &&x) : data(
std::move(x)) {}
432 template <
typename T, std::
size_t N,
typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
433 template <
typename T, std::
size_t N,
typename VectorType>
435 template <
typename T, std::
size_t N,
typename VectorType>
436 Vc_INTRINSIC VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
440 template <
typename T, std::
size_t N,
typename VectorType>
441 Vc_INTRINSIC
const VectorType &internal_data(
const SimdArray<T, N, VectorType, N> &x)
447 template <
typename T, std::
size_t N,
typename VectorType>
448 template <
typename MT,
typename IT>
449 inline void SimdArray<T, N, VectorType, N>::gatherImplementation(
const MT *mem,
452 data.gather(mem, std::forward<IT>(indexes));
454 template <
typename T, std::
size_t N,
typename VectorType>
455 template <
typename MT,
typename IT>
456 inline void SimdArray<T, N, VectorType, N>::gatherImplementation(
const MT *mem,
460 data.gather(mem, std::forward<IT>(indexes), mask);
464 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t>
467 ((Common::nextPowerOfTwo(N) * (sizeof(VectorType) / VectorType::size()) - 1) & 127) +
471 static_assert(std::is_same<T, double>::value ||
472 std::is_same<T, float>::value ||
473 std::is_same<T, int32_t>::value ||
474 std::is_same<T, uint32_t>::value ||
475 std::is_same<T, int16_t>::value ||
476 std::is_same<T, uint16_t>::value,
"SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
478 using my_traits = SimdArrayTraits<T, N>;
479 static constexpr std::size_t N0 = my_traits::N0;
480 static constexpr std::size_t N1 = my_traits::N1;
481 using Split = Common::Split<N0>;
484 using storage_type0 =
typename my_traits::storage_type0;
485 using storage_type1 =
typename my_traits::storage_type1;
486 static_assert(storage_type0::size() == N0,
"");
488 using vector_type = VectorType;
489 using vectorentry_type =
typename storage_type0::vectorentry_type;
490 typedef vectorentry_type alias_type Vc_MAY_ALIAS;
491 using value_type = T;
492 using mask_type = SimdMaskArray<T, N, vector_type>;
493 using index_type = SimdArray<int, N>;
494 static constexpr std::size_t size() {
return N; }
495 using Mask = mask_type;
496 using MaskType = Mask;
497 using MaskArgument =
const MaskType &;
498 using VectorEntryType = vectorentry_type;
499 using EntryType = value_type;
500 using IndexType = index_type;
501 using AsArg =
const SimdArray &;
502 static constexpr std::size_t Size = size();
503 static constexpr std::size_t MemoryAlignment =
511 SimdArray() =
default;
514 SimdArray(
const SimdArray &) =
default;
515 SimdArray(SimdArray &&) =
default;
516 SimdArray &operator=(
const SimdArray &) =
default;
519 Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
522 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
524 : SimdArray(static_cast<value_type>(a))
529 template <
typename U,
531 typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
532 explicit Vc_INTRINSIC SimdArray(
const U *mem, Flags f = Flags())
533 : data0(mem, f), data1(mem + storage_type0::size(), f)
538 Vc_INTRINSIC SimdArray(
const std::initializer_list<value_type> &init)
540 , data1(init.begin() + storage_type0::size(),
Vc::
Unaligned)
542 #if defined Vc_CXX14 && 0 // doesn't compile yet
543 static_assert(init.size() == size(),
"The initializer_list argument to "
544 "SimdArray<T, N> must contain exactly N "
547 Vc_ASSERT(init.size() == size());
551 #include "gatherinterface.h"
554 template <
typename... Args,
555 typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
556 !Traits::is_initializer_list<Args...>::value &&
557 !Traits::is_gather_signature<Args...>::value &&
558 !Traits::is_load_arguments<Args...>::value>>
559 explicit Vc_INTRINSIC SimdArray(Args &&... args)
560 : data0(Split::lo(args)...)
562 , data1(Split::hi(
std::forward<Args>(args))...)
567 template <
typename V>
568 Vc_INTRINSIC
explicit SimdArray(
570 enable_if<(Traits::is_simd_vector<V>::value && Traits::simd_vector_size<V>::value == N &&
571 !(std::is_convertible<Traits::entry_type_of<V>, T>::value &&
572 Traits::isSimdArray<V>::value))> = nullarg)
573 : data0(Split::lo(x)), data1(Split::hi(x))
578 template <
typename V>
579 Vc_INTRINSIC SimdArray(
581 enable_if<(Traits::isSimdArray<V>::value && Traits::simd_vector_size<V>::value == N &&
582 std::is_convertible<Traits::entry_type_of<V>, T>::value)> = nullarg)
583 : data0(Split::lo(x)), data1(Split::hi(x))
589 template <
typename V,
590 typename = enable_if<
591 Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value &&
592 std::is_convertible<T, typename V::EntryType>::value && V::size() == N>>
600 Vc_INTRINSIC
void setZero()
605 Vc_INTRINSIC
void setZero(
const mask_type &k)
607 data0.setZero(Split::lo(k));
608 data1.setZero(Split::hi(k));
610 Vc_INTRINSIC
void setZeroInverted()
612 data0.setZeroInverted();
613 data1.setZeroInverted();
615 Vc_INTRINSIC
void setZeroInverted(
const mask_type &k)
617 data0.setZeroInverted(Split::lo(k));
618 data1.setZeroInverted(Split::hi(k));
622 template <
typename Op,
typename... Args>
623 static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
626 storage_type0::fromOperation(op, Split::lo(args)...),
629 storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
633 static Vc_INTRINSIC SimdArray
Zero()
637 static Vc_INTRINSIC SimdArray
One()
645 static Vc_INTRINSIC SimdArray Random()
647 return fromOperation(Common::Operations::random());
650 template <
typename U,
typename... Args> Vc_INTRINSIC
void load(
const U *mem, Args &&... args)
652 data0.load(mem, Split::lo(args)...);
654 data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
657 template <
typename U,
typename... Args> Vc_INTRINSIC
void store(U *mem, Args &&... args)
const
659 data0.store(mem, Split::lo(args)...);
661 data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
664 Vc_INTRINSIC mask_type operator!()
const
666 return {!data0, !data1};
669 Vc_INTRINSIC SimdArray operator-()
const
671 return {-data0, -data1};
674 Vc_INTRINSIC SimdArray operator~()
const
676 return {~data0, ~data1};
680 template <
typename U,
681 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
682 Vc_INTRINSIC Vc_CONST SimdArray
operator<<(U x)
const
684 return {data0 << x, data1 << x};
686 template <
typename U,
687 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
688 Vc_INTRINSIC SimdArray &operator<<=(U x)
694 template <
typename U,
695 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
696 Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x)
const
698 return {data0 >> x, data1 >> x};
700 template <
typename U,
701 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
702 Vc_INTRINSIC SimdArray &operator>>=(U x)
710 #define Vc_BINARY_OPERATOR_(op) \
711 Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
713 return {data0 op rhs.data0, data1 op rhs.data1}; \
715 Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
717 data0 op## = rhs.data0; \
718 data1 op## = rhs.data1; \
721 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_)
722 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_)
723 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_)
724 #undef Vc_BINARY_OPERATOR_
726 #define Vc_COMPARES(op) \
727 Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
729 return {data0 op rhs.data0, data1 op rhs.data1}; \
731 Vc_ALL_COMPARES(Vc_COMPARES)
735 Vc_INTRINSIC value_type operator[](std::size_t i)
const
737 const auto tmp =
reinterpret_cast<const alias_type *
>(
this);
741 Vc_INTRINSIC alias_type &operator[](std::size_t i)
743 auto tmp =
reinterpret_cast<alias_type *
>(
this);
747 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
const mask_type &k)
752 Vc_INTRINSIC
void assign(
const SimdArray &v,
const mask_type &k)
754 data0.assign(v.data0, internal_data0(k));
755 data1.assign(v.data1, internal_data1(k));
759 #define Vc_REDUCTION_FUNCTION__(name__, binary_fun__) \
760 template <typename ForSfinae = void> \
761 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
762 storage_type0::size() == storage_type1::size(), \
766 return binary_fun__(data0, data1).name__(); \
769 template <typename ForSfinae = void> \
770 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
771 storage_type0::size() != storage_type1::size(), \
775 return binary_fun__(data0.name__(), data1.name__()); \
778 Vc_INTRINSIC value_type name__(const mask_type &mask) const \
780 if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
781 return data1.name__(Split::hi(mask)); \
782 } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
783 return data0.name__(Split::lo(mask)); \
785 return binary_fun__(data0.name__(Split::lo(mask)), \
786 data1.name__(Split::hi(mask))); \
790 Vc_REDUCTION_FUNCTION__(
max,
Vc::internal::max)
791 Vc_REDUCTION_FUNCTION__(product, internal::product_helper__)
792 Vc_REDUCTION_FUNCTION__(sum, internal::sum_helper__)
793 #undef Vc_REDUCTION_FUNCTION__
794 Vc_INTRINSIC Vc_PURE SimdArray partialSum() const
796 auto ps0 = data0.partialSum();
798 tmp[0] += ps0[data0.size() - 1];
799 return {std::move(ps0), tmp.partialSum()};
802 void fusedMultiplyAdd(
const SimdArray &factor,
const SimdArray &summand)
804 data0.fusedMultiplyAdd(Split::lo(factor), Split::lo(summand));
805 data1.fusedMultiplyAdd(Split::hi(factor), Split::hi(summand));
809 template <
typename F> Vc_INTRINSIC SimdArray apply(F &&f)
const
811 return {data0.apply(f), data1.apply(f)};
813 template <
typename F> Vc_INTRINSIC SimdArray apply(F &&f,
const mask_type &k)
const
815 return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
819 inline SimdArray
shifted(
int amount)
const
821 constexpr
int SSize = Size;
822 constexpr
int SSize0 = storage_type0::Size;
823 constexpr
int SSize1 = storage_type1::Size;
828 if (amount > -SSize0) {
829 return {data0.shifted(amount), data1.shifted(amount, data0)};
831 if (amount == -SSize0) {
834 if (amount < -SSize0) {
840 if (amount >= SSize) {
842 }
else if (amount >= SSize0) {
846 }
else if (amount >= SSize1) {
849 return {data0.shifted(amount, data1), data1.shifted(amount)};
854 template <std::
size_t NN>
856 !(std::is_same<storage_type0, storage_type1>::value &&
859 shifted(
int amount,
const SimdArray<value_type, NN> &shiftIn)
const
861 constexpr
int SSize = Size;
863 return SimdArray::generate([&](
int i) -> value_type {
866 return operator[](i);
867 }
else if (i >= -SSize) {
868 return shiftIn[i + SSize];
873 return SimdArray::generate([&](
int i) -> value_type {
876 return operator[](i);
877 }
else if (i < 2 * SSize) {
878 return shiftIn[i - SSize];
884 template <std::
size_t NN>
886 enable_if<(std::is_same<storage_type0, storage_type1>::value &&
889 shifted(
int amount,
const SimdArray<value_type, NN> &shiftIn)
const
891 constexpr
int SSize = Size;
893 if (amount > -static_cast<int>(storage_type0::Size)) {
894 return {data0.shifted(amount, internal_data1(shiftIn)),
895 data1.shifted(amount, data0)};
897 if (amount == -static_cast<int>(storage_type0::Size)) {
898 return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
900 if (amount > -SSize) {
902 internal_data1(shiftIn)
903 .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
904 data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
906 if (amount == -SSize) {
909 if (amount > -2 * SSize) {
910 return shiftIn.shifted(amount + SSize);
916 if (amount < static_cast<int>(storage_type0::Size)) {
917 return {data0.shifted(amount, data1),
918 data1.shifted(amount, internal_data0(shiftIn))};
920 if (amount == static_cast<int>(storage_type0::Size)) {
921 return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
923 if (amount < SSize) {
924 return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
925 internal_data0(shiftIn)
926 .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
928 if (amount == SSize) {
931 if (amount < 2 * SSize) {
932 return shiftIn.shifted(amount - SSize);
938 Vc_INTRINSIC SimdArray rotated(
int amount)
const
940 amount %= int(size());
943 }
else if (amount < 0) {
947 auto &&d0cvtd =
simd_cast<storage_type1>(data0);
948 auto &&d1cvtd =
simd_cast<storage_type0>(data1);
949 constexpr
int size0 = storage_type0::size();
950 constexpr
int size1 = storage_type1::size();
952 if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
953 return {std::move(d1cvtd), std::move(d0cvtd)};
954 }
else if (amount < size1) {
955 return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
956 }
else if (amount == size1) {
957 return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
958 }
else if (
int(size()) - amount < size1) {
959 return {data0.shifted(amount -
int(size()), d1cvtd.shifted(size1 - size0)),
960 data1.shifted(amount -
int(size()), data0.shifted(size0 - size1))};
961 }
else if (
int(size()) - amount == size1) {
962 return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
963 simd_cast<storage_type1>(data0.shifted(size0 - size1))};
964 }
else if (amount <= size0) {
965 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
966 simd_cast<storage_type1>(data0.shifted(amount - size1))};
968 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
969 simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
975 Vc_INTRINSIC SimdArray interleaveLow(
const SimdArray &x)
const
978 return {data0.interleaveLow(x.data0),
979 simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
981 Vc_INTRINSIC SimdArray interleaveHigh(
const SimdArray &x)
const
983 return interleaveHighImpl(
985 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
989 Vc_INTRINSIC SimdArray interleaveHighImpl(
const SimdArray &x, std::true_type)
const
991 return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
993 inline SimdArray interleaveHighImpl(
const SimdArray &x, std::false_type)
const
995 return {data0.interleaveHigh(x.data0)
996 .shifted(storage_type1::Size,
997 simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
998 data1.interleaveHigh(x.data1)};
1002 inline SimdArray reversed() const
1004 if (std::is_same<storage_type0, storage_type1>::value) {
1005 return {
simd_cast<storage_type0>(data1).reversed(),
1006 simd_cast<storage_type1>(data0).reversed()};
1008 return {data0.shifted(storage_type1::Size, data1).reversed(),
1009 simd_cast<storage_type1>(data0.reversed().shifted(
1010 storage_type0::Size - storage_type1::Size))};
1013 inline SimdArray sorted() const
1016 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1019 Vc_INTRINSIC SimdArray sortedImpl(std::true_type)
const
1021 #ifdef Vc_DEBUG_SORTED
1022 std::cerr <<
"-- " << data0 << data1 <<
'\n';
1024 const auto a = data0.sorted();
1025 const auto b = data1.sorted().reversed();
1028 return {lo.sorted(), hi.sorted()};
1031 Vc_INTRINSIC SimdArray sortedImpl(std::false_type)
const
1033 using SortableArray = SimdArray<value_type, Common::nextPowerOfTwo(size())>;
1034 auto sortable =
simd_cast<SortableArray>(*this);
1035 for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1036 using limits = std::numeric_limits<value_type>;
1037 if (limits::has_infinity) {
1038 sortable[i] = limits::infinity();
1043 return simd_cast<SimdArray>(sortable.sorted());
1072 template <
typename G>
static Vc_INTRINSIC SimdArray generate(
const G &gen)
1074 auto tmp = storage_type0::generate(gen);
1079 return {std::move(tmp),
1080 storage_type1::generate([&](std::size_t i) {
return gen(i + N0); })};
1084 friend storage_type0 &internal_data0<>(SimdArray &x);
1085 friend storage_type1 &internal_data1<>(SimdArray &x);
1086 friend const storage_type0 &internal_data0<>(
const SimdArray &x);
1087 friend const storage_type1 &internal_data1<>(
const SimdArray &x);
1090 Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y)
1091 : data0(
std::move(x)), data1(
std::move(y))
1095 storage_type0 data0;
1096 storage_type1 data1;
1098 #undef Vc_CURRENT_CLASS_NAME
1099 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M> constexpr std::size_t SimdArray<T, N, VectorType, M>::Size;
1100 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1104 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1105 template <
typename MT,
typename IT>
1106 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
const MT *mem,
1109 data0.gather(mem, Split::lo(Common::Operations::gather(),
1112 data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1114 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1115 template <
typename MT,
typename IT>
1116 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
const MT *mem,
1117 IT &&indexes, MaskArgument mask)
1119 data0.gather(mem, Split::lo(Common::Operations::gather(), indexes),
1122 data1.gather(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1127 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1128 Vc_INTRINSIC
typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1129 SimdArray<T, N, V, M> &x)
1133 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1134 Vc_INTRINSIC
typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1135 SimdArray<T, N, V, M> &x)
1139 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1140 Vc_INTRINSIC
const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1141 const SimdArray<T, N, V, M> &x)
1145 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1146 Vc_INTRINSIC
const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1147 const SimdArray<T, N, V, M> &x)
1153 namespace result_vector_type_internal
1155 template <
typename T>
1156 using type =
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1158 template <
typename T>
1159 using is_integer_larger_than_int = std::integral_constant<
1160 bool, std::is_integral<T>::value &&(
sizeof(T) >
sizeof(
int) ||
1161 std::is_same<T, long>::value ||
1162 std::is_same<T, unsigned long>::value)>;
1165 typename L,
typename R, std::size_t N = Traits::isSimdArray<L>::value
1166 ? Traits::simd_vector_size<L>::value
1167 : Traits::simd_vector_size<R>::value,
1168 bool = (Traits::isSimdArray<L>::value ||
1169 Traits::isSimdArray<R>::value)
1171 !std::is_same<type<L>, type<R>>::value
1174 ((std::is_arithmetic<type<L>>::value &&
1175 !is_integer_larger_than_int<type<L>>::value) ||
1176 (std::is_arithmetic<type<R>>::value &&
1177 !is_integer_larger_than_int<
1180 (Traits::is_simd_vector<L>::value && !Traits::isSimdArray<L>::value) ||
1181 (Traits::is_simd_vector<R>::value &&
1182 !Traits::isSimdArray<R>::value)
1183 ) >
struct evaluate;
1185 template <
typename L,
typename R, std::
size_t N>
struct evaluate<L, R, N, true>
1188 using LScalar = Traits::entry_type_of<L>;
1189 using RScalar = Traits::entry_type_of<R>;
1191 template <
bool B,
typename True,
typename False>
1192 using conditional =
typename std::conditional<B, True, False>::type;
1205 using type = SimdArray<
1206 conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1207 sizeof(LScalar) <
sizeof(
int) &&
1208 sizeof(RScalar) <
sizeof(
int)),
1209 conditional<(
sizeof(LScalar) ==
sizeof(RScalar)),
1210 conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1211 conditional<(sizeof(LScalar) >
sizeof(RScalar)), LScalar, RScalar>>,
1212 decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1218 template <
typename L,
typename R>
1219 using result_vector_type =
typename result_vector_type_internal::evaluate<L, R>::type;
1222 std::is_same<result_vector_type<
short int, Vc::SimdArray<short unsigned int, 32ul>>,
1223 Vc::SimdArray<short unsigned int, 32ul>>::value,
1224 "result_vector_type does not work");
1226 #define Vc_BINARY_OPERATORS_(op__) \
1227 template <typename L, typename R> \
1228 Vc_INTRINSIC result_vector_type<L, R> operator op__(L &&lhs, R &&rhs) \
1230 using Return = result_vector_type<L, R>; \
1231 return Return(std::forward<L>(lhs)) op__ Return(std::forward<R>(rhs)); \
1233 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_)
1234 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_)
1235 #undef Vc_BINARY_OPERATORS_
1236 #define Vc_BINARY_OPERATORS_(op__) \
1237 template <typename L, typename R> \
1238 Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op__(L &&lhs, \
1241 using Promote = result_vector_type<L, R>; \
1242 return Promote(std::forward<L>(lhs)) op__ Promote(std::forward<R>(rhs)); \
1244 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_)
1245 #undef Vc_BINARY_OPERATORS_
1248 template <
typename T, std::
size_t N> SimdArray<T, N>
abs(
const SimdArray<T, N> &x)
1250 return SimdArray<T, N>::fromOperation(Common::Operations::Abs(), x);
1252 template <
typename T, std::
size_t N> SimdMaskArray<T, N>
isnan(
const SimdArray<T, N> &x)
1254 return SimdMaskArray<T, N>::fromOperation(Common::Operations::Isnan(), x);
1256 template <
typename T, std::
size_t N>
1257 SimdArray<T, N>
frexp(
const SimdArray<T, N> &x, SimdArray<int, N> *e)
1259 return SimdArray<T, N>::fromOperation(Common::Operations::Frexp(), x, e);
1261 template <
typename T, std::
size_t N>
1262 SimdArray<T, N>
ldexp(
const SimdArray<T, N> &x,
const SimdArray<int, N> &e)
1264 return SimdArray<T, N>::fromOperation(Common::Operations::Ldexp(), x, e);
1272 template <
typename Return, std::size_t N,
typename T,
typename... From>
1273 Vc_INTRINSIC Vc_CONST enable_if<
sizeof...(From) != 0, Return>
1274 simd_cast_impl_smaller_input(
const From &... xs,
const T &last)
1277 for (
size_t i = 0; i < N; ++i) {
1278 r[i + N *
sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1282 template <
typename Return, std::
size_t N,
typename T>
1283 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(
const T &last)
1285 Return r = Return();
1286 for (
size_t i = 0; i < N; ++i) {
1287 r[i] =
static_cast<typename Return::EntryType
>(last[i]);
1291 template <
typename Return, std::size_t N,
typename T,
typename... From>
1292 Vc_INTRINSIC Vc_CONST enable_if<
sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1293 const From &... xs,
const T &last)
1296 for (
size_t i = N *
sizeof...(From); i < Return::Size; ++i) {
1297 r[i] =
static_cast<typename Return::EntryType
>(last[i - N *
sizeof...(From)]);
1301 template <
typename Return, std::
size_t N,
typename T>
1302 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(
const T &last)
1304 Return r = Return();
1305 for (
size_t i = 0; i < Return::size(); ++i) {
1306 r[i] =
static_cast<typename Return::EntryType
>(last[i]);
1312 template <
typename Return,
typename T,
typename... From>
1313 Vc_INTRINSIC_L Vc_CONST_L Return
1314 simd_cast_without_last(
const From &... xs,
const T &) Vc_INTRINSIC_R Vc_CONST_R;
1317 template <typename... Ts> struct are_all_types_equal;
1318 template <typename T>
1319 struct are_all_types_equal<T> : public
std::integral_constant<
bool, true>
1322 template <
typename T0,
typename T1,
typename... Ts>
1323 struct are_all_types_equal<T0, T1, Ts...>
1324 :
public std::integral_constant<
1325 bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1349 template <
typename Return,
typename... Ts>
1350 Vc_INTRINSIC Vc_CONST Return
1351 simd_cast_interleaved_argument_order(
const Ts &... a,
const Ts &... b);
1355 template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
1356 Vc_INTRINSIC Vc_CONST
1357 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1358 simd_cast_with_offset(
const From &x,
const Froms &... xs);
1360 template <
typename Return, std::
size_t offset,
typename From>
1361 Vc_INTRINSIC Vc_CONST
1362 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1363 simd_cast_with_offset(
const From &x);
1365 template <
typename Return, std::
size_t offset,
typename From>
1366 Vc_INTRINSIC Vc_CONST
1367 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1368 ((Traits::isSimdArray<Return>::value &&
1369 !Traits::isAtomicSimdArray<Return>::value) ||
1370 (Traits::isSimdMaskArray<Return>::value &&
1371 !Traits::isAtomicSimdMaskArray<Return>::value))),
1373 simd_cast_with_offset(
const From &x);
1375 template <
typename Return, std::
size_t offset,
typename From>
1376 Vc_INTRINSIC Vc_CONST
1377 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1378 ((Traits::isSimdArray<Return>::value &&
1379 Traits::isAtomicSimdArray<Return>::value) ||
1380 (Traits::isSimdMaskArray<Return>::value &&
1381 Traits::isAtomicSimdMaskArray<Return>::value))),
1383 simd_cast_with_offset(
const From &x);
1385 template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
1386 Vc_INTRINSIC Vc_CONST enable_if<
1387 (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1388 simd_cast_with_offset(
const From &,
const Froms &... xs)
1390 return simd_cast_with_offset<Return, offset - From::Size>(xs...);
1394 template <
typename Return, std::
size_t offset,
typename From>
1395 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
1402 template <
typename T,
typename... Ts>
struct first_type_of_impl
1406 template <
typename... Ts>
using first_type_of =
typename first_type_of_impl<Ts...>::type;
1409 template <
typename Return,
typename From>
1410 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
1411 template <
typename Return,
typename... Froms>
1412 Vc_INTRINSIC Vc_CONST
1413 enable_if<(are_all_types_equal<Froms...>::value &&
1414 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1416 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
1420 template <
typename Return,
typename From,
typename... Froms>
1421 Vc_INTRINSIC Vc_CONST enable_if<
1422 (are_all_types_equal<From, Froms...>::value &&
1423 (1 +
sizeof...(Froms)) * From::Size >= Return::Size &&
sizeof...(Froms) != 0),
1425 simd_cast_drop_arguments(Froms... xs, From x, From);
1426 template <
typename Return,
typename From>
1427 Vc_INTRINSIC Vc_CONST
1428 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1429 simd_cast_drop_arguments(From x, From);
1433 #ifdef Vc_DEBUG_SIMD_CAST
1434 void debugDoNothing(
const std::initializer_list<void *> &) {}
1435 template <
typename T0,
typename... Ts>
1436 inline void vc_debug_(
const char *prefix,
const char *suffix,
const T0 &arg0,
1439 std::cerr << prefix << arg0;
1440 debugDoNothing({&(std::cerr <<
", " << args)...});
1441 std::cerr << suffix;
1444 template <
typename T0,
typename... Ts>
1445 Vc_INTRINSIC
void vc_debug_(
const char *,
const char *,
const T0 &,
const Ts &...)
1452 #define Vc_SIMDARRAY_CASTS(SimdArrayType__, trait_name__) \
1453 template <typename Return, typename From, typename... Froms> \
1454 Vc_INTRINSIC Vc_CONST enable_if<(Traits::isAtomic##SimdArrayType__<Return>::value && \
1455 !Traits::is##SimdArrayType__<From>::value && \
1456 Traits::is_simd_##trait_name__<From>::value && \
1457 From::Size * sizeof...(Froms) < Return::Size && \
1458 are_all_types_equal<From, Froms...>::value), \
1460 simd_cast(From x, Froms... xs) \
1462 vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
1463 return {simd_cast<typename Return::storage_type>(x, xs...)}; \
1465 template <typename Return, typename From, typename... Froms> \
1466 Vc_INTRINSIC Vc_CONST enable_if<(Traits::isAtomic##SimdArrayType__<Return>::value && \
1467 !Traits::is##SimdArrayType__<From>::value && \
1468 Traits::is_simd_##trait_name__<From>::value && \
1469 From::Size * sizeof...(Froms) >= Return::Size && \
1470 are_all_types_equal<From, Froms...>::value), \
1472 simd_cast(From x, Froms... xs) \
1474 vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
1475 return {simd_cast_without_last<Return, From, Froms...>(x, xs...)}; \
1477 template <typename Return, typename From, typename... Froms> \
1478 Vc_INTRINSIC Vc_CONST \
1479 enable_if<(Traits::is##SimdArrayType__<Return>::value && \
1480 !Traits::isAtomic##SimdArrayType__<Return>::value && \
1481 !Traits::is##SimdArrayType__<From>::value && \
1482 Traits::is_simd_##trait_name__<From>::value && \
1483 Common::left_size(Return::Size) < \
1484 From::Size * (1 + sizeof...(Froms)) && \
1485 are_all_types_equal<From, Froms...>::value), \
1487 simd_cast(From x, Froms... xs) \
1489 vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
1490 using R0 = typename Return::storage_type0; \
1491 using R1 = typename Return::storage_type1; \
1492 return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
1493 simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
1495 template <typename Return, typename From, typename... Froms> \
1496 Vc_INTRINSIC Vc_CONST \
1497 enable_if<(Traits::is##SimdArrayType__<Return>::value && \
1498 !Traits::isAtomic##SimdArrayType__<Return>::value && \
1499 !Traits::is##SimdArrayType__<From>::value && \
1500 Traits::is_simd_##trait_name__<From>::value && \
1501 Common::left_size(Return::Size) >= \
1502 From::Size * (1 + sizeof...(Froms)) && \
1503 are_all_types_equal<From, Froms...>::value), \
1505 simd_cast(From x, Froms... xs) \
1507 vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
1508 using R0 = typename Return::storage_type0; \
1509 using R1 = typename Return::storage_type1; \
1510 return {simd_cast<R0>(x, xs...), R1::Zero()}; \
1512 Vc_SIMDARRAY_CASTS(SimdArray,
vector)
1513 Vc_SIMDARRAY_CASTS(SimdMaskArray, mask)
1514 #undef Vc_SIMDARRAY_CASTS
1517 #define Vc_SIMDARRAY_CASTS(SimdArrayType__, trait_name__) \
1519 template <typename Return, int offset, typename From> \
1520 Vc_INTRINSIC Vc_CONST enable_if<(Traits::isAtomic##SimdArrayType__<Return>::value && \
1521 !Traits::is##SimdArrayType__<From>::value && \
1522 Traits::is_simd_##trait_name__<From>::value), \
1526 vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
1527 return {simd_cast<typename Return::storage_type, offset>(x)}; \
1530 template <typename Return, int offset, typename From> \
1531 Vc_INTRINSIC Vc_CONST \
1532 enable_if<(Traits::is##SimdArrayType__<Return>::value && \
1533 !Traits::isAtomic##SimdArrayType__<Return>::value && \
1534 !Traits::is##SimdArrayType__<From>::value && \
1535 Traits::is_simd_##trait_name__<From>::value && \
1536 Return::Size * offset + Common::left_size(Return::Size) < \
1541 vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
1542 using R0 = typename Return::storage_type0; \
1543 constexpr int entries_offset = offset * Return::Size; \
1544 constexpr int entries_offset_right = entries_offset + R0::Size; \
1546 simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
1547 simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
1552 template <typename Return, int offset, typename From> \
1553 Vc_INTRINSIC Vc_CONST \
1554 enable_if<(Traits::is##SimdArrayType__<Return>::value && \
1555 !Traits::isAtomic##SimdArrayType__<Return>::value && \
1556 !Traits::is##SimdArrayType__<From>::value && \
1557 Traits::is_simd_##trait_name__<From>::value && \
1558 Return::Size * offset + Common::left_size(Return::Size) >= \
1563 vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
1564 using R0 = typename Return::storage_type0; \
1565 using R1 = typename Return::storage_type1; \
1566 constexpr int entries_offset = offset * Return::Size; \
1567 return {simd_cast_with_offset<R0, entries_offset>(x), R1::Zero()}; \
1569 Vc_SIMDARRAY_CASTS(SimdArray,
vector)
1570 Vc_SIMDARRAY_CASTS(SimdMaskArray, mask)
1571 #undef Vc_SIMDARRAY_CASTS
1574 #define Vc_SIMDARRAY_CASTS(SimdArrayType__) \
1576 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
1577 Vc_INTRINSIC Vc_CONST \
1578 enable_if<(are_all_types_equal<SimdArrayType__<T, N, V, N>, From...>::value && \
1579 (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
1580 !std::is_same<Return, SimdArrayType__<T, N, V, N>>::value), \
1582 simd_cast(const SimdArrayType__<T, N, V, N> &x0, const From &... xs) \
1584 vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
1585 return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
1588 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
1589 Vc_INTRINSIC Vc_CONST \
1590 enable_if<(are_all_types_equal<SimdArrayType__<T, N, V, N>, From...>::value && \
1591 (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
1592 !std::is_same<Return, SimdArrayType__<T, N, V, N>>::value), \
1594 simd_cast(const SimdArrayType__<T, N, V, N> &x0, const From &... xs) \
1596 vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
1597 return simd_cast_without_last< \
1598 Return, typename SimdArrayType__<T, N, V, N>::storage_type, \
1599 typename From::storage_type...>(internal_data(x0), internal_data(xs)...); \
1602 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
1604 Vc_INTRINSIC Vc_CONST \
1605 enable_if<(N != M && \
1606 are_all_types_equal<SimdArrayType__<T, N, V, M>, From...>::value && \
1607 N * sizeof...(From) < Return::Size && ((N - 1) & N) == 0), \
1609 simd_cast(const SimdArrayType__<T, N, V, M> &x0, const From &... xs) \
1611 vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
1612 return simd_cast_interleaved_argument_order< \
1613 Return, typename SimdArrayType__<T, N, V, M>::storage_type0, \
1614 typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
1615 internal_data1(x0), internal_data1(xs)...); \
1619 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
1621 Vc_INTRINSIC Vc_CONST \
1622 enable_if<(N != M && \
1623 are_all_types_equal<SimdArrayType__<T, N, V, M>, From...>::value && \
1624 N * sizeof...(From) >= Return::Size && ((N - 1) & N) == 0), \
1626 simd_cast(const SimdArrayType__<T, N, V, M> &x0, const From &... xs) \
1628 vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
1629 return simd_cast_without_last<Return, SimdArrayType__<T, N, V, M>, From...>( \
1633 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
1635 Vc_INTRINSIC Vc_CONST \
1636 enable_if<(N != M && \
1637 are_all_types_equal<SimdArrayType__<T, N, V, M>, From...>::value && \
1638 N * (1 + sizeof...(From)) <= Return::Size && ((N - 1) & N) != 0), \
1640 simd_cast(const SimdArrayType__<T, N, V, M> &x0, const From &... xs) \
1642 vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
1643 return simd_cast_impl_smaller_input<Return, N, SimdArrayType__<T, N, V, M>, \
1644 From...>(x0, xs...); \
1647 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
1649 Vc_INTRINSIC Vc_CONST \
1650 enable_if<(N != M && \
1651 are_all_types_equal<SimdArrayType__<T, N, V, M>, From...>::value && \
1652 N * (1 + sizeof...(From)) > Return::Size && ((N - 1) & N) != 0), \
1654 simd_cast(const SimdArrayType__<T, N, V, M> &x0, const From &... xs) \
1656 vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
1657 return simd_cast_impl_larger_input<Return, N, SimdArrayType__<T, N, V, M>, \
1658 From...>(x0, xs...); \
1661 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
1662 Vc_INTRINSIC Vc_CONST \
1663 enable_if<(N != M && N >= 2 * Return::Size && ((N - 1) & N) == 0), Return> \
1664 simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1666 vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
1667 return simd_cast<Return>(internal_data0(x)); \
1669 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
1670 Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
1671 N < 2 * Return::Size && ((N - 1) & N) == 0), \
1673 simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1675 vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
1676 return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
1678 Vc_SIMDARRAY_CASTS(SimdArray)
1679 Vc_SIMDARRAY_CASTS(SimdMaskArray)
1680 #undef Vc_SIMDARRAY_CASTS
1683 #define Vc_SIMDARRAY_CASTS(SimdArrayType__) \
1685 template <typename Return, int offset, typename T, std::size_t N, typename V, \
1687 Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
1688 const SimdArrayType__<T, N, V, M> &x) \
1690 vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
1691 return simd_cast<Return>(x); \
1694 template <typename Return, int offset, typename T, std::size_t N, typename V> \
1695 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
1696 const SimdArrayType__<T, N, V, N> &x) \
1698 vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
1699 return simd_cast<Return, offset>(internal_data(x)); \
1702 template <typename Return, int offset, typename T, std::size_t N, typename V, \
1704 Vc_INTRINSIC Vc_CONST \
1705 enable_if<(N != M && offset * Return::Size >= Common::left_size(N) && \
1706 offset != 0 && Common::left_size(N) % Return::Size == 0), \
1708 simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1710 vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
1711 return simd_cast<Return, offset - Common::left_size(N) / Return::Size>( \
1712 internal_data1(x)); \
1716 template <typename Return, int offset, typename T, std::size_t N, typename V, \
1718 Vc_INTRINSIC Vc_CONST \
1719 enable_if<(N != M && offset * Return::Size >= Common::left_size(N) && \
1720 offset != 0 && Common::left_size(N) % Return::Size != 0), \
1722 simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1724 vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
1725 return simd_cast_with_offset<Return, \
1726 offset * Return::Size - Common::left_size(N)>( \
1727 internal_data1(x)); \
1730 template <typename Return, int offset, typename T, std::size_t N, typename V, \
1732 Vc_INTRINSIC Vc_CONST \
1733 enable_if<(N != M && \
1734 offset != 0 && (offset + 1) * Return::Size <= Common::left_size(N)), \
1736 simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1738 vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
1739 return simd_cast<Return, offset>(internal_data0(x)); \
1742 template <typename Return, int offset, typename T, std::size_t N, typename V, \
1744 Vc_INTRINSIC Vc_CONST \
1745 enable_if<(N != M && (offset * Return::Size < Common::left_size(N)) && \
1746 offset != 0 && (offset + 1) * Return::Size > Common::left_size(N)), \
1748 simd_cast(const SimdArrayType__<T, N, V, M> &x) \
1750 vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
1751 using R = typename Return::EntryType; \
1752 Return r = Return::Zero(); \
1753 for (std::size_t i = offset * Return::Size; \
1754 i < std::min(N, (offset + 1) * Return::Size); ++i) { \
1755 r[i - offset * Return::Size] = static_cast<R>(x[i]); \
1759 Vc_SIMDARRAY_CASTS(SimdArray)
1760 Vc_SIMDARRAY_CASTS(SimdMaskArray)
1761 #undef Vc_SIMDARRAY_CASTS
1763 template <
typename Return,
typename From>
1764 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
1768 template <
typename Return,
typename... Froms>
1769 Vc_INTRINSIC Vc_CONST
1770 enable_if<(are_all_types_equal<Froms...>::value &&
1771 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1773 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
1780 template <
typename Return,
typename From,
typename... Froms>
1781 Vc_INTRINSIC Vc_CONST enable_if<
1782 (are_all_types_equal<From, Froms...>::value &&
1783 (1 +
sizeof...(Froms)) * From::Size >= Return::Size &&
sizeof...(Froms) != 0),
1785 simd_cast_drop_arguments(Froms... xs, From x, From)
1787 return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
1789 template <
typename Return,
typename From>
1790 Vc_INTRINSIC Vc_CONST
1791 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1792 simd_cast_drop_arguments(From x, From)
1794 return simd_cast_drop_arguments<Return>(x);
1798 template <
typename Return, std::
size_t offset,
typename From>
1799 Vc_INTRINSIC Vc_CONST
1800 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
1801 Return> simd_cast_with_offset(
const From &x)
1803 return simd_cast<Return, offset / Return::Size>(x);
1805 template <
typename Return, std::
size_t offset,
typename From>
1806 Vc_INTRINSIC Vc_CONST
1807 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1808 ((Traits::isSimdArray<Return>::value &&
1809 !Traits::isAtomicSimdArray<Return>::value) ||
1810 (Traits::isSimdMaskArray<Return>::value &&
1811 !Traits::isAtomicSimdMaskArray<Return>::value))),
1813 simd_cast_with_offset(
const From &x)
1815 using R0 =
typename Return::storage_type0;
1816 using R1 =
typename Return::storage_type1;
1817 return {simd_cast_with_offset<R0, offset>(x),
1818 simd_cast_with_offset<R1, offset + R0::Size>(x)};
1820 template <
typename Return, std::
size_t offset,
typename From>
1821 Vc_INTRINSIC Vc_CONST
1822 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1823 ((Traits::isSimdArray<Return>::value &&
1824 Traits::isAtomicSimdArray<Return>::value) ||
1825 (Traits::isSimdMaskArray<Return>::value &&
1826 Traits::isAtomicSimdMaskArray<Return>::value))),
1828 simd_cast_with_offset(
const From &x)
1830 return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
1832 template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
1833 Vc_INTRINSIC Vc_CONST
1834 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1835 simd_cast_with_offset(
const From &x,
const Froms &... xs)
1841 template <
typename Return,
typename T,
typename... From>
1842 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(
const From &... xs,
const T &)
1850 template <std::size_t I,
typename T0,
typename... Ts>
1851 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(
const T0 &a0,
1859 template <std::size_t I,
typename T0,
typename... Ts>
1860 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(
const T0 &,
1868 template <std::size_t I,
typename T0,
typename... Ts>
1869 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(
const T0 &,
1874 return extract_interleaved<I - 2, Ts...>(a..., b...);
1878 template <
typename Return,
typename... Ts, std::size_t... Indexes>
1879 Vc_INTRINSIC Vc_CONST Return
1880 simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>,
const Ts &... a,
1883 return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
1887 template <
typename Return,
typename... Ts>
1888 Vc_INTRINSIC Vc_CONST Return
1889 simd_cast_interleaved_argument_order(
const Ts &... a,
const Ts &... b)
1891 using seq = make_index_sequence<
sizeof...(Ts)*2>;
1892 return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
1898 #define Vc_BINARY_FUNCTION__(name__) \
1899 template <typename T, std::size_t N, typename V, std::size_t M> \
1900 SimdArray<T, N, V, M> Vc_INTRINSIC Vc_PURE \
1901 name__(const SimdArray<T, N, V, M> &l, const SimdArray<T, N, V, M> &r) \
1903 return {name__(internal_data0(l), internal_data0(r)), \
1904 name__(internal_data1(l), internal_data1(r))}; \
1906 template <typename T, std::size_t N, typename V> \
1907 SimdArray<T, N, V, N> Vc_INTRINSIC Vc_PURE \
1908 name__(const SimdArray<T, N, V, N> &l, const SimdArray<T, N, V, N> &r) \
1910 return SimdArray<T, N, V, N>{name__(internal_data(l), internal_data(r))}; \
1912 Vc_BINARY_FUNCTION__(
min)
1913 Vc_BINARY_FUNCTION__(max)
1914 #undef Vc_BINARY_FUNCTION__
1917 #define Vc_CONDITIONAL_ASSIGN(name__, op__) \
1918 template <Operator O, typename T, std::size_t N, typename M, typename U> \
1919 Vc_INTRINSIC enable_if<O == Operator::name__, void> conditional_assign( \
1920 SimdArray<T, N> &lhs, M &&mask, U &&rhs) \
1922 lhs(mask) op__ rhs; \
1924 Vc_CONDITIONAL_ASSIGN( Assign, =)
1925 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=)
1926 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=)
1927 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=)
1928 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=)
1929 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=)
1930 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=)
1931 Vc_CONDITIONAL_ASSIGN( AndAssign, &=)
1932 Vc_CONDITIONAL_ASSIGN( OrAssign, |=)
1933 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=)
1934 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=)
1935 #undef Vc_CONDITIONAL_ASSIGN
1937 #define Vc_CONDITIONAL_ASSIGN(name__, expr__) \
1938 template <Operator O, typename T, std::size_t N, typename M> \
1939 Vc_INTRINSIC enable_if<O == Operator::name__, SimdArray<T, N>> conditional_assign( \
1940 SimdArray<T, N> &lhs, M &&mask) \
1944 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++)
1945 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask))
1946 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--)
1947 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask))
1948 #undef Vc_CONDITIONAL_ASSIGN
1952 template <
int L,
typename T, std::
size_t N,
typename V>
1953 inline enable_if<L == 4, void> transpose_impl(
1954 SimdArray<T, N, V, N> * Vc_RESTRICT r[],
1955 const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
1956 SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
1958 V *Vc_RESTRICT r2[L] = {&internal_data(*r[0]), &internal_data(*r[1]),
1959 &internal_data(*r[2]), &internal_data(*r[3])};
1961 &r2[0], TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
1962 internal_data(std::get<1>(proxy.in)),
1963 internal_data(std::get<2>(proxy.in)),
1964 internal_data(std::get<3>(proxy.in))});
1966 template <
int L,
typename T,
typename V>
1967 inline enable_if<(L == 2), void> transpose_impl(
1968 SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
1969 const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
1970 SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
1974 internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
1975 internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
1976 internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
1977 internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
1978 internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
1979 internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
1980 internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
1981 internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
1983 template <
int L,
typename T, std::
size_t N,
typename V>
1984 inline enable_if<(L == 4 && N > 1),
void> transpose_impl(
1985 SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
1986 const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
1987 SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
1989 SimdArray<T, N, V, 1> *Vc_RESTRICT r0[L / 2] = {r[0], r[1]};
1990 SimdArray<T, N, V, 1> *Vc_RESTRICT r1[L / 2] = {r[2], r[3]};
1991 using H = SimdArray<T, 2>;
1993 &r0[0], TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
1994 internal_data0(std::get<1>(proxy.in)),
1995 internal_data0(std::get<2>(proxy.in)),
1996 internal_data0(std::get<3>(proxy.in))});
1998 &r1[0], TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
1999 internal_data1(std::get<1>(proxy.in)),
2000 internal_data1(std::get<2>(proxy.in)),
2001 internal_data1(std::get<3>(proxy.in))});
2037 static_assert(Traits::has_no_allocated_data<
const volatile Vc::SimdArray<int, 4> &>::value,
"");
2038 static_assert(Traits::has_no_allocated_data<
const volatile Vc::SimdArray<int, 4>>::value,
"");
2039 static_assert(Traits::has_no_allocated_data<
volatile Vc::SimdArray<int, 4> &>::value,
"");
2040 static_assert(Traits::has_no_allocated_data<
volatile Vc::SimdArray<int, 4>>::value,
"");
2041 static_assert(Traits::has_no_allocated_data<
const Vc::SimdArray<int, 4> &>::value,
"");
2042 static_assert(Traits::has_no_allocated_data<
const Vc::SimdArray<int, 4>>::value,
"");
2043 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4>>::value,
"");
2044 static_assert(Traits::has_no_allocated_data<Vc::SimdArray<int, 4> &&>::value,
"");