28 #ifndef VC_COMMON_SIMDARRAY_H_
29 #define VC_COMMON_SIMDARRAY_H_
33 #if defined Vc_DEBUG_SIMD_CAST || defined Vc_DEBUG_SORTED
39 #include "writemaskedvector.h"
40 #include "simdarrayhelper.h"
41 #include "simdmaskarray.h"
43 #include "interleave.h"
44 #include "indexsequence.h"
45 #include "transpose.h"
48 namespace Vc_VERSIONED_NAMESPACE
53 template <
typename T> T Vc_INTRINSIC Vc_PURE product_helper_(
const T &l,
const T &r) {
return l * r; }
54 template <
typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(
const T &l,
const T &r) {
return l + r; }
58 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
59 inline SimdArray<T, N, V, M>
min(
const SimdArray<T, N, V, M> &x,
60 const SimdArray<T, N, V, M> &y);
61 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
62 inline SimdArray<T, N, V, M>
max(
const SimdArray<T, N, V, M> &x,
63 const SimdArray<T, N, V, M> &y);
70 #define Vc_CURRENT_CLASS_NAME SimdArray
80 template <
typename T, std::
size_t N,
typename VectorType_>
81 class SimdArray<T, N, VectorType_, N>
83 static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
84 std::is_same<T, int32_t>::value ||
85 std::is_same<T, uint32_t>::value ||
86 std::is_same<T, int16_t>::value ||
87 std::is_same<T, uint16_t>::value,
88 "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
89 "int16_t, uint16_t }");
92 using VectorType = VectorType_;
93 using vector_type = VectorType;
94 using storage_type = vector_type;
95 using vectorentry_type =
typename vector_type::VectorEntryType;
97 using mask_type = SimdMaskArray<T, N, vector_type>;
98 using index_type = SimdArray<int, N>;
99 static constexpr std::size_t size() {
return N; }
100 using Mask = mask_type;
101 using MaskType =
Mask;
102 using MaskArgument =
const MaskType &;
103 using VectorEntryType = vectorentry_type;
104 using EntryType = value_type;
105 using IndexType = index_type;
107 using reference = Detail::ElementReference<SimdArray>;
108 static constexpr std::size_t Size = size();
112 #ifndef Vc_MSVC // bogus error C2580
123 Vc_INTRINSIC
SimdArray(value_type &a) : data(a) {}
124 Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
127 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
128 Vc_INTRINSIC SimdArray(U a)
129 : SimdArray(static_cast<value_type>(a))
134 template <
typename U,
typename V>
136 : data(simd_cast<vector_type>(internal_data(x)))
139 template <
typename U,
typename V>
141 enable_if<(N > V::Size && N <= 2 * V::Size)> = nullarg)
142 : data(simd_cast<vector_type>(internal_data(internal_data0(x)), internal_data(internal_data1(x))))
145 template <
typename U,
typename V>
146 Vc_INTRINSIC SimdArray(
const SimdArray<U, N, V> &x,
147 enable_if<(N > 2 * V::Size && N <= 4 * V::Size)> = nullarg)
148 : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
149 internal_data(internal_data1(internal_data0(x))),
150 internal_data(internal_data0(internal_data1(x))),
151 internal_data(internal_data1(internal_data1(x)))))
155 template <
typename V, std::
size_t Pieces, std::
size_t Index>
156 Vc_INTRINSIC
SimdArray(Common::Segment<V, Pieces, Index> &&x)
157 : data(simd_cast<vector_type, Index>(x.data))
161 Vc_INTRINSIC
SimdArray(
const std::initializer_list<value_type> &init)
164 #if defined Vc_CXX14 && 0 // doesn't compile yet
165 static_assert(init.size() == size(),
"The initializer_list argument to "
166 "SimdArray<T, N> must contain exactly N "
169 Vc_ASSERT(init.size() == size());
176 typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
177 explicit Vc_INTRINSIC SimdArray(
const V &x)
178 : data(simd_cast<vector_type>(x))
185 typename U,
typename A,
192 #include "gatherinterface.h"
193 #include "scatterinterface.h"
196 template <
typename... Args,
197 typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
198 !Traits::is_gather_signature<Args...>::value &&
199 !Traits::is_initializer_list<Args...>::value>>
200 explicit Vc_INTRINSIC SimdArray(Args &&... args)
201 : data(
std::forward<Args>(args)...)
205 template <std::
size_t Offset>
206 explicit Vc_INTRINSIC SimdArray(
207 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
213 Vc_INTRINSIC
void setZero() { data.setZero(); }
214 Vc_INTRINSIC
void setZero(mask_type k) { data.setZero(internal_data(k)); }
215 Vc_INTRINSIC
void setZeroInverted() { data.setZeroInverted(); }
216 Vc_INTRINSIC
void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
218 Vc_INTRINSIC
void setQnan() { data.setQnan(); }
219 Vc_INTRINSIC
void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
222 template <
typename Op,
typename... Args>
223 static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
226 Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
230 template <
typename Op,
typename... Args>
231 static Vc_INTRINSIC
void callOperation(Op op, Args &&... args)
233 Common::unpackArgumentsAuto(op,
nullptr, std::forward<Args>(args)...);
236 static Vc_INTRINSIC SimdArray
Zero()
240 static Vc_INTRINSIC SimdArray
One()
248 static Vc_INTRINSIC SimdArray Random()
250 return fromOperation(Common::Operations::random());
253 template <
typename... Args> Vc_INTRINSIC
void load(Args &&... args)
255 data.load(std::forward<Args>(args)...);
258 template <
typename... Args> Vc_INTRINSIC
void store(Args &&... args)
const
260 data.store(std::forward<Args>(args)...);
263 Vc_INTRINSIC mask_type operator!()
const
274 Vc_INTRINSIC SimdArray
operator+()
const {
return *
this; }
276 Vc_INTRINSIC SimdArray operator~()
const
281 template <
typename U,
282 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
283 Vc_INTRINSIC Vc_CONST SimdArray
operator<<(U x)
const
287 template <
typename U,
288 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
289 Vc_INTRINSIC SimdArray &operator<<=(U x)
294 template <
typename U,
295 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
296 Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x)
const
300 template <
typename U,
301 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
302 Vc_INTRINSIC SimdArray &operator>>=(U x)
308 #define Vc_BINARY_OPERATOR_(op) \
309 Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
311 return {data op rhs.data}; \
313 Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
315 data op## = rhs.data; \
318 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
319 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
320 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
321 #undef Vc_BINARY_OPERATOR_
323 #define Vc_COMPARES(op) \
324 Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
326 return {data op rhs.data}; \
328 Vc_ALL_COMPARES(Vc_COMPARES);
332 Vc_DEPRECATED(
"use isnegative(x) instead") Vc_INTRINSIC
MaskType isNegative()
const
339 Vc_INTRINSIC
static value_type get(
const SimdArray &o,
int i) noexcept
343 template <
typename U>
344 Vc_INTRINSIC
static void set(SimdArray &o,
int i, U &&v) noexcept(
345 noexcept(
std::declval<value_type &>() = v))
357 Vc_INTRINSIC reference operator[](
size_t i) noexcept
359 static_assert(noexcept(reference{std::declval<SimdArray &>(),
int()}),
"");
360 return {*
this, int(i)};
364 return get(*
this, int(i));
367 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
const mask_type &k)
372 Vc_INTRINSIC
void assign(
const SimdArray &v,
const mask_type &k)
374 data.assign(v.data, internal_data(k));
378 #define Vc_REDUCTION_FUNCTION_(name_) \
379 Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
380 Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
382 return data.name_(internal_data(mask)); \
384 Vc_NOTHING_EXPECTING_SEMICOLON
385 Vc_REDUCTION_FUNCTION_(
min);
386 Vc_REDUCTION_FUNCTION_(
max);
387 Vc_REDUCTION_FUNCTION_(product);
388 Vc_REDUCTION_FUNCTION_(sum);
389 #undef Vc_REDUCTION_FUNCTION_
390 Vc_INTRINSIC Vc_PURE SimdArray partialSum()
const {
return data.partialSum(); }
392 template <
typename F> Vc_INTRINSIC SimdArray apply(F &&f)
const
394 return {data.apply(std::forward<F>(f))};
396 template <
typename F> Vc_INTRINSIC SimdArray apply(F &&f,
const mask_type &k)
const
398 return {data.apply(std::forward<F>(f), k)};
401 Vc_INTRINSIC SimdArray
shifted(
int amount)
const
403 return {data.shifted(amount)};
406 template <std::
size_t NN>
407 Vc_INTRINSIC SimdArray
shifted(
int amount,
const SimdArray<value_type, NN> &shiftIn)
410 return {data.shifted(amount, simd_cast<VectorType>(shiftIn))};
413 Vc_INTRINSIC SimdArray rotated(
int amount)
const
415 return {data.rotated(amount)};
419 Vc_DEPRECATED(
"use exponent(x) instead") Vc_INTRINSIC SimdArray
exponent()
const
424 Vc_INTRINSIC SimdArray interleaveLow(SimdArray x)
const
426 return {data.interleaveLow(x.data)};
428 Vc_INTRINSIC SimdArray interleaveHigh(SimdArray x)
const
430 return {data.interleaveHigh(x.data)};
433 Vc_INTRINSIC SimdArray reversed()
const
435 return {data.reversed()};
438 Vc_INTRINSIC SimdArray sorted()
const
440 return {data.sorted()};
443 template <
typename G>
static Vc_INTRINSIC SimdArray generate(
const G &gen)
445 return {VectorType::generate(gen)};
448 Vc_DEPRECATED(
"use copysign(x, y) instead") Vc_INTRINSIC SimdArray
449 copySign(const SimdArray &reference)
const
454 friend VectorType &internal_data<>(SimdArray &x);
455 friend const VectorType &internal_data<>(
const SimdArray &x);
458 Vc_INTRINSIC SimdArray(VectorType &&x) : data(
std::move(x)) {}
460 Vc_FREE_STORE_OPERATORS_ALIGNED(
alignof(storage_type));
466 alignas(
static_cast<std::size_t
>(
467 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value *
sizeof(VectorType_) /
468 VectorType_::size()>::value)) storage_type data;
471 template <
typename T, std::
size_t N,
typename VectorType>
473 template <
typename T, std::
size_t N,
typename VectorType>
481 template <
typename T, std::
size_t N,
typename VectorType>
491 template <
typename T> T unpackIfSegment(T &&x) {
return std::forward<T>(x); }
492 template <
typename T,
size_t Pieces,
size_t Index>
493 auto unpackIfSegment(Common::Segment<T, Pieces, Index> &&x) -> decltype(x.asSimdArray())
495 return x.asSimdArray();
499 template <
typename T, std::
size_t N,
typename VectorType>
500 template <
typename MT,
typename IT>
501 inline void SimdArray<T, N, VectorType, N>::gatherImplementation(
const MT *mem,
504 data.gather(mem, unpackIfSegment(indexes));
506 template <
typename T, std::
size_t N,
typename VectorType>
507 template <
typename MT,
typename IT>
512 data.
gather(mem, unpackIfSegment(indexes), mask);
516 template <
typename T, std::
size_t N,
typename VectorType>
517 template <
typename MT,
typename IT>
521 data.
scatter(mem, unpackIfSegment(std::forward<IT>(indexes)));
523 template <
typename T, std::
size_t N,
typename VectorType>
524 template <
typename MT,
typename IT>
525 inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
527 MaskArgument mask)
const
529 data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)), mask);
565 template <
typename T,
size_t N,
typename V,
size_t Wt>
class SimdArray
567 static_assert(std::is_same<T, double>::value ||
568 std::is_same<T, float>::value ||
569 std::is_same<T, int32_t>::value ||
570 std::is_same<T, uint32_t>::value ||
571 std::is_same<T, int16_t>::value ||
572 std::is_same<T, uint16_t>::value,
"SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
575 std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
577 (N % V::size() == 0),
578 "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
579 "MIC::(u)short_v::size(), i.e. k * 16.");
581 using my_traits = SimdArrayTraits<T, N>;
582 static constexpr std::size_t N0 = my_traits::N0;
583 static constexpr std::size_t N1 = my_traits::N1;
584 using Split = Common::Split<N0>;
585 template <
typename U, std::
size_t K>
using CArray = U[K];
590 static_assert(storage_type0::size() == N0,
"");
595 using vector_type = V;
596 using vectorentry_type =
typename storage_type0::vectorentry_type;
597 typedef vectorentry_type alias_type Vc_MAY_ALIAS;
618 static constexpr std::size_t
size() {
return N; }
625 using VectorEntryType = vectorentry_type;
630 using AsArg =
const SimdArray &;
632 using reference = Detail::ElementReference<SimdArray>;
635 static constexpr std::size_t MemoryAlignment =
644 static Vc_INTRINSIC SimdArray
Zero()
650 static Vc_INTRINSIC SimdArray
One()
664 return fromOperation(Common::Operations::random());
668 template <
typename G>
static Vc_INTRINSIC SimdArray
generate(
const G &gen)
670 auto tmp = storage_type0::generate(gen);
675 return {std::move(tmp),
676 storage_type1::generate([&](std::size_t i) {
return gen(i + N0); })};
684 #ifndef Vc_MSVC // bogus error C2580
685 SimdArray() =
default;
696 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
698 : SimdArray(static_cast<value_type>(a))
704 SimdArray(
const SimdArray &) =
default;
705 SimdArray(SimdArray &&) =
default;
706 SimdArray &operator=(
const SimdArray &) =
default;
709 template <
typename U,
710 typename Flags = DefaultLoadTag,
711 typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
712 explicit Vc_INTRINSIC SimdArray(
const U *mem, Flags f = Flags())
713 : data0(mem, f), data1(mem + storage_type0::size(), f)
725 template <
typename U, std::size_t Extent,
typename Flags =
DefaultLoadTag,
726 typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
727 explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
728 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
734 template <
typename U, std::size_t Extent,
typename Flags =
DefaultLoadTag,
735 typename = enable_if<Traits::is_load_store_flag<Flags>::value>>
736 explicit Vc_INTRINSIC SimdArray(
const CArray<U, Extent> &mem, Flags f = Flags())
737 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
743 Vc_INTRINSIC SimdArray(
const std::initializer_list<value_type> &init)
745 , data1(init.begin() + storage_type0::size(),
Vc::
Unaligned)
747 #if defined Vc_CXX14 && 0 // doesn't compile yet
748 static_assert(init.size() == size(),
"The initializer_list argument to "
749 "SimdArray<T, N> must contain exactly N "
752 Vc_ASSERT(init.size() == size());
756 #include "gatherinterface.h"
757 #include "scatterinterface.h"
760 template <
typename... Args,
761 typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
762 !Traits::is_initializer_list<Args...>::value &&
763 !Traits::is_gather_signature<Args...>::value &&
764 !Traits::is_load_arguments<Args...>::value>>
765 explicit Vc_INTRINSIC SimdArray(Args &&... args)
766 : data0(Split::lo(args)...)
768 , data1(Split::hi(
std::forward<Args>(args))...)
773 template <
typename W>
774 Vc_INTRINSIC
explicit SimdArray(
776 enable_if<(Traits::is_simd_vector<W>::value && Traits::simd_vector_size<W>::value == N &&
777 !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
778 Traits::isSimdArray<W>::value))> = nullarg)
779 : data0(Split::lo(x)), data1(Split::hi(x))
784 template <
typename W>
785 Vc_INTRINSIC SimdArray(
787 enable_if<(Traits::isSimdArray<W>::value && Traits::simd_vector_size<W>::value == N &&
788 std::is_convertible<Traits::entry_type_of<W>, T>::value)> = nullarg)
789 : data0(Split::lo(x)), data1(Split::hi(x))
796 typename U,
typename A,
797 typename = enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N>>
798 operator Vector<U, A>()
const
800 return simd_cast<Vector<U, A>>(data0, data1);
805 Vc_INTRINSIC
void setZero()
810 Vc_INTRINSIC
void setZero(
const mask_type &k)
812 data0.setZero(Split::lo(k));
813 data1.setZero(Split::hi(k));
815 Vc_INTRINSIC
void setZeroInverted()
817 data0.setZeroInverted();
818 data1.setZeroInverted();
820 Vc_INTRINSIC
void setZeroInverted(
const mask_type &k)
822 data0.setZeroInverted(Split::lo(k));
823 data1.setZeroInverted(Split::hi(k));
827 Vc_INTRINSIC
void setQnan() {
831 Vc_INTRINSIC
void setQnan(
const mask_type &m) {
832 data0.setQnan(Split::lo(m));
833 data1.setQnan(Split::hi(m));
837 template <
typename Op,
typename... Args>
838 static Vc_INTRINSIC SimdArray fromOperation(Op op, Args &&... args)
841 storage_type0::fromOperation(op, Split::lo(args)...),
844 storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
849 template <
typename Op,
typename... Args>
850 static Vc_INTRINSIC
void callOperation(Op op, Args &&... args)
852 storage_type0::callOperation(op, Split::lo(args)...);
853 storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
857 template <
typename U,
typename... Args> Vc_INTRINSIC
void load(
const U *mem, Args &&... args)
859 data0.load(mem, Split::lo(args)...);
861 data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
864 template <
typename U,
typename... Args> Vc_INTRINSIC
void store(U *mem, Args &&... args)
const
866 data0.store(mem, Split::lo(args)...);
868 data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
871 Vc_INTRINSIC mask_type operator!()
const
873 return {!data0, !data1};
878 return {-data0, -data1};
882 Vc_INTRINSIC SimdArray
operator+()
const {
return *
this; }
884 Vc_INTRINSIC SimdArray operator~()
const
886 return {~data0, ~data1};
890 template <
typename U,
891 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
892 Vc_INTRINSIC Vc_CONST SimdArray
operator<<(U x)
const
894 return {data0 << x, data1 << x};
896 template <
typename U,
897 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
898 Vc_INTRINSIC SimdArray &operator<<=(U x)
904 template <
typename U,
905 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
906 Vc_INTRINSIC Vc_CONST SimdArray operator>>(U x)
const
908 return {data0 >> x, data1 >> x};
910 template <
typename U,
911 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
912 Vc_INTRINSIC SimdArray &operator>>=(U x)
920 #define Vc_BINARY_OPERATOR_(op) \
921 Vc_INTRINSIC Vc_CONST SimdArray operator op(const SimdArray &rhs) const \
923 return {data0 op rhs.data0, data1 op rhs.data1}; \
925 Vc_INTRINSIC SimdArray &operator op##=(const SimdArray &rhs) \
927 data0 op## = rhs.data0; \
928 data1 op## = rhs.data1; \
931 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
932 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
933 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
934 #undef Vc_BINARY_OPERATOR_
936 #define Vc_COMPARES(op) \
937 Vc_INTRINSIC mask_type operator op(const SimdArray &rhs) const \
939 return {data0 op rhs.data0, data1 op rhs.data1}; \
941 Vc_ALL_COMPARES(Vc_COMPARES);
950 Vc_INTRINSIC
static value_type
get(
const SimdArray &o,
int i) noexcept
952 return reinterpret_cast<const alias_type *
>(&o)[i];
954 template <
typename U>
955 Vc_INTRINSIC
static void set(SimdArray &o,
int i, U &&v) noexcept(
956 noexcept(
std::declval<value_type &>() = v))
958 reinterpret_cast<alias_type *
>(&o)[i] = v;
971 static_assert(noexcept(reference{std::declval<SimdArray &>(),
int()}),
"");
972 return {*
this, int(i)};
978 return get(*
this, int(index));
984 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type>
operator()(
987 return {*
this, mask};
991 Vc_INTRINSIC
void assign(
const SimdArray &v,
const mask_type &k)
993 data0.assign(v.data0, internal_data0(k));
994 data1.assign(v.data1, internal_data1(k));
998 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
1000 template <typename ForSfinae = void> \
1001 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1002 storage_type0::Size == storage_type1::Size, \
1003 value_type> name_##_impl() const \
1005 return binary_fun_(data0, data1).name_(); \
1008 template <typename ForSfinae = void> \
1009 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1010 storage_type0::Size != storage_type1::Size, \
1011 value_type> name_##_impl() const \
1013 return scalar_fun_(data0.name_(), data1.name_()); \
1018 Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1020 Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1022 if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1023 return data1.name_(Split::hi(mask)); \
1024 } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1025 return data0.name_(Split::lo(mask)); \
1027 return scalar_fun_(data0.name_(Split::lo(mask)), \
1028 data1.name_(Split::hi(mask))); \
1031 Vc_NOTHING_EXPECTING_SEMICOLON
1034 Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1035 Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1036 #undef Vc_REDUCTION_FUNCTION_
1037 Vc_INTRINSIC Vc_PURE SimdArray partialSum() const
1040 auto ps0 = data0.partialSum();
1042 tmp[0] += ps0[data0.size() - 1];
1043 return {std::move(ps0), tmp.partialSum()};
1048 template <
typename F>
inline SimdArray
apply(F &&f)
const
1055 return {data0.
apply(f, Split::lo(k)), data1.
apply(f, Split::hi(k))};
1062 constexpr
int SSize = Size;
1063 constexpr
int SSize0 = storage_type0::Size;
1064 constexpr
int SSize1 = storage_type1::Size;
1069 if (amount > -SSize0) {
1072 if (amount == -SSize0) {
1075 if (amount < -SSize0) {
1081 if (amount >= SSize) {
1083 }
else if (amount >= SSize0) {
1085 simd_cast<storage_type0>(data1).
shifted(amount - SSize0),
1087 }
else if (amount >= SSize1) {
1090 return {data0.shifted(amount, data1), data1.shifted(amount)};
1095 template <std::
size_t NN>
1097 !(std::is_same<storage_type0, storage_type1>::value &&
1102 constexpr
int SSize = Size;
1104 return SimdArray::generate([&](
int i) ->
value_type {
1107 return operator[](i);
1108 }
else if (i >= -SSize) {
1109 return shiftIn[i + SSize];
1114 return SimdArray::generate([&](
int i) ->
value_type {
1117 return operator[](i);
1118 }
else if (i < 2 * SSize) {
1119 return shiftIn[i - SSize];
1128 template <std::
size_t NN>
struct bisectable_shift
1129 :
public std::integral_constant<bool,
1130 std::is_same<storage_type0, storage_type1>::value &&
1136 template <std::
size_t NN>
1137 inline SimdArray
shifted(enable_if<bisectable_shift<NN>::value,
int> amount,
1140 constexpr
int SSize = Size;
1142 if (amount > -static_cast<int>(storage_type0::Size)) {
1143 return {data0.shifted(amount, internal_data1(shiftIn)),
1144 data1.shifted(amount, data0)};
1146 if (amount == -static_cast<int>(storage_type0::Size)) {
1147 return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1149 if (amount > -SSize) {
1151 internal_data1(shiftIn)
1152 .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1153 data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1155 if (amount == -SSize) {
1158 if (amount > -2 * SSize) {
1159 return shiftIn.shifted(amount + SSize);
1165 if (amount < static_cast<int>(storage_type0::Size)) {
1166 return {data0.shifted(amount, data1),
1167 data1.shifted(amount, internal_data0(shiftIn))};
1169 if (amount == static_cast<int>(storage_type0::Size)) {
1170 return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1172 if (amount < SSize) {
1173 return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1174 internal_data0(shiftIn)
1175 .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1177 if (amount == SSize) {
1180 if (amount < 2 * SSize) {
1181 return shiftIn.shifted(amount - SSize);
1190 amount %= int(size());
1193 }
else if (amount < 0) {
1207 r.data1.load(&tmp[(amount + data0.size()) % size()],
Vc::Unaligned);
1210 auto &&d0cvtd = simd_cast<storage_type1>(data0);
1211 auto &&d1cvtd = simd_cast<storage_type0>(data1);
1212 constexpr
int size0 = storage_type0::size();
1213 constexpr
int size1 = storage_type1::size();
1215 if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1216 return {std::move(d1cvtd), std::move(d0cvtd)};
1217 }
else if (amount < size1) {
1218 return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1219 }
else if (amount == size1) {
1220 return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1221 }
else if (
int(size()) - amount < size1) {
1222 return {data0.shifted(amount -
int(size()), d1cvtd.shifted(size1 - size0)),
1223 data1.shifted(amount -
int(size()), data0.shifted(size0 - size1))};
1224 }
else if (
int(size()) - amount == size1) {
1225 return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1226 simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1227 }
else if (amount <= size0) {
1228 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1229 simd_cast<storage_type1>(data0.shifted(amount - size1))};
1231 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1232 simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1240 Vc_INTRINSIC SimdArray interleaveLow(
const SimdArray &x)
const
1243 return {data0.interleaveLow(x.data0),
1244 simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1247 Vc_INTRINSIC SimdArray interleaveHigh(
const SimdArray &x)
const
1249 return interleaveHighImpl(
1251 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1256 Vc_INTRINSIC SimdArray interleaveHighImpl(
const SimdArray &x, std::true_type)
const
1258 return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1261 inline SimdArray interleaveHighImpl(
const SimdArray &x, std::false_type)
const
1263 return {data0.interleaveHigh(x.data0)
1264 .shifted(storage_type1::Size,
1265 simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1266 data1.interleaveHigh(x.data1)};
1273 if (std::is_same<storage_type0, storage_type1>::value) {
1274 return {simd_cast<storage_type0>(data1).reversed(),
1275 simd_cast<storage_type1>(data0).reversed()};
1286 return {data0.shifted(storage_type1::Size, data1).reversed(),
1287 simd_cast<storage_type1>(data0.reversed().shifted(
1288 storage_type0::Size - storage_type1::Size))};
1296 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1300 Vc_INTRINSIC SimdArray sortedImpl(std::true_type)
const
1302 #ifdef Vc_DEBUG_SORTED
1303 std::cerr <<
"-- " << data0 << data1 <<
'\n';
1305 const auto a = data0.sorted();
1306 const auto b = data1.sorted().reversed();
1307 const auto lo =
Vc::min(a, b);
1308 const auto hi =
Vc::max(a, b);
1309 return {lo.sorted(), hi.sorted()};
1313 Vc_INTRINSIC SimdArray sortedImpl(std::false_type)
const
1315 using SortableArray =
1316 SimdArray<value_type, Common::NextPowerOfTwo<size()>::value>;
1317 auto sortable = simd_cast<SortableArray>(*this);
1318 for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1319 using limits = std::numeric_limits<value_type>;
1320 if (limits::has_infinity) {
1321 sortable[i] = limits::infinity();
1326 return simd_cast<SimdArray>(sortable.sorted());
1360 static constexpr std::size_t Size = size();
1363 Vc_DEPRECATED(
"use exponent(x) instead") Vc_INTRINSIC SimdArray
exponent()
const
1369 Vc_DEPRECATED(
"use isnegative(x) instead") Vc_INTRINSIC
MaskType isNegative()
const
1375 Vc_DEPRECATED(
"use copysign(x, y) instead") Vc_INTRINSIC SimdArray
1376 copySign(const SimdArray &reference)
const
1384 friend storage_type0 &internal_data0<>(SimdArray &x);
1385 friend storage_type1 &internal_data1<>(SimdArray &x);
1386 friend const storage_type0 &internal_data0<>(
const SimdArray &x);
1387 friend const storage_type1 &internal_data1<>(
const SimdArray &x);
1390 Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y)
1391 : data0(
std::move(x)), data1(
std::move(y))
1395 Vc_FREE_STORE_OPERATORS_ALIGNED(
alignof(storage_type0));
1401 alignas(
static_cast<std::size_t
>(
1402 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value *
sizeof(V) /
1403 V::size()>::value)) storage_type0 data0;
1404 storage_type1 data1;
1406 #undef Vc_CURRENT_CLASS_NAME
1407 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1409 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1413 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1414 template <
typename MT,
typename IT>
1418 data0.
gather(mem, Split::lo(Common::Operations::gather(), indexes));
1419 data1.gather(mem, Split::hi(Common::Operations::gather(), indexes));
1421 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1422 template <
typename MT,
typename IT>
1423 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
const MT *mem,
1427 data0.gather(mem, Split::lo(Common::Operations::gather(), indexes), Split::lo(mask));
1428 data1.gather(mem, Split::hi(Common::Operations::gather(), indexes), Split::hi(mask));
1432 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1433 template <
typename MT,
typename IT>
1434 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1437 data0.scatter(mem, Split::lo(Common::Operations::gather(),
1440 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1442 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1443 template <
typename MT,
typename IT>
1444 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1445 IT &&indexes, MaskArgument mask)
const
1447 data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1450 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1456 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1460 typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1461 SimdArray<T, N, V, M> &x)
1466 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1470 typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1471 SimdArray<T, N, V, M> &x)
1476 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1480 const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1481 const SimdArray<T, N, V, M> &x)
1486 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1490 const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1491 const SimdArray<T, N, V, M> &x)
1499 #if defined Vc_MSVC && defined Vc_IMPL_SSE
1501 Vc_INTRINSIC SimdArray<double, 8, SSE::Vector<double>, 2>::SimdArray(
1502 SimdArray<double, 4> &&x, SimdArray<double, 4> &&y)
1503 : data0(x), data1(0)
1510 namespace result_vector_type_internal
1512 template <
typename T>
1513 using type =
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1515 template <
typename T>
1516 using is_integer_larger_than_int = std::integral_constant<
1517 bool, std::is_integral<T>::value &&(
sizeof(T) >
sizeof(
int) ||
1518 std::is_same<T, long>::value ||
1519 std::is_same<T, unsigned long>::value)>;
1522 typename L,
typename R,
1523 std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value
1524 : Traits::simd_vector_size<R>::value,
1526 (Traits::isSimdArray<L>::value ||
1527 Traits::isSimdArray<R>::value)
1528 && !std::is_same<type<L>, type<R>>::value
1531 ((std::is_arithmetic<type<L>>::value &&
1532 !is_integer_larger_than_int<type<L>>::value) ||
1533 (std::is_arithmetic<type<R>>::value &&
1534 !is_integer_larger_than_int<type<R>>::value)
1539 Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value &&
1540 ((Traits::is_simd_vector<L>::value && !Traits::isSimdArray<L>::value) ||
1541 (Traits::is_simd_vector<R>::value && !Traits::isSimdArray<R>::value))))>
1544 template <
typename L,
typename R, std::
size_t N>
struct evaluate<L, R, N, true>
1547 using LScalar = Traits::entry_type_of<L>;
1548 using RScalar = Traits::entry_type_of<R>;
1550 template <
bool B,
typename True,
typename False>
1551 using conditional =
typename std::conditional<B, True, False>::type;
1564 using type = SimdArray<
1565 conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1566 sizeof(LScalar) <
sizeof(
int) &&
1567 sizeof(RScalar) <
sizeof(
int)),
1568 conditional<(
sizeof(LScalar) ==
sizeof(RScalar)),
1569 conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1570 conditional<(sizeof(LScalar) >
sizeof(RScalar)), LScalar, RScalar>>,
1571 decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1577 template <
typename L,
typename R>
1578 using result_vector_type =
typename result_vector_type_internal::evaluate<L, R>::type;
1583 "result_vector_type does not work");
1585 #define Vc_BINARY_OPERATORS_(op_) \
1587 template <typename L, typename R> \
1588 Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1590 using Return = result_vector_type<L, R>; \
1591 return Return(std::forward<L>(lhs)) op_ Return(std::forward<R>(rhs)); \
1610 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1612 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1614 #undef Vc_BINARY_OPERATORS_
1615 #define Vc_BINARY_OPERATORS_(op_) \
1617 template <typename L, typename R> \
1618 Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1621 using Promote = result_vector_type<L, R>; \
1622 return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1641 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1644 #undef Vc_BINARY_OPERATORS_
1647 #define Vc_FORWARD_UNARY_OPERATOR(name_) \
1649 template <typename T, std::size_t N, typename V, std::size_t M> \
1650 inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \
1652 return SimdArray<T, N, V, M>::fromOperation( \
1653 Common::Operations::Forward_##name_(), x); \
1655 Vc_NOTHING_EXPECTING_SEMICOLON
1657 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1659 template <typename T, std::size_t N, typename V, std::size_t M> \
1660 inline SimdMaskArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x) \
1662 return SimdMaskArray<T, N, V, M>::fromOperation( \
1663 Common::Operations::Forward_##name_(), x); \
1665 Vc_NOTHING_EXPECTING_SEMICOLON
1667 #define Vc_FORWARD_BINARY_OPERATOR(name_) \
1669 template <typename T, std::size_t N, typename V, std::size_t M> \
1670 inline SimdArray<T, N, V, M> name_(const SimdArray<T, N, V, M> &x, \
1671 const SimdArray<T, N, V, M> &y) \
1673 return SimdArray<T, N, V, M>::fromOperation( \
1674 Common::Operations::Forward_##name_(), x, y); \
1676 Vc_NOTHING_EXPECTING_SEMICOLON
1682 Vc_FORWARD_UNARY_OPERATOR(
abs);
1689 Vc_FORWARD_UNARY_OPERATOR(
cos);
1690 Vc_FORWARD_UNARY_OPERATOR(
exp);
1694 template <
typename T, std::
size_t N>
1703 #if defined Vc_MSVC && defined Vc_IMPL_SSE
1705 const SimdArray<
double, 8, SSE::Vector<double>, 2> &x)
1707 using V = SSE::Vector<double>;
1712 internal_data(internal_data0(r0)) =
isnan(internal_data(internal_data0(x0)));
1713 internal_data(internal_data1(r0)) =
isnan(internal_data(internal_data1(x0)));
1714 internal_data(internal_data0(r1)) =
isnan(internal_data(internal_data0(x1)));
1715 internal_data(internal_data1(r1)) =
isnan(internal_data(internal_data1(x1)));
1716 return {std::move(r0), std::move(r1)};
1721 template <
typename T, std::
size_t N>
1727 template <
typename T, std::
size_t N>
1732 Vc_FORWARD_UNARY_OPERATOR(
log);
1738 Vc_FORWARD_UNARY_OPERATOR(
sin);
1740 template <
typename T, std::
size_t N>
1747 Vc_FORWARD_BINARY_OPERATOR(
min);
1748 Vc_FORWARD_BINARY_OPERATOR(
max);
1750 #undef Vc_FORWARD_UNARY_OPERATOR
1751 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1752 #undef Vc_FORWARD_BINARY_OPERATOR
1756 #define Vc_DUMMY_ARG0 , int = 0
1757 #define Vc_DUMMY_ARG1 , long = 0
1758 #define Vc_DUMMY_ARG2 , short = 0
1759 #define Vc_DUMMY_ARG3 , char = '0'
1760 #define Vc_DUMMY_ARG4 , unsigned = 0u
1761 #define Vc_DUMMY_ARG5 , unsigned short = 0u
1763 #define Vc_DUMMY_ARG0
1764 #define Vc_DUMMY_ARG1
1765 #define Vc_DUMMY_ARG2
1766 #define Vc_DUMMY_ARG3
1767 #define Vc_DUMMY_ARG4
1768 #define Vc_DUMMY_ARG5
1775 template <
typename Return, std::size_t N,
typename T,
typename... From>
1776 Vc_INTRINSIC Vc_CONST enable_if<
sizeof...(From) != 0, Return>
1777 simd_cast_impl_smaller_input(
const From &... xs,
const T &last)
1780 for (
size_t i = 0; i < N; ++i) {
1781 r[i + N *
sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1785 template <
typename Return, std::
size_t N,
typename T>
1786 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(
const T &last)
1788 Return r = Return();
1789 for (
size_t i = 0; i < N; ++i) {
1790 r[i] =
static_cast<typename Return::EntryType
>(last[i]);
1794 template <
typename Return, std::size_t N,
typename T,
typename... From>
1795 Vc_INTRINSIC Vc_CONST enable_if<
sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1796 const From &... xs,
const T &last)
1799 for (
size_t i = N *
sizeof...(From); i < Return::Size; ++i) {
1800 r[i] =
static_cast<typename Return::EntryType
>(last[i - N *
sizeof...(From)]);
1804 template <
typename Return, std::
size_t N,
typename T>
1805 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(
const T &last)
1807 Return r = Return();
1808 for (
size_t i = 0; i < Return::size(); ++i) {
1809 r[i] =
static_cast<typename Return::EntryType
>(last[i]);
1815 template <
typename Return,
typename T,
typename... From>
1816 Vc_INTRINSIC_L Vc_CONST_L Return
1817 simd_cast_without_last(
const From &... xs,
const T &) Vc_INTRINSIC_R Vc_CONST_R;
1820 template <typename... Ts> struct are_all_types_equal;
1821 template <typename T>
1822 struct are_all_types_equal<T> : public
std::integral_constant<
bool, true>
1825 template <
typename T0,
typename T1,
typename... Ts>
1826 struct are_all_types_equal<T0, T1, Ts...>
1827 :
public std::integral_constant<
1828 bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1852 template <
typename Return,
typename... Ts>
1853 Vc_INTRINSIC Vc_CONST Return
1854 simd_cast_interleaved_argument_order(
const Ts &... a,
const Ts &... b);
1858 template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
1859 Vc_INTRINSIC Vc_CONST
1860 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1861 simd_cast_with_offset(
const From &x,
const Froms &... xs);
1863 template <
typename Return, std::
size_t offset,
typename From>
1864 Vc_INTRINSIC Vc_CONST
1865 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1866 simd_cast_with_offset(
const From &x);
1868 template <
typename Return, std::
size_t offset,
typename From>
1869 Vc_INTRINSIC Vc_CONST
1870 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1871 ((Traits::isSimdArray<Return>::value &&
1872 !Traits::isAtomicSimdArray<Return>::value) ||
1873 (Traits::isSimdMaskArray<Return>::value &&
1874 !Traits::isAtomicSimdMaskArray<Return>::value))),
1876 simd_cast_with_offset(
const From &x);
1878 template <
typename Return, std::
size_t offset,
typename From>
1879 Vc_INTRINSIC Vc_CONST
1880 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1881 ((Traits::isSimdArray<Return>::value &&
1882 Traits::isAtomicSimdArray<Return>::value) ||
1883 (Traits::isSimdMaskArray<Return>::value &&
1884 Traits::isAtomicSimdMaskArray<Return>::value))),
1886 simd_cast_with_offset(
const From &x);
1888 template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
1889 Vc_INTRINSIC Vc_CONST enable_if<
1890 (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1891 simd_cast_with_offset(
const From &,
const Froms &... xs)
1893 return simd_cast_with_offset<Return, offset - From::Size>(xs...);
1897 template <
typename Return, std::
size_t offset,
typename From>
1898 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
1905 template <
typename T,
typename... Ts>
struct first_type_of_impl
1909 template <
typename... Ts>
using first_type_of =
typename first_type_of_impl<Ts...>::type;
1912 template <
typename Return,
typename From>
1913 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
1914 template <
typename Return,
typename... Froms>
1915 Vc_INTRINSIC Vc_CONST
1916 enable_if<(are_all_types_equal<Froms...>::value &&
1917 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1919 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
1923 template <
typename Return,
typename From,
typename... Froms>
1924 Vc_INTRINSIC Vc_CONST enable_if<
1925 (are_all_types_equal<From, Froms...>::value &&
1926 (1 +
sizeof...(Froms)) * From::Size >= Return::Size &&
sizeof...(Froms) != 0),
1928 simd_cast_drop_arguments(Froms... xs, From x, From);
1929 template <
typename Return,
typename From>
1930 Vc_INTRINSIC Vc_CONST
1931 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1932 simd_cast_drop_arguments(From x, From);
1936 #ifdef Vc_DEBUG_SIMD_CAST
1937 void debugDoNothing(
const std::initializer_list<void *> &) {}
1938 template <
typename T0,
typename... Ts>
1939 inline void vc_debug_(
const char *prefix,
const char *suffix,
const T0 &arg0,
1942 std::cerr << prefix << arg0;
1943 debugDoNothing({&(std::cerr <<
", " << args)...});
1944 std::cerr << suffix;
1947 template <
typename T0,
typename... Ts>
1948 Vc_INTRINSIC
void vc_debug_(
const char *,
const char *,
const T0 &,
const Ts &...)
1955 template <
size_t A,
size_t B>
1956 struct is_less :
public std::integral_constant<bool, (A < B)> {
1961 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
1965 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
1966 template <typename Return, typename T, typename A, typename... Froms> \
1967 Vc_INTRINSIC Vc_CONST enable_if< \
1968 (Traits::isAtomic##SimdArrayType_<Return>::value && \
1969 is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
1970 are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
1972 simd_cast(NativeType_<T, A> x, Froms... xs) \
1974 vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
1975 return {simd_cast<typename Return::storage_type>(x, xs...)}; \
1977 template <typename Return, typename T, typename A, typename... Froms> \
1978 Vc_INTRINSIC Vc_CONST enable_if< \
1979 (Traits::isAtomic##SimdArrayType_<Return>::value && \
1980 !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
1981 are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
1983 simd_cast(NativeType_<T, A> x, Froms... xs) \
1985 vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
1986 return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
1988 template <typename Return, typename T, typename A, typename... Froms> \
1989 Vc_INTRINSIC Vc_CONST \
1990 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
1991 !Traits::isAtomic##SimdArrayType_<Return>::value && \
1992 is_less<Common::left_size<Return::Size>(), \
1993 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
1994 are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
1996 simd_cast(NativeType_<T, A> x, Froms... xs) \
1998 vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
1999 using R0 = typename Return::storage_type0; \
2000 using R1 = typename Return::storage_type1; \
2001 return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2002 simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2004 template <typename Return, typename T, typename A, typename... Froms> \
2005 Vc_INTRINSIC Vc_CONST \
2006 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2007 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2008 !is_less<Common::left_size<Return::Size>(), \
2009 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2010 are_all_types_equal<NativeType_<T, A>, Froms...>::value), \
2012 simd_cast(NativeType_<T, A> x, Froms... xs) \
2014 vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2015 using R0 = typename Return::storage_type0; \
2016 using R1 = typename Return::storage_type1; \
2017 return {simd_cast<R0>(x, xs...), R1::Zero()}; \
2019 Vc_NOTHING_EXPECTING_SEMICOLON
2021 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2022 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2023 #undef Vc_SIMDARRAY_CASTS
2026 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2028 template <typename Return, int offset, typename T, typename A> \
2029 Vc_INTRINSIC Vc_CONST \
2030 enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2031 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2033 vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2034 return {simd_cast<typename Return::storage_type, offset>(x)}; \
2037 template <typename Return, int offset, typename T, typename A> \
2038 Vc_INTRINSIC Vc_CONST \
2039 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2040 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2041 Return::Size * offset + Common::left_size<Return::Size>() < \
2042 NativeType_<T, A>::Size), \
2044 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2046 vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2047 using R0 = typename Return::storage_type0; \
2048 constexpr int entries_offset = offset * Return::Size; \
2049 constexpr int entries_offset_right = entries_offset + R0::Size; \
2051 simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2052 simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2057 template <typename Return, int offset, typename T, typename A> \
2058 Vc_INTRINSIC Vc_CONST \
2059 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2060 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2061 Return::Size * offset + Common::left_size<Return::Size>() >= \
2062 NativeType_<T, A>::Size), \
2064 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2066 vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2067 using R0 = typename Return::storage_type0; \
2068 using R1 = typename Return::storage_type1; \
2069 constexpr int entries_offset = offset * Return::Size; \
2070 return {simd_cast_with_offset<R0, entries_offset>(x), R1::Zero()}; \
2072 Vc_NOTHING_EXPECTING_SEMICOLON
2074 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2075 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2076 #undef Vc_SIMDARRAY_CASTS
2079 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2081 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2082 Vc_INTRINSIC Vc_CONST \
2083 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2084 (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2085 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2087 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2089 vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2090 return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2093 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2094 Vc_INTRINSIC Vc_CONST \
2095 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2096 (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2097 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2099 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2101 vc_debug_(
"simd_cast{indivisible2}(",
")\n", x0, xs...); \
2102 return simd_cast_without_last<Return, \
2103 typename SimdArrayType_<T, N, V, N>::storage_type, \
2104 typename From::storage_type...>( \
2105 internal_data(x0), internal_data(xs)...); \
2108 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2110 Vc_INTRINSIC Vc_CONST enable_if< \
2111 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2112 !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2113 is_less<N *
sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2115 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2117 vc_debug_(
"simd_cast{bisectable}(",
")\n", x0, xs...); \
2118 return simd_cast_interleaved_argument_order< \
2119 Return,
typename SimdArrayType_<T, N, V, M>::storage_type0, \
2120 typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2121 internal_data1(x0), internal_data1(xs)...); \
2125 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2127 Vc_INTRINSIC Vc_CONST enable_if< \
2128 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2129 !is_less<N *
sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2131 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2133 vc_debug_(
"simd_cast{bisectable2}(",
")\n", x0, xs...); \
2134 return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2138 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2140 Vc_INTRINSIC Vc_CONST enable_if< \
2141 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2142 N * (1 +
sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2144 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2146 vc_debug_(
"simd_cast{remaining}(",
")\n", x0, xs...); \
2147 return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2148 From...>(x0, xs...); \
2151 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2153 Vc_INTRINSIC Vc_CONST enable_if< \
2154 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2155 N * (1 +
sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2157 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2159 vc_debug_(
"simd_cast{remaining2}(",
")\n", x0, xs...); \
2160 return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2161 From...>(x0, xs...); \
2164 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2165 Vc_INTRINSIC Vc_CONST \
2166 enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2167 simd_cast(
const SimdArrayType_<T, N, V, M> &x) \
2169 vc_debug_(
"simd_cast{single bisectable}(",
")\n", x); \
2170 return
simd_cast<Return>(internal_data0(x)); \
2172 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2173 Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2174 N < 2 * Return::Size && is_power_of_2<N>::value), \
2176 simd_cast(
const SimdArrayType_<T, N, V, M> &x) \
2178 vc_debug_(
"simd_cast{single bisectable2}(",
")\n", x); \
2179 return
simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2181 Vc_NOTHING_EXPECTING_SEMICOLON
2183 Vc_SIMDARRAY_CASTS(SimdArray);
2184 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2185 #undef Vc_SIMDARRAY_CASTS
2188 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2190 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2192 Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2193 const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2195 vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2196 return simd_cast<Return>(x); \
2199 template <typename Return, int offset, typename T, std::size_t N, typename V> \
2200 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2201 const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2203 vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2204 return simd_cast<Return, offset>(internal_data(x)); \
2207 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2209 Vc_INTRINSIC Vc_CONST \
2210 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2211 offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2213 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2215 vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2216 return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2217 internal_data1(x)); \
2221 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2223 Vc_INTRINSIC Vc_CONST \
2224 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2225 offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2227 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2229 vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2230 return simd_cast_with_offset<Return, \
2231 offset * Return::Size - Common::left_size<N>()>( \
2232 internal_data1(x)); \
2235 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2237 Vc_INTRINSIC Vc_CONST enable_if< \
2239 offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2241 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2243 vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2244 return simd_cast<Return, offset>(internal_data0(x)); \
2247 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2249 Vc_INTRINSIC Vc_CONST \
2250 enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2251 offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2253 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2255 vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2256 using R = typename Return::EntryType; \
2257 Return r = Return::Zero(); \
2258 for (std::size_t i = offset * Return::Size; \
2259 i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2260 r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2264 Vc_NOTHING_EXPECTING_SEMICOLON
2265 Vc_SIMDARRAY_CASTS(SimdArray);
2266 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2267 #undef Vc_SIMDARRAY_CASTS
2269 template <
typename Return,
typename From>
2270 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2274 template <
typename Return,
typename... Froms>
2275 Vc_INTRINSIC Vc_CONST
2276 enable_if<(are_all_types_equal<Froms...>::value &&
2277 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2279 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2286 template <
typename Return,
typename From,
typename... Froms>
2287 Vc_INTRINSIC Vc_CONST enable_if<
2288 (are_all_types_equal<From, Froms...>::value &&
2289 (1 +
sizeof...(Froms)) * From::Size >= Return::Size &&
sizeof...(Froms) != 0),
2291 simd_cast_drop_arguments(Froms... xs, From x, From)
2293 return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2295 template <
typename Return,
typename From>
2296 Vc_INTRINSIC Vc_CONST
2297 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2298 simd_cast_drop_arguments(From x, From)
2300 return simd_cast_drop_arguments<Return>(x);
2304 template <
typename Return, std::
size_t offset,
typename From>
2305 Vc_INTRINSIC Vc_CONST
2306 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2307 Return> simd_cast_with_offset(
const From &x)
2309 return simd_cast<Return, offset / Return::Size>(x);
2311 template <
typename Return, std::
size_t offset,
typename From>
2312 Vc_INTRINSIC Vc_CONST
2313 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2314 ((Traits::isSimdArray<Return>::value &&
2315 !Traits::isAtomicSimdArray<Return>::value) ||
2316 (Traits::isSimdMaskArray<Return>::value &&
2317 !Traits::isAtomicSimdMaskArray<Return>::value))),
2319 simd_cast_with_offset(
const From &x)
2321 using R0 =
typename Return::storage_type0;
2322 using R1 =
typename Return::storage_type1;
2323 return {simd_cast_with_offset<R0, offset>(x),
2324 simd_cast_with_offset<R1, offset + R0::Size>(x)};
2326 template <
typename Return, std::
size_t offset,
typename From>
2327 Vc_INTRINSIC Vc_CONST
2328 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2329 ((Traits::isSimdArray<Return>::value &&
2330 Traits::isAtomicSimdArray<Return>::value) ||
2331 (Traits::isSimdMaskArray<Return>::value &&
2332 Traits::isAtomicSimdMaskArray<Return>::value))),
2334 simd_cast_with_offset(
const From &x)
2336 return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2338 template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
2339 Vc_INTRINSIC Vc_CONST
2340 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2341 simd_cast_with_offset(
const From &x,
const Froms &... xs)
2347 template <
typename Return,
typename T,
typename... From>
2348 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(
const From &... xs,
const T &)
2359 template <std::
size_t I,
typename T0>
2360 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(
const T0 &a0,
const T0 &)
2364 template <std::
size_t I,
typename T0>
2365 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(
const T0 &,
const T0 &b0)
2372 template <std::size_t I,
typename T0,
typename... Ts>
2373 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(
const T0 &a0,
2381 template <std::size_t I,
typename T0,
typename... Ts>
2382 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(
const T0 &,
2390 template <std::size_t I,
typename T0,
typename... Ts>
2391 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(
const T0 &,
2396 return extract_interleaved<I - 2, Ts...>(a..., b...);
2399 template <
typename Return,
typename... Ts, std::size_t... Indexes>
2400 Vc_INTRINSIC Vc_CONST Return
2401 simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>,
const Ts &... a,
2404 return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2408 template <
typename Return,
typename... Ts>
2409 Vc_INTRINSIC Vc_CONST Return
2410 simd_cast_interleaved_argument_order(
const Ts &... a,
const Ts &... b)
2412 using seq = make_index_sequence<
sizeof...(Ts)*2>;
2413 return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2417 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2418 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2420 Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2421 SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2423 lhs(mask) op_ rhs; \
2425 Vc_NOTHING_EXPECTING_SEMICOLON
2426 Vc_CONDITIONAL_ASSIGN( Assign, =);
2427 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2428 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2429 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2430 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2431 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2432 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2433 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2434 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2435 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2436 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2437 #undef Vc_CONDITIONAL_ASSIGN
2439 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2440 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2441 Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2442 conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2446 Vc_NOTHING_EXPECTING_SEMICOLON
2447 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2448 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2449 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2450 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2451 #undef Vc_CONDITIONAL_ASSIGN
2455 template <
typename T,
size_t N,
typename V>
2456 inline void transpose_impl(
2457 TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2458 const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2459 SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
2461 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2462 &internal_data(*r[2]), &internal_data(*r[3])};
2463 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2464 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2465 internal_data(std::get<1>(proxy.in)),
2466 internal_data(std::get<2>(proxy.in)),
2467 internal_data(std::get<3>(proxy.in))});
2470 template <
typename T,
typename V>
2471 inline void transpose_impl(
2472 TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2473 const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2474 SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
2478 internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2479 internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2480 internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2481 internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2482 internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2483 internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2484 internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2485 internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2488 template <
typename T,
typename V>
2489 inline void transpose_impl(
2490 TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2491 const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2492 SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
2494 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2495 &internal_data(*r[2]), &internal_data(*r[3])};
2496 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2497 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2498 internal_data(std::get<1>(proxy.in)),
2499 internal_data(std::get<2>(proxy.in)),
2500 internal_data(std::get<3>(proxy.in))});
2503 template <
typename T,
size_t N,
typename V>
2504 inline void transpose_impl(
2505 TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2506 const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2507 SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
2509 SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2510 SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2511 using H = SimdArray<T, 2>;
2512 transpose_impl(TransposeTag<2, 4>(), &r0[0],
2513 TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2514 internal_data0(std::get<1>(proxy.in)),
2515 internal_data0(std::get<2>(proxy.in)),
2516 internal_data0(std::get<3>(proxy.in))});
2517 transpose_impl(TransposeTag<2, 4>(), &r1[0],
2518 TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2519 internal_data1(std::get<1>(proxy.in)),
2520 internal_data1(std::get<2>(proxy.in)),
2521 internal_data1(std::get<3>(proxy.in))});
2574 template <
typename T,
size_t N,
typename V,
size_t VN>
2575 struct numeric_limits<
Vc::SimdArray<T, N, V, VN>> :
public numeric_limits<T> {
2582 static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2584 return numeric_limits<T>::lowest();
2586 static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2588 return numeric_limits<T>::epsilon();
2590 static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2592 return numeric_limits<T>::round_error();
2594 static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2596 return numeric_limits<T>::infinity();
2598 static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2600 return numeric_limits<T>::quiet_NaN();
2602 static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2604 return numeric_limits<T>::signaling_NaN();
2606 static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2608 return numeric_limits<T>::denorm_min();
2614 #endif // VC_COMMON_SIMDARRAY_H_
SimdArray< T, N, V, M > ceil(const SimdArray< T, N, V, M > &x)
Applies the std:: ceil function component-wise and concurrently.
Vc::Vector< T > frexp(const Vc::Vector< T > &x, Vc::SimdArray< int, size()> *e)
Convert floating-point number to fractional and integral components.
Vc::Vector< T > log2(const Vc::Vector< T > &v)
Vc::Vector< T > exp(const Vc::Vector< T > &v)
static SimdArray generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
SimdArray< T, N, V, M > floor(const SimdArray< T, N, V, M > &x)
Applies the std:: floor function component-wise and concurrently.
The main vector class for expressing data parallelism.
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Vc::Vector< T > sin(const Vc::Vector< T > &v)
Vc::Vector< T > cos(const Vc::Vector< T > &v)
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Vc::Vector< T > reciprocal(const Vc::Vector< T > &v)
Returns the reciprocal of v.
SimdArray rotated(int amount) const
Rotate vector entries to the left by amount.
Vc::Vector< T > ldexp(Vc::Vector< T > x, Vc::SimdArray< int, size()> e)
Multiply floating-point number by integral power of 2.
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
Vc::Vector< T > abs(const Vc::Vector< T > &v)
Returns the absolute value of v.
SimdArray apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
Vc::Vector< T > max(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
SimdArray< T, N, V, M > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
static SimdArray IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
SimdArray< T, N, V, M > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
Vc::Vector< T > log(const Vc::Vector< T > &v)
SimdArray shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
Vc::Vector< T > fma(Vc::Vector< T > a, Vc::Vector< T > b, Vc::Vector< T > c)
Multiplies a with b and then adds c, without rounding between the multiplication and the addition...
Data-parallel arithmetic type with user-defined number of elements.
Data-parallel mask type with user-defined number of boolean elements.
Vc::Vector< T > round(const Vc::Vector< T > &v)
Returns the closest integer to v; 0.5 is rounded to even.
SimdArray apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
static SimdArray Zero()
Returns a vector with the entries initialized to zero.
Vc::Vector< T > rsqrt(const Vc::Vector< T > &v)
Returns the reciprocal square root of v.
SimdMaskArray< T, N, V, M > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
Vc::Vector< T > log10(const Vc::Vector< T > &v)
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
SimdArray< T, N, V, M > trunc(const SimdArray< T, N, V, M > &x)
Applies the std:: trunc function component-wise and concurrently.
SimdArray< T, N, V, M > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
Vc::Vector< T > atan2(const Vc::Vector< T > &y, const Vc::Vector< T > &x)
Calculates the angle given the lengths of the opposite and adjacent legs in a right triangle...
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
value_type EntryType
The type of the elements (i.e. T)
SimdArray< T, N, V, M > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
Vc::Vector< T > atan(const Vc::Vector< T > &v)
Vc::Vector< T > asin(const Vc::Vector< T > &v)
SimdArray operator+() const
Returns a copy of itself.
void assign(Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
void gather(const MT *mem, const IT &indexes)
Gather function.
Vc::Mask< T > isfinite(const Vc::Vector< T > &x)
SimdArray(value_type a)
Broadcast Constructor.
The main SIMD mask class.
Vc::Mask< T > isnan(const Vc::Vector< T > &x)
static SimdArray Random()
Returns a vector with pseudo-random entries.
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
void scatter(MT *mem, IT &&indexes) const
Scatter function.
T value_type
The type of the elements (i.e. T)
Vector Classes Namespace.
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
To simd_cast(From &&x, enable_if< std::is_same< To, Traits::decay< From >>::value >=nullarg)
Casts the argument x from type From to type To.
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
SimdArray reversed() const
Returns a vector with all components reversed.
static SimdArray One()
Returns a vector with the entries initialized to one.
SimdArray sorted() const
Return a sorted copy of the vector.
Vc::Vector< T > sqrt(const Vc::Vector< T > &v)
Returns the square root of v.
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.
SimdMaskArray< T, N, V, M > isinf(const SimdArray< T, N, V, M > &x)
Applies the std:: isinf function component-wise and concurrently.