Vc  1.3.2-dev
SIMD Vector Classes for C++
simdarrayfwd.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_SIMDARRAYFWD_H_
29 #define VC_COMMON_SIMDARRAYFWD_H_
30 
31 #include "../scalar/types.h"
32 #include "../sse/types.h"
33 #include "../avx/types.h"
34 #include "../mic/types.h"
35 
36 #include "utility.h"
37 #include "macros.h"
38 
39 namespace Vc_VERSIONED_NAMESPACE
40 {
41 namespace Common
42 {
45 /*select_best_vector_type{{{*/
50 template<std::size_t N, typename... Typelist> struct select_best_vector_type_impl;
51 
52 template<std::size_t N, typename T> struct select_best_vector_type_impl<N, T>
53 {
54  using type = T;
55 };
56 template<std::size_t N, typename T, typename... Typelist> struct select_best_vector_type_impl<N, T, Typelist...>
57 {
58  using type = typename std::conditional<
59  (N < T::Size), typename select_best_vector_type_impl<N, Typelist...>::type,
60  T>::type;
61 };
62 template <typename T, std::size_t N>
63 using select_best_vector_type =
64  typename select_best_vector_type_impl<N,
65 #ifdef Vc_IMPL_AVX2
66  Vc::AVX2::Vector<T>,
67  Vc::SSE::Vector<T>,
68  Vc::Scalar::Vector<T>
69 #elif defined(Vc_IMPL_AVX)
70  Vc::AVX::Vector<T>,
71  Vc::SSE::Vector<T>,
72  Vc::Scalar::Vector<T>
73 #elif defined(Vc_IMPL_Scalar)
74  Vc::Scalar::Vector<T>
75 #elif defined(Vc_IMPL_SSE)
76  Vc::SSE::Vector<T>,
77  Vc::Scalar::Vector<T>
78 #elif defined(Vc_IMPL_MIC)
79  Vc::MIC::Vector<T>,
80  Vc::Scalar::Vector<T>
81 #endif
82  >::type;
83 //}}}
85 } // namespace Common
86 
87 // === having SimdArray<T, N> in the Vc namespace leads to a ABI bug ===
88 //
89 // SimdArray<double, 4> can be { double[4] }, { __m128d[2] }, or { __m256d } even though the type
90 // is the same.
91 // The question is, what should SimdArray focus on?
92 // a) A type that makes interfacing between different implementations possible?
93 // b) Or a type that makes fixed size SIMD easier and efficient?
94 //
95 // a) can be achieved by using a union with T[N] as one member. But this may have more serious
96 // performance implications than only less efficient parameter passing (because compilers have a
97 // much harder time wrt. aliasing issues). Also alignment would need to be set to the sizeof in
98 // order to be compatible with targets with larger alignment requirements.
99 // But, the in-memory representation of masks is not portable. Thus, at the latest with AVX-512,
100 // there would be a problem with requiring SimdMaskArray<T, N> to be an ABI compatible type.
101 // AVX-512 uses one bit per boolean, whereas SSE/AVX use sizeof(T) Bytes per boolean. Conversion
102 // between the two representations is not a trivial operation. Therefore choosing one or the other
103 // representation will have a considerable impact for the targets that do not use this
104 // representation. Since the future probably belongs to one bit per boolean representation, I would
105 // go with that choice.
106 //
107 // b) requires that SimdArray<T, N> != SimdArray<T, N> if
108 // SimdArray<T, N>::vector_type != SimdArray<T, N>::vector_type
109 //
110 // Therefore use SimdArray<T, N, V>, where V follows from the above.
111 template <typename T, size_t N, typename V = Common::select_best_vector_type<T, N>,
112  size_t Wt = V::Size // this last parameter is only used for specialization of N
113  // == VectorSize
114  >
115 class SimdArray;
116 
117 template <typename T, size_t N, typename V = Common::select_best_vector_type<T, N>,
118  size_t Wt = V::Size // this last parameter is only used for specialization of N
119  // == VectorSize
120  >
122 
127 template <typename T, std::size_t N> struct SimdArrayTraits {
128  static constexpr std::size_t N0 = Common::left_size<N>();
129  static constexpr std::size_t N1 = Common::right_size<N>();
130 
133 };
134 
135 template <typename T, std::size_t N, typename VectorType, std::size_t VectorSize>
136 Vc_INTRINSIC_L typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
137  SimdArray<T, N, VectorType, VectorSize> &x) Vc_INTRINSIC_R;
138 template <typename T, std::size_t N, typename VectorType, std::size_t VectorSize>
139 Vc_INTRINSIC_L typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
140  SimdArray<T, N, VectorType, VectorSize> &x) Vc_INTRINSIC_R;
141 template <typename T, std::size_t N, typename VectorType, std::size_t VectorSize>
142 Vc_INTRINSIC_L const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
143  const SimdArray<T, N, VectorType, VectorSize> &x) Vc_INTRINSIC_R;
144 template <typename T, std::size_t N, typename VectorType, std::size_t VectorSize>
145 Vc_INTRINSIC_L const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
146  const SimdArray<T, N, VectorType, VectorSize> &x) Vc_INTRINSIC_R;
147 
148 template <typename T, std::size_t N, typename V>
149 Vc_INTRINSIC_L V &internal_data(SimdArray<T, N, V, N> &x) Vc_INTRINSIC_R;
150 template <typename T, std::size_t N, typename V>
151 Vc_INTRINSIC_L const V &internal_data(const SimdArray<T, N, V, N> &x) Vc_INTRINSIC_R;
152 
153 namespace Traits
154 {
155 template <typename T, std::size_t N, typename V> struct is_atomic_simdarray_internal<SimdArray<T, N, V, N>> : public std::true_type {};
156 template <typename T, std::size_t N, typename V> struct is_atomic_simd_mask_array_internal<SimdMaskArray<T, N, V, N>> : public std::true_type {};
157 
158 template <typename T, std::size_t N, typename VectorType, std::size_t M> struct is_simdarray_internal<SimdArray<T, N, VectorType, M>> : public std::true_type {};
159 template <typename T, std::size_t N, typename VectorType, std::size_t M> struct is_simd_mask_array_internal<SimdMaskArray<T, N, VectorType, M>> : public std::true_type {};
160 template <typename T, std::size_t N, typename V, std::size_t M> struct is_integral_internal <SimdArray<T, N, V, M>, false> : public std::is_integral<T> {};
161 template <typename T, std::size_t N, typename V, std::size_t M> struct is_floating_point_internal<SimdArray<T, N, V, M>, false> : public std::is_floating_point<T> {};
162 template <typename T, std::size_t N, typename V, std::size_t M> struct is_signed_internal <SimdArray<T, N, V, M>, false> : public std::is_signed<T> {};
163 template <typename T, std::size_t N, typename V, std::size_t M> struct is_unsigned_internal <SimdArray<T, N, V, M>, false> : public std::is_unsigned<T> {};
164 
165 template<typename T, std::size_t N> struct has_no_allocated_data_impl<Vc::SimdArray<T, N>> : public std::true_type {};
166 } // namespace Traits
167 
168 } // namespace Vc
169 
170 #endif // VC_COMMON_SIMDARRAYFWD_H_
171 
172 // vim: foldmethod=marker
Data-parallel arithmetic type with user-defined number of elements.
Definition: simdarray.h:565
Data-parallel mask type with user-defined number of boolean elements.
Definition: simdarrayfwd.h:121
Vector Classes Namespace.
Definition: cpuid.h:32