Vc  1.4.1
SIMD Vector Classes for C++
trigonometric.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_TRIGONOMETRIC_H_
29 #define VC_COMMON_TRIGONOMETRIC_H_
30 
31 #include "macros.h"
32 
33 #ifdef Vc_HAVE_LIBMVEC
34 extern "C" {
35 __m128 _ZGVbN4v_sinf(__m128);
36 __m128d _ZGVbN2v_sin(__m128d);
37 __m128 _ZGVbN4v_cosf(__m128);
38 __m128d _ZGVbN2v_cos(__m128d);
39 __m256 _ZGVdN8v_sinf(__m256);
40 __m256d _ZGVdN4v_sin(__m256d);
41 __m256 _ZGVdN8v_cosf(__m256);
42 __m256d _ZGVdN4v_cos(__m256d);
43 }
44 #endif
45 
46 namespace Vc_VERSIONED_NAMESPACE
47 {
48 namespace Detail
49 {
50 template<Vc::Implementation Impl> struct MapImpl { enum Dummy { Value = Impl }; };
51 template<> struct MapImpl<Vc::SSE42Impl> { enum Dummy { Value = MapImpl<Vc::SSE41Impl>::Value }; };
52 
53 template<Vc::Implementation Impl> using TrigonometricImplementation =
54  ImplementationT<MapImpl<Impl>::Value
55 #if defined(Vc_IMPL_XOP) && defined(Vc_IMPL_FMA4)
58 #endif
59  >;
60 } // namespace Detail
61 
62 namespace Common
63 {
64 template<typename Impl> struct Trigonometric
65 {
66  template<typename T> static T sin(const T &_x);
67  template<typename T> static T cos(const T &_x);
68  template<typename T> static void sincos(const T &_x, T *_sin, T *_cos);
69  template<typename T> static T asin (const T &_x);
70  template<typename T> static T atan (const T &_x);
71  template<typename T> static T atan2(const T &y, const T &x);
72 };
73 } // namespace Common
74 
75 #if defined Vc_IMPL_SSE || defined DOXYGEN
76 // this is either SSE, AVX, or AVX2
77 namespace Detail
78 {
79 template <typename T, typename Abi>
80 using Trig = Common::Trigonometric<Detail::TrigonometricImplementation<
81  (std::is_same<Abi, VectorAbi::Sse>::value
82  ? SSE42Impl
83  : std::is_same<Abi, VectorAbi::Avx>::value ? AVXImpl : ScalarImpl)>>;
84 } // namespace Detail
85 
86 #ifdef Vc_HAVE_LIBMVEC
87 Vc_INTRINSIC __m128 sin_dispatch(__m128 x) { return ::_ZGVbN4v_sinf(x); }
88 Vc_INTRINSIC __m128d sin_dispatch(__m128d x) { return ::_ZGVbN2v_sin (x); }
89 Vc_INTRINSIC __m128 cos_dispatch(__m128 x) { return ::_ZGVbN4v_cosf(x); }
90 Vc_INTRINSIC __m128d cos_dispatch(__m128d x) { return ::_ZGVbN2v_cos (x); }
91 #ifdef Vc_IMPL_AVX
92 Vc_INTRINSIC __m256 sin_dispatch(__m256 x) { return ::_ZGVdN8v_sinf(x); }
93 Vc_INTRINSIC __m256d sin_dispatch(__m256d x) { return ::_ZGVdN4v_sin (x); }
94 Vc_INTRINSIC __m256 cos_dispatch(__m256 x) { return ::_ZGVdN8v_cosf(x); }
95 Vc_INTRINSIC __m256d cos_dispatch(__m256d x) { return ::_ZGVdN4v_cos (x); }
96 #endif
97 
98 template <typename T, typename Abi>
99 Vc_INTRINSIC Vector<T, detail::not_fixed_size_abi<Abi>> sin(const Vector<T, Abi> &x)
100 {
101  return sin_dispatch(x.data());
102 }
103 template <typename T, typename Abi>
104 Vc_INTRINSIC Vector<T, detail::not_fixed_size_abi<Abi>> cos(const Vector<T, Abi> &x)
105 {
106  return cos_dispatch(x.data());
107 }
108 #else
109 
133 template <typename T, typename Abi>
135 {
136  return Detail::Trig<T, Abi>::sin(x);
137 }
138 
150 template <typename T, typename Abi>
152 {
153  return Detail::Trig<T, Abi>::cos(x);
154 }
155 #endif
156 
167 template <typename T, typename Abi>
169 {
170  return Detail::Trig<T, Abi>::asin(x);
171 }
172 
182 template <typename T, typename Abi>
184 {
185  return Detail::Trig<T, Abi>::atan(x);
186 }
187 
198 template <typename T, typename Abi>
200  const Vector<T, Abi> &x)
201 {
202  return Detail::Trig<T, Abi>::atan2(y, x);
203 }
204 
216 template <typename T, typename Abi>
217 Vc_INTRINSIC void sincos(const Vector<T, Abi> &x,
218  Vector<T, detail::not_fixed_size_abi<Abi>> *sin,
220 {
222 }
223 #endif
224 } // namespace Vc_VERSIONED_NAMESPACE
225 
226 #endif // VC_COMMON_TRIGONOMETRIC_H_
The main vector class for expressing data parallelism.
Definition: fwddecl.h:53
Vector< T, detail::not_fixed_size_abi< Abi > > sin(const Vector< T, Abi > &x)
Returns the sine of all input values in x.
Vector< T, detail::not_fixed_size_abi< Abi > > asin(const Vector< T, Abi > &x)
Returns the arcsine of all input values in x.
void sincos(const Vector< T, Abi > &x, Vector< T, detail::not_fixed_size_abi< Abi >> *sin, Vector< T, Abi > *cos)
Vector< T, detail::not_fixed_size_abi< Abi > > cos(const Vector< T, Abi > &x)
Returns the cosine of all input values in x.
Support for XOP instructions.
Definition: global.h:505
Vector< T, detail::not_fixed_size_abi< Abi > > atan(const Vector< T, Abi > &x)
Returns the arctangent of all input values in x.
Vector< T, detail::not_fixed_size_abi< Abi > > atan2(const Vector< T, Abi > &y, const Vector< T, Abi > &x)
Returns the arctangent of all input values in x and y.
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
Definition: global.h:479
Support for FMA4 instructions.
Definition: global.h:503
Vector Classes Namespace.
Definition: dox.h:584
uses only fundamental types
Definition: global.h:469
x86 AVX
Definition: global.h:481