Vc  1.1.0
SIMD Vector Classes for C++
math.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the names of contributing organizations nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 }}}*/
28 
29 #ifndef VC_COMMON_MATH_H_
30 #define VC_COMMON_MATH_H_
31 
32 #define Vc_COMMON_MATH_H_INTERNAL 1
33 
34 #include "trigonometric.h"
35 
36 #include "const.h"
37 #include "macros.h"
38 
39 namespace Vc_VERSIONED_NAMESPACE
40 {
41 #ifdef Vc_IMPL_SSE
42 // for SSE, AVX, and AVX2
43 #include "logarithm.h"
44 #include "exponential.h"
45 #ifdef Vc_IMPL_AVX
46 inline AVX::double_v exp(Vc_ALIGNED_PARAMETER(AVX::double_v) _x)
47 {
48  AVX::Vector<double> x = _x;
49  typedef AVX::Vector<double> V;
50  typedef V::Mask M;
51  typedef AVX::Const<double> C;
52 
53  const M overflow = x > Vc::Detail::doubleConstant< 1, 0x0006232bdd7abcd2ull, 9>(); // max log
54  const M underflow = x < Vc::Detail::doubleConstant<-1, 0x0006232bdd7abcd2ull, 9>(); // min log
55 
56  V px = floor(C::log2_e() * x + 0.5);
57  __m128i tmp = _mm256_cvttpd_epi32(px.data());
58  const SimdArray<int, V::Size> n = SSE::int_v{tmp};
59  x -= px * C::ln2_large(); //Vc::Detail::doubleConstant<1, 0x00062e4000000000ull, -1>(); // ln2
60  x -= px * C::ln2_small(); //Vc::Detail::doubleConstant<1, 0x0007f7d1cf79abcaull, -20>(); // ln2
61 
62  const double P[] = {
63  Vc::Detail::doubleConstant<1, 0x000089cdd5e44be8ull, -13>(),
64  Vc::Detail::doubleConstant<1, 0x000f06d10cca2c7eull, -6>(),
65  Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 0>()
66  };
67  const double Q[] = {
68  Vc::Detail::doubleConstant<1, 0x00092eb6bc365fa0ull, -19>(),
69  Vc::Detail::doubleConstant<1, 0x0004ae39b508b6c0ull, -9>(),
70  Vc::Detail::doubleConstant<1, 0x000d17099887e074ull, -3>(),
71  Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 1>()
72  };
73  const V x2 = x * x;
74  px = x * ((P[0] * x2 + P[1]) * x2 + P[2]);
75  x = px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px);
76  x = V::One() + 2.0 * x;
77 
78  x = ldexp(x, n); // == x * 2ⁿ
79 
80  x(overflow) = std::numeric_limits<double>::infinity();
81  x.setZero(underflow);
82 
83  return x;
84  }
85 #endif // Vc_IMPL_AVX
86 
87 inline SSE::double_v exp(SSE::double_v::AsArg _x) {
88  SSE::Vector<double> x = _x;
89  typedef SSE::Vector<double> V;
90  typedef V::Mask M;
91  typedef SSE::Const<double> C;
92 
93  const M overflow = x > Vc::Detail::doubleConstant< 1, 0x0006232bdd7abcd2ull, 9>(); // max log
94  const M underflow = x < Vc::Detail::doubleConstant<-1, 0x0006232bdd7abcd2ull, 9>(); // min log
95 
96  V px = floor(C::log2_e() * x + 0.5);
97  SimdArray<int, V::Size> n;
98  _mm_storel_epi64(reinterpret_cast<__m128i *>(&n), _mm_cvttpd_epi32(px.data()));
99  x -= px * C::ln2_large(); //Vc::Detail::doubleConstant<1, 0x00062e4000000000ull, -1>(); // ln2
100  x -= px * C::ln2_small(); //Vc::Detail::doubleConstant<1, 0x0007f7d1cf79abcaull, -20>(); // ln2
101 
102  const double P[] = {
103  Vc::Detail::doubleConstant<1, 0x000089cdd5e44be8ull, -13>(),
104  Vc::Detail::doubleConstant<1, 0x000f06d10cca2c7eull, -6>(),
105  Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 0>()
106  };
107  const double Q[] = {
108  Vc::Detail::doubleConstant<1, 0x00092eb6bc365fa0ull, -19>(),
109  Vc::Detail::doubleConstant<1, 0x0004ae39b508b6c0ull, -9>(),
110  Vc::Detail::doubleConstant<1, 0x000d17099887e074ull, -3>(),
111  Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 1>()
112  };
113  const V x2 = x * x;
114  px = x * ((P[0] * x2 + P[1]) * x2 + P[2]);
115  x = px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px);
116  x = V::One() + 2.0 * x;
117 
118  x = ldexp(x, n); // == x * 2ⁿ
119 
120  x(overflow) = std::numeric_limits<double>::infinity();
121  x.setZero(underflow);
122 
123  return x;
124  }
125 
126 #endif
127 } // namespace Vc
128 
129 #undef Vc_COMMON_MATH_H_INTERNAL
130 
131 #endif // VC_COMMON_MATH_H_
Vc::Vector< T > exp(const Vc::Vector< T > &v)
Vc::Vector< T > ldexp(Vc::Vector< T > x, Vc::SimdArray< int, size()> e)
Multiply floating-point number by integral power of 2.
Vector< int > int_v
vector of signed integers
Definition: vector.h:60
Vector< double > double_v
vector of double precision
Definition: vector.h:56
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
Definition: types.h:90