Vc  1.3.2-dev
SIMD Vector Classes for C++
logarithm.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 /* The log implementations are based on code from Julien Pommier which carries the following
29  copyright information:
30  */
31 /*
32  Inspired by Intel Approximate Math library, and based on the
33  corresponding algorithms of the cephes math library
34 */
35 /* Copyright (C) 2007 Julien Pommier
36 
37  This software is provided 'as-is', without any express or implied
38  warranty. In no event will the authors be held liable for any damages
39  arising from the use of this software.
40 
41  Permission is granted to anyone to use this software for any purpose,
42  including commercial applications, and to alter it and redistribute it
43  freely, subject to the following restrictions:
44 
45  1. The origin of this software must not be misrepresented; you must not
46  claim that you wrote the original software. If you use this software
47  in a product, an acknowledgment in the product documentation would be
48  appreciated but is not required.
49  2. Altered source versions must be plainly marked as such, and must not be
50  misrepresented as being the original software.
51  3. This notice may not be removed or altered from any source distribution.
52 
53  (this is the zlib license)
54 */
55 
56 #ifdef Vc_COMMON_MATH_H_INTERNAL
57 
58 enum LogarithmBase {
59  BaseE, Base10, Base2
60 };
61 
62 namespace Detail
63 {
64 template <typename T, typename Abi>
65 using Const = typename std::conditional<std::is_same<Abi, VectorAbi::Avx>::value,
66  AVX::Const<T>, SSE::Const<T>>::type;
67 
68 template<LogarithmBase Base>
69 struct LogImpl
70 {
71  template<typename T, typename Abi> static Vc_ALWAYS_INLINE void log_series(Vector<T, Abi> &Vc_RESTRICT x, typename Vector<T, Abi>::AsArg exponent) {
72  typedef Vector<T, Abi> V;
73  typedef Detail::Const<T, Abi> C;
74  // Taylor series around x = 2^exponent
75  // f(x) = ln(x) → exponent * ln(2) → C::ln2_small + C::ln2_large
76  // f'(x) = x⁻¹ → x → 1
77  // f''(x) = - x⁻² → -x² / 2 → C::_1_2()
78  // = 2!x⁻³ → x³ / 3 → C::P(8)
79  // = -3!x⁻⁴ → -x⁴ / 4 → C::P(7)
80  // = 4!x⁻⁵ → x⁵ / 5 → C::P(6)
81  // ...
82  // The high order coefficients are adjusted to reduce the error that occurs from ommission
83  // of higher order terms.
84  // P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹|
85  // The order of additions must go from smallest to largest terms
86  const V x2 = x * x; // 0 → 4
87 #ifdef Vc_LOG_ILP
88  V y2 = (C::P(6) * /*4 → 8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8);
89  V y0 = (C::P(0) * /*5 → 9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2);
90  V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5);
91  const V x3 = x2 * x; // 7 → 11
92  const V x6 = x3 * x3; // 11 → 15
93  const V x9 = x6 * x3; // 15 → 19
94  V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3;
95 #elif defined Vc_LOG_ILP2
96  /*
97  * name start done
98  * movaps %xmm0, %xmm1 ; x 0 1
99  * movaps %xmm0, %xmm2 ; x 0 1
100  * mulps %xmm1, %xmm1 ; x2 1 5 *xmm1
101  * movaps <P8>, %xmm15 ; y8 1 2
102  * mulps %xmm1, %xmm2 ; x3 5 9 *xmm2
103  * movaps %xmm1, %xmm3 ; x2 5 6
104  * movaps %xmm1, %xmm4 ; x2 5 6
105  * mulps %xmm3, %xmm3 ; x4 6 10 *xmm3
106  * movaps %xmm2, %xmm5 ; x3 9 10
107  * movaps %xmm2, %xmm6 ; x3 9 10
108  * mulps %xmm2, %xmm4 ; x5 9 13 *xmm4
109  * movaps %xmm3, %xmm7 ; x4 10 11
110  * movaps %xmm3, %xmm8 ; x4 10 11
111  * movaps %xmm3, %xmm9 ; x4 10 11
112  * mulps %xmm5, %xmm5 ; x6 10 14 *xmm5
113  * mulps %xmm3, %xmm6 ; x7 11 15 *xmm6
114  * mulps %xmm7, %xmm7 ; x8 12 16 *xmm7
115  * movaps %xmm4, %xmm10 ; x5 13 14
116  * mulps %xmm4, %xmm8 ; x9 13 17 *xmm8
117  * mulps %xmm5, %xmm10 ; x11 14 18 *xmm10
118  * mulps %xmm5, %xmm9 ; x10 15 19 *xmm9
119  * mulps <P0>, %xmm10 ; y0 18 22
120  * mulps <P1>, %xmm9 ; y1 19 23
121  * mulps <P2>, %xmm8 ; y2 20 24
122  * mulps <P3>, %xmm7 ; y3 21 25
123  * addps %xmm10, %xmm9 ; y 23 26
124  * addps %xmm9, %xmm8 ; y 26 29
125  * addps %xmm8, %xmm7 ; y 29 32
126  */
127  const V x3 = x2 * x; // 4 → 8
128  const V x4 = x2 * x2; // 5 → 9
129  const V x5 = x2 * x3; // 8 → 12
130  const V x6 = x3 * x3; // 9 → 13
131  const V x7 = x4 * x3; //
132  const V x8 = x4 * x4;
133  const V x9 = x5 * x4;
134  const V x10 = x5 * x5;
135  const V x11 = x5 * x6; // 13 → 17
136  V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7
137  + C::P(5) * x6 + C::P(6) * x5 + C::P(7) * x4 + C::P(8) * x3;
138 #else
139  V y = C::P(0);
140  Vc::Common::unrolled_loop<int, 1, 9>([&](int i) { y = y * x + C::P(i); });
141  y *= x * x2;
142 #endif
143  switch (Base) {
144  case BaseE:
145  // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
146  y += exponent * C::ln2_small();
147  y -= x2 * C::_1_2(); // [0, 0.25[
148  x += y;
149  x += exponent * C::ln2_large();
150  break;
151  case Base10:
152  y += exponent * C::ln2_small();
153  y -= x2 * C::_1_2(); // [0, 0.25[
154  x += y;
155  x += exponent * C::ln2_large();
156  x *= C::log10_e();
157  break;
158  case Base2:
159  {
160  const V x_ = x;
161  x *= C::log2_e();
162  y *= C::log2_e();
163  y -= x_ * x * C::_1_2(); // [0, 0.25[
164  x += y;
165  x += exponent;
166  break;
167  }
168  }
169  }
170 
171 template <typename Abi>
172 static Vc_ALWAYS_INLINE void log_series(Vector<double, Abi> &Vc_RESTRICT x,
173  typename Vector<double, Abi>::AsArg exponent)
174 {
175  typedef Vector<double, Abi> V;
176  typedef Detail::Const<double, Abi> C;
177  const V x2 = x * x;
178  V y = C::P(0);
179  V y2 = C::Q(0) + x;
180  Vc::Common::unrolled_loop<int, 1, 5>([&](int i) {
181  y = y * x + C::P(i);
182  y2 = y2 * x + C::Q(i);
183  });
184  y2 = x / y2;
185  y = y * x + C::P(5);
186  y = x2 * y * y2;
187  // TODO: refactor the following with the float implementation:
188  switch (Base) {
189  case BaseE:
190  // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
191  y += exponent * C::ln2_small();
192  y -= x2 * C::_1_2(); // [0, 0.25[
193  x += y;
194  x += exponent * C::ln2_large();
195  break;
196  case Base10:
197  y += exponent * C::ln2_small();
198  y -= x2 * C::_1_2(); // [0, 0.25[
199  x += y;
200  x += exponent * C::ln2_large();
201  x *= C::log10_e();
202  break;
203  case Base2:
204  {
205  const V x_ = x;
206  x *= C::log2_e();
207  y *= C::log2_e();
208  y -= x_ * x * C::_1_2(); // [0, 0.25[
209  x += y;
210  x += exponent;
211  break;
212  }
213  }
214  }
215 
216 template <typename T, typename Abi, typename V = Vector<T, Abi>>
217 static inline Vector<T, Abi> calc(V _x)
218 {
219  typedef typename V::Mask M;
220  typedef Detail::Const<T, Abi> C;
221 
222  V x(_x);
223 
224  const M invalidMask = x < V::Zero();
225  const M infinityMask = x == V::Zero();
226  const M denormal = x <= C::min();
227 
228  x(denormal) *= V(Vc::Detail::doubleConstant<1, 0, 54>()); // 2²⁵
229  V exponent = Detail::exponent(x.data()); // = ⎣log₂(x)⎦
230  exponent(denormal) -= 54;
231 
232  x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[
233  x = Detail::operator|(x,
234  C::_1_2()); // and set the exponent to 2⁻¹ ⇒ x ∈ [½, 1[
235 
236  // split calculation in two cases:
237  // A: x ∈ [½, √½[
238  // B: x ∈ [√½, 1[
239  // √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e.
240  // log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½
241 
242  const M smallX = x < C::_1_sqrt2();
243  x(smallX) += x; // => x ∈ [√½, 1[ ∪ [1.5, 1 + √½[
244  x -= V::One(); // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[
245  exponent(!smallX) += V::One();
246 
247  log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e) B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1)
248 
249  x.setQnan(invalidMask); // x < 0 → NaN
250  x(infinityMask) = C::neginf(); // x = 0 → -∞
251 
252  return x;
253  }
254 };
255 } // namespace Detail
256 
257 template <typename T, typename Abi>
258 Vc_INTRINSIC Vc_CONST Vector<T, Abi> log(const Vector<T, Abi> &x)
259 {
260  return Detail::LogImpl<BaseE>::calc<T, Abi>(x);
261 }
262 template <typename T, typename Abi>
263 Vc_INTRINSIC Vc_CONST Vector<T, Abi> log10(const Vector<T, Abi> &x)
264 {
265  return Detail::LogImpl<Base10>::calc<T, Abi>(x);
266 }
267 template <typename T, typename Abi>
268 Vc_INTRINSIC Vc_CONST Vector<T, Abi> log2(const Vector<T, Abi> &x)
269 {
270  return Detail::LogImpl<Base2>::calc<T, Abi>(x);
271 }
272 
273 #endif // Vc_COMMON_MATH_H_INTERNAL
Vc::Vector< T > log2(const Vc::Vector< T > &v)
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
result_vector_type< L, R > operator|(L &&lhs, R &&rhs)
Applies | component-wise and concurrently.
Definition: simdarray.h:1612
Vc::Vector< T > log(const Vc::Vector< T > &v)
Vc::Vector< T > log10(const Vc::Vector< T > &v)
SimdArray< T, N, V, M > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
Definition: simdarray.h:1691
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:84
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
Definition: types.h:89