Vc  1.0.0-dev
SIMD Vector Classes for C++
logarithm.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the names of contributing organizations nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 }}}*/
28 
29 /* The log implementations are based on code from Julien Pommier which carries the following
30  copyright information:
31  */
32 /*
33  Inspired by Intel Approximate Math library, and based on the
34  corresponding algorithms of the cephes math library
35 */
36 /* Copyright (C) 2007 Julien Pommier
37 
38  This software is provided 'as-is', without any express or implied
39  warranty. In no event will the authors be held liable for any damages
40  arising from the use of this software.
41 
42  Permission is granted to anyone to use this software for any purpose,
43  including commercial applications, and to alter it and redistribute it
44  freely, subject to the following restrictions:
45 
46  1. The origin of this software must not be misrepresented; you must not
47  claim that you wrote the original software. If you use this software
48  in a product, an acknowledgment in the product documentation would be
49  appreciated but is not required.
50  2. Altered source versions must be plainly marked as such, and must not be
51  misrepresented as being the original software.
52  3. This notice may not be removed or altered from any source distribution.
53 
54  (this is the zlib license)
55 */
56 
57 #ifdef Vc_COMMON_MATH_H_INTERNAL
58 
59 enum LogarithmBase {
60  BaseE, Base10, Base2
61 };
62 
63 namespace Detail
64 {
65 template <typename T, typename Abi>
66 using Const = typename std::conditional<std::is_same<Abi, VectorAbi::Avx>::value,
67  AVX::Const<T>, SSE::Const<T>>::type;
68 
69 template<LogarithmBase Base>
70 struct LogImpl
71 {
72  template<typename T, typename Abi> static Vc_ALWAYS_INLINE void log_series(Vector<T, Abi> &Vc_RESTRICT x, typename Vector<T, Abi>::AsArg exponent) {
73  typedef Vector<T, Abi> V;
74  typedef Detail::Const<T, Abi> C;
75  // Taylor series around x = 2^exponent
76  // f(x) = ln(x) → exponent * ln(2) → C::ln2_small + C::ln2_large
77  // f'(x) = x⁻¹ → x → 1
78  // f''(x) = - x⁻² → -x² / 2 → C::_1_2()
79  // = 2!x⁻³ → x³ / 3 → C::P(8)
80  // = -3!x⁻⁴ → -x⁴ / 4 → C::P(7)
81  // = 4!x⁻⁵ → x⁵ / 5 → C::P(6)
82  // ...
83  // The high order coefficients are adjusted to reduce the error that occurs from ommission
84  // of higher order terms.
85  // P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹|
86  // The order of additions must go from smallest to largest terms
87  const V x2 = x * x; // 0 → 4
88 #ifdef Vc_LOG_ILP
89  V y2 = (C::P(6) * /*4 → 8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8);
90  V y0 = (C::P(0) * /*5 → 9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2);
91  V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5);
92  const V x3 = x2 * x; // 7 → 11
93  const V x6 = x3 * x3; // 11 → 15
94  const V x9 = x6 * x3; // 15 → 19
95  V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3;
96 #elif defined Vc_LOG_ILP2
97  /*
98  * name start done
99  * movaps %xmm0, %xmm1 ; x 0 1
100  * movaps %xmm0, %xmm2 ; x 0 1
101  * mulps %xmm1, %xmm1 ; x2 1 5 *xmm1
102  * movaps <P8>, %xmm15 ; y8 1 2
103  * mulps %xmm1, %xmm2 ; x3 5 9 *xmm2
104  * movaps %xmm1, %xmm3 ; x2 5 6
105  * movaps %xmm1, %xmm4 ; x2 5 6
106  * mulps %xmm3, %xmm3 ; x4 6 10 *xmm3
107  * movaps %xmm2, %xmm5 ; x3 9 10
108  * movaps %xmm2, %xmm6 ; x3 9 10
109  * mulps %xmm2, %xmm4 ; x5 9 13 *xmm4
110  * movaps %xmm3, %xmm7 ; x4 10 11
111  * movaps %xmm3, %xmm8 ; x4 10 11
112  * movaps %xmm3, %xmm9 ; x4 10 11
113  * mulps %xmm5, %xmm5 ; x6 10 14 *xmm5
114  * mulps %xmm3, %xmm6 ; x7 11 15 *xmm6
115  * mulps %xmm7, %xmm7 ; x8 12 16 *xmm7
116  * movaps %xmm4, %xmm10 ; x5 13 14
117  * mulps %xmm4, %xmm8 ; x9 13 17 *xmm8
118  * mulps %xmm5, %xmm10 ; x11 14 18 *xmm10
119  * mulps %xmm5, %xmm9 ; x10 15 19 *xmm9
120  * mulps <P0>, %xmm10 ; y0 18 22
121  * mulps <P1>, %xmm9 ; y1 19 23
122  * mulps <P2>, %xmm8 ; y2 20 24
123  * mulps <P3>, %xmm7 ; y3 21 25
124  * addps %xmm10, %xmm9 ; y 23 26
125  * addps %xmm9, %xmm8 ; y 26 29
126  * addps %xmm8, %xmm7 ; y 29 32
127  */
128  const V x3 = x2 * x; // 4 → 8
129  const V x4 = x2 * x2; // 5 → 9
130  const V x5 = x2 * x3; // 8 → 12
131  const V x6 = x3 * x3; // 9 → 13
132  const V x7 = x4 * x3; //
133  const V x8 = x4 * x4;
134  const V x9 = x5 * x4;
135  const V x10 = x5 * x5;
136  const V x11 = x5 * x6; // 13 → 17
137  V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7
138  + C::P(5) * x6 + C::P(6) * x5 + C::P(7) * x4 + C::P(8) * x3;
139 #else
140  V y = C::P(0);
141  Vc::Common::unrolled_loop<int, 1, 9>([&](int i) { y = y * x + C::P(i); });
142  y *= x * x2;
143 #endif
144  switch (Base) {
145  case BaseE:
146  // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
147  y += exponent * C::ln2_small();
148  y -= x2 * C::_1_2(); // [0, 0.25[
149  x += y;
150  x += exponent * C::ln2_large();
151  break;
152  case Base10:
153  y += exponent * C::ln2_small();
154  y -= x2 * C::_1_2(); // [0, 0.25[
155  x += y;
156  x += exponent * C::ln2_large();
157  x *= C::log10_e();
158  break;
159  case Base2:
160  {
161  const V x_ = x;
162  x *= C::log2_e();
163  y *= C::log2_e();
164  y -= x_ * x * C::_1_2(); // [0, 0.25[
165  x += y;
166  x += exponent;
167  break;
168  }
169  }
170  }
171 
172 template <typename Abi>
173 static Vc_ALWAYS_INLINE void log_series(Vector<double, Abi> &Vc_RESTRICT x,
174  typename Vector<double, Abi>::AsArg exponent)
175 {
176  typedef Vector<double, Abi> V;
177  typedef Detail::Const<double, Abi> C;
178  const V x2 = x * x;
179  V y = C::P(0);
180  V y2 = C::Q(0) + x;
181  Vc::Common::unrolled_loop<int, 1, 5>([&](int i) {
182  y = y * x + C::P(i);
183  y2 = y2 * x + C::Q(i);
184  });
185  y2 = x / y2;
186  y = y * x + C::P(5);
187  y = x2 * y * y2;
188  // TODO: refactor the following with the float implementation:
189  switch (Base) {
190  case BaseE:
191  // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
192  y += exponent * C::ln2_small();
193  y -= x2 * C::_1_2(); // [0, 0.25[
194  x += y;
195  x += exponent * C::ln2_large();
196  break;
197  case Base10:
198  y += exponent * C::ln2_small();
199  y -= x2 * C::_1_2(); // [0, 0.25[
200  x += y;
201  x += exponent * C::ln2_large();
202  x *= C::log10_e();
203  break;
204  case Base2:
205  {
206  const V x_ = x;
207  x *= C::log2_e();
208  y *= C::log2_e();
209  y -= x_ * x * C::_1_2(); // [0, 0.25[
210  x += y;
211  x += exponent;
212  break;
213  }
214  }
215  }
216 
217 template <typename T, typename Abi, typename V = Vector<T, Abi>>
218 static inline Vector<T, Abi> calc(Vc_ALIGNED_PARAMETER(V) _x)
219 {
220  typedef typename V::Mask M;
221  typedef Detail::Const<T, Abi> C;
222 
223  V x(_x);
224 
225  const M invalidMask = x < V::Zero();
226  const M infinityMask = x == V::Zero();
227  const M denormal = x <= C::min();
228 
229  x(denormal) *= V(Vc::Detail::doubleConstant<1, 0, 54>()); // 2²⁵
230  V exponent = Detail::exponent(x.data()); // = ⎣log₂(x)⎦
231  exponent(denormal) -= 54;
232 
233  x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[
234  x |= C::_1_2(); // and set the exponent to 2⁻¹ ⇒ x ∈ [½, 1[
235 
236  // split calculation in two cases:
237  // A: x ∈ [½, √½[
238  // B: x ∈ [√½, 1[
239  // √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e.
240  // log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½
241 
242  const M smallX = x < C::_1_sqrt2();
243  x(smallX) += x; // => x ∈ [√½, 1[ ∪ [1.5, 1 + √½[
244  x -= V::One(); // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[
245  exponent(!smallX) += V::One();
246 
247  log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e) B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1)
248 
249  x.setQnan(invalidMask); // x < 0 → NaN
250  x(infinityMask) = C::neginf(); // x = 0 → -∞
251 
252  return x;
253  }
254 };
255 } // namespace Detail
256 
257 template <typename T, typename Abi>
258 Vc_INTRINSIC Vc_CONST Vector<T, Abi> log(const Vector<T, Abi> &x)
259 {
260  return Detail::LogImpl<BaseE>::calc<T, Abi>(x);
261 }
262 template <typename T, typename Abi>
263 Vc_INTRINSIC Vc_CONST Vector<T, Abi> log10(const Vector<T, Abi> &x)
264 {
265  return Detail::LogImpl<Base10>::calc<T, Abi>(x);
266 }
267 template <typename T, typename Abi>
268 Vc_INTRINSIC Vc_CONST Vector<T, Abi> log2(const Vector<T, Abi> &x)
269 {
270  return Detail::LogImpl<Base2>::calc<T, Abi>(x);
271 }
272 
273 #endif // Vc_COMMON_MATH_H_INTERNAL
Vc::Vector< T > log2(const Vc::Vector< T > &v)
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Vc::Vector< T > log(const Vc::Vector< T > &v)
Vc::Vector< T > log10(const Vc::Vector< T > &v)
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:85
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
Definition: types.h:90