Vc  1.1.0
SIMD Vector Classes for C++
global.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the names of contributing organizations nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 }}}*/
28 
29 #ifndef VC_GLOBAL_H_
30 #define VC_GLOBAL_H_
31 
32 #include <cstdint>
33 
34 #ifdef DOXYGEN
35 
48 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
49 #undef Vc_ICC
50 
57 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
58 #undef Vc_CLANG
59 
66 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
67 
74 #define Vc_MSVC _MSC_FULL_VER
75 #undef Vc_MSVC
76 
86 #define Vc_PASSING_VECTOR_BY_VALUE_IS_BROKEN 1
87 #undef Vc_PASSING_VECTOR_BY_VALUE_IS_BROKEN
88 
89 
90 #else // DOXYGEN
91 
92 // Compiler defines
93 #ifdef __INTEL_COMPILER
94 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
95 #elif defined(__OPENCC__)
96 #define Vc_OPEN64 1
97 #elif defined(__clang__)
98 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
99 #elif defined(__GNUC__)
100 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
101 #elif defined(_MSC_VER)
102 #define Vc_MSVC _MSC_FULL_VER
103 #else
104 #define Vc_UNSUPPORTED_COMPILER 1
105 #endif
106 
107 #if __cplusplus < 201103
108 #if (defined Vc_MSVC && Vc_MSVC >= 160000000)
109 // these compilers still work, even if they don't define __cplusplus as expected
110 #else
111 #error "Vc requires support for C++11."
112 #endif
113 #elif __cplusplus >= 201402L
114 # define Vc_CXX14 1
115 #endif
116 
117 // Features/Quirks defines
118 #if defined Vc_MSVC && defined _WIN32
119 // the Win32 ABI can't handle function parameters with alignment >= 16
120 #define Vc_PASSING_VECTOR_BY_VALUE_IS_BROKEN 1
121 #endif
122 #if defined(__GNUC__) && !defined(Vc_NO_INLINE_ASM)
123 #define Vc_GNU_ASM 1
124 #endif
125 
126 #if defined(Vc_MSVC) && Vc_MSVC < 180000000
127 // MSVC doesn't know constexpr and noexcept
128 // first include the check that forbids macroizing keywords >:)
129 #include <xkeycheck.h>
130 #ifndef constexpr
131 #define constexpr inline __forceinline
132 #endif
133 #define Vc_NO_NOEXCEPT 1
134 #endif
135 
136 
137 #ifdef Vc_GCC
138 # if Vc_GCC >= 0x40700 // && Vc_GCC < 0x408000)
139 // ::max_align_t was introduced with GCC 4.7. std::max_align_t took a bit longer.
140 # define Vc_HAVE_MAX_ALIGN_T 1
141 # endif
142 #elif !defined(Vc_CLANG) && !defined(Vc_ICC)
143 // Clang/ICC don't provide max_align_t at all
144 # define Vc_HAVE_STD_MAX_ALIGN_T 1
145 #endif
146 
147 #if defined(Vc_GCC) || defined(Vc_CLANG)
148 #define Vc_USE_BUILTIN_VECTOR_TYPES 1
149 #endif
150 
151 /* Define the following strings to a unique integer, which is the only type the preprocessor can
152  * compare. This allows to use -DVc_IMPL=SSE3. The preprocessor will then consider Vc_IMPL and SSE3
153  * to be equal. Of course, it is important to undefine the strings later on!
154  */
155 #define Scalar 0x00100000
156 #define SSE 0x00200000
157 #define SSE2 0x00300000
158 #define SSE3 0x00400000
159 #define SSSE3 0x00500000
160 #define SSE4_1 0x00600000
161 #define SSE4_2 0x00700000
162 #define AVX 0x00800000
163 #define AVX2 0x00900000
164 #define MIC 0x00A00000
165 
166 #define XOP 0x00000001
167 #define FMA4 0x00000002
168 #define F16C 0x00000004
169 #define POPCNT 0x00000008
170 #define SSE4a 0x00000010
171 #define FMA 0x00000020
172 #define BMI2 0x00000040
173 
174 #define IMPL_MASK 0xFFF00000
175 #define EXT_MASK 0x000FFFFF
176 
177 #ifdef Vc_MSVC
178 # ifdef _M_IX86_FP
179 # if _M_IX86_FP >= 1
180 # ifndef __SSE__
181 # define __SSE__ 1
182 # endif
183 # endif
184 # if _M_IX86_FP >= 2
185 # ifndef __SSE2__
186 # define __SSE2__ 1
187 # endif
188 # endif
189 # elif defined(_M_AMD64)
190 // If the target is x86_64 then SSE2 is guaranteed
191 # ifndef __SSE__
192 # define __SSE__ 1
193 # endif
194 # ifndef __SSE2__
195 # define __SSE2__ 1
196 # endif
197 # endif
198 #endif
199 
200 #if defined Vc_ICC && !defined __POPCNT__
201 # if defined __SSE4_2__ || defined __SSE4A__
202 # define __POPCNT__ 1
203 # endif
204 #endif
205 
206 #ifdef VC_IMPL
207 #error "You are using the old VC_IMPL macro. Since Vc 1.0 all Vc macros start with Vc_, i.e. a lower-case 'c'"
208 #endif
209 
210 #ifndef Vc_IMPL
211 
212 # if defined(__MIC__)
213 # define Vc_IMPL_MIC 1
214 # elif defined(__AVX2__)
215 # define Vc_IMPL_AVX2 1
216 # define Vc_IMPL_AVX 1
217 # elif defined(__AVX__)
218 # define Vc_IMPL_AVX 1
219 # else
220 # if defined(__SSE4_2__)
221 # define Vc_IMPL_SSE 1
222 # define Vc_IMPL_SSE4_2 1
223 # endif
224 # if defined(__SSE4_1__)
225 # define Vc_IMPL_SSE 1
226 # define Vc_IMPL_SSE4_1 1
227 # endif
228 # if defined(__SSE3__)
229 # define Vc_IMPL_SSE 1
230 # define Vc_IMPL_SSE3 1
231 # endif
232 # if defined(__SSSE3__)
233 # define Vc_IMPL_SSE 1
234 # define Vc_IMPL_SSSE3 1
235 # endif
236 # if defined(__SSE2__)
237 # define Vc_IMPL_SSE 1
238 # define Vc_IMPL_SSE2 1
239 # endif
240 
241 # if defined(Vc_IMPL_SSE)
242  // nothing
243 # else
244 # define Vc_IMPL_Scalar 1
245 # endif
246 # endif
247 # if !defined(Vc_IMPL_Scalar)
248 # ifdef __FMA4__
249 # define Vc_IMPL_FMA4 1
250 # endif
251 # ifdef __XOP__
252 # define Vc_IMPL_XOP 1
253 # endif
254 # ifdef __F16C__
255 # define Vc_IMPL_F16C 1
256 # endif
257 # ifdef __POPCNT__
258 # define Vc_IMPL_POPCNT 1
259 # endif
260 # ifdef __SSE4A__
261 # define Vc_IMPL_SSE4a 1
262 # endif
263 # ifdef __FMA__
264 # define Vc_IMPL_FMA 1
265 # endif
266 # ifdef __BMI2__
267 # define Vc_IMPL_BMI2 1
268 # endif
269 # endif
270 
271 #else // Vc_IMPL
272 
273 # if (Vc_IMPL & IMPL_MASK) == MIC // MIC supersedes everything else
274 # define Vc_IMPL_MIC 1
275 # ifdef __POPCNT__
276 # define Vc_IMPL_POPCNT 1
277 # endif
278 # elif (Vc_IMPL & IMPL_MASK) == AVX2 // AVX2 supersedes SSE
279 # define Vc_IMPL_AVX2 1
280 # define Vc_IMPL_AVX 1
281 # elif (Vc_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
282 # define Vc_IMPL_AVX 1
283 # elif (Vc_IMPL & IMPL_MASK) == Scalar
284 # define Vc_IMPL_Scalar 1
285 # elif (Vc_IMPL & IMPL_MASK) == SSE4_2
286 # define Vc_IMPL_SSE4_2 1
287 # define Vc_IMPL_SSE4_1 1
288 # define Vc_IMPL_SSSE3 1
289 # define Vc_IMPL_SSE3 1
290 # define Vc_IMPL_SSE2 1
291 # define Vc_IMPL_SSE 1
292 # elif (Vc_IMPL & IMPL_MASK) == SSE4_1
293 # define Vc_IMPL_SSE4_1 1
294 # define Vc_IMPL_SSSE3 1
295 # define Vc_IMPL_SSE3 1
296 # define Vc_IMPL_SSE2 1
297 # define Vc_IMPL_SSE 1
298 # elif (Vc_IMPL & IMPL_MASK) == SSSE3
299 # define Vc_IMPL_SSSE3 1
300 # define Vc_IMPL_SSE3 1
301 # define Vc_IMPL_SSE2 1
302 # define Vc_IMPL_SSE 1
303 # elif (Vc_IMPL & IMPL_MASK) == SSE3
304 # define Vc_IMPL_SSE3 1
305 # define Vc_IMPL_SSE2 1
306 # define Vc_IMPL_SSE 1
307 # elif (Vc_IMPL & IMPL_MASK) == SSE2
308 # define Vc_IMPL_SSE2 1
309 # define Vc_IMPL_SSE 1
310 # elif (Vc_IMPL & IMPL_MASK) == SSE
311 # define Vc_IMPL_SSE 1
312 # if defined(__SSE4_2__)
313 # define Vc_IMPL_SSE4_2 1
314 # endif
315 # if defined(__SSE4_1__)
316 # define Vc_IMPL_SSE4_1 1
317 # endif
318 # if defined(__SSE3__)
319 # define Vc_IMPL_SSE3 1
320 # endif
321 # if defined(__SSSE3__)
322 # define Vc_IMPL_SSSE3 1
323 # endif
324 # if defined(__SSE2__)
325 # define Vc_IMPL_SSE2 1
326 # endif
327 # elif (Vc_IMPL & IMPL_MASK) == 0 && (Vc_IMPL & SSE4a)
328  // this is for backward compatibility only where SSE4a was included in the main
329  // line of available SIMD instruction sets
330 # define Vc_IMPL_SSE3 1
331 # define Vc_IMPL_SSE2 1
332 # define Vc_IMPL_SSE 1
333 # endif
334 # if (Vc_IMPL & XOP)
335 # define Vc_IMPL_XOP 1
336 # endif
337 # if (Vc_IMPL & FMA4)
338 # define Vc_IMPL_FMA4 1
339 # endif
340 # if (Vc_IMPL & F16C)
341 # define Vc_IMPL_F16C 1
342 # endif
343 # if (!defined(Vc_IMPL_Scalar) && defined(__POPCNT__)) || (Vc_IMPL & POPCNT)
344 # define Vc_IMPL_POPCNT 1
345 # endif
346 # if (Vc_IMPL & SSE4a)
347 # define Vc_IMPL_SSE4a 1
348 # endif
349 # if (Vc_IMPL & FMA)
350 # define Vc_IMPL_FMA 1
351 # endif
352 # if (Vc_IMPL & BMI2)
353 # define Vc_IMPL_BMI2 1
354 # endif
355 # undef Vc_IMPL
356 
357 #endif // Vc_IMPL
358 
359 // If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
360 #ifdef __AVX__
361 # define Vc_USE_VEX_CODING 1
362 #endif
363 
364 #ifdef Vc_IMPL_AVX
365 // if we have AVX then we also have all SSE intrinsics
366 # define Vc_IMPL_SSE4_2 1
367 # define Vc_IMPL_SSE4_1 1
368 # define Vc_IMPL_SSSE3 1
369 # define Vc_IMPL_SSE3 1
370 # define Vc_IMPL_SSE2 1
371 # define Vc_IMPL_SSE 1
372 #endif
373 
374 #if defined(Vc_CLANG) && Vc_CLANG >= 0x30600 && Vc_CLANG < 0x30700
375 # if defined(Vc_IMPL_AVX)
376 # warning "clang 3.6.x miscompiles AVX code, frequently losing 50% of the data. Vc will fall back to SSE4 instead."
377 # undef Vc_IMPL_AVX
378 # if defined(Vc_IMPL_AVX2)
379 # undef Vc_IMPL_AVX2
380 # endif
381 # endif
382 #endif
383 
384 # if !defined(Vc_IMPL_Scalar) && !defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_AVX) && !defined(Vc_IMPL_MIC)
385 # error "No suitable Vc implementation was selected! Probably Vc_IMPL was set to an invalid value."
386 # elif defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_SSE2)
387 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
388 # endif
389 
390 #undef Scalar
391 #undef SSE
392 #undef SSE2
393 #undef SSE3
394 #undef SSSE3
395 #undef SSE4_1
396 #undef SSE4_2
397 #undef AVX
398 #undef AVX2
399 #undef MIC
400 
401 #undef XOP
402 #undef FMA4
403 #undef F16C
404 #undef POPCNT
405 #undef SSE4a
406 #undef FMA
407 #undef BMI2
408 
409 #undef IMPL_MASK
410 #undef EXT_MASK
411 
412 #ifdef Vc_IMPL_MIC
413 #define Vc_DEFAULT_IMPL_MIC
414 #elif defined Vc_IMPL_AVX2
415 #define Vc_DEFAULT_IMPL_AVX2
416 #elif defined Vc_IMPL_AVX
417 #define Vc_DEFAULT_IMPL_AVX
418 #elif defined Vc_IMPL_SSE
419 #define Vc_DEFAULT_IMPL_SSE
420 #elif defined Vc_IMPL_Scalar
421 #define Vc_DEFAULT_IMPL_Scalar
422 #else
423 #error "Preprocessor logic broken. Please report a bug."
424 #endif
425 
426 #define Vc_VERSIONED_NAMESPACE Vc_1
427 
428 namespace Vc_VERSIONED_NAMESPACE {}
429 namespace Vc = Vc_VERSIONED_NAMESPACE;
430 
431 #endif // DOXYGEN
432 
433 namespace Vc_VERSIONED_NAMESPACE
434 {
435 
436 typedef signed char int8_t;
437 typedef unsigned char uint8_t;
438 typedef signed short int16_t;
439 typedef unsigned short uint16_t;
440 typedef signed int int32_t;
441 typedef unsigned int uint32_t;
442 typedef signed long long int64_t;
443 typedef unsigned long long uint64_t;
444 
470 };
471 
481 enum Implementation : std::uint_least32_t { // TODO: make enum class
500  ImplementationMask = 0xfff
501 };
502 
513 enum ExtraInstructions : std::uint_least32_t { // TODO: make enum class
517  Fma4Instructions = 0x02000,
519  XopInstructions = 0x04000,
523  Sse4aInstructions = 0x10000,
525  FmaInstructions = 0x20000,
527  VexInstructions = 0x40000,
529  Bmi2Instructions = 0x80000,
530  // PclmulqdqInstructions,
531  // AesInstructions,
532  // RdrandInstructions
533  ExtraInstructionsMask = 0xfffff000u
534 };
535 
545 template <unsigned int Features> struct ImplementationT {
547  static constexpr Implementation current()
548  {
549  return static_cast<Implementation>(Features & ImplementationMask);
550  }
552  static constexpr bool is(Implementation impl)
553  {
554  return static_cast<unsigned int>(impl) == current();
555  }
560  static constexpr bool is_between(Implementation low, Implementation high)
561  {
562  return static_cast<unsigned int>(low) <= current() &&
563  static_cast<unsigned int>(high) >= current();
564  }
568  static constexpr bool runs_on(unsigned int extraInstructions)
569  {
570  return (extraInstructions & Features & ExtraInstructionsMask) ==
571  (Features & ExtraInstructionsMask);
572  }
573 };
580 using CurrentImplementation = ImplementationT<
581 #ifdef Vc_IMPL_Scalar
582  ScalarImpl
583 #elif defined(Vc_IMPL_MIC)
584  MICImpl
585 #elif defined(Vc_IMPL_AVX2)
586  AVX2Impl
587 #elif defined(Vc_IMPL_AVX)
588  AVXImpl
589 #elif defined(Vc_IMPL_SSE4_2)
590  SSE42Impl
591 #elif defined(Vc_IMPL_SSE4_1)
592  SSE41Impl
593 #elif defined(Vc_IMPL_SSSE3)
594  SSSE3Impl
595 #elif defined(Vc_IMPL_SSE3)
596  SSE3Impl
597 #elif defined(Vc_IMPL_SSE2)
598  SSE2Impl
599 #endif
600 #ifdef Vc_IMPL_SSE4a
602 #ifdef Vc_IMPL_XOP
604 #ifdef Vc_IMPL_FMA4
606 #endif
607 #endif
608 #endif
609 #ifdef Vc_IMPL_POPCNT
611 #endif
612 #ifdef Vc_IMPL_FMA
614 #endif
615 #ifdef Vc_IMPL_BMI2
617 #endif
618 #ifdef Vc_USE_VEX_CODING
620 #endif
621  >;
622 
623 } // namespace Vc
624 
625 // TODO: clean up headers (e.g. math.h) to remove the following:
626 #ifndef Vc_ENABLE_FLOAT_BIT_OPERATORS
627 #define Vc_ENABLE_FLOAT_BIT_OPERATORS 1
628 #endif
629 
630 #include "version.h"
631 
632 #endif // VC_GLOBAL_H_
633 
634 // vim: foldmethod=marker
Intel Xeon Phi.
Definition: global.h:499
ExtraInstructions
The list of available instructions is not easily described by a linear list of instruction sets...
Definition: global.h:513
Align on boundary of page sizes (e.g.
Definition: global.h:469
static constexpr bool is_between(Implementation low, Implementation high)
Returns whether the current Vc::Implementation implements at least low and at most high...
Definition: global.h:560
Support for FMA instructions (3 operand variant)
Definition: global.h:525
Implementation
Enum to identify a certain SIMD instruction set.
Definition: global.h:481
static constexpr bool runs_on(unsigned int extraInstructions)
Returns whether the current code would run on a CPU providing extraInstructions.
Definition: global.h:568
This class identifies the specific implementation Vc uses in the current translation unit in terms of...
Definition: global.h:545
Support for BMI2 instructions.
Definition: global.h:529
Support for XOP instructions.
Definition: global.h:519
MallocAlignment
Enum that specifies the alignment and padding restrictions to use for memory allocation with Vc::mall...
Definition: global.h:451
Support for the population count instruction.
Definition: global.h:521
Support for SSE4a instructions.
Definition: global.h:523
static constexpr bool is(Implementation impl)
Returns whether impl is the current Vc::Implementation.
Definition: global.h:552
x86 SSE + SSE2
Definition: global.h:485
ImplementationT< > CurrentImplementation
Identifies the Vc implementation used in the current translation unit.
Definition: global.h:621
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
Definition: global.h:493
Align on boundary of cache line sizes (e.g.
Definition: global.h:463
Support for ternary instruction coding (VEX)
Definition: global.h:527
x86 SSE + SSE2 + SSE3 + SSSE3
Definition: global.h:489
x86 SSE + SSE2 + SSE3
Definition: global.h:487
Support for FMA4 instructions.
Definition: global.h:517
Vector Classes Namespace.
Definition: cpuid.h:33
static constexpr Implementation current()
Returns the currently used Vc::Implementation.
Definition: global.h:547
uses only fundamental types
Definition: global.h:483
Align on boundary of vector sizes (e.g.
Definition: global.h:457
x86 AVX + AVX2
Definition: global.h:497
Support for float16 conversions in hardware.
Definition: global.h:515
x86 AVX
Definition: global.h:495
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
Definition: global.h:491