Vc  1.3.0
SIMD Vector Classes for C++
global.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_GLOBAL_H_
29 #define VC_GLOBAL_H_
30 
31 #include <cstdint>
32 
33 #ifdef DOXYGEN
34 
47 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
48 #undef Vc_ICC
49 
56 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
57 #undef Vc_CLANG
58 
65 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
66 #undef Vc_APPLECLANG
67 
74 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
75 
82 #define Vc_MSVC _MSC_FULL_VER
83 #undef Vc_MSVC
84 
85 
86 #else // DOXYGEN
87 
88 // Compiler defines
89 #ifdef __INTEL_COMPILER
90 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
91 #elif defined(__OPENCC__)
92 #define Vc_OPEN64 1
93 #elif defined(__clang__) && defined(__APPLE__) && __clang_major__ >= 6
94 // this is going to break :-(
95 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
96 #elif defined(__clang__)
97 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
98 #elif defined(__GNUC__)
99 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
100 #elif defined(_MSC_VER)
101 #define Vc_MSVC _MSC_FULL_VER
102 #else
103 #define Vc_UNSUPPORTED_COMPILER 1
104 #endif
105 
106 #if __cplusplus < 201103 && (!defined Vc_MSVC || _MSC_VER < 1900)
107 # error "Vc requires support for C++11."
108 #elif __cplusplus >= 201402L
109 # define Vc_CXX14 1
110 # if __cplusplus > 201700L
111 # define Vc_CXX17 1
112 # endif
113 #endif
114 
115 #if defined(__GNUC__) && !defined(Vc_NO_INLINE_ASM)
116 #define Vc_GNU_ASM 1
117 #endif
118 
119 #ifdef Vc_GCC
120 # define Vc_HAVE_MAX_ALIGN_T 1
121 #elif !defined(Vc_CLANG) && !defined(Vc_ICC)
122 // Clang/ICC don't provide max_align_t at all
123 # define Vc_HAVE_STD_MAX_ALIGN_T 1
124 #endif
125 
126 #if defined(Vc_GCC) || defined(Vc_CLANG) || defined Vc_APPLECLANG
127 #define Vc_USE_BUILTIN_VECTOR_TYPES 1
128 #endif
129 
130 #ifdef Vc_MSVC
131 # define Vc_CDECL __cdecl
132 # define Vc_VDECL __vectorcall
133 #else
134 # define Vc_CDECL
135 # define Vc_VDECL
136 #endif
137 
138 /* Define the following strings to a unique integer, which is the only type the preprocessor can
139  * compare. This allows to use -DVc_IMPL=SSE3. The preprocessor will then consider Vc_IMPL and SSE3
140  * to be equal. Of course, it is important to undefine the strings later on!
141  */
142 #define Scalar 0x00100000
143 #define SSE 0x00200000
144 #define SSE2 0x00300000
145 #define SSE3 0x00400000
146 #define SSSE3 0x00500000
147 #define SSE4_1 0x00600000
148 #define SSE4_2 0x00700000
149 #define AVX 0x00800000
150 #define AVX2 0x00900000
151 #define MIC 0x00A00000
152 
153 #define XOP 0x00000001
154 #define FMA4 0x00000002
155 #define F16C 0x00000004
156 #define POPCNT 0x00000008
157 #define SSE4a 0x00000010
158 #define FMA 0x00000020
159 #define BMI2 0x00000040
160 
161 #define IMPL_MASK 0xFFF00000
162 #define EXT_MASK 0x000FFFFF
163 
164 #ifdef Vc_MSVC
165 # ifdef _M_IX86_FP
166 # if _M_IX86_FP >= 1
167 # ifndef __SSE__
168 # define __SSE__ 1
169 # endif
170 # endif
171 # if _M_IX86_FP >= 2
172 # ifndef __SSE2__
173 # define __SSE2__ 1
174 # endif
175 # endif
176 # elif defined(_M_AMD64)
177 // If the target is x86_64 then SSE2 is guaranteed
178 # ifndef __SSE__
179 # define __SSE__ 1
180 # endif
181 # ifndef __SSE2__
182 # define __SSE2__ 1
183 # endif
184 # endif
185 #endif
186 
187 #if defined Vc_ICC && !defined __POPCNT__
188 # if defined __SSE4_2__ || defined __SSE4A__
189 # define __POPCNT__ 1
190 # endif
191 #endif
192 
193 #ifdef VC_IMPL
194 #error "You are using the old VC_IMPL macro. Since Vc 1.0 all Vc macros start with Vc_, i.e. a lower-case 'c'"
195 #endif
196 
197 #ifndef Vc_IMPL
198 
199 # if defined(__MIC__)
200 # define Vc_IMPL_MIC 1
201 # elif defined(__AVX2__)
202 # define Vc_IMPL_AVX2 1
203 # define Vc_IMPL_AVX 1
204 # elif defined(__AVX__)
205 # define Vc_IMPL_AVX 1
206 # else
207 # if defined(__SSE4_2__)
208 # define Vc_IMPL_SSE 1
209 # define Vc_IMPL_SSE4_2 1
210 # endif
211 # if defined(__SSE4_1__)
212 # define Vc_IMPL_SSE 1
213 # define Vc_IMPL_SSE4_1 1
214 # endif
215 # if defined(__SSE3__)
216 # define Vc_IMPL_SSE 1
217 # define Vc_IMPL_SSE3 1
218 # endif
219 # if defined(__SSSE3__)
220 # define Vc_IMPL_SSE 1
221 # define Vc_IMPL_SSSE3 1
222 # endif
223 # if defined(__SSE2__)
224 # define Vc_IMPL_SSE 1
225 # define Vc_IMPL_SSE2 1
226 # endif
227 
228 # if defined(Vc_IMPL_SSE)
229  // nothing
230 # else
231 # define Vc_IMPL_Scalar 1
232 # endif
233 # endif
234 # if !defined(Vc_IMPL_Scalar)
235 # ifdef __FMA4__
236 # define Vc_IMPL_FMA4 1
237 # endif
238 # ifdef __XOP__
239 # define Vc_IMPL_XOP 1
240 # endif
241 # ifdef __F16C__
242 # define Vc_IMPL_F16C 1
243 # endif
244 # ifdef __POPCNT__
245 # define Vc_IMPL_POPCNT 1
246 # endif
247 # ifdef __SSE4A__
248 # define Vc_IMPL_SSE4a 1
249 # endif
250 # ifdef __FMA__
251 # define Vc_IMPL_FMA 1
252 # endif
253 # ifdef __BMI2__
254 # define Vc_IMPL_BMI2 1
255 # endif
256 # endif
257 
258 #else // Vc_IMPL
259 
260 # if (Vc_IMPL & IMPL_MASK) == MIC // MIC supersedes everything else
261 # define Vc_IMPL_MIC 1
262 # ifdef __POPCNT__
263 # define Vc_IMPL_POPCNT 1
264 # endif
265 # elif (Vc_IMPL & IMPL_MASK) == AVX2 // AVX2 supersedes SSE
266 # define Vc_IMPL_AVX2 1
267 # define Vc_IMPL_AVX 1
268 # elif (Vc_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
269 # define Vc_IMPL_AVX 1
270 # elif (Vc_IMPL & IMPL_MASK) == Scalar
271 # define Vc_IMPL_Scalar 1
272 # elif (Vc_IMPL & IMPL_MASK) == SSE4_2
273 # define Vc_IMPL_SSE4_2 1
274 # define Vc_IMPL_SSE4_1 1
275 # define Vc_IMPL_SSSE3 1
276 # define Vc_IMPL_SSE3 1
277 # define Vc_IMPL_SSE2 1
278 # define Vc_IMPL_SSE 1
279 # elif (Vc_IMPL & IMPL_MASK) == SSE4_1
280 # define Vc_IMPL_SSE4_1 1
281 # define Vc_IMPL_SSSE3 1
282 # define Vc_IMPL_SSE3 1
283 # define Vc_IMPL_SSE2 1
284 # define Vc_IMPL_SSE 1
285 # elif (Vc_IMPL & IMPL_MASK) == SSSE3
286 # define Vc_IMPL_SSSE3 1
287 # define Vc_IMPL_SSE3 1
288 # define Vc_IMPL_SSE2 1
289 # define Vc_IMPL_SSE 1
290 # elif (Vc_IMPL & IMPL_MASK) == SSE3
291 # define Vc_IMPL_SSE3 1
292 # define Vc_IMPL_SSE2 1
293 # define Vc_IMPL_SSE 1
294 # elif (Vc_IMPL & IMPL_MASK) == SSE2
295 # define Vc_IMPL_SSE2 1
296 # define Vc_IMPL_SSE 1
297 # elif (Vc_IMPL & IMPL_MASK) == SSE
298 # define Vc_IMPL_SSE 1
299 # if defined(__SSE4_2__)
300 # define Vc_IMPL_SSE4_2 1
301 # endif
302 # if defined(__SSE4_1__)
303 # define Vc_IMPL_SSE4_1 1
304 # endif
305 # if defined(__SSE3__)
306 # define Vc_IMPL_SSE3 1
307 # endif
308 # if defined(__SSSE3__)
309 # define Vc_IMPL_SSSE3 1
310 # endif
311 # if defined(__SSE2__)
312 # define Vc_IMPL_SSE2 1
313 # endif
314 # elif (Vc_IMPL & IMPL_MASK) == 0 && (Vc_IMPL & SSE4a)
315  // this is for backward compatibility only where SSE4a was included in the main
316  // line of available SIMD instruction sets
317 # define Vc_IMPL_SSE3 1
318 # define Vc_IMPL_SSE2 1
319 # define Vc_IMPL_SSE 1
320 # endif
321 # if (Vc_IMPL & XOP)
322 # define Vc_IMPL_XOP 1
323 # endif
324 # if (Vc_IMPL & FMA4)
325 # define Vc_IMPL_FMA4 1
326 # endif
327 # if (Vc_IMPL & F16C)
328 # define Vc_IMPL_F16C 1
329 # endif
330 # if (!defined(Vc_IMPL_Scalar) && defined(__POPCNT__)) || (Vc_IMPL & POPCNT)
331 # define Vc_IMPL_POPCNT 1
332 # endif
333 # if (Vc_IMPL & SSE4a)
334 # define Vc_IMPL_SSE4a 1
335 # endif
336 # if (Vc_IMPL & FMA)
337 # define Vc_IMPL_FMA 1
338 # endif
339 # if (Vc_IMPL & BMI2)
340 # define Vc_IMPL_BMI2 1
341 # endif
342 # undef Vc_IMPL
343 
344 #endif // Vc_IMPL
345 
346 // If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
347 #ifdef __AVX__
348 # define Vc_USE_VEX_CODING 1
349 #endif
350 
351 #ifdef Vc_IMPL_AVX
352 // if we have AVX then we also have all SSE intrinsics
353 # define Vc_IMPL_SSE4_2 1
354 # define Vc_IMPL_SSE4_1 1
355 # define Vc_IMPL_SSSE3 1
356 # define Vc_IMPL_SSE3 1
357 # define Vc_IMPL_SSE2 1
358 # define Vc_IMPL_SSE 1
359 #endif
360 
361 #if defined(Vc_CLANG) && Vc_CLANG >= 0x30600 && Vc_CLANG < 0x30700
362 # if defined(Vc_IMPL_AVX)
363 # warning "clang 3.6.x miscompiles AVX code, frequently losing 50% of the data. Vc will fall back to SSE4 instead."
364 # undef Vc_IMPL_AVX
365 # if defined(Vc_IMPL_AVX2)
366 # undef Vc_IMPL_AVX2
367 # endif
368 # endif
369 #endif
370 
371 # if !defined(Vc_IMPL_Scalar) && !defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_AVX) && !defined(Vc_IMPL_MIC)
372 # error "No suitable Vc implementation was selected! Probably Vc_IMPL was set to an invalid value."
373 # elif defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_SSE2)
374 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
375 # endif
376 
377 #undef Scalar
378 #undef SSE
379 #undef SSE2
380 #undef SSE3
381 #undef SSSE3
382 #undef SSE4_1
383 #undef SSE4_2
384 #undef AVX
385 #undef AVX2
386 #undef MIC
387 
388 #undef XOP
389 #undef FMA4
390 #undef F16C
391 #undef POPCNT
392 #undef SSE4a
393 #undef FMA
394 #undef BMI2
395 
396 #undef IMPL_MASK
397 #undef EXT_MASK
398 
399 #ifdef Vc_IMPL_MIC
400 #define Vc_DEFAULT_IMPL_MIC
401 #elif defined Vc_IMPL_AVX2
402 #define Vc_DEFAULT_IMPL_AVX2
403 #elif defined Vc_IMPL_AVX
404 #define Vc_DEFAULT_IMPL_AVX
405 #elif defined Vc_IMPL_SSE
406 #define Vc_DEFAULT_IMPL_SSE
407 #elif defined Vc_IMPL_Scalar
408 #define Vc_DEFAULT_IMPL_Scalar
409 #else
410 #error "Preprocessor logic broken. Please report a bug."
411 #endif
412 
413 #define Vc_VERSIONED_NAMESPACE Vc_1
414 
415 namespace Vc_VERSIONED_NAMESPACE {}
416 namespace Vc = Vc_VERSIONED_NAMESPACE;
417 
418 #endif // DOXYGEN
419 
420 namespace Vc_VERSIONED_NAMESPACE
421 {
422 
423 typedef signed char int8_t;
424 typedef unsigned char uint8_t;
425 typedef signed short int16_t;
426 typedef unsigned short uint16_t;
427 typedef signed int int32_t;
428 typedef unsigned int uint32_t;
429 typedef signed long long int64_t;
430 typedef unsigned long long uint64_t;
431 
457 };
458 
468 enum Implementation : std::uint_least32_t { // TODO: make enum class
487  ImplementationMask = 0xfff
488 };
489 
500 enum ExtraInstructions : std::uint_least32_t { // TODO: make enum class
504  Fma4Instructions = 0x02000,
506  XopInstructions = 0x04000,
510  Sse4aInstructions = 0x10000,
512  FmaInstructions = 0x20000,
514  VexInstructions = 0x40000,
516  Bmi2Instructions = 0x80000,
517  // PclmulqdqInstructions,
518  // AesInstructions,
519  // RdrandInstructions
520  ExtraInstructionsMask = 0xfffff000u
521 };
522 
532 template <unsigned int Features> struct ImplementationT {
534  static constexpr Implementation current()
535  {
536  return static_cast<Implementation>(Features & ImplementationMask);
537  }
539  static constexpr bool is(Implementation impl)
540  {
541  return static_cast<unsigned int>(impl) == current();
542  }
547  static constexpr bool is_between(Implementation low, Implementation high)
548  {
549  return static_cast<unsigned int>(low) <= current() &&
550  static_cast<unsigned int>(high) >= current();
551  }
555  static constexpr bool runs_on(unsigned int extraInstructions)
556  {
557  return (extraInstructions & Features & ExtraInstructionsMask) ==
558  (Features & ExtraInstructionsMask);
559  }
560 };
568 #ifdef Vc_IMPL_Scalar
569  ScalarImpl
570 #elif defined(Vc_IMPL_MIC)
571  MICImpl
572 #elif defined(Vc_IMPL_AVX2)
573  AVX2Impl
574 #elif defined(Vc_IMPL_AVX)
575  AVXImpl
576 #elif defined(Vc_IMPL_SSE4_2)
577  SSE42Impl
578 #elif defined(Vc_IMPL_SSE4_1)
579  SSE41Impl
580 #elif defined(Vc_IMPL_SSSE3)
581  SSSE3Impl
582 #elif defined(Vc_IMPL_SSE3)
583  SSE3Impl
584 #elif defined(Vc_IMPL_SSE2)
585  SSE2Impl
586 #endif
587 #ifdef Vc_IMPL_SSE4a
589 #ifdef Vc_IMPL_XOP
591 #ifdef Vc_IMPL_FMA4
593 #endif
594 #endif
595 #endif
596 #ifdef Vc_IMPL_POPCNT
598 #endif
599 #ifdef Vc_IMPL_FMA
601 #endif
602 #ifdef Vc_IMPL_BMI2
604 #endif
605 #ifdef Vc_USE_VEX_CODING
607 #endif
608  >;
609 
610 } // namespace Vc
611 
612 #include "version.h"
613 
614 #endif // VC_GLOBAL_H_
615 
616 // vim: foldmethod=marker
Intel Xeon Phi.
Definition: global.h:486
ExtraInstructions
The list of available instructions is not easily described by a linear list of instruction sets...
Definition: global.h:500
Align on boundary of page sizes (e.g.
Definition: global.h:456
static constexpr bool is_between(Implementation low, Implementation high)
Returns whether the current Vc::Implementation implements at least low and at most high...
Definition: global.h:547
Support for FMA instructions (3 operand variant)
Definition: global.h:512
Implementation
Enum to identify a certain SIMD instruction set.
Definition: global.h:468
static constexpr bool runs_on(unsigned int extraInstructions)
Returns whether the current code would run on a CPU providing extraInstructions.
Definition: global.h:555
This class identifies the specific implementation Vc uses in the current translation unit in terms of...
Definition: global.h:532
Support for BMI2 instructions.
Definition: global.h:516
Support for XOP instructions.
Definition: global.h:506
MallocAlignment
Enum that specifies the alignment and padding restrictions to use for memory allocation with Vc::mall...
Definition: global.h:438
Support for the population count instruction.
Definition: global.h:508
Support for SSE4a instructions.
Definition: global.h:510
static constexpr bool is(Implementation impl)
Returns whether impl is the current Vc::Implementation.
Definition: global.h:539
x86 SSE + SSE2
Definition: global.h:472
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
Definition: global.h:480
Align on boundary of cache line sizes (e.g.
Definition: global.h:450
Support for ternary instruction coding (VEX)
Definition: global.h:514
x86 SSE + SSE2 + SSE3 + SSSE3
Definition: global.h:476
x86 SSE + SSE2 + SSE3
Definition: global.h:474
Support for FMA4 instructions.
Definition: global.h:504
Vector Classes Namespace.
Definition: cpuid.h:32
static constexpr Implementation current()
Returns the currently used Vc::Implementation.
Definition: global.h:534
uses only fundamental types
Definition: global.h:470
Align on boundary of vector sizes (e.g.
Definition: global.h:444
x86 AVX + AVX2
Definition: global.h:484
Support for float16 conversions in hardware.
Definition: global.h:502
x86 AVX
Definition: global.h:482
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
Definition: global.h:478