Vc  1.3.2-dev
SIMD Vector Classes for C++
global.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_GLOBAL_H_
29 #define VC_GLOBAL_H_
30 
31 #include <cstdint>
32 
33 #ifdef DOXYGEN
34 
47 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
48 #undef Vc_ICC
49 
56 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
57 #undef Vc_CLANG
58 
65 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
66 #undef Vc_APPLECLANG
67 
74 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
75 
82 #define Vc_MSVC _MSC_FULL_VER
83 #undef Vc_MSVC
84 
85 
86 #else // DOXYGEN
87 
88 // Compiler defines
89 #ifdef __INTEL_COMPILER
90 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
91 #elif defined(__OPENCC__)
92 #define Vc_OPEN64 1
93 #elif defined(__clang__) && defined(__APPLE__) && __clang_major__ >= 6
94 // this is going to break :-(
95 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
96 #elif defined(__clang__)
97 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
98 #elif defined(__GNUC__)
99 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
100 #elif defined(_MSC_VER)
101 #define Vc_MSVC _MSC_FULL_VER
102 #else
103 #define Vc_UNSUPPORTED_COMPILER 1
104 #endif
105 
106 #if defined Vc_GCC && Vc_GCC >= 0x60000
107 #define Vc_RESET_DIAGNOSTICS _Pragma("GCC diagnostic pop")
108 #pragma GCC diagnostic push
109 #pragma GCC diagnostic ignored "-Wignored-attributes"
110 #else
111 #define Vc_RESET_DIAGNOSTICS
112 #endif
113 
114 #if __cplusplus < 201103 && (!defined Vc_MSVC || _MSC_VER < 1900)
115 # error "Vc requires support for C++11."
116 #elif __cplusplus >= 201402L
117 # define Vc_CXX14 1
118 # if __cplusplus > 201700L
119 # define Vc_CXX17 1
120 # endif
121 #endif
122 
123 #if defined(__GNUC__) && !defined(Vc_NO_INLINE_ASM)
124 #define Vc_GNU_ASM 1
125 #endif
126 
127 #ifdef Vc_GCC
128 # define Vc_HAVE_MAX_ALIGN_T 1
129 #elif !defined(Vc_CLANG) && !defined(Vc_ICC)
130 // Clang/ICC don't provide max_align_t at all
131 # define Vc_HAVE_STD_MAX_ALIGN_T 1
132 #endif
133 
134 #if defined(Vc_GCC) || defined(Vc_CLANG) || defined Vc_APPLECLANG
135 #define Vc_USE_BUILTIN_VECTOR_TYPES 1
136 #endif
137 
138 #ifdef Vc_MSVC
139 # define Vc_CDECL __cdecl
140 # define Vc_VDECL __vectorcall
141 #else
142 # define Vc_CDECL
143 # define Vc_VDECL
144 #endif
145 
146 /* Define the following strings to a unique integer, which is the only type the preprocessor can
147  * compare. This allows to use -DVc_IMPL=SSE3. The preprocessor will then consider Vc_IMPL and SSE3
148  * to be equal. Of course, it is important to undefine the strings later on!
149  */
150 #define Scalar 0x00100000
151 #define SSE 0x00200000
152 #define SSE2 0x00300000
153 #define SSE3 0x00400000
154 #define SSSE3 0x00500000
155 #define SSE4_1 0x00600000
156 #define SSE4_2 0x00700000
157 #define AVX 0x00800000
158 #define AVX2 0x00900000
159 #define MIC 0x00A00000
160 
161 #define XOP 0x00000001
162 #define FMA4 0x00000002
163 #define F16C 0x00000004
164 #define POPCNT 0x00000008
165 #define SSE4a 0x00000010
166 #define FMA 0x00000020
167 #define BMI2 0x00000040
168 
169 #define IMPL_MASK 0xFFF00000
170 #define EXT_MASK 0x000FFFFF
171 
172 #ifdef Vc_MSVC
173 # ifdef _M_IX86_FP
174 # if _M_IX86_FP >= 1
175 # ifndef __SSE__
176 # define __SSE__ 1
177 # endif
178 # endif
179 # if _M_IX86_FP >= 2
180 # ifndef __SSE2__
181 # define __SSE2__ 1
182 # endif
183 # endif
184 # elif defined(_M_AMD64)
185 // If the target is x86_64 then SSE2 is guaranteed
186 # ifndef __SSE__
187 # define __SSE__ 1
188 # endif
189 # ifndef __SSE2__
190 # define __SSE2__ 1
191 # endif
192 # endif
193 #endif
194 
195 #if defined Vc_ICC && !defined __POPCNT__
196 # if defined __SSE4_2__ || defined __SSE4A__
197 # define __POPCNT__ 1
198 # endif
199 #endif
200 
201 #ifdef VC_IMPL
202 #error "You are using the old VC_IMPL macro. Since Vc 1.0 all Vc macros start with Vc_, i.e. a lower-case 'c'"
203 #endif
204 
205 #ifndef Vc_IMPL
206 
207 # if defined(__MIC__)
208 # define Vc_IMPL_MIC 1
209 # elif defined(__AVX2__)
210 # define Vc_IMPL_AVX2 1
211 # define Vc_IMPL_AVX 1
212 # elif defined(__AVX__)
213 # define Vc_IMPL_AVX 1
214 # else
215 # if defined(__SSE4_2__)
216 # define Vc_IMPL_SSE 1
217 # define Vc_IMPL_SSE4_2 1
218 # endif
219 # if defined(__SSE4_1__)
220 # define Vc_IMPL_SSE 1
221 # define Vc_IMPL_SSE4_1 1
222 # endif
223 # if defined(__SSE3__)
224 # define Vc_IMPL_SSE 1
225 # define Vc_IMPL_SSE3 1
226 # endif
227 # if defined(__SSSE3__)
228 # define Vc_IMPL_SSE 1
229 # define Vc_IMPL_SSSE3 1
230 # endif
231 # if defined(__SSE2__)
232 # define Vc_IMPL_SSE 1
233 # define Vc_IMPL_SSE2 1
234 # endif
235 
236 # if defined(Vc_IMPL_SSE)
237  // nothing
238 # else
239 # define Vc_IMPL_Scalar 1
240 # endif
241 # endif
242 # if !defined(Vc_IMPL_Scalar)
243 # ifdef __FMA4__
244 # define Vc_IMPL_FMA4 1
245 # endif
246 # ifdef __XOP__
247 # define Vc_IMPL_XOP 1
248 # endif
249 # ifdef __F16C__
250 # define Vc_IMPL_F16C 1
251 # endif
252 # ifdef __POPCNT__
253 # define Vc_IMPL_POPCNT 1
254 # endif
255 # ifdef __SSE4A__
256 # define Vc_IMPL_SSE4a 1
257 # endif
258 # ifdef __FMA__
259 # define Vc_IMPL_FMA 1
260 # endif
261 # ifdef __BMI2__
262 # define Vc_IMPL_BMI2 1
263 # endif
264 # endif
265 
266 #else // Vc_IMPL
267 
268 # if (Vc_IMPL & IMPL_MASK) == MIC // MIC supersedes everything else
269 # define Vc_IMPL_MIC 1
270 # ifdef __POPCNT__
271 # define Vc_IMPL_POPCNT 1
272 # endif
273 # elif (Vc_IMPL & IMPL_MASK) == AVX2 // AVX2 supersedes SSE
274 # define Vc_IMPL_AVX2 1
275 # define Vc_IMPL_AVX 1
276 # elif (Vc_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
277 # define Vc_IMPL_AVX 1
278 # elif (Vc_IMPL & IMPL_MASK) == Scalar
279 # define Vc_IMPL_Scalar 1
280 # elif (Vc_IMPL & IMPL_MASK) == SSE4_2
281 # define Vc_IMPL_SSE4_2 1
282 # define Vc_IMPL_SSE4_1 1
283 # define Vc_IMPL_SSSE3 1
284 # define Vc_IMPL_SSE3 1
285 # define Vc_IMPL_SSE2 1
286 # define Vc_IMPL_SSE 1
287 # elif (Vc_IMPL & IMPL_MASK) == SSE4_1
288 # define Vc_IMPL_SSE4_1 1
289 # define Vc_IMPL_SSSE3 1
290 # define Vc_IMPL_SSE3 1
291 # define Vc_IMPL_SSE2 1
292 # define Vc_IMPL_SSE 1
293 # elif (Vc_IMPL & IMPL_MASK) == SSSE3
294 # define Vc_IMPL_SSSE3 1
295 # define Vc_IMPL_SSE3 1
296 # define Vc_IMPL_SSE2 1
297 # define Vc_IMPL_SSE 1
298 # elif (Vc_IMPL & IMPL_MASK) == SSE3
299 # define Vc_IMPL_SSE3 1
300 # define Vc_IMPL_SSE2 1
301 # define Vc_IMPL_SSE 1
302 # elif (Vc_IMPL & IMPL_MASK) == SSE2
303 # define Vc_IMPL_SSE2 1
304 # define Vc_IMPL_SSE 1
305 # elif (Vc_IMPL & IMPL_MASK) == SSE
306 # define Vc_IMPL_SSE 1
307 # if defined(__SSE4_2__)
308 # define Vc_IMPL_SSE4_2 1
309 # endif
310 # if defined(__SSE4_1__)
311 # define Vc_IMPL_SSE4_1 1
312 # endif
313 # if defined(__SSE3__)
314 # define Vc_IMPL_SSE3 1
315 # endif
316 # if defined(__SSSE3__)
317 # define Vc_IMPL_SSSE3 1
318 # endif
319 # if defined(__SSE2__)
320 # define Vc_IMPL_SSE2 1
321 # endif
322 # elif (Vc_IMPL & IMPL_MASK) == 0 && (Vc_IMPL & SSE4a)
323  // this is for backward compatibility only where SSE4a was included in the main
324  // line of available SIMD instruction sets
325 # define Vc_IMPL_SSE3 1
326 # define Vc_IMPL_SSE2 1
327 # define Vc_IMPL_SSE 1
328 # endif
329 # if (Vc_IMPL & XOP)
330 # define Vc_IMPL_XOP 1
331 # endif
332 # if (Vc_IMPL & FMA4)
333 # define Vc_IMPL_FMA4 1
334 # endif
335 # if (Vc_IMPL & F16C)
336 # define Vc_IMPL_F16C 1
337 # endif
338 # if (!defined(Vc_IMPL_Scalar) && defined(__POPCNT__)) || (Vc_IMPL & POPCNT)
339 # define Vc_IMPL_POPCNT 1
340 # endif
341 # if (Vc_IMPL & SSE4a)
342 # define Vc_IMPL_SSE4a 1
343 # endif
344 # if (Vc_IMPL & FMA)
345 # define Vc_IMPL_FMA 1
346 # endif
347 # if (Vc_IMPL & BMI2)
348 # define Vc_IMPL_BMI2 1
349 # endif
350 # undef Vc_IMPL
351 
352 #endif // Vc_IMPL
353 
354 // If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
355 #ifdef __AVX__
356 # define Vc_USE_VEX_CODING 1
357 #endif
358 
359 #ifdef Vc_IMPL_AVX
360 // if we have AVX then we also have all SSE intrinsics
361 # define Vc_IMPL_SSE4_2 1
362 # define Vc_IMPL_SSE4_1 1
363 # define Vc_IMPL_SSSE3 1
364 # define Vc_IMPL_SSE3 1
365 # define Vc_IMPL_SSE2 1
366 # define Vc_IMPL_SSE 1
367 #endif
368 
369 #if defined(Vc_CLANG) && Vc_CLANG >= 0x30600 && Vc_CLANG < 0x30700
370 # if defined(Vc_IMPL_AVX)
371 # warning "clang 3.6.x miscompiles AVX code, frequently losing 50% of the data. Vc will fall back to SSE4 instead."
372 # undef Vc_IMPL_AVX
373 # if defined(Vc_IMPL_AVX2)
374 # undef Vc_IMPL_AVX2
375 # endif
376 # endif
377 #endif
378 
379 # if !defined(Vc_IMPL_Scalar) && !defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_AVX) && !defined(Vc_IMPL_MIC)
380 # error "No suitable Vc implementation was selected! Probably Vc_IMPL was set to an invalid value."
381 # elif defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_SSE2)
382 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
383 # endif
384 
385 #undef Scalar
386 #undef SSE
387 #undef SSE2
388 #undef SSE3
389 #undef SSSE3
390 #undef SSE4_1
391 #undef SSE4_2
392 #undef AVX
393 #undef AVX2
394 #undef MIC
395 
396 #undef XOP
397 #undef FMA4
398 #undef F16C
399 #undef POPCNT
400 #undef SSE4a
401 #undef FMA
402 #undef BMI2
403 
404 #undef IMPL_MASK
405 #undef EXT_MASK
406 
407 #ifdef Vc_IMPL_MIC
408 #define Vc_DEFAULT_IMPL_MIC
409 #elif defined Vc_IMPL_AVX2
410 #define Vc_DEFAULT_IMPL_AVX2
411 #elif defined Vc_IMPL_AVX
412 #define Vc_DEFAULT_IMPL_AVX
413 #elif defined Vc_IMPL_SSE
414 #define Vc_DEFAULT_IMPL_SSE
415 #elif defined Vc_IMPL_Scalar
416 #define Vc_DEFAULT_IMPL_Scalar
417 #else
418 #error "Preprocessor logic broken. Please report a bug."
419 #endif
420 
421 #define Vc_VERSIONED_NAMESPACE Vc_1
422 
423 namespace Vc_VERSIONED_NAMESPACE {}
424 namespace Vc = Vc_VERSIONED_NAMESPACE;
425 
426 #endif // DOXYGEN
427 
428 namespace Vc_VERSIONED_NAMESPACE
429 {
430 
431 typedef signed char int8_t;
432 typedef unsigned char uint8_t;
433 typedef signed short int16_t;
434 typedef unsigned short uint16_t;
435 typedef signed int int32_t;
436 typedef unsigned int uint32_t;
437 typedef signed long long int64_t;
438 typedef unsigned long long uint64_t;
439 
465 };
466 
476 enum Implementation : std::uint_least32_t { // TODO: make enum class
495  ImplementationMask = 0xfff
496 };
497 
508 enum ExtraInstructions : std::uint_least32_t { // TODO: make enum class
512  Fma4Instructions = 0x02000,
514  XopInstructions = 0x04000,
518  Sse4aInstructions = 0x10000,
520  FmaInstructions = 0x20000,
522  VexInstructions = 0x40000,
524  Bmi2Instructions = 0x80000,
525  // PclmulqdqInstructions,
526  // AesInstructions,
527  // RdrandInstructions
528  ExtraInstructionsMask = 0xfffff000u
529 };
530 
540 template <unsigned int Features> struct ImplementationT {
542  static constexpr Implementation current()
543  {
544  return static_cast<Implementation>(Features & ImplementationMask);
545  }
547  static constexpr bool is(Implementation impl)
548  {
549  return static_cast<unsigned int>(impl) == current();
550  }
555  static constexpr bool is_between(Implementation low, Implementation high)
556  {
557  return static_cast<unsigned int>(low) <= current() &&
558  static_cast<unsigned int>(high) >= current();
559  }
563  static constexpr bool runs_on(unsigned int extraInstructions)
564  {
565  return (extraInstructions & Features & ExtraInstructionsMask) ==
566  (Features & ExtraInstructionsMask);
567  }
568 };
575 using CurrentImplementation = ImplementationT<
576 #ifdef Vc_IMPL_Scalar
577  ScalarImpl
578 #elif defined(Vc_IMPL_MIC)
579  MICImpl
580 #elif defined(Vc_IMPL_AVX2)
581  AVX2Impl
582 #elif defined(Vc_IMPL_AVX)
583  AVXImpl
584 #elif defined(Vc_IMPL_SSE4_2)
585  SSE42Impl
586 #elif defined(Vc_IMPL_SSE4_1)
587  SSE41Impl
588 #elif defined(Vc_IMPL_SSSE3)
589  SSSE3Impl
590 #elif defined(Vc_IMPL_SSE3)
591  SSE3Impl
592 #elif defined(Vc_IMPL_SSE2)
593  SSE2Impl
594 #endif
595 #ifdef Vc_IMPL_SSE4a
597 #ifdef Vc_IMPL_XOP
599 #ifdef Vc_IMPL_FMA4
601 #endif
602 #endif
603 #endif
604 #ifdef Vc_IMPL_POPCNT
606 #endif
607 #ifdef Vc_IMPL_FMA
609 #endif
610 #ifdef Vc_IMPL_BMI2
612 #endif
613 #ifdef Vc_USE_VEX_CODING
615 #endif
616  >;
617 
618 } // namespace Vc
619 
620 #include "version.h"
621 
622 #endif // VC_GLOBAL_H_
623 
624 // vim: foldmethod=marker
Intel Xeon Phi.
Definition: global.h:494
ExtraInstructions
The list of available instructions is not easily described by a linear list of instruction sets...
Definition: global.h:508
Align on boundary of page sizes (e.g.
Definition: global.h:464
static constexpr bool is_between(Implementation low, Implementation high)
Returns whether the current Vc::Implementation implements at least low and at most high...
Definition: global.h:555
Support for FMA instructions (3 operand variant)
Definition: global.h:520
Implementation
Enum to identify a certain SIMD instruction set.
Definition: global.h:476
static constexpr bool runs_on(unsigned int extraInstructions)
Returns whether the current code would run on a CPU providing extraInstructions.
Definition: global.h:563
This class identifies the specific implementation Vc uses in the current translation unit in terms of...
Definition: global.h:540
Support for BMI2 instructions.
Definition: global.h:524
Support for XOP instructions.
Definition: global.h:514
MallocAlignment
Enum that specifies the alignment and padding restrictions to use for memory allocation with Vc::mall...
Definition: global.h:446
Support for the population count instruction.
Definition: global.h:516
Support for SSE4a instructions.
Definition: global.h:518
static constexpr bool is(Implementation impl)
Returns whether impl is the current Vc::Implementation.
Definition: global.h:547
x86 SSE + SSE2
Definition: global.h:480
ImplementationT< > CurrentImplementation
Identifies the Vc implementation used in the current translation unit.
Definition: global.h:616
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
Definition: global.h:488
Align on boundary of cache line sizes (e.g.
Definition: global.h:458
Support for ternary instruction coding (VEX)
Definition: global.h:522
x86 SSE + SSE2 + SSE3 + SSSE3
Definition: global.h:484
x86 SSE + SSE2 + SSE3
Definition: global.h:482
Support for FMA4 instructions.
Definition: global.h:512
Vector Classes Namespace.
Definition: cpuid.h:32
static constexpr Implementation current()
Returns the currently used Vc::Implementation.
Definition: global.h:542
uses only fundamental types
Definition: global.h:478
Align on boundary of vector sizes (e.g.
Definition: global.h:452
x86 AVX + AVX2
Definition: global.h:492
Support for float16 conversions in hardware.
Definition: global.h:510
x86 AVX
Definition: global.h:490
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
Definition: global.h:486