Vc  1.3.2-dev
SIMD Vector Classes for C++
memory.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_MEMORY_H_
29 #define VC_COMMON_MEMORY_H_
30 
31 #include "memorybase.h"
32 #include <assert.h>
33 #include <algorithm>
34 #include <cstring>
35 #include <cstddef>
36 #include <initializer_list>
37 #include "memoryfwd.h"
38 #include "malloc.h"
39 #include "macros.h"
40 
41 namespace Vc_VERSIONED_NAMESPACE
42 {
74 template<typename T, Vc::MallocAlignment A>
75 Vc_ALWAYS_INLINE T *malloc(size_t n)
76 {
77  return static_cast<T *>(Common::malloc<A>(n * sizeof(T)));
78 }
79 
101 template<typename T>
102 Vc_ALWAYS_INLINE void free(T *p)
103 {
104  Common::free(p);
105 }
106 
107 namespace Common
108 {
109 template<typename V, size_t Size> struct _MemorySizeCalculation
110 {
111  enum AlignmentCalculations {
112  Alignment = V::Size,
113  AlignmentMask = Alignment - 1,
114  MaskedSize = Size & AlignmentMask,
115  Padding = Alignment - MaskedSize,
116  PaddedSize = MaskedSize == 0 ? Size : Size + Padding
117  };
118 };
119 
130 template <typename V, size_t Size1, size_t Size2, bool InitPadding>
131 #ifdef Vc_RECURSIVE_MEMORY
132 class Memory : public MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
133  Memory<V, Size2, 0, InitPadding>>
134 #else
135 class Memory : public AlignedBase<V::MemoryAlignment>,
136  public MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
137  Memory<V, Size2, 0, false>>
138 #endif
139 {
140 public:
141  typedef typename V::EntryType EntryType;
142 
143 private:
144 #ifdef Vc_RECURSIVE_MEMORY
146 #else
148 #endif
150  friend class MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2, RowMemory>;
151  friend class MemoryDimensionBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
152  RowMemory>;
153  enum : size_t {
154  Alignment = V::MemoryAlignment,
155  PaddedSize2 = _MemorySizeCalculation<V, Size2>::PaddedSize
156  };
157  alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
158  // integer constant' unless the
159  // static_cast is present
160 #ifdef Vc_RECURSIVE_MEMORY
161  RowMemory m_mem[Size1];
162 #else
163  EntryType m_mem[Size1][PaddedSize2];
164 #endif
165 
166  public:
167  using Base::vector;
168  enum Constants {
169  RowCount = Size1,
170  VectorsCount = PaddedSize2 / V::Size
171  };
172 
173 #ifdef Vc_RECURSIVE_MEMORY
174  Memory() = default;
175 #else
176  Memory()
177  {
178  if (InitPadding) {
179  if (Size1 > 32)
180  for (size_t i = 0; i < Size1; ++i) {
181  V::Zero().store(&m_mem[i][PaddedSize2 - V::Size], Vc::Streaming);
182  }
183  }
184  }
185 #endif
186 
192  static constexpr size_t rowsCount() { return RowCount; }
201  static constexpr size_t entriesCount() { return Size1 * Size2; }
207  static constexpr size_t vectorsCount() { return VectorsCount * Size1; }
208 
218  template<typename Parent, typename RM>
219  Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 2, RM> &rhs) {
220  assert(vectorsCount() == rhs.vectorsCount());
221  Detail::copyVectors(*this, rhs);
222  return *this;
223  }
224 
225  Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
226  Detail::copyVectors(*this, rhs);
227  return *this;
228  }
229 
237  inline Memory &operator=(const V &v) {
238  for (size_t i = 0; i < vectorsCount(); ++i) {
239  vector(i) = v;
240  }
241  return *this;
242  }
243 };
244 
288 template <typename V, size_t Size, bool InitPadding>
289 class Memory<V, Size, 0u, InitPadding> :
290 #ifndef Vc_RECURSIVE_MEMORY
291  public AlignedBase<V::MemoryAlignment>,
292 #endif
293  public MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>
294  {
295  public:
296  typedef typename V::EntryType EntryType;
297  private:
299  friend class MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
300  friend class MemoryDimensionBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
301  enum : size_t {
302  Alignment = V::MemoryAlignment, // in Bytes
303  MaskedSize = Size & (V::Size - 1), // the fraction of Size that exceeds
304  // an integral multiple of V::Size
305  Padding = V::Size - MaskedSize,
306  PaddedSize = MaskedSize == 0 ? Size : Size + Padding
307  };
308  alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
309  // integer constant' unless the
310  // static_cast is present
311  EntryType m_mem[PaddedSize];
312 
313  public:
314  using Base::vector;
315  enum Constants {
316  EntriesCount = Size,
317  VectorsCount = PaddedSize / V::Size
318  };
319 
320  Memory()
321  {
322  if (InitPadding) {
323  Base::lastVector() = V::Zero();
324  }
325  }
326 
327  Memory(std::initializer_list<EntryType> init)
328  {
329  Vc_ASSERT(init.size() <= Size);
330  Base::lastVector() = V::Zero();
331  std::copy(init.begin(), init.end(), &m_mem[0]);
332  }
333 
356  static Vc_ALWAYS_INLINE Vc_CONST Memory<V, Size, 0u, false> &fromRawData(EntryType *ptr)
357  {
358  // DANGER! This placement new has to use the right address. If the compiler decides
359  // RowMemory requires padding before the actual data then the address has to be adjusted
360  // accordingly
361  char *addr = reinterpret_cast<char *>(ptr);
362  typedef Memory<V, Size, 0u, false> MM;
363  addr -= Vc_OFFSETOF(MM, m_mem);
364  return *new(addr) MM;
365  }
366 
372  static constexpr size_t entriesCount() { return EntriesCount; }
373 
379  static constexpr size_t vectorsCount() { return VectorsCount; }
380 
381  inline Memory(const Memory &rhs)
382  {
383  Detail::copyVectors(*this, rhs);
384  }
385 
386  template <size_t S> inline Memory(const Memory<V, S> &rhs)
387  {
388  assert(vectorsCount() == rhs.vectorsCount());
389  Detail::copyVectors(*this, rhs);
390  }
391 
392  inline Memory &operator=(const Memory &rhs)
393  {
394  Detail::copyVectors(*this, rhs);
395  return *this;
396  }
397 
398  template <size_t S> inline Memory &operator=(const Memory<V, S> &rhs)
399  {
400  assert(vectorsCount() == rhs.vectorsCount());
401  Detail::copyVectors(*this, rhs);
402  return *this;
403  }
404 
405  Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
406  std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
407  return *this;
408  }
409  inline Memory &operator=(const V &v) {
410  for (size_t i = 0; i < vectorsCount(); ++i) {
411  vector(i) = v;
412  }
413  return *this;
414  }
415  };
416 
457  template<typename V> class Memory<V, 0u, 0u, true> : public MemoryBase<V, Memory<V, 0u, 0u, true>, 1, void>
458  {
459  public:
460  typedef typename V::EntryType EntryType;
461  private:
462  typedef MemoryBase<V, Memory<V>, 1, void> Base;
463  friend class MemoryBase<V, Memory<V>, 1, void>;
464  friend class MemoryDimensionBase<V, Memory<V>, 1, void>;
465  enum InternalConstants {
466  Alignment = V::Size,
467  AlignmentMask = Alignment - 1
468  };
469  size_t m_entriesCount;
470  size_t m_vectorsCount;
471  EntryType *m_mem;
472  size_t calcPaddedEntriesCount(size_t x)
473  {
474  size_t masked = x & AlignmentMask;
475  return (masked == 0 ? x : x + (Alignment - masked));
476  }
477  public:
478  using Base::vector;
479 
487  Vc_ALWAYS_INLINE Memory(size_t size)
488  : m_entriesCount(size),
489  m_vectorsCount(calcPaddedEntriesCount(m_entriesCount)),
490  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount))
491  {
492  m_vectorsCount /= V::Size;
493  Base::lastVector() = V::Zero();
494  }
495 
503  template<typename Parent, typename RM>
504  Vc_ALWAYS_INLINE Memory(const MemoryBase<V, Parent, 1, RM> &rhs)
505  : m_entriesCount(rhs.entriesCount()),
506  m_vectorsCount(rhs.vectorsCount()),
507  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
508  {
509  Detail::copyVectors(*this, rhs);
510  }
511 
519  Vc_ALWAYS_INLINE Memory(const Memory &rhs)
520  : m_entriesCount(rhs.entriesCount()),
521  m_vectorsCount(rhs.vectorsCount()),
522  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
523  {
524  Detail::copyVectors(*this, rhs);
525  }
526 
530  Vc_ALWAYS_INLINE ~Memory()
531  {
532  Vc::free(m_mem);
533  }
534 
540  inline void swap(Memory &rhs) {
541  std::swap(m_mem, rhs.m_mem);
542  std::swap(m_entriesCount, rhs.m_entriesCount);
543  std::swap(m_vectorsCount, rhs.m_vectorsCount);
544  }
545 
549  Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return m_entriesCount; }
550 
554  Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return m_vectorsCount; }
555 
565  template<typename Parent, typename RM>
566  Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) {
567  assert(vectorsCount() == rhs.vectorsCount());
568  Detail::copyVectors(*this, rhs);
569  return *this;
570  }
571 
572  Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
573  assert(vectorsCount() == rhs.vectorsCount());
574  Detail::copyVectors(*this, rhs);
575  return *this;
576  }
577 
587  Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
588  std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
589  return *this;
590  }
591 };
592 
603 Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr)
604 {
605  Vc::Detail::prefetchForOneRead(addr, VectorAbi::Best<float>());
606 }
607 
620 Vc_ALWAYS_INLINE void prefetchForModify(const void *addr)
621 {
622  Vc::Detail::prefetchForModify(addr, VectorAbi::Best<float>());
623 }
624 
635 Vc_ALWAYS_INLINE void prefetchClose(const void *addr)
636 {
637  Vc::Detail::prefetchClose(addr, VectorAbi::Best<float>());
638 }
639 
650 Vc_ALWAYS_INLINE void prefetchMid(const void *addr)
651 {
652  Vc::Detail::prefetchMid(addr, VectorAbi::Best<float>());
653 }
654 
665 Vc_ALWAYS_INLINE void prefetchFar(const void *addr)
666 {
667  Vc::Detail::prefetchFar(addr, VectorAbi::Best<float>());
668 }
669 } // namespace Common
670 
671 using Common::Memory;
675 using Common::prefetchMid;
676 using Common::prefetchFar;
677 } // namespace Vc
678 
679 namespace std
680 {
681  template<typename V> Vc_ALWAYS_INLINE void swap(Vc::Memory<V> &a, Vc::Memory<V> &b) { a.swap(b); }
682 } // namespace std
683 
684 #endif // VC_COMMON_MEMORY_H_
void free(T *p)
Frees memory that was allocated with Vc::malloc.
Definition: memory.h:102
void prefetchClose(const void *addr)
Prefetch the cacheline containing addr to L1 cache.
Definition: memory.h:635
Memory & operator=(const EntryType *rhs)
Overwrite all entries with the values stored in the memory at rhs.
Definition: memory.h:587
void prefetchMid(const void *addr)
Prefetch the cacheline containing addr to L2 cache.
Definition: memory.h:650
Memory(size_t size)
Allocate enough memory to access size values of type V::EntryType.
Definition: memory.h:487
static constexpr size_t entriesCount()
Definition: memory.h:372
size_t vectorsCount() const
Definition: memorybase.h:385
void swap(Adapter< S, T, N > &a, std::size_t i, S &x)
Swaps one scalar object x with a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1106
void prefetchForModify(const void *addr)
Prefetch the cacheline containing addr for modification.
Definition: memory.h:620
Definition: vector.h:257
static constexpr size_t vectorsCount()
Definition: memory.h:207
A helper class for fixed-size two-dimensional arrays.
Definition: memory.h:135
Memory(const MemoryBase< V, Parent, 1, RM > &rhs)
Copy the memory into a new memory area.
Definition: memory.h:504
Memory & operator=(const MemoryBase< V, Parent, 1, RM > &rhs)
Overwrite all entries with the values stored in rhs.
Definition: memory.h:566
Common interface to all Memory classes, independent of allocation on the stack or heap...
Definition: memorybase.h:361
Common::AdaptSubscriptOperator< std::vector< T, Allocator >> vector
An adapted std::vector container with an additional subscript operator which implements gather and sc...
Definition: vector:51
Helper class to ensure a given alignment.
Definition: alignedbase.h:67
Memory & operator=(const V &v)
Initialize all data with the given vector.
Definition: memory.h:237
void prefetchForOneRead(const void *addr)
Prefetch the cacheline containing addr for a single read access.
Definition: memory.h:603
void prefetchFar(const void *addr)
Prefetch the cacheline containing addr to L3 cache.
Definition: memory.h:665
Memory(const Memory &rhs)
Overload of the above function.
Definition: memory.h:519
static constexpr size_t entriesCount()
Definition: memory.h:201
T * malloc(size_t n)
Allocates memory on the Heap with alignment and padding suitable for vectorized access.
Definition: memory.h:75
constexpr StreamingTag Streaming
Use this object for a flags parameter to request streaming loads and stores.
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:84
Vector Classes Namespace.
Definition: cpuid.h:32
void swap(Memory &rhs)
Swap the contents and size information of two Memory objects.
Definition: memory.h:540
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:218
static Memory< V, Size, 0u, false > & fromRawData(EntryType *ptr)
Wrap existing data with the Memory convenience class.
Definition: memory.h:356
static constexpr size_t vectorsCount()
Definition: memory.h:379
Align on boundary of vector sizes (e.g.
Definition: global.h:452
static constexpr size_t rowsCount()
Definition: memory.h:192
Memory & operator=(const MemoryBase< V, Parent, 2, RM > &rhs)
Copies the data from a different object.
Definition: memory.h:219
~Memory()
Frees the memory which was allocated in the constructor.
Definition: memory.h:530