Vc  1.0.0-dev
SIMD Vector Classes for C++
memory.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the names of contributing organizations nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 }}}*/
28 
29 #ifndef VC_COMMON_MEMORY_H_
30 #define VC_COMMON_MEMORY_H_
31 
32 #include "memorybase.h"
33 #include <assert.h>
34 #include <algorithm>
35 #include <cstring>
36 #include <cstddef>
37 #include <initializer_list>
38 #include "memoryfwd.h"
39 #include "malloc.h"
40 #include "macros.h"
41 
42 namespace Vc_VERSIONED_NAMESPACE
43 {
75 template<typename T, Vc::MallocAlignment A>
76 Vc_ALWAYS_INLINE T *malloc(size_t n)
77 {
78  return static_cast<T *>(Common::malloc<A>(n * sizeof(T)));
79 }
80 
102 template<typename T>
103 Vc_ALWAYS_INLINE void free(T *p)
104 {
105  Common::free(p);
106 }
107 
108 namespace Common
109 {
110 template<typename V, size_t Size> struct _MemorySizeCalculation
111 {
112  enum AlignmentCalculations {
113  Alignment = V::Size,
114  AlignmentMask = Alignment - 1,
115  MaskedSize = Size & AlignmentMask,
116  Padding = Alignment - MaskedSize,
117  PaddedSize = MaskedSize == 0 ? Size : Size + Padding
118  };
119 };
120 
131 template <typename V, size_t Size1, size_t Size2, bool InitPadding>
132 class Memory : public AlignedBase<V::MemoryAlignment>,
133  public MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
134  Memory<V, Size2, 0, false>>
135 {
136  public:
137  typedef typename V::EntryType EntryType;
138  private:
140  friend class MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2, Memory<V, Size2, 0, false> >;
141  friend class MemoryDimensionBase<V, Memory<V, Size1, Size2, InitPadding>, 2, Memory<V, Size2, 0, false> >;
142  enum : size_t {
143  Alignment = V::MemoryAlignment,
144  PaddedSize2 = _MemorySizeCalculation<V, Size2>::PaddedSize
145  };
146  alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
147  // integer constant' unless the
148  // static_cast is present
149  EntryType m_mem[Size1][PaddedSize2];
150 
151  public:
152  using Base::vector;
153  enum Constants {
154  RowCount = Size1,
155  VectorsCount = PaddedSize2 / V::Size
156  };
157 
158  Memory()
159  {
160  if (InitPadding) {
161  if (Size1 > 32)
162  for (size_t i = 0; i < Size1; ++i) {
163  V::Zero().store(&m_mem[i][PaddedSize2 - V::Size], Vc::Streaming);
164  }
165  }
166  }
167 
173  static constexpr size_t rowsCount() { return RowCount; }
182  static constexpr size_t entriesCount() { return Size1 * Size2; }
188  static constexpr size_t vectorsCount() { return VectorsCount * Size1; }
189 
199  template<typename Parent, typename RM>
200  Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 2, RM> &rhs) {
201  assert(vectorsCount() == rhs.vectorsCount());
202  Detail::copyVectors(*this, rhs);
203  return *this;
204  }
205 
206  Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
207  Detail::copyVectors(*this, rhs);
208  return *this;
209  }
210 
218  inline Memory &operator=(const V &v) {
219  for (size_t i = 0; i < vectorsCount(); ++i) {
220  vector(i) = v;
221  }
222  return *this;
223  }
224 };
225 
269 template <typename V, size_t Size, bool InitPadding>
270 class Memory<V, Size, 0u, InitPadding>
271  : public AlignedBase<V::MemoryAlignment>,
272  public MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>
273  {
274  public:
275  typedef typename V::EntryType EntryType;
276  private:
278  friend class MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
279  friend class MemoryDimensionBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
280  enum : size_t {
281  Alignment = V::MemoryAlignment, // in Bytes
282  MaskedSize = Size & (V::Size - 1), // the fraction of Size that exceeds
283  // an integral multiple of V::Size
284  Padding = V::Size - MaskedSize,
285  PaddedSize = MaskedSize == 0 ? Size : Size + Padding
286  };
287  alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
288  // integer constant' unless the
289  // static_cast is present
290  EntryType m_mem[PaddedSize];
291 
292  public:
293  using Base::vector;
294  enum Constants {
295  EntriesCount = Size,
296  VectorsCount = PaddedSize / V::Size
297  };
298 
299  Memory()
300  {
301  if (InitPadding) {
302  Base::lastVector() = V::Zero();
303  }
304  }
305 
306  Memory(std::initializer_list<EntryType> init)
307  {
308  Vc_ASSERT(init.size() <= Size);
309  Base::lastVector() = V::Zero();
310  std::copy(init.begin(), init.end(), &m_mem[0]);
311  }
312 
335  static Vc_ALWAYS_INLINE Vc_CONST Memory<V, Size, 0u, false> &fromRawData(EntryType *ptr)
336  {
337  // DANGER! This placement new has to use the right address. If the compiler decides
338  // RowMemory requires padding before the actual data then the address has to be adjusted
339  // accordingly
340  char *addr = reinterpret_cast<char *>(ptr);
341  typedef Memory<V, Size, 0u, false> MM;
342  addr -= Vc_OFFSETOF(MM, m_mem);
343  return *new(addr) MM;
344  }
345 
351  static constexpr size_t entriesCount() { return EntriesCount; }
352 
358  static constexpr size_t vectorsCount() { return VectorsCount; }
359 
360  inline Memory(const Memory &rhs)
361  {
362  Detail::copyVectors(*this, rhs);
363  }
364 
365  template <size_t S> inline Memory(const Memory<V, S> &rhs)
366  {
367  assert(vectorsCount() == rhs.vectorsCount());
368  Detail::copyVectors(*this, rhs);
369  }
370 
371  inline Memory &operator=(const Memory &rhs)
372  {
373  Detail::copyVectors(*this, rhs);
374  return *this;
375  }
376 
377  template <size_t S> inline Memory &operator=(const Memory<V, S> &rhs)
378  {
379  assert(vectorsCount() == rhs.vectorsCount());
380  Detail::copyVectors(*this, rhs);
381  return *this;
382  }
383 
384  Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
385  std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
386  return *this;
387  }
388  inline Memory &operator=(const V &v) {
389  for (size_t i = 0; i < vectorsCount(); ++i) {
390  vector(i) = v;
391  }
392  return *this;
393  }
394  };
395 
436  template<typename V> class Memory<V, 0u, 0u, true> : public MemoryBase<V, Memory<V, 0u, 0u, true>, 1, void>
437  {
438  public:
439  typedef typename V::EntryType EntryType;
440  private:
441  typedef MemoryBase<V, Memory<V>, 1, void> Base;
442  friend class MemoryBase<V, Memory<V>, 1, void>;
443  friend class MemoryDimensionBase<V, Memory<V>, 1, void>;
444  enum InternalConstants {
445  Alignment = V::Size,
446  AlignmentMask = Alignment - 1
447  };
448  size_t m_entriesCount;
449  size_t m_vectorsCount;
450  EntryType *m_mem;
451  size_t calcPaddedEntriesCount(size_t x)
452  {
453  size_t masked = x & AlignmentMask;
454  return (masked == 0 ? x : x + (Alignment - masked));
455  }
456  public:
457  using Base::vector;
458 
466  Vc_ALWAYS_INLINE Memory(size_t size)
467  : m_entriesCount(size),
468  m_vectorsCount(calcPaddedEntriesCount(m_entriesCount)),
469  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount))
470  {
471  m_vectorsCount /= V::Size;
472  Base::lastVector() = V::Zero();
473  }
474 
482  template<typename Parent, typename RM>
483  Vc_ALWAYS_INLINE Memory(const MemoryBase<V, Parent, 1, RM> &rhs)
484  : m_entriesCount(rhs.entriesCount()),
485  m_vectorsCount(rhs.vectorsCount()),
486  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
487  {
488  Detail::copyVectors(*this, rhs);
489  }
490 
498  Vc_ALWAYS_INLINE Memory(const Memory &rhs)
499  : m_entriesCount(rhs.entriesCount()),
500  m_vectorsCount(rhs.vectorsCount()),
501  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
502  {
503  Detail::copyVectors(*this, rhs);
504  }
505 
509  Vc_ALWAYS_INLINE ~Memory()
510  {
511  Vc::free(m_mem);
512  }
513 
519  inline void swap(Memory &rhs) {
520  std::swap(m_mem, rhs.m_mem);
521  std::swap(m_entriesCount, rhs.m_entriesCount);
522  std::swap(m_vectorsCount, rhs.m_vectorsCount);
523  }
524 
528  Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return m_entriesCount; }
529 
533  Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return m_vectorsCount; }
534 
544  template<typename Parent, typename RM>
545  Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) {
546  assert(vectorsCount() == rhs.vectorsCount());
547  Detail::copyVectors(*this, rhs);
548  return *this;
549  }
550 
551  Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
552  assert(vectorsCount() == rhs.vectorsCount());
553  Detail::copyVectors(*this, rhs);
554  return *this;
555  }
556 
566  Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
567  std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
568  return *this;
569  }
570 };
571 
582 Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr)
583 {
584  Vc::Detail::prefetchForOneRead(addr, VectorAbi::Best<float>());
585 }
586 
599 Vc_ALWAYS_INLINE void prefetchForModify(const void *addr)
600 {
601  Vc::Detail::prefetchForModify(addr, VectorAbi::Best<float>());
602 }
603 
614 Vc_ALWAYS_INLINE void prefetchClose(const void *addr)
615 {
616  Vc::Detail::prefetchClose(addr, VectorAbi::Best<float>());
617 }
618 
629 Vc_ALWAYS_INLINE void prefetchMid(const void *addr)
630 {
631  Vc::Detail::prefetchMid(addr, VectorAbi::Best<float>());
632 }
633 
644 Vc_ALWAYS_INLINE void prefetchFar(const void *addr)
645 {
646  Vc::Detail::prefetchFar(addr, VectorAbi::Best<float>());
647 }
648 } // namespace Common
649 
650 using Common::Memory;
654 using Common::prefetchMid;
655 using Common::prefetchFar;
656 } // namespace Vc
657 
658 namespace std
659 {
660  template<typename V> Vc_ALWAYS_INLINE void swap(Vc::Memory<V> &a, Vc::Memory<V> &b) { a.swap(b); }
661 } // namespace std
662 
663 #endif // VC_COMMON_MEMORY_H_
void free(T *p)
Frees memory that was allocated with Vc::malloc.
Definition: memory.h:103
void prefetchClose(const void *addr)
Prefetch the cacheline containing addr to L1 cache.
Definition: memory.h:614
Memory & operator=(const EntryType *rhs)
Overwrite all entries with the values stored in the memory at rhs.
Definition: memory.h:566
void prefetchMid(const void *addr)
Prefetch the cacheline containing addr to L2 cache.
Definition: memory.h:629
Memory(size_t size)
Allocate enough memory to access size values of type V::EntryType.
Definition: memory.h:466
static constexpr size_t entriesCount()
Definition: memory.h:351
size_t vectorsCount() const
Definition: memorybase.h:373
void swap(Adapter< S, T, N > &a, std::size_t i, S &x)
Swaps one scalar object x with a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1036
void prefetchForModify(const void *addr)
Prefetch the cacheline containing addr for modification.
Definition: memory.h:599
Definition: vector.h:258
static constexpr size_t vectorsCount()
Definition: memory.h:188
A helper class for fixed-size two-dimensional arrays.
Definition: memory.h:132
Memory(const MemoryBase< V, Parent, 1, RM > &rhs)
Copy the memory into a new memory area.
Definition: memory.h:483
Memory & operator=(const MemoryBase< V, Parent, 1, RM > &rhs)
Overwrite all entries with the values stored in rhs.
Definition: memory.h:545
Common interface to all Memory classes, independent of allocation on the stack or heap...
Definition: memorybase.h:349
Common::AdaptSubscriptOperator< std::vector< T, Allocator >> vector
An adapted std::vector container with an additional subscript operator which implements gather and sc...
Definition: vector:51
Helper class to ensure a given alignment.
Definition: alignedbase.h:68
Memory & operator=(const V &v)
Initialize all data with the given vector.
Definition: memory.h:218
void prefetchForOneRead(const void *addr)
Prefetch the cacheline containing addr for a single read access.
Definition: memory.h:582
void prefetchFar(const void *addr)
Prefetch the cacheline containing addr to L3 cache.
Definition: memory.h:644
Memory(const Memory &rhs)
Overload of the above function.
Definition: memory.h:498
static constexpr size_t entriesCount()
Definition: memory.h:182
T * malloc(size_t n)
Allocates memory on the Heap with alignment and padding suitable for vectorized access.
Definition: memory.h:76
constexpr StreamingTag Streaming
Use this object for a flags parameter to request streaming loads and stores.
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:85
Vector Classes Namespace.
Definition: cpuid.h:33
void swap(Memory &rhs)
Swap the contents and size information of two Memory objects.
Definition: memory.h:519
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:219
static Memory< V, Size, 0u, false > & fromRawData(EntryType *ptr)
Wrap existing data with the Memory convenience class.
Definition: memory.h:335
static constexpr size_t vectorsCount()
Definition: memory.h:358
Align on boundary of vector sizes (e.g.
Definition: global.h:457
static constexpr size_t rowsCount()
Definition: memory.h:173
Memory & operator=(const MemoryBase< V, Parent, 2, RM > &rhs)
Copies the data from a different object.
Definition: memory.h:200
~Memory()
Frees the memory which was allocated in the constructor.
Definition: memory.h:509