Vc  1.3.2-dev
SIMD Vector Classes for C++
scatterimplementation.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_SCATTERIMPLEMENTATION_H_
29 #define VC_COMMON_SCATTERIMPLEMENTATION_H_
30 
31 #include "gatherimplementation.h"
32 #include "macros.h"
33 
34 namespace Vc_VERSIONED_NAMESPACE
35 {
36 namespace Common
37 {
38 
39 template <typename V, typename MT, typename IT>
40 Vc_ALWAYS_INLINE void executeScatter(SetIndexZeroT,
41  V &v,
42  MT *mem,
43  IT indexes,
44  typename V::MaskArgument mask)
45 {
46  indexes.setZeroInverted(static_cast<typename IT::Mask>(mask));
47  // Huh?
48  const V tmp(mem, indexes);
49  where(mask) | v = tmp;
50 }
51 
52 template <typename V, typename MT, typename IT>
53 Vc_ALWAYS_INLINE void executeScatter(SimpleLoopT,
54  V &v,
55  MT *mem,
56  const IT &indexes,
57  typename V::MaskArgument mask)
58 {
59  if (Vc_IS_UNLIKELY(mask.isEmpty())) {
60  return;
61  }
62  Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
63  if (mask[i])
64  mem[indexes[i]] = v[i];
65  });
66 }
67 
68 template <typename V, typename MT, typename IT>
69 Vc_ALWAYS_INLINE void executeScatter(BitScanLoopT,
70  V &v,
71  MT *mem,
72  const IT &indexes,
73  typename V::MaskArgument mask)
74 {
75  size_t bits = mask.toInt();
76  while (Vc_IS_LIKELY(bits > 0)) {
77  size_t i, j;
78  asm("bsf %[bits],%[i]\n\t"
79  "bsr %[bits],%[j]\n\t"
80  "btr %[i],%[bits]\n\t"
81  "btr %[j],%[bits]\n\t"
82  : [i] "=r"(i), [j] "=r"(j), [bits] "+r"(bits));
83  mem[indexes[i]] = v[i];
84  mem[indexes[j]] = v[j];
85  }
86 
87  /* Alternative from Vc::SSE (0.7)
88  int bits = mask.toInt();
89  while (bits) {
90  const int i = _bit_scan_forward(bits);
91  bits ^= (1 << i); // btr?
92  mem[indexes[i]] = v[i];
93  }
94  */
95 }
96 
97 template <typename V, typename MT, typename IT>
98 Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT,
99  V &v,
100  MT *mem,
101  const IT &indexes,
102  typename V::MaskArgument mask,
103  enable_if<V::Size == 16> = nullarg)
104 {
105  unsigned int bits = mask.toInt();
106  unsigned int low, high = 0;
107  switch (Vc::Detail::popcnt16(bits)) {
108  case 16:
109  v.scatter(mem, indexes);
110  break;
111  case 15:
112  low = _bit_scan_forward(bits);
113  bits ^= 1 << low;
114  mem[indexes[low]] = v[low];
115  case 14:
116  high = _bit_scan_reverse(bits);
117  mem[indexes[high]] = v[high];
118  high = (1 << high);
119  case 13:
120  low = _bit_scan_forward(bits);
121  bits ^= high | (1 << low);
122  mem[indexes[low]] = v[low];
123  case 12:
124  high = _bit_scan_reverse(bits);
125  mem[indexes[high]] = v[high];
126  high = (1 << high);
127  case 11:
128  low = _bit_scan_forward(bits);
129  bits ^= high | (1 << low);
130  mem[indexes[low]] = v[low];
131  case 10:
132  high = _bit_scan_reverse(bits);
133  mem[indexes[high]] = v[high];
134  high = (1 << high);
135  case 9:
136  low = _bit_scan_forward(bits);
137  bits ^= high | (1 << low);
138  mem[indexes[low]] = v[low];
139  case 8:
140  high = _bit_scan_reverse(bits);
141  mem[indexes[high]] = v[high];
142  high = (1 << high);
143  case 7:
144  low = _bit_scan_forward(bits);
145  bits ^= high | (1 << low);
146  mem[indexes[low]] = v[low];
147  case 6:
148  high = _bit_scan_reverse(bits);
149  mem[indexes[high]] = v[high];
150  high = (1 << high);
151  case 5:
152  low = _bit_scan_forward(bits);
153  bits ^= high | (1 << low);
154  mem[indexes[low]] = v[low];
155  case 4:
156  high = _bit_scan_reverse(bits);
157  mem[indexes[high]] = v[high];
158  high = (1 << high);
159  case 3:
160  low = _bit_scan_forward(bits);
161  bits ^= high | (1 << low);
162  mem[indexes[low]] = v[low];
163  case 2:
164  high = _bit_scan_reverse(bits);
165  mem[indexes[high]] = v[high];
166  case 1:
167  low = _bit_scan_forward(bits);
168  mem[indexes[low]] = v[low];
169  case 0:
170  break;
171  }
172 }
173 template <typename V, typename MT, typename IT>
174 Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT,
175  V &v,
176  MT *mem,
177  const IT &indexes,
178  typename V::MaskArgument mask,
179  enable_if<V::Size == 8> = nullarg)
180 {
181  unsigned int bits = mask.toInt();
182  unsigned int low, high = 0;
183  switch (Vc::Detail::popcnt8(bits)) {
184  case 8:
185  v.scatter(mem, indexes);
186  break;
187  case 7:
188  low = _bit_scan_forward(bits);
189  bits ^= 1 << low;
190  mem[indexes[low]] = v[low];
191  case 6:
192  high = _bit_scan_reverse(bits);
193  mem[indexes[high]] = v[high];
194  high = (1 << high);
195  case 5:
196  low = _bit_scan_forward(bits);
197  bits ^= high | (1 << low);
198  mem[indexes[low]] = v[low];
199  case 4:
200  high = _bit_scan_reverse(bits);
201  mem[indexes[high]] = v[high];
202  high = (1 << high);
203  case 3:
204  low = _bit_scan_forward(bits);
205  bits ^= high | (1 << low);
206  mem[indexes[low]] = v[low];
207  case 2:
208  high = _bit_scan_reverse(bits);
209  mem[indexes[high]] = v[high];
210  case 1:
211  low = _bit_scan_forward(bits);
212  mem[indexes[low]] = v[low];
213  case 0:
214  break;
215  }
216 }
217 template <typename V, typename MT, typename IT>
218 Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT,
219  V &v,
220  MT *mem,
221  const IT &indexes,
222  typename V::MaskArgument mask,
223  enable_if<V::Size == 4> = nullarg)
224 {
225  unsigned int bits = mask.toInt();
226  unsigned int low, high = 0;
227  switch (Vc::Detail::popcnt4(bits)) {
228  case 4:
229  v.scatter(mem, indexes);
230  break;
231  case 3:
232  low = _bit_scan_forward(bits);
233  bits ^= 1 << low;
234  mem[indexes[low]] = v[low];
235  case 2:
236  high = _bit_scan_reverse(bits);
237  mem[indexes[high]] = v[high];
238  case 1:
239  low = _bit_scan_forward(bits);
240  mem[indexes[low]] = v[low];
241  case 0:
242  break;
243  }
244 }
245 template <typename V, typename MT, typename IT>
246 Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT,
247  V &v,
248  MT *mem,
249  const IT &indexes,
250  typename V::MaskArgument mask,
251  enable_if<V::Size == 2> = nullarg)
252 {
253  unsigned int bits = mask.toInt();
254  unsigned int low;
255  switch (Vc::Detail::popcnt4(bits)) {
256  case 2:
257  v.scatter(mem, indexes);
258  break;
259  case 1:
260  low = _bit_scan_forward(bits);
261  mem[indexes[low]] = v[low];
262  case 0:
263  break;
264  }
265 }
266 
267 } // namespace Common
268 } // namespace Vc
269 
270 #endif // VC_COMMON_SCATTERIMPLEMENTATION_H_
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.
Definition: where.h:229