Vc  1.0.0-dev
SIMD Vector Classes for C++
gatherimplementation.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the names of contributing organizations nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 }}}*/
28 
29 #ifndef VC_COMMON_GATHERIMPLEMENTATION_H_
30 #define VC_COMMON_GATHERIMPLEMENTATION_H_
31 
32 #include "macros.h"
33 
34 namespace Vc_VERSIONED_NAMESPACE
35 {
36 namespace Common
37 {
38 
39 enum class GatherScatterImplementation : int {
40  SimpleLoop,
41  SetIndexZero,
42  BitScanLoop,
43  PopcntSwitch
44 };
45 
46 using SimpleLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SimpleLoop>;
47 using SetIndexZeroT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SetIndexZero>;
48 using BitScanLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::BitScanLoop>;
49 using PopcntSwitchT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::PopcntSwitch>;
50 
51 template <typename V, typename MT, typename IT>
52 Vc_ALWAYS_INLINE void executeGather(SetIndexZeroT,
53  V &v,
54  const MT *mem,
55  IT indexes,
56  typename V::MaskArgument mask)
57 {
58  indexes.setZeroInverted(static_cast<typename IT::Mask>(mask));
59  const V tmp(mem, indexes);
60  where(mask) | v = tmp;
61 }
62 
63 template <typename V, typename MT, typename IT>
64 Vc_ALWAYS_INLINE void executeGather(SimpleLoopT,
65  V &v,
66  const MT *mem,
67  const IT &indexes,
68  typename V::MaskArgument mask)
69 {
70  if (Vc_IS_UNLIKELY(mask.isEmpty())) {
71  return;
72  }
73  Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
74  if (mask[i])
75  v[i] = mem[indexes[i]];
76  });
77 }
78 
79 template <typename V, typename MT, typename IT>
80 Vc_ALWAYS_INLINE void executeGather(BitScanLoopT,
81  V &v,
82  const MT *mem,
83  const IT &indexes,
84  typename V::MaskArgument mask)
85 {
86  size_t bits = mask.toInt();
87  while (Vc_IS_LIKELY(bits > 0)) {
88  size_t i, j;
89  asm("bsf %[bits],%[i]\n\t"
90  "bsr %[bits],%[j]\n\t"
91  "btr %[i],%[bits]\n\t"
92  "btr %[j],%[bits]\n\t"
93  : [i] "=r"(i), [j] "=r"(j), [bits] "+r"(bits));
94  v[i] = mem[indexes[i]];
95  v[j] = mem[indexes[j]];
96  }
97 
98  /* Alternative from Vc::SSE (0.7)
99  int bits = mask.toInt();
100  while (bits) {
101  const int i = _bit_scan_forward(bits);
102  bits &= ~(1 << i); // btr?
103  d.set(i, ith_value(i));
104  }
105  */
106 }
107 
108 template <typename V, typename MT, typename IT>
109 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
110  V &v,
111  const MT *mem,
112  const IT &indexes,
113  typename V::MaskArgument mask,
114  enable_if<V::size() == 16> = nullarg)
115 {
116  unsigned int bits = mask.toInt();
117  unsigned int low, high = 0;
118  switch (Vc::Detail::popcnt16(bits)) {
119  case 16:
120  v.gather(mem, indexes);
121  break;
122  case 15:
123  low = _bit_scan_forward(bits);
124  bits ^= 1 << low;
125  v[low] = mem[indexes[low]];
126  case 14:
127  high = _bit_scan_reverse(bits);
128  v[high] = mem[indexes[high]];
129  high = (1 << high);
130  case 13:
131  low = _bit_scan_forward(bits);
132  bits ^= high | (1 << low);
133  v[low] = mem[indexes[low]];
134  case 12:
135  high = _bit_scan_reverse(bits);
136  v[high] = mem[indexes[high]];
137  high = (1 << high);
138  case 11:
139  low = _bit_scan_forward(bits);
140  bits ^= high | (1 << low);
141  v[low] = mem[indexes[low]];
142  case 10:
143  high = _bit_scan_reverse(bits);
144  v[high] = mem[indexes[high]];
145  high = (1 << high);
146  case 9:
147  low = _bit_scan_forward(bits);
148  bits ^= high | (1 << low);
149  v[low] = mem[indexes[low]];
150  case 8:
151  high = _bit_scan_reverse(bits);
152  v[high] = mem[indexes[high]];
153  high = (1 << high);
154  case 7:
155  low = _bit_scan_forward(bits);
156  bits ^= high | (1 << low);
157  v[low] = mem[indexes[low]];
158  case 6:
159  high = _bit_scan_reverse(bits);
160  v[high] = mem[indexes[high]];
161  high = (1 << high);
162  case 5:
163  low = _bit_scan_forward(bits);
164  bits ^= high | (1 << low);
165  v[low] = mem[indexes[low]];
166  case 4:
167  high = _bit_scan_reverse(bits);
168  v[high] = mem[indexes[high]];
169  high = (1 << high);
170  case 3:
171  low = _bit_scan_forward(bits);
172  bits ^= high | (1 << low);
173  v[low] = mem[indexes[low]];
174  case 2:
175  high = _bit_scan_reverse(bits);
176  v[high] = mem[indexes[high]];
177  case 1:
178  low = _bit_scan_forward(bits);
179  v[low] = mem[indexes[low]];
180  case 0:
181  break;
182  }
183 }
184 template <typename V, typename MT, typename IT>
185 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
186  V &v,
187  const MT *mem,
188  const IT &indexes,
189  typename V::MaskArgument mask,
190  enable_if<V::size() == 8> = nullarg)
191 {
192  unsigned int bits = mask.toInt();
193  unsigned int low, high = 0;
194  switch (Vc::Detail::popcnt8(bits)) {
195  case 8:
196  v.gather(mem, indexes);
197  break;
198  case 7:
199  low = _bit_scan_forward(bits);
200  bits ^= 1 << low;
201  v[low] = mem[indexes[low]];
202  case 6:
203  high = _bit_scan_reverse(bits);
204  v[high] = mem[indexes[high]];
205  high = (1 << high);
206  case 5:
207  low = _bit_scan_forward(bits);
208  bits ^= high | (1 << low);
209  v[low] = mem[indexes[low]];
210  case 4:
211  high = _bit_scan_reverse(bits);
212  v[high] = mem[indexes[high]];
213  high = (1 << high);
214  case 3:
215  low = _bit_scan_forward(bits);
216  bits ^= high | (1 << low);
217  v[low] = mem[indexes[low]];
218  case 2:
219  high = _bit_scan_reverse(bits);
220  v[high] = mem[indexes[high]];
221  case 1:
222  low = _bit_scan_forward(bits);
223  v[low] = mem[indexes[low]];
224  case 0:
225  break;
226  }
227 }
228 template <typename V, typename MT, typename IT>
229 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
230  V &v,
231  const MT *mem,
232  const IT &indexes,
233  typename V::MaskArgument mask,
234  enable_if<V::size() == 4> = nullarg)
235 {
236  unsigned int bits = mask.toInt();
237  unsigned int low, high = 0;
238  switch (Vc::Detail::popcnt4(bits)) {
239  case 4:
240  v.gather(mem, indexes);
241  break;
242  case 3:
243  low = _bit_scan_forward(bits);
244  bits ^= 1 << low;
245  v[low] = mem[indexes[low]];
246  case 2:
247  high = _bit_scan_reverse(bits);
248  v[high] = mem[indexes[high]];
249  case 1:
250  low = _bit_scan_forward(bits);
251  v[low] = mem[indexes[low]];
252  case 0:
253  break;
254  }
255 }
256 template <typename V, typename MT, typename IT>
257 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
258  V &v,
259  const MT *mem,
260  const IT &indexes,
261  typename V::MaskArgument mask,
262  enable_if<V::size() == 2> = nullarg)
263 {
264  unsigned int bits = mask.toInt();
265  unsigned int low;
266  switch (Vc::Detail::popcnt4(bits)) {
267  case 2:
268  v.gather(mem, indexes);
269  break;
270  case 1:
271  low = _bit_scan_forward(bits);
272  v[low] = mem[indexes[low]];
273  case 0:
274  break;
275  }
276 }
277 
278 } // namespace Common
279 } // namespace Vc
280 
281 #endif // VC_COMMON_GATHERIMPLEMENTATION_H_
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.
Definition: where.h:230