Vc  1.3.2-dev
SIMD Vector Classes for C++
gatherimplementation.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_GATHERIMPLEMENTATION_H_
29 #define VC_COMMON_GATHERIMPLEMENTATION_H_
30 
31 #include "macros.h"
32 
33 namespace Vc_VERSIONED_NAMESPACE
34 {
35 namespace Common
36 {
37 
38 enum class GatherScatterImplementation : int {
39  SimpleLoop,
40  SetIndexZero,
41  BitScanLoop,
42  PopcntSwitch
43 };
44 
45 using SimpleLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SimpleLoop>;
46 using SetIndexZeroT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SetIndexZero>;
47 using BitScanLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::BitScanLoop>;
48 using PopcntSwitchT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::PopcntSwitch>;
49 
50 template <typename V, typename MT, typename IT>
51 Vc_ALWAYS_INLINE void executeGather(SetIndexZeroT,
52  V &v,
53  const MT *mem,
54  IT &&indexes_,
55  typename V::MaskArgument mask)
56 {
57  auto indexes = std::forward<IT>(indexes_);
58  indexes.setZeroInverted(static_cast<decltype(!indexes)>(mask));
59  const V tmp(mem, indexes);
60  where(mask) | v = tmp;
61 }
62 
63 template <typename V, typename MT, typename IT>
64 Vc_ALWAYS_INLINE void executeGather(SimpleLoopT,
65  V &v,
66  const MT *mem,
67  const IT &indexes,
68  typename V::MaskArgument mask)
69 {
70  if (Vc_IS_UNLIKELY(mask.isEmpty())) {
71  return;
72  }
73  Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
74  if (mask[i])
75  v[i] = mem[indexes[i]];
76  });
77 }
78 
79 template <typename V, typename MT, typename IT>
80 Vc_ALWAYS_INLINE void executeGather(BitScanLoopT,
81  V &v,
82  const MT *mem,
83  const IT &indexes,
84  typename V::MaskArgument mask)
85 {
86 #ifdef Vc_GNU_ASM
87  size_t bits = mask.toInt();
88  while (Vc_IS_LIKELY(bits > 0)) {
89  size_t i, j;
90  asm("bsf %[bits],%[i]\n\t"
91  "bsr %[bits],%[j]\n\t"
92  "btr %[i],%[bits]\n\t"
93  "btr %[j],%[bits]\n\t"
94  : [i] "=r"(i), [j] "=r"(j), [bits] "+r"(bits));
95  v[i] = mem[indexes[i]];
96  v[j] = mem[indexes[j]];
97  }
98 #else
99  // Alternative from Vc::SSE (0.7)
100  int bits = mask.toInt();
101  while (bits) {
102  const int i = _bit_scan_forward(bits);
103  bits &= bits - 1;
104  v[i] = mem[indexes[i]];
105  }
106 #endif // Vc_GNU_ASM
107 }
108 
109 template <typename V, typename MT, typename IT>
110 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
111  V &v,
112  const MT *mem,
113  const IT &indexes,
114  typename V::MaskArgument mask,
115  enable_if<V::Size == 16> = nullarg)
116 {
117  unsigned int bits = mask.toInt();
118  unsigned int low, high = 0;
119  switch (Vc::Detail::popcnt16(bits)) {
120  case 16:
121  v.gather(mem, indexes);
122  break;
123  case 15:
124  low = _bit_scan_forward(bits);
125  bits ^= 1 << low;
126  v[low] = mem[indexes[low]];
127  case 14:
128  high = _bit_scan_reverse(bits);
129  v[high] = mem[indexes[high]];
130  high = (1 << high);
131  case 13:
132  low = _bit_scan_forward(bits);
133  bits ^= high | (1 << low);
134  v[low] = mem[indexes[low]];
135  case 12:
136  high = _bit_scan_reverse(bits);
137  v[high] = mem[indexes[high]];
138  high = (1 << high);
139  case 11:
140  low = _bit_scan_forward(bits);
141  bits ^= high | (1 << low);
142  v[low] = mem[indexes[low]];
143  case 10:
144  high = _bit_scan_reverse(bits);
145  v[high] = mem[indexes[high]];
146  high = (1 << high);
147  case 9:
148  low = _bit_scan_forward(bits);
149  bits ^= high | (1 << low);
150  v[low] = mem[indexes[low]];
151  case 8:
152  high = _bit_scan_reverse(bits);
153  v[high] = mem[indexes[high]];
154  high = (1 << high);
155  case 7:
156  low = _bit_scan_forward(bits);
157  bits ^= high | (1 << low);
158  v[low] = mem[indexes[low]];
159  case 6:
160  high = _bit_scan_reverse(bits);
161  v[high] = mem[indexes[high]];
162  high = (1 << high);
163  case 5:
164  low = _bit_scan_forward(bits);
165  bits ^= high | (1 << low);
166  v[low] = mem[indexes[low]];
167  case 4:
168  high = _bit_scan_reverse(bits);
169  v[high] = mem[indexes[high]];
170  high = (1 << high);
171  case 3:
172  low = _bit_scan_forward(bits);
173  bits ^= high | (1 << low);
174  v[low] = mem[indexes[low]];
175  case 2:
176  high = _bit_scan_reverse(bits);
177  v[high] = mem[indexes[high]];
178  case 1:
179  low = _bit_scan_forward(bits);
180  v[low] = mem[indexes[low]];
181  case 0:
182  break;
183  }
184 }
185 template <typename V, typename MT, typename IT>
186 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
187  V &v,
188  const MT *mem,
189  const IT &indexes,
190  typename V::MaskArgument mask,
191  enable_if<V::Size == 8> = nullarg)
192 {
193  unsigned int bits = mask.toInt();
194  unsigned int low, high = 0;
195  switch (Vc::Detail::popcnt8(bits)) {
196  case 8:
197  v.gather(mem, indexes);
198  break;
199  case 7:
200  low = _bit_scan_forward(bits);
201  bits ^= 1 << low;
202  v[low] = mem[indexes[low]];
203  case 6:
204  high = _bit_scan_reverse(bits);
205  v[high] = mem[indexes[high]];
206  high = (1 << high);
207  case 5:
208  low = _bit_scan_forward(bits);
209  bits ^= high | (1 << low);
210  v[low] = mem[indexes[low]];
211  case 4:
212  high = _bit_scan_reverse(bits);
213  v[high] = mem[indexes[high]];
214  high = (1 << high);
215  case 3:
216  low = _bit_scan_forward(bits);
217  bits ^= high | (1 << low);
218  v[low] = mem[indexes[low]];
219  case 2:
220  high = _bit_scan_reverse(bits);
221  v[high] = mem[indexes[high]];
222  case 1:
223  low = _bit_scan_forward(bits);
224  v[low] = mem[indexes[low]];
225  case 0:
226  break;
227  }
228 }
229 template <typename V, typename MT, typename IT>
230 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
231  V &v,
232  const MT *mem,
233  const IT &indexes,
234  typename V::MaskArgument mask,
235  enable_if<V::Size == 4> = nullarg)
236 {
237  unsigned int bits = mask.toInt();
238  unsigned int low, high = 0;
239  switch (Vc::Detail::popcnt4(bits)) {
240  case 4:
241  v.gather(mem, indexes);
242  break;
243  case 3:
244  low = _bit_scan_forward(bits);
245  bits ^= 1 << low;
246  v[low] = mem[indexes[low]];
247  case 2:
248  high = _bit_scan_reverse(bits);
249  v[high] = mem[indexes[high]];
250  case 1:
251  low = _bit_scan_forward(bits);
252  v[low] = mem[indexes[low]];
253  case 0:
254  break;
255  }
256 }
257 template <typename V, typename MT, typename IT>
258 Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
259  V &v,
260  const MT *mem,
261  const IT &indexes,
262  typename V::MaskArgument mask,
263  enable_if<V::Size == 2> = nullarg)
264 {
265  unsigned int bits = mask.toInt();
266  unsigned int low;
267  switch (Vc::Detail::popcnt4(bits)) {
268  case 2:
269  v.gather(mem, indexes);
270  break;
271  case 1:
272  low = _bit_scan_forward(bits);
273  v[low] = mem[indexes[low]];
274  case 0:
275  break;
276  }
277 }
278 
279 } // namespace Common
280 } // namespace Vc
281 
282 #endif // VC_COMMON_GATHERIMPLEMENTATION_H_
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.
Definition: where.h:229