28 #ifndef VC_COMMON_GATHERIMPLEMENTATION_H_
29 #define VC_COMMON_GATHERIMPLEMENTATION_H_
33 namespace Vc_VERSIONED_NAMESPACE
38 enum class GatherScatterImplementation : int {
45 using SimpleLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SimpleLoop>;
46 using SetIndexZeroT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SetIndexZero>;
47 using BitScanLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::BitScanLoop>;
48 using PopcntSwitchT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::PopcntSwitch>;
50 template <
typename V,
typename MT,
typename IT>
51 Vc_ALWAYS_INLINE
void executeGather(SetIndexZeroT,
55 typename V::MaskArgument mask)
57 auto indexes = std::forward<IT>(indexes_);
58 indexes.setZeroInverted(
static_cast<decltype(!indexes)
>(mask));
59 const V tmp(mem, indexes);
60 where(mask) | v = tmp;
63 template <
typename V,
typename MT,
typename IT>
64 Vc_ALWAYS_INLINE
void executeGather(SimpleLoopT,
68 typename V::MaskArgument mask)
70 if (Vc_IS_UNLIKELY(mask.isEmpty())) {
73 Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
75 v[i] = mem[indexes[i]];
79 template <
typename V,
typename MT,
typename IT>
80 Vc_ALWAYS_INLINE
void executeGather(BitScanLoopT,
84 typename V::MaskArgument mask)
87 size_t bits = mask.toInt();
88 while (Vc_IS_LIKELY(bits > 0)) {
90 asm(
"bsf %[bits],%[i]\n\t"
91 "bsr %[bits],%[j]\n\t"
92 "btr %[i],%[bits]\n\t"
93 "btr %[j],%[bits]\n\t"
94 : [i]
"=r"(i), [j]
"=r"(j), [bits]
"+r"(bits));
95 v[i] = mem[indexes[i]];
96 v[j] = mem[indexes[j]];
100 int bits = mask.toInt();
102 const int i = _bit_scan_forward(bits);
104 v[i] = mem[indexes[i]];
109 template <
typename V,
typename MT,
typename IT>
110 Vc_ALWAYS_INLINE
void executeGather(PopcntSwitchT,
114 typename V::MaskArgument mask,
115 enable_if<V::Size == 16> = nullarg)
117 unsigned int bits = mask.toInt();
118 unsigned int low, high = 0;
119 switch (Vc::Detail::popcnt16(bits)) {
121 v.gather(mem, indexes);
124 low = _bit_scan_forward(bits);
126 v[low] = mem[indexes[low]];
128 high = _bit_scan_reverse(bits);
129 v[high] = mem[indexes[high]];
132 low = _bit_scan_forward(bits);
133 bits ^= high | (1 << low);
134 v[low] = mem[indexes[low]];
136 high = _bit_scan_reverse(bits);
137 v[high] = mem[indexes[high]];
140 low = _bit_scan_forward(bits);
141 bits ^= high | (1 << low);
142 v[low] = mem[indexes[low]];
144 high = _bit_scan_reverse(bits);
145 v[high] = mem[indexes[high]];
148 low = _bit_scan_forward(bits);
149 bits ^= high | (1 << low);
150 v[low] = mem[indexes[low]];
152 high = _bit_scan_reverse(bits);
153 v[high] = mem[indexes[high]];
156 low = _bit_scan_forward(bits);
157 bits ^= high | (1 << low);
158 v[low] = mem[indexes[low]];
160 high = _bit_scan_reverse(bits);
161 v[high] = mem[indexes[high]];
164 low = _bit_scan_forward(bits);
165 bits ^= high | (1 << low);
166 v[low] = mem[indexes[low]];
168 high = _bit_scan_reverse(bits);
169 v[high] = mem[indexes[high]];
172 low = _bit_scan_forward(bits);
173 bits ^= high | (1 << low);
174 v[low] = mem[indexes[low]];
176 high = _bit_scan_reverse(bits);
177 v[high] = mem[indexes[high]];
179 low = _bit_scan_forward(bits);
180 v[low] = mem[indexes[low]];
185 template <
typename V,
typename MT,
typename IT>
186 Vc_ALWAYS_INLINE
void executeGather(PopcntSwitchT,
190 typename V::MaskArgument mask,
191 enable_if<V::Size == 8> = nullarg)
193 unsigned int bits = mask.toInt();
194 unsigned int low, high = 0;
195 switch (Vc::Detail::popcnt8(bits)) {
197 v.gather(mem, indexes);
200 low = _bit_scan_forward(bits);
202 v[low] = mem[indexes[low]];
204 high = _bit_scan_reverse(bits);
205 v[high] = mem[indexes[high]];
208 low = _bit_scan_forward(bits);
209 bits ^= high | (1 << low);
210 v[low] = mem[indexes[low]];
212 high = _bit_scan_reverse(bits);
213 v[high] = mem[indexes[high]];
216 low = _bit_scan_forward(bits);
217 bits ^= high | (1 << low);
218 v[low] = mem[indexes[low]];
220 high = _bit_scan_reverse(bits);
221 v[high] = mem[indexes[high]];
223 low = _bit_scan_forward(bits);
224 v[low] = mem[indexes[low]];
229 template <
typename V,
typename MT,
typename IT>
230 Vc_ALWAYS_INLINE
void executeGather(PopcntSwitchT,
234 typename V::MaskArgument mask,
235 enable_if<V::Size == 4> = nullarg)
237 unsigned int bits = mask.toInt();
238 unsigned int low, high = 0;
239 switch (Vc::Detail::popcnt4(bits)) {
241 v.gather(mem, indexes);
244 low = _bit_scan_forward(bits);
246 v[low] = mem[indexes[low]];
248 high = _bit_scan_reverse(bits);
249 v[high] = mem[indexes[high]];
251 low = _bit_scan_forward(bits);
252 v[low] = mem[indexes[low]];
257 template <
typename V,
typename MT,
typename IT>
258 Vc_ALWAYS_INLINE
void executeGather(PopcntSwitchT,
262 typename V::MaskArgument mask,
263 enable_if<V::Size == 2> = nullarg)
265 unsigned int bits = mask.toInt();
267 switch (Vc::Detail::popcnt4(bits)) {
269 v.gather(mem, indexes);
272 low = _bit_scan_forward(bits);
273 v[low] = mem[indexes[low]];
282 #endif // VC_COMMON_GATHERIMPLEMENTATION_H_
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.