29 #ifndef VC_COMMON_GATHERIMPLEMENTATION_H_
30 #define VC_COMMON_GATHERIMPLEMENTATION_H_
34 namespace Vc_VERSIONED_NAMESPACE
39 enum class GatherScatterImplementation : int {
46 using SimpleLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SimpleLoop>;
47 using SetIndexZeroT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SetIndexZero>;
48 using BitScanLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::BitScanLoop>;
49 using PopcntSwitchT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::PopcntSwitch>;
51 template <
typename V,
typename MT,
typename IT>
52 Vc_ALWAYS_INLINE
void executeGather(SetIndexZeroT,
56 typename V::MaskArgument mask)
58 indexes.setZeroInverted(static_cast<typename IT::Mask>(mask));
59 const V tmp(mem, indexes);
60 where(mask) | v = tmp;
63 template <
typename V,
typename MT,
typename IT>
64 Vc_ALWAYS_INLINE
void executeGather(SimpleLoopT,
68 typename V::MaskArgument mask)
70 if (Vc_IS_UNLIKELY(mask.isEmpty())) {
73 Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
75 v[i] = mem[indexes[i]];
79 template <
typename V,
typename MT,
typename IT>
80 Vc_ALWAYS_INLINE
void executeGather(BitScanLoopT,
84 typename V::MaskArgument mask)
86 size_t bits = mask.toInt();
87 while (Vc_IS_LIKELY(bits > 0)) {
89 asm(
"bsf %[bits],%[i]\n\t"
90 "bsr %[bits],%[j]\n\t"
91 "btr %[i],%[bits]\n\t"
92 "btr %[j],%[bits]\n\t"
93 : [i]
"=r"(i), [j]
"=r"(j), [bits]
"+r"(bits));
94 v[i] = mem[indexes[i]];
95 v[j] = mem[indexes[j]];
108 template <
typename V,
typename MT,
typename IT>
109 Vc_ALWAYS_INLINE
void executeGather(PopcntSwitchT,
113 typename V::MaskArgument mask,
114 enable_if<V::size() == 16> = nullarg)
116 unsigned int bits = mask.toInt();
117 unsigned int low, high = 0;
118 switch (Vc::Detail::popcnt16(bits)) {
120 v.gather(mem, indexes);
123 low = _bit_scan_forward(bits);
125 v[low] = mem[indexes[low]];
127 high = _bit_scan_reverse(bits);
128 v[high] = mem[indexes[high]];
131 low = _bit_scan_forward(bits);
132 bits ^= high | (1 << low);
133 v[low] = mem[indexes[low]];
135 high = _bit_scan_reverse(bits);
136 v[high] = mem[indexes[high]];
139 low = _bit_scan_forward(bits);
140 bits ^= high | (1 << low);
141 v[low] = mem[indexes[low]];
143 high = _bit_scan_reverse(bits);
144 v[high] = mem[indexes[high]];
147 low = _bit_scan_forward(bits);
148 bits ^= high | (1 << low);
149 v[low] = mem[indexes[low]];
151 high = _bit_scan_reverse(bits);
152 v[high] = mem[indexes[high]];
155 low = _bit_scan_forward(bits);
156 bits ^= high | (1 << low);
157 v[low] = mem[indexes[low]];
159 high = _bit_scan_reverse(bits);
160 v[high] = mem[indexes[high]];
163 low = _bit_scan_forward(bits);
164 bits ^= high | (1 << low);
165 v[low] = mem[indexes[low]];
167 high = _bit_scan_reverse(bits);
168 v[high] = mem[indexes[high]];
171 low = _bit_scan_forward(bits);
172 bits ^= high | (1 << low);
173 v[low] = mem[indexes[low]];
175 high = _bit_scan_reverse(bits);
176 v[high] = mem[indexes[high]];
178 low = _bit_scan_forward(bits);
179 v[low] = mem[indexes[low]];
184 template <
typename V,
typename MT,
typename IT>
185 Vc_ALWAYS_INLINE
void executeGather(PopcntSwitchT,
189 typename V::MaskArgument mask,
190 enable_if<V::size() == 8> = nullarg)
192 unsigned int bits = mask.toInt();
193 unsigned int low, high = 0;
194 switch (Vc::Detail::popcnt8(bits)) {
196 v.gather(mem, indexes);
199 low = _bit_scan_forward(bits);
201 v[low] = mem[indexes[low]];
203 high = _bit_scan_reverse(bits);
204 v[high] = mem[indexes[high]];
207 low = _bit_scan_forward(bits);
208 bits ^= high | (1 << low);
209 v[low] = mem[indexes[low]];
211 high = _bit_scan_reverse(bits);
212 v[high] = mem[indexes[high]];
215 low = _bit_scan_forward(bits);
216 bits ^= high | (1 << low);
217 v[low] = mem[indexes[low]];
219 high = _bit_scan_reverse(bits);
220 v[high] = mem[indexes[high]];
222 low = _bit_scan_forward(bits);
223 v[low] = mem[indexes[low]];
228 template <
typename V,
typename MT,
typename IT>
229 Vc_ALWAYS_INLINE
void executeGather(PopcntSwitchT,
233 typename V::MaskArgument mask,
234 enable_if<V::size() == 4> = nullarg)
236 unsigned int bits = mask.toInt();
237 unsigned int low, high = 0;
238 switch (Vc::Detail::popcnt4(bits)) {
240 v.gather(mem, indexes);
243 low = _bit_scan_forward(bits);
245 v[low] = mem[indexes[low]];
247 high = _bit_scan_reverse(bits);
248 v[high] = mem[indexes[high]];
250 low = _bit_scan_forward(bits);
251 v[low] = mem[indexes[low]];
256 template <
typename V,
typename MT,
typename IT>
257 Vc_ALWAYS_INLINE
void executeGather(PopcntSwitchT,
261 typename V::MaskArgument mask,
262 enable_if<V::size() == 2> = nullarg)
264 unsigned int bits = mask.toInt();
266 switch (Vc::Detail::popcnt4(bits)) {
268 v.gather(mem, indexes);
271 low = _bit_scan_forward(bits);
272 v[low] = mem[indexes[low]];
281 #endif // VC_COMMON_GATHERIMPLEMENTATION_H_
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.