28 #ifndef VC_COMMON_SCATTERIMPLEMENTATION_H_
29 #define VC_COMMON_SCATTERIMPLEMENTATION_H_
31 #include "gatherimplementation.h"
34 namespace Vc_VERSIONED_NAMESPACE
39 template <
typename V,
typename MT,
typename IT>
40 Vc_ALWAYS_INLINE
void executeScatter(SetIndexZeroT,
44 typename V::MaskArgument mask)
46 indexes.setZeroInverted(static_cast<typename IT::Mask>(mask));
48 const V tmp(mem, indexes);
49 where(mask) | v = tmp;
52 template <
typename V,
typename MT,
typename IT>
53 Vc_ALWAYS_INLINE
void executeScatter(SimpleLoopT,
57 typename V::MaskArgument mask)
59 if (Vc_IS_UNLIKELY(mask.isEmpty())) {
62 Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
64 mem[indexes[i]] = v[i];
68 template <
typename V,
typename MT,
typename IT>
69 Vc_ALWAYS_INLINE
void executeScatter(BitScanLoopT,
73 typename V::MaskArgument mask)
75 size_t bits = mask.toInt();
76 while (Vc_IS_LIKELY(bits > 0)) {
78 asm(
"bsf %[bits],%[i]\n\t"
79 "bsr %[bits],%[j]\n\t"
80 "btr %[i],%[bits]\n\t"
81 "btr %[j],%[bits]\n\t"
82 : [i]
"=r"(i), [j]
"=r"(j), [bits]
"+r"(bits));
83 mem[indexes[i]] = v[i];
84 mem[indexes[j]] = v[j];
97 template <
typename V,
typename MT,
typename IT>
98 Vc_ALWAYS_INLINE
void executeScatter(PopcntSwitchT,
102 typename V::MaskArgument mask,
103 enable_if<V::Size == 16> = nullarg)
105 unsigned int bits = mask.toInt();
106 unsigned int low, high = 0;
107 switch (Vc::Detail::popcnt16(bits)) {
109 v.scatter(mem, indexes);
112 low = _bit_scan_forward(bits);
114 mem[indexes[low]] = v[low];
116 high = _bit_scan_reverse(bits);
117 mem[indexes[high]] = v[high];
120 low = _bit_scan_forward(bits);
121 bits ^= high | (1 << low);
122 mem[indexes[low]] = v[low];
124 high = _bit_scan_reverse(bits);
125 mem[indexes[high]] = v[high];
128 low = _bit_scan_forward(bits);
129 bits ^= high | (1 << low);
130 mem[indexes[low]] = v[low];
132 high = _bit_scan_reverse(bits);
133 mem[indexes[high]] = v[high];
136 low = _bit_scan_forward(bits);
137 bits ^= high | (1 << low);
138 mem[indexes[low]] = v[low];
140 high = _bit_scan_reverse(bits);
141 mem[indexes[high]] = v[high];
144 low = _bit_scan_forward(bits);
145 bits ^= high | (1 << low);
146 mem[indexes[low]] = v[low];
148 high = _bit_scan_reverse(bits);
149 mem[indexes[high]] = v[high];
152 low = _bit_scan_forward(bits);
153 bits ^= high | (1 << low);
154 mem[indexes[low]] = v[low];
156 high = _bit_scan_reverse(bits);
157 mem[indexes[high]] = v[high];
160 low = _bit_scan_forward(bits);
161 bits ^= high | (1 << low);
162 mem[indexes[low]] = v[low];
164 high = _bit_scan_reverse(bits);
165 mem[indexes[high]] = v[high];
167 low = _bit_scan_forward(bits);
168 mem[indexes[low]] = v[low];
173 template <
typename V,
typename MT,
typename IT>
174 Vc_ALWAYS_INLINE
void executeScatter(PopcntSwitchT,
178 typename V::MaskArgument mask,
179 enable_if<V::Size == 8> = nullarg)
181 unsigned int bits = mask.toInt();
182 unsigned int low, high = 0;
183 switch (Vc::Detail::popcnt8(bits)) {
185 v.scatter(mem, indexes);
188 low = _bit_scan_forward(bits);
190 mem[indexes[low]] = v[low];
192 high = _bit_scan_reverse(bits);
193 mem[indexes[high]] = v[high];
196 low = _bit_scan_forward(bits);
197 bits ^= high | (1 << low);
198 mem[indexes[low]] = v[low];
200 high = _bit_scan_reverse(bits);
201 mem[indexes[high]] = v[high];
204 low = _bit_scan_forward(bits);
205 bits ^= high | (1 << low);
206 mem[indexes[low]] = v[low];
208 high = _bit_scan_reverse(bits);
209 mem[indexes[high]] = v[high];
211 low = _bit_scan_forward(bits);
212 mem[indexes[low]] = v[low];
217 template <
typename V,
typename MT,
typename IT>
218 Vc_ALWAYS_INLINE
void executeScatter(PopcntSwitchT,
222 typename V::MaskArgument mask,
223 enable_if<V::Size == 4> = nullarg)
225 unsigned int bits = mask.toInt();
226 unsigned int low, high = 0;
227 switch (Vc::Detail::popcnt4(bits)) {
229 v.scatter(mem, indexes);
232 low = _bit_scan_forward(bits);
234 mem[indexes[low]] = v[low];
236 high = _bit_scan_reverse(bits);
237 mem[indexes[high]] = v[high];
239 low = _bit_scan_forward(bits);
240 mem[indexes[low]] = v[low];
245 template <
typename V,
typename MT,
typename IT>
246 Vc_ALWAYS_INLINE
void executeScatter(PopcntSwitchT,
250 typename V::MaskArgument mask,
251 enable_if<V::Size == 2> = nullarg)
253 unsigned int bits = mask.toInt();
255 switch (Vc::Detail::popcnt4(bits)) {
257 v.scatter(mem, indexes);
260 low = _bit_scan_forward(bits);
261 mem[indexes[low]] = v[low];
270 #endif // VC_COMMON_SCATTERIMPLEMENTATION_H_
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.