29 #ifndef VC_COMMON_SCATTERIMPLEMENTATION_H_
30 #define VC_COMMON_SCATTERIMPLEMENTATION_H_
32 #include "gatherimplementation.h"
35 namespace Vc_VERSIONED_NAMESPACE
40 template <
typename V,
typename MT,
typename IT>
41 Vc_ALWAYS_INLINE
void executeScatter(SetIndexZeroT,
45 typename V::MaskArgument mask)
47 indexes.setZeroInverted(static_cast<typename IT::Mask>(mask));
49 const V tmp(mem, indexes);
50 where(mask) | v = tmp;
53 template <
typename V,
typename MT,
typename IT>
54 Vc_ALWAYS_INLINE
void executeScatter(SimpleLoopT,
58 typename V::MaskArgument mask)
60 if (Vc_IS_UNLIKELY(mask.isEmpty())) {
63 Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
65 mem[indexes[i]] = v[i];
69 template <
typename V,
typename MT,
typename IT>
70 Vc_ALWAYS_INLINE
void executeScatter(BitScanLoopT,
74 typename V::MaskArgument mask)
76 size_t bits = mask.toInt();
77 while (Vc_IS_LIKELY(bits > 0)) {
79 asm(
"bsf %[bits],%[i]\n\t"
80 "bsr %[bits],%[j]\n\t"
81 "btr %[i],%[bits]\n\t"
82 "btr %[j],%[bits]\n\t"
83 : [i]
"=r"(i), [j]
"=r"(j), [bits]
"+r"(bits));
84 mem[indexes[i]] = v[i];
85 mem[indexes[j]] = v[j];
98 template <
typename V,
typename MT,
typename IT>
99 Vc_ALWAYS_INLINE
void executeScatter(PopcntSwitchT,
103 typename V::MaskArgument mask,
104 enable_if<V::size() == 16> = nullarg)
106 unsigned int bits = mask.toInt();
107 unsigned int low, high = 0;
108 switch (Vc::Detail::popcnt16(bits)) {
110 v.scatter(mem, indexes);
113 low = _bit_scan_forward(bits);
115 mem[indexes[low]] = v[low];
117 high = _bit_scan_reverse(bits);
118 mem[indexes[high]] = v[high];
121 low = _bit_scan_forward(bits);
122 bits ^= high | (1 << low);
123 mem[indexes[low]] = v[low];
125 high = _bit_scan_reverse(bits);
126 mem[indexes[high]] = v[high];
129 low = _bit_scan_forward(bits);
130 bits ^= high | (1 << low);
131 mem[indexes[low]] = v[low];
133 high = _bit_scan_reverse(bits);
134 mem[indexes[high]] = v[high];
137 low = _bit_scan_forward(bits);
138 bits ^= high | (1 << low);
139 mem[indexes[low]] = v[low];
141 high = _bit_scan_reverse(bits);
142 mem[indexes[high]] = v[high];
145 low = _bit_scan_forward(bits);
146 bits ^= high | (1 << low);
147 mem[indexes[low]] = v[low];
149 high = _bit_scan_reverse(bits);
150 mem[indexes[high]] = v[high];
153 low = _bit_scan_forward(bits);
154 bits ^= high | (1 << low);
155 mem[indexes[low]] = v[low];
157 high = _bit_scan_reverse(bits);
158 mem[indexes[high]] = v[high];
161 low = _bit_scan_forward(bits);
162 bits ^= high | (1 << low);
163 mem[indexes[low]] = v[low];
165 high = _bit_scan_reverse(bits);
166 mem[indexes[high]] = v[high];
168 low = _bit_scan_forward(bits);
169 mem[indexes[low]] = v[low];
174 template <
typename V,
typename MT,
typename IT>
175 Vc_ALWAYS_INLINE
void executeScatter(PopcntSwitchT,
179 typename V::MaskArgument mask,
180 enable_if<V::size() == 8> = nullarg)
182 unsigned int bits = mask.toInt();
183 unsigned int low, high = 0;
184 switch (Vc::Detail::popcnt8(bits)) {
186 v.scatter(mem, indexes);
189 low = _bit_scan_forward(bits);
191 mem[indexes[low]] = v[low];
193 high = _bit_scan_reverse(bits);
194 mem[indexes[high]] = v[high];
197 low = _bit_scan_forward(bits);
198 bits ^= high | (1 << low);
199 mem[indexes[low]] = v[low];
201 high = _bit_scan_reverse(bits);
202 mem[indexes[high]] = v[high];
205 low = _bit_scan_forward(bits);
206 bits ^= high | (1 << low);
207 mem[indexes[low]] = v[low];
209 high = _bit_scan_reverse(bits);
210 mem[indexes[high]] = v[high];
212 low = _bit_scan_forward(bits);
213 mem[indexes[low]] = v[low];
218 template <
typename V,
typename MT,
typename IT>
219 Vc_ALWAYS_INLINE
void executeScatter(PopcntSwitchT,
223 typename V::MaskArgument mask,
224 enable_if<V::size() == 4> = nullarg)
226 unsigned int bits = mask.toInt();
227 unsigned int low, high = 0;
228 switch (Vc::Detail::popcnt4(bits)) {
230 v.scatter(mem, indexes);
233 low = _bit_scan_forward(bits);
235 mem[indexes[low]] = v[low];
237 high = _bit_scan_reverse(bits);
238 mem[indexes[high]] = v[high];
240 low = _bit_scan_forward(bits);
241 mem[indexes[low]] = v[low];
246 template <
typename V,
typename MT,
typename IT>
247 Vc_ALWAYS_INLINE
void executeScatter(PopcntSwitchT,
251 typename V::MaskArgument mask,
252 enable_if<V::size() == 2> = nullarg)
254 unsigned int bits = mask.toInt();
256 switch (Vc::Detail::popcnt4(bits)) {
258 v.scatter(mem, indexes);
261 low = _bit_scan_forward(bits);
262 mem[indexes[low]] = v[low];
271 #endif // VC_COMMON_SCATTERIMPLEMENTATION_H_
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.