29 #ifndef VC_COMMON_GATHERIMPLEMENTATION_H_ 
   30 #define VC_COMMON_GATHERIMPLEMENTATION_H_ 
   34 namespace Vc_VERSIONED_NAMESPACE
 
   39 enum class GatherScatterImplementation : int {
 
   46 using SimpleLoopT   = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SimpleLoop>;
 
   47 using SetIndexZeroT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SetIndexZero>;
 
   48 using BitScanLoopT  = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::BitScanLoop>;
 
   49 using PopcntSwitchT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::PopcntSwitch>;
 
   51 template <
typename V, 
typename MT, 
typename IT>
 
   52 Vc_ALWAYS_INLINE 
void executeGather(SetIndexZeroT,
 
   56                                     typename V::MaskArgument mask)
 
   58     indexes.setZeroInverted(static_cast<typename IT::Mask>(mask));
 
   59     const V tmp(mem, indexes);
 
   60     where(mask) | v = tmp;
 
   63 template <
typename V, 
typename MT, 
typename IT>
 
   64 Vc_ALWAYS_INLINE 
void executeGather(SimpleLoopT,
 
   68                                     typename V::MaskArgument mask)
 
   70     if (Vc_IS_UNLIKELY(mask.isEmpty())) {
 
   73     Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
 
   75             v[i] = mem[indexes[i]];
 
   79 template <
typename V, 
typename MT, 
typename IT>
 
   80 Vc_ALWAYS_INLINE 
void executeGather(BitScanLoopT,
 
   84                                     typename V::MaskArgument mask)
 
   86     size_t bits = mask.toInt();
 
   87     while (Vc_IS_LIKELY(bits > 0)) {
 
   89         asm(
"bsf %[bits],%[i]\n\t" 
   90             "bsr %[bits],%[j]\n\t" 
   91             "btr %[i],%[bits]\n\t" 
   92             "btr %[j],%[bits]\n\t" 
   93             : [i] 
"=r"(i), [j] 
"=r"(j), [bits] 
"+r"(bits));
 
   94         v[i] = mem[indexes[i]];
 
   95         v[j] = mem[indexes[j]];
 
  108 template <
typename V, 
typename MT, 
typename IT>
 
  109 Vc_ALWAYS_INLINE 
void executeGather(PopcntSwitchT,
 
  113                                     typename V::MaskArgument mask,
 
  114                                     enable_if<V::size() == 16> = nullarg)
 
  116     unsigned int bits = mask.toInt();
 
  117     unsigned int low, high = 0;
 
  118     switch (Vc::Detail::popcnt16(bits)) {
 
  120         v.gather(mem, indexes);
 
  123         low = _bit_scan_forward(bits);
 
  125         v[low] = mem[indexes[low]];
 
  127         high = _bit_scan_reverse(bits);
 
  128         v[high] = mem[indexes[high]];
 
  131         low = _bit_scan_forward(bits);
 
  132         bits ^= high | (1 << low);
 
  133         v[low] = mem[indexes[low]];
 
  135         high = _bit_scan_reverse(bits);
 
  136         v[high] = mem[indexes[high]];
 
  139         low = _bit_scan_forward(bits);
 
  140         bits ^= high | (1 << low);
 
  141         v[low] = mem[indexes[low]];
 
  143         high = _bit_scan_reverse(bits);
 
  144         v[high] = mem[indexes[high]];
 
  147         low = _bit_scan_forward(bits);
 
  148         bits ^= high | (1 << low);
 
  149         v[low] = mem[indexes[low]];
 
  151         high = _bit_scan_reverse(bits);
 
  152         v[high] = mem[indexes[high]];
 
  155         low = _bit_scan_forward(bits);
 
  156         bits ^= high | (1 << low);
 
  157         v[low] = mem[indexes[low]];
 
  159         high = _bit_scan_reverse(bits);
 
  160         v[high] = mem[indexes[high]];
 
  163         low = _bit_scan_forward(bits);
 
  164         bits ^= high | (1 << low);
 
  165         v[low] = mem[indexes[low]];
 
  167         high = _bit_scan_reverse(bits);
 
  168         v[high] = mem[indexes[high]];
 
  171         low = _bit_scan_forward(bits);
 
  172         bits ^= high | (1 << low);
 
  173         v[low] = mem[indexes[low]];
 
  175         high = _bit_scan_reverse(bits);
 
  176         v[high] = mem[indexes[high]];
 
  178         low = _bit_scan_forward(bits);
 
  179         v[low] = mem[indexes[low]];
 
  184 template <
typename V, 
typename MT, 
typename IT>
 
  185 Vc_ALWAYS_INLINE 
void executeGather(PopcntSwitchT,
 
  189                                     typename V::MaskArgument mask,
 
  190                                     enable_if<V::size() == 8> = nullarg)
 
  192     unsigned int bits = mask.toInt();
 
  193     unsigned int low, high = 0;
 
  194     switch (Vc::Detail::popcnt8(bits)) {
 
  196         v.gather(mem, indexes);
 
  199         low = _bit_scan_forward(bits);
 
  201         v[low] = mem[indexes[low]];
 
  203         high = _bit_scan_reverse(bits);
 
  204         v[high] = mem[indexes[high]];
 
  207         low = _bit_scan_forward(bits);
 
  208         bits ^= high | (1 << low);
 
  209         v[low] = mem[indexes[low]];
 
  211         high = _bit_scan_reverse(bits);
 
  212         v[high] = mem[indexes[high]];
 
  215         low = _bit_scan_forward(bits);
 
  216         bits ^= high | (1 << low);
 
  217         v[low] = mem[indexes[low]];
 
  219         high = _bit_scan_reverse(bits);
 
  220         v[high] = mem[indexes[high]];
 
  222         low = _bit_scan_forward(bits);
 
  223         v[low] = mem[indexes[low]];
 
  228 template <
typename V, 
typename MT, 
typename IT>
 
  229 Vc_ALWAYS_INLINE 
void executeGather(PopcntSwitchT,
 
  233                                     typename V::MaskArgument mask,
 
  234                                     enable_if<V::size() == 4> = nullarg)
 
  236     unsigned int bits = mask.toInt();
 
  237     unsigned int low, high = 0;
 
  238     switch (Vc::Detail::popcnt4(bits)) {
 
  240         v.gather(mem, indexes);
 
  243         low = _bit_scan_forward(bits);
 
  245         v[low] = mem[indexes[low]];
 
  247         high = _bit_scan_reverse(bits);
 
  248         v[high] = mem[indexes[high]];
 
  250         low = _bit_scan_forward(bits);
 
  251         v[low] = mem[indexes[low]];
 
  256 template <
typename V, 
typename MT, 
typename IT>
 
  257 Vc_ALWAYS_INLINE 
void executeGather(PopcntSwitchT,
 
  261                                     typename V::MaskArgument mask,
 
  262                                     enable_if<V::size() == 2> = nullarg)
 
  264     unsigned int bits = mask.toInt();
 
  266     switch (Vc::Detail::popcnt4(bits)) {
 
  268         v.gather(mem, indexes);
 
  271         low = _bit_scan_forward(bits);
 
  272         v[low] = mem[indexes[low]];
 
  281 #endif // VC_COMMON_GATHERIMPLEMENTATION_H_ 
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.