Working notes - Indexing++

From Eigen
Revision as of 16:40, 21 December 2016 by Ggael (Talk | contribs)

Jump to: navigation, search

The goal of this page is to summarize the different ideas and working plan to (finally!) provide support for flexible row/column indexing in Eigen. See this bug report.

Needs

We aim to support various indexing mechanisms. For each dimension, we would like to be able to have any of:

  • all: as in A(:)
  • singleton: as in A(3) (already supported!)
  • index-array: as in A([3 1 5])
  • range-based: as in A(3:9) (partly supported via Block)
  • slicing: as in A(3:2:9)
  • negative indices: as in A(end-3) or A(3:2:end-1)
  • boolean masking: as in A(A>0)

All of this with a concise and unambiguous API! We should also be able to provide compile-time information such as lengths.

C++11 API

Since achieving a lean API in C++03 seems to be rather impossible, let's first see what could be done in C++11 and provide more verbose fallbacks for C++03.

Better than a long story, here is a showcase demo:

#include <iostream>
#include <array>
#include <valarray>
#include <vector>
#include <Eigen/Dense>
using namespace Eigen;
using namespace std;
 
struct all_t {};
static const all_t all{};
 
struct shifted_last {
  int offset;
};
 
struct last_t {
  shifted_last operator- (int offset) const { return shifted_last{offset}; }
};
static const last_t last{};
 
struct Range {
  Range(Index f, Index l) : m_first(f), m_last(l) {}
  Index m_first, m_last;
  Index size() const { return m_last-m_first+1; }
  Index operator[] (Index k) { return m_first + k; }
};
 
struct RangeLast {
  RangeLast(Index f, last_t) : m_first(f) {}
  Index m_first;
};
 
struct RangeShiftedLast {
  RangeShiftedLast(Index f, shifted_last l) : m_first(f), m_last(l) {}
  Index m_first;
  shifted_last m_last;
};
 
struct Slice {
  Slice(Index f, Index s, Index l) : m_first(f), m_step(s), m_last(l) {}
  Index m_first, m_step, m_last;
  Index size() const { return (m_last-m_first+m_step)/m_step; }
  Index operator[] (Index k) { return m_first + k*m_step; }
};
struct SliceLast {
  SliceLast(Index f, Index s, last_t) : m_first(f), m_step(s) {}
  Index m_first, m_step;
};
 
struct SliceShiftedLast {
  SliceShiftedLast(Index f, Index s, shifted_last l) : m_first(f), m_step(s), m_last(l) {}
  Index m_first, m_step;
  shifted_last m_last;
};
 
// NOTE that declaring value_type here is needed to make SFINAE works properly in case T does not exhibit value_type
// Another solution would be to specialize has_boolean_value_type and has_index_value_type for Range, Slice, etc.
template<typename T,typename value_type=typename T::value_type>
struct has_boolean_value_type {
  enum {
    value =  bool(internal::is_same<value_type,bool>::value)
#if EIGEN_COMP_CLANG
    // Workaround clang's issue:
    //  (valarray::operator<)::value_type is int instead of bool,
    //  so we check the return type of (valarray::operator<).operator[], which is bool!
          || bool(internal::is_same<typename internal::remove_all<decltype(declval<T>()[0])>::type,bool>::value)
#endif
  };
};
 
template<typename T> struct is_index { enum { value = std::is_integral<T>::value }; };
template<> struct is_index<bool> { enum { value = false }; };
 
template<typename T,typename value_type=typename T::value_type>
struct has_index_value_type {
  enum {
    value = is_index<value_type>::value
  };
};
 
struct MyMat {
 
  void operator()(Index k) { cout << "singleton: "; cout << k << '\n'; }
 
  void operator()(all_t) { cout << "all\n"; }
 
  void operator()(Range ids) { cout << "range-based: ";  print_indices(ids);  }
  void operator()(RangeLast ids) { return operator()({ids.m_first,m_size-1});  }
  void operator()(RangeShiftedLast ids) { return operator()({ids.m_first,m_size-ids.m_last.offset-1});  }
 
  void operator()(Slice ids) { cout << "slice-based: ";  print_indices(ids);  }
  void operator()(SliceLast ids) { return operator()(Slice{ids.m_first,ids.m_step,m_size-1});  }
  void operator()(SliceShiftedLast ids) { return operator()(Slice{ids.m_first,ids.m_step,m_size-ids.m_last.offset-1});  }
 
  template<typename Indices>
  typename internal::enable_if<has_index_value_type<Indices>::value>::type
  operator()(const Indices &ids) { cout << "index-based: "; print_indices(ids); }
 
  template<typename Mask>
  typename internal::enable_if<has_boolean_value_type<Mask>::value>::type
  operator()(const Mask &mask) {
    cout << "mask-based: ";
    for(int k=0; k<mask.size(); ++k)
      if(mask[k]) cout << k << " ";
    cout << "\n";
  }
 
protected:
  template<typename T>
  void print_indices(T& ids) {
    for(int k=0; k<ids.size(); ++k)
      cout << ids[k] << " ";
    cout << "\n";
  }
 
  Index m_size = 11;
};
 
int main()
{
  ArrayXd eia(10); eia.setRandom();
  valarray<double> vala(10); Map<ArrayXd>(&vala[0],10) = eia;
  ArrayXi eii(4); eii << 3, 1, 7, 5;
 
  MyMat mat;
  mat(5);           // singleton
  mat(all);         // all
  mat({3,9});       // range
  mat(Range{3,9});  // range
  mat({3,last});    // range
  mat({3,last-2});  // range
  mat({9,3});       // empty range (as in MatLab), need to use a slice:
  mat({9,-1,3});    // slice
  mat({9,-2,1});    // slice
  mat({3,2,9});     // slice
  mat({3,2,last});   // slice
  mat({3,3,last-3}); // slice
  mat(valarray<bool>{true,false,false,true});   // mask
  mat(vector<bool>{true,false,false,true});     // mask
  mat(eia>0);                                   // mask
  mat(vala>0.0);                                // mask
  mat(eii);                       // index-based
  mat(valarray<int>{5,2,5,6});    // index-based
  mat(array<int,4>{5,2,5,6});     // index-based
 
}

that returns:

singleton: 5
all
range-based: 3 4 5 6 7 8 9
range-based: 3 4 5 6 7 8 9
range-based: 3 4 5 6 7 8 9 10
range-based: 3 4 5 6 7 8
range-based:
slice-based: 9 8 7 6 5 4 3
slice-based: 9 7 5 3 1
slice-based: 3 5 7 9
slice-based: 3 5 7 9
slice-based: 3 6
mask-based: 0 3
mask-based: 0 3
mask-based: 2 4 7 8 9
mask-based: 2 4 7 8 9
index-based: 3 1 7 5
index-based: 5 2 5 6
index-based: 5 2 5 6


Here, we use the word 'last' instead of Matlab's 'end' to (1) reduce collision with std::end, and (2) make it more consistent with STL's end as 'last' refer to the last element whereas 'end' refers to the last+1.

Still need to work on compile-time sizes, and triple check for possible conflicts.


What does not work:

mat({5,2,6,7,1});                                // needs to explicitly use a valarray, array, or ...
mat({true,false,false,true,true,false,true});    // needs to explicitly use a valarray, array, or ...

C++03 fallback API

Internal implementation details

Performance

One could be tempted to leverage AVX2 gather/scatter instructions, but those are horribly slow. Better emulate them for now.