#include #include #include #include #include #include using namespace Eigen; static void computeDistances(Ref out, const std::vector & points) { auto np = points.size(); for (int i = 0; i < np; ++i) { for (int j = 0; j < np; ++j) { out(i, j) = (points[j] - points[i]).squaredNorm(); } } } static void performTest(int np, int nTrials) { // Create a sample set of points std::vector points; points.reserve(np); for (int i = 0; i < np; ++i) { float iFloat = (float)i; Vector4f point(0.0 + iFloat, 1.0 + iFloat, 2.0 + iFloat, 3.0 + iFloat); points.push_back(point); } MatrixXf result(np, np); clock_t begin = clock(); for (int i = 0; i < nTrials; ++i) { // Compute the (squared) distances between all the couples of points. computeDistances(result, points); } clock_t end = clock(); double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC; std::cout << "nTrials\t" << nTrials << "\tnPoints\t" << np << "\ttime\t" << elapsed_secs << "\r\n"; } int main(int ac, char* av[]) { performTest(1000, 200); getchar(); } Assembly obtained with Eigen 3.2.10: 00007FF6D64F12F0 add rcx,rcx 00007FF6D64F12F3 vmovups xmm1,xmmword ptr [rbx+rcx*8] 00007FF6D64F12F8 vsubps xmm1,xmm1,xmmword ptr [rbx+rax*8] 00007FF6D64F12FD vmulps xmm3,xmm1,xmm1 00007FF6D64F1301 vmovhlps xmm1,xmm3,xmm3 00007FF6D64F1305 vaddps xmm3,xmm1,xmm3 00007FF6D64F1309 vshufps xmm0,xmm3,xmm3,1 00007FF6D64F130E vaddss xmm2,xmm3,xmm0 00007FF6D64F1312 vmovss dword ptr [r10],xmm2 00007FF6D64F1317 inc r9d 00007FF6D64F131A lea r10,[r10+0FA0h] 00007FF6D64F1321 movsxd rcx,r9d 00007FF6D64F1324 cmp rcx,r11 00007FF6D64F1327 jb performTest+170h (07FF6D64F12F0h) Assembly obtained with Eigen 3.3.1: 00007FF6D13812E0 shl rcx,4 00007FF6D13812E4 add rcx,rbx 00007FF6D13812E7 mov qword ptr [rbp-79h],rcx 00007FF6D13812EB mov qword ptr [rbp-71h],rax 00007FF6D13812EF vmovups ymm0,ymmword ptr [rsp+28h] 00007FF6D13812F5 vmovups ymmword ptr [rbp-59h],ymm0 00007FF6D13812FA vmovups xmm0,xmmword ptr [rcx] 00007FF6D13812FE vsubps xmm1,xmm0,xmmword ptr [rax] 00007FF6D1381302 vmulps xmm3,xmm1,xmm1 00007FF6D1381306 vhaddps xmm1,xmm3,xmm3 00007FF6D138130A vhaddps xmm3,xmm1,xmm1 00007FF6D138130E vmovss dword ptr [r9],xmm3 00007FF6D1381313 inc r8d 00007FF6D1381316 lea r9,[r9+0FA0h] 00007FF6D138131D movsxd rcx,r8d 00007FF6D1381320 cmp rcx,r11 00007FF6D1381323 jb performTest+150h (07FF6D13812E0h)