This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 692
Collapse All | Expand All

(-)a/Eigen/src/Core/GenericPacketMath.h (-1 / +17 lines)
Lines 347-360 inline void palign(PacketType& first, co Link Here
347
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
347
template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
348
{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
348
{ return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
349
349
350
template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
350
template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
351
{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
351
{ return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
352
352
353
#endif
353
#endif
354
354
355
356
/***************************************************************************
357
 * MetaPacket, that is a collection of N packets.
358
 * For performance reasons, N should be a small multiple of the packet size.
359
***************************************************************************/
360
template <typename Packet, int N> struct MetaPacket {
361
  Packet packet[N];
362
363
  // Initializes (through a pset1 operation) the ith packet with the value at from[i].
364
  // The from address must be aligned.
365
  EIGEN_STRONG_INLINE void broadcast_from(const typename unpacket_traits<Packet>::type* from) {
366
    for (int i = 0; i < N; ++i) {
367
      packet[i] = from[i];
368
    }
369
  }
370
};
371
355
} // end namespace internal
372
} // end namespace internal
356
373
357
} // end namespace Eigen
374
} // end namespace Eigen
358
375
359
#endif // EIGEN_GENERIC_PACKET_MATH_H
376
#endif // EIGEN_GENERIC_PACKET_MATH_H
360
(-)a/Eigen/src/Core/arch/AltiVec/Complex.h (+10 lines)
Lines 205-217 template<> EIGEN_STRONG_INLINE Packet2cf Link Here
205
  return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV))));
205
  return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV))));
206
}
206
}
207
207
208
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
208
template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
209
{
209
{
210
  return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
210
  return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV));
211
}
211
}
212
212
213
template<int N> struct MetaPacket<Packet2cf, N> {
214
  Packet2cf packet[N];
215
216
  EIGEN_STRONG_INLINE void broadcast_from(const std::complex<float>* from) {
217
    for (int i = 0; i < N; ++i) {
218
      packet[i] = pset1<Packet2cf>(from+i);
219
    }
220
  }
221
};
222
213
} // end namespace internal
223
} // end namespace internal
214
224
215
} // end namespace Eigen
225
} // end namespace Eigen
216
226
217
#endif // EIGEN_COMPLEX_ALTIVEC_H
227
#endif // EIGEN_COMPLEX_ALTIVEC_H
(-)a/Eigen/src/Core/arch/AltiVec/PacketMath.h (+20 lines)
Lines 489-501 struct palign_impl<Offset,Packet4i> Link Here
489
{
489
{
490
  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
490
  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
491
  {
491
  {
492
    if (Offset!=0)
492
    if (Offset!=0)
493
      first = vec_sld(first, second, Offset*4);
493
      first = vec_sld(first, second, Offset*4);
494
  }
494
  }
495
};
495
};
496
496
497
template<int N> struct MetaPacket<Packet4f, N> {
498
  Packet4f packet[N];
499
500
  EIGEN_STRONG_INLINE void broadcast_from(const float* from) {
501
    for (int i = 0; i < N; ++i) {
502
      packet[i] = pset1<Packet4f>(from+i);
503
    }
504
  }
505
};
506
507
template<int N> struct MetaPacket<Packet4i, N> {
508
  Packet4i packet[N];
509
510
  EIGEN_STRONG_INLINE void broadcast_from(const int* from) {
511
    for (int i = 0; i < N; ++i) {
512
      packet[i] = pset1<Packet4i>(from+i);
513
    }
514
  }
515
};
516
497
} // end namespace internal
517
} // end namespace internal
498
518
499
} // end namespace Eigen
519
} // end namespace Eigen
500
520
501
#endif // EIGEN_PACKET_MATH_ALTIVEC_H
521
#endif // EIGEN_PACKET_MATH_ALTIVEC_H
(-)a/Eigen/src/Core/arch/NEON/Complex.h (+10 lines)
Lines 241-253 template<> EIGEN_STRONG_INLINE Packet2cf Link Here
241
241
242
  // this computes the norm
242
  // this computes the norm
243
  s = vmulq_f32(b.v, b.v);
243
  s = vmulq_f32(b.v, b.v);
244
  rev_s = vrev64q_f32(s);
244
  rev_s = vrev64q_f32(s);
245
245
246
  return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
246
  return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
247
}
247
}
248
248
249
template<int N> struct MetaPacket<Packet2cf, N> {
250
  Packet2cf packet[N];
251
252
  EIGEN_STRONG_INLINE void broadcast_from(const std::complex<float>* from) {
253
    for (int i = 0; i < N; ++i) {
254
      packet[i] = pset1<Packet2cf>(from+i);
255
    }
256
  }
257
};
258
249
} // end namespace internal
259
} // end namespace internal
250
260
251
} // end namespace Eigen
261
} // end namespace Eigen
252
262
253
#endif // EIGEN_COMPLEX_NEON_H
263
#endif // EIGEN_COMPLEX_NEON_H
(-)a/Eigen/src/Core/arch/NEON/PacketMath.h (+22 lines)
Lines 408-420 PALIGN_NEON(2,Packet4f,vextq_f32) Link Here
408
PALIGN_NEON(3,Packet4f,vextq_f32)
408
PALIGN_NEON(3,Packet4f,vextq_f32)
409
PALIGN_NEON(0,Packet4i,vextq_s32)
409
PALIGN_NEON(0,Packet4i,vextq_s32)
410
PALIGN_NEON(1,Packet4i,vextq_s32)
410
PALIGN_NEON(1,Packet4i,vextq_s32)
411
PALIGN_NEON(2,Packet4i,vextq_s32)
411
PALIGN_NEON(2,Packet4i,vextq_s32)
412
PALIGN_NEON(3,Packet4i,vextq_s32)
412
PALIGN_NEON(3,Packet4i,vextq_s32)
413
    
413
    
414
#undef PALIGN_NEON
414
#undef PALIGN_NEON
415
415
416
417
template<int N> struct MetaPacket<Packet4f, N> {
418
  Packet4f packet[N];
419
420
  EIGEN_STRONG_INLINE void broadcast_from(const float* from) {
421
    for (int i = 0; i < N; ++i) {
422
      packet[i] = pset1<Packet4f>(from+i);
423
    }
424
  }
425
};
426
427
template<int N> struct MetaPacket<Packet4i, N> {
428
  Packet4i packet[N];
429
430
  EIGEN_STRONG_INLINE void broadcast_from(const int* from) {
431
    for (int i = 0; i < N; ++i) {
432
      packet[i] = pset1<Packet4i>(from+i);
433
    }
434
  }
435
};
436
437
416
} // end namespace internal
438
} // end namespace internal
417
439
418
} // end namespace Eigen
440
} // end namespace Eigen
419
441
420
#endif // EIGEN_PACKET_MATH_NEON_H
442
#endif // EIGEN_PACKET_MATH_NEON_H
(-)a/Eigen/src/Core/arch/SSE/Complex.h (+30 lines)
Lines 430-442 template<> EIGEN_STRONG_INLINE Packet1cd Link Here
430
  return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
430
  return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
431
}
431
}
432
432
433
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
433
EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
434
{
434
{
435
  return Packet1cd(preverse(x.v));
435
  return Packet1cd(preverse(x.v));
436
}
436
}
437
437
438
template<int N> struct MetaPacket<Packet2cf, N> {
439
  Packet2cf packet[N];
440
441
  EIGEN_STRONG_INLINE void broadcast_from(const std::complex<float>* from) {
442
    EIGEN_ASM_COMMENT("Start broadcast_from<Packet2cf>");
443
    const int k = N/2;
444
    for (int i = 0; i < k*2; i+=2) {
445
      packet[i+1] = pload<Packet2cf>(from+i);
446
      packet[i+0].v = vec4f_swizzle1(packet[i+1].v, 0,1,0,1);
447
      packet[i+1].v = vec4f_swizzle1(packet[i+1].v, 2,3,2,3);
448
    }
449
    for (int i = k*2; i < N; ++i) {
450
      packet[i] = pload1<Packet2cf>(from+i);
451
    }
452
    EIGEN_ASM_COMMENT("Done broadcast_from<Packet2cf>");
453
  }
454
};
455
456
template<int N> struct MetaPacket<Packet1cd, N> {
457
  Packet1cd packet[N];
458
459
  EIGEN_STRONG_INLINE void broadcast_from(const std::complex<double>* from) {
460
    EIGEN_ASM_COMMENT("Start broadcast_from<Packet1cd>");
461
    for (int i = 0; i < N; ++i) {
462
      packet[i] = pload1<Packet1cd>(from+i);
463
    }
464
    EIGEN_ASM_COMMENT("Done broadcast_from<Packet1cd>");
465
  }
466
};
467
438
} // end namespace internal
468
} // end namespace internal
439
469
440
} // end namespace Eigen
470
} // end namespace Eigen
441
471
442
#endif // EIGEN_COMPLEX_SSE_H
472
#endif // EIGEN_COMPLEX_SSE_H
(-)a/Eigen/src/Core/arch/SSE/PacketMath.h (+58 lines)
Lines 658-670 struct palign_impl<Offset,Packet2d> Link Here
658
    {
658
    {
659
      first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
659
      first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
660
      first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
660
      first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
661
    }
661
    }
662
  }
662
  }
663
};
663
};
664
#endif
664
#endif
665
665
666
template<int N> struct MetaPacket<Packet4f, N> {
667
  Packet4f packet[N];
668
669
  EIGEN_STRONG_INLINE void broadcast_from(const float* from) {
670
    EIGEN_ASM_COMMENT("Start broadcast_from<Packet4f>");
671
    const int k = N/4;
672
    for (int i = 0; i < k*4; i+=4) {
673
      packet[i+3] = pload<Packet4f>(from+i);
674
      packet[i+0] = vec4f_swizzle1(packet[i+3], 0,0,0,0);
675
      packet[i+1] = vec4f_swizzle1(packet[i+3], 1,1,1,1);
676
      packet[i+2] = vec4f_swizzle1(packet[i+3], 2,2,2,2);
677
      packet[i+3] = vec4f_swizzle1(packet[i+3], 3,3,3,3);
678
    }
679
    for (int i = k*4; i < N; ++i) {
680
      packet[i] = pload1<Packet4f>(from+i);
681
    }
682
    EIGEN_ASM_COMMENT("Done broadcast_from<Packet4f>");
683
  }
684
};
685
686
template<int N> struct MetaPacket<Packet2d, N> {
687
  Packet2d packet[N];
688
689
  EIGEN_STRONG_INLINE void broadcast_from(const double* from) {
690
    EIGEN_ASM_COMMENT("Start broadcast_from<Packet2d");
691
    const int k = N/2;
692
    for (int i = 0; i < k*2; i+=2) {
693
      packet[i+1] = pload<Packet2d>(from+i);
694
      packet[i+0] = vec2d_swizzle1(packet[i+1], 0,0);
695
      packet[i+1] = vec2d_swizzle1(packet[i+1], 1,1);
696
    }
697
    for (int i = k*2; i < N; ++i) {
698
      packet[i] = pload1<Packet2d>(from+i);
699
    }
700
    EIGEN_ASM_COMMENT("Done broadcast_from<Packet2d>");
701
  }
702
};
703
704
template<int N> struct MetaPacket<Packet4i, N> {
705
  Packet4i packet[N];
706
707
  EIGEN_STRONG_INLINE void broadcast_from(const int* from) {
708
    EIGEN_ASM_COMMENT("Start broadcast_from<Packet4i>");
709
    const int k = N/4;
710
    for (int i = 0; i < k*4; i+=4) {
711
      packet[i+3] = pload<Packet4i>(from+i);
712
      packet[i+0] = vec4i_swizzle1(packet[i+3], 0,0,0,0);
713
      packet[i+1] = vec4i_swizzle1(packet[i+3], 1,1,1,1);
714
      packet[i+2] = vec4i_swizzle1(packet[i+3], 2,2,2,2);
715
      packet[i+3] = vec4i_swizzle1(packet[i+3], 3,3,3,3);
716
    }
717
    for (int i = k*4; i < N; ++i) {
718
      packet[i] = pload1<Packet4i>(from+i);
719
    }
720
    EIGEN_ASM_COMMENT("Done broadcast_from<Packet4i>");
721
  }
722
};
723
666
} // end namespace internal
724
} // end namespace internal
667
725
668
} // end namespace Eigen
726
} // end namespace Eigen
669
727
670
#endif // EIGEN_PACKET_MATH_SSE_H
728
#endif // EIGEN_PACKET_MATH_SSE_H
(-)a/test/packetmath.cpp (+9 lines)
Lines 198-213 template<typename Scalar> void packetmat Link Here
198
  }
198
  }
199
  internal::pstore(data2, internal::preduxp(packets));
199
  internal::pstore(data2, internal::preduxp(packets));
200
  VERIFY(areApproxAbs(ref, data2, PacketSize, refvalue) && "internal::preduxp");
200
  VERIFY(areApproxAbs(ref, data2, PacketSize, refvalue) && "internal::preduxp");
201
201
202
  for (int i=0; i<PacketSize; ++i)
202
  for (int i=0; i<PacketSize; ++i)
203
    ref[i] = data1[PacketSize-i-1];
203
    ref[i] = data1[PacketSize-i-1];
204
  internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1)));
204
  internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1)));
205
  VERIFY(areApprox(ref, data2, PacketSize) && "internal::preverse");
205
  VERIFY(areApprox(ref, data2, PacketSize) && "internal::preverse");
206
207
  internal::MetaPacket<Packet, 2*PacketSize+1> meta;
208
  meta.broadcast_from(data1);
209
  for (int i = 0; i < PacketSize; ++i) {
210
    internal::pstore(data2, meta.packet[i]);
211
    for (int j = 0; j < PacketSize; ++j) {
212
      VERIFY(isApproxAbs(data2[j], data1[i], refvalue));
213
    }
214
  }
206
}
215
}
207
216
208
template<typename Scalar> void packetmath_real()
217
template<typename Scalar> void packetmath_real()
209
{
218
{
210
  using std::abs;
219
  using std::abs;
211
  typedef typename internal::packet_traits<Scalar>::type Packet;
220
  typedef typename internal::packet_traits<Scalar>::type Packet;
212
  const int PacketSize = internal::packet_traits<Scalar>::size;
221
  const int PacketSize = internal::packet_traits<Scalar>::size;
213
222

Return to bug 692