Lines 592-625
template<> EIGEN_STRONG_INLINE void pref
Link Here
|
592 |
|
592 |
|
593 |
// FIXME only store the 2 first elements ? |
593 |
// FIXME only store the 2 first elements ? |
594 |
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); } |
594 |
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); } |
595 |
|
595 |
|
596 |
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); } |
596 |
template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); } |
597 |
|
597 |
|
598 |
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); } |
598 |
template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); } |
599 |
|
599 |
|
|
|
600 |
// FIXME: gcc 4.9.2 and below handle float64x1_t as a float64_t, not a vector of float64_t |
601 |
// clang always handles it as a vector |
602 |
#if EIGEN_GNUC_PATCH_AT_MOST(4,9,2) |
600 |
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_low_f64(a) + vget_high_f64(a); } |
603 |
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_low_f64(a) + vget_high_f64(a); } |
|
|
604 |
#else |
605 |
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) + vget_high_f64(a))[0]; } |
606 |
#endif |
601 |
|
607 |
|
602 |
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) |
608 |
template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) |
603 |
{ |
609 |
{ |
604 |
float64x2_t trn1, trn2; |
610 |
float64x2_t trn1, trn2; |
605 |
|
611 |
|
606 |
// NEON zip performs interleaving of the supplied vectors. |
612 |
// NEON zip performs interleaving of the supplied vectors. |
607 |
// We perform two interleaves in a row to acquire the transposed vector |
613 |
// We perform two interleaves in a row to acquire the transposed vector |
608 |
trn1 = vzip1q_f64(vecs[0], vecs[1]); |
614 |
trn1 = vzip1q_f64(vecs[0], vecs[1]); |
609 |
trn2 = vzip2q_f64(vecs[0], vecs[1]); |
615 |
trn2 = vzip2q_f64(vecs[0], vecs[1]); |
610 |
|
616 |
|
611 |
// Do the addition of the resulting vectors |
617 |
// Do the addition of the resulting vectors |
612 |
return vaddq_f64(trn1, trn2); |
618 |
return vaddq_f64(trn1, trn2); |
613 |
} |
619 |
} |
614 |
// Other reduction functions: |
620 |
// Other reduction functions: |
615 |
// mul |
621 |
// mul |
|
|
622 |
// FIXME: gcc 4.9.2 and below handle float64x1_t as a float64_t, not a vector of float64_t |
623 |
// clang always handles it as a vector |
624 |
#if EIGEN_GNUC_PATCH_AT_MOST(4,9,2) |
616 |
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_low_f64(a) * vget_high_f64(a); } |
625 |
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_low_f64(a) * vget_high_f64(a); } |
617 |
|
626 |
#else |
|
|
627 |
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) * vget_high_f64(a))[0]; } |
628 |
#endif |
618 |
// min |
629 |
// min |
619 |
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); } |
630 |
template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); } |
620 |
|
631 |
|
621 |
// max |
632 |
// max |
622 |
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); } |
633 |
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); } |
623 |
|
634 |
|
624 |
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors, |
635 |
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors, |
625 |
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074 |
636 |
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074 |