Bugzilla – Attachment 960 Details for
Bug 1777
Make scalar and packet paths consistent
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Forgot Password
Login:
[x]
This bugzilla service is closed. All entries have been migrated to
https://gitlab.com/libeigen/eigen
[patch]
Patch to make the logistic function scalar-SIMD consistent
bug1777_logistic.diff (text/plain), 5.21 KB, created by
Gael Guennebaud
on 2019-11-14 15:08:39 UTC
(
hide
)
Description:
Patch to make the logistic function scalar-SIMD consistent
Filename:
MIME Type:
Creator:
Gael Guennebaud
Created:
2019-11-14 15:08:39 UTC
Size:
5.21 KB
patch
obsolete
>diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h >--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h >+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h >@@ -228,16 +228,20 @@ Packet pexp_float(const Packet _x) > y = pmadd(y, r, cst_cephes_exp_p5); > y = pmadd(y, r2, r); > y = padd(y, cst_1); > > // Return 2^m * exp(r). > return pmax(pldexp(y,m), _x); > } > >+// make it the default path for scalar float >+template<> >+float pexp(const float& a) { return pexp_float(a); } >+ > template <typename Packet> > EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS > EIGEN_UNUSED > Packet pexp_double(const Packet _x) > { > Packet x = _x; > > const Packet cst_1 = pset1<Packet>(1.0); >@@ -296,16 +300,20 @@ Packet pexp_double(const Packet _x) > x = pdiv(px, psub(qx, px)); > x = pmadd(cst_2, x, cst_1); > > // Construct the result 2^n * exp(g) = e * x. The max is used to catch > // non-finite values in the input. > return pmax(pldexp(x,fx), _x); > } > >+// make it the default path for scalar double >+template<> >+double pexp(const double& a) { return pexp_double(a); } >+ > // The following code is inspired by the following stack-overflow answer: > // https://stackoverflow.com/questions/30463616/payne-hanek-algorithm-implementation-in-c/30465751#30465751 > // It has been largely optimized: > // - By-pass calls to frexp. > // - Aligned loads of required 96 bits of 2/pi. This is accomplished by > // (1) balancing the mantissa and exponent to the required bits of 2/pi are > // aligned on 8-bits, and (2) replicating the storage of the bits of 2/pi. > // - Avoid a branch in rounding and extraction of the remaining fractional part. >diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h >--- a/Eigen/src/Core/functors/UnaryFunctors.h >+++ b/Eigen/src/Core/functors/UnaryFunctors.h >@@ -889,18 +889,17 @@ struct functor_traits<scalar_sign_op<Sca > /** \internal > * \brief Template functor to compute the logistic function of a scalar > * \sa class CwiseUnaryOp, ArrayBase::logistic() > */ > template <typename T> > struct scalar_logistic_op { > EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op) > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { >- const T one = T(1); >- return one / (one + numext::exp(-x)); >+ return packetOp(x); > } > > template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE > Packet packetOp(const Packet& x) const { > const Packet one = pset1<Packet>(T(1)); > return pdiv(one, padd(one, pexp(pnegate(x)))); > } > }; >@@ -914,19 +913,17 @@ struct scalar_logistic_op { > * logistic is interpolated because it was easier to make the fit converge. > * > */ > > template <> > struct scalar_logistic_op<float> { > EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op) > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator()(const float& x) const { >- if (x < -18.0f) return 0.0f; >- else if (x > 18.0f) return 1.0f; >- else return 1.0f / (1.0f + numext::exp(-x)); >+ return packetOp(x); > } > > template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE > Packet packetOp(const Packet& _x) const { > // Clamp the inputs to the range [-18, 18] since anything outside > // this range is 0.0f or 1.0f in single-precision. > const Packet x = pmax(pmin(_x, pset1<Packet>(18.0)), pset1<Packet>(-18.0)); > >diff --git a/test/packetmath.cpp b/test/packetmath.cpp >--- a/test/packetmath.cpp >+++ b/test/packetmath.cpp >@@ -566,16 +566,31 @@ template<typename Scalar,typename Packet > if (PacketTraits::HasTanh) { > // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details. > data1[0] = std::numeric_limits<Scalar>::quiet_NaN(); > packet_helper<internal::packet_traits<Scalar>::HasTanh,Packet> h; > h.store(data2, internal::ptanh(h.load(data1))); > VERIFY((numext::isnan)(data2[0])); > } > >+ { >+ internal::scalar_logistic_op<Scalar> logistic; >+ for (int i=0; i<size; ++i) >+ { >+ data1[i] = internal::random<Scalar>(-20,20); >+ } >+ internal::pstore(data2, logistic.packetOp(internal::pload<Packet>(data1))); >+ for (int i=0; i<PacketSize; ++i) { >+ VERIFY_IS_APPROX(data2[i],logistic(data1[i])); >+ #ifdef EIGEN_VECTORIZE // don't check for exactness when using the i387 FPU >+ VERIFY_IS_EQUAL(data2[i],logistic(data1[i])); >+ #endif >+ } >+ } >+ > #if EIGEN_HAS_C99_MATH > { > data1[0] = std::numeric_limits<Scalar>::quiet_NaN(); > packet_helper<internal::packet_traits<Scalar>::HasLGamma,Packet> h; > h.store(data2, internal::plgamma(h.load(data1))); > VERIFY((numext::isnan)(data2[0])); > } > if (internal::packet_traits<Scalar>::HasErf) { >@@ -960,17 +975,16 @@ struct runner<Scalar,PacketType,false,fa > runall<Scalar,PacketType>::run(); > } > }; > > EIGEN_DECLARE_TEST(packetmath) > { > g_first_pass = true; > for(int i = 0; i < g_repeat; i++) { >- > CALL_SUBTEST_1( runner<float>::run() ); > CALL_SUBTEST_2( runner<double>::run() ); > CALL_SUBTEST_3( runner<int>::run() ); > CALL_SUBTEST_4( runner<std::complex<float> >::run() ); > CALL_SUBTEST_5( runner<std::complex<double> >::run() ); > CALL_SUBTEST_6(( packetmath<half,internal::packet_traits<half>::type>() )); > g_first_pass = false; > }
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 1777
: 960