This bugzilla service is closed. All entries have been migrated to https://gitlab.com/libeigen/eigen
View | Details | Raw Unified | Return to bug 1699
Collapse All | Expand All

(-)a/Eigen/src/Core/GenericPacketMath.h (+11 lines)
Lines 500-515 Packet ptanh(const Packet& a) { using st Link Here
500
template <typename Packet>
500
template <typename Packet>
501
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet
501
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet
502
perf(const Packet& a) {
502
perf(const Packet& a) {
503
    EIGEN_STATIC_ASSERT((internal::is_same<Packet, Packet>::value == false),
503
    EIGEN_STATIC_ASSERT((internal::is_same<Packet, Packet>::value == false),
504
                        THIS_TYPE_IS_NOT_SUPPORTED);
504
                        THIS_TYPE_IS_NOT_SUPPORTED);
505
    return Packet(0);
505
    return Packet(0);
506
}
506
}
507
507
508
/** \internal \returns the error function of \a a (coeff-wise). There is no
509
 * default type-agnositc implementation outside of C++11, so this should just
510
 * fail for now. */
511
template <typename Packet>
512
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet
513
perf(const Packet& a) {
514
    EIGEN_STATIC_ASSERT((internal::is_same<Packet, Packet>::value == false),
515
                        THIS_TYPE_IS_NOT_SUPPORTED);
516
    return Packet(0);
517
}
518
508
/** \internal \returns the exp of \a a (coeff-wise) */
519
/** \internal \returns the exp of \a a (coeff-wise) */
509
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
520
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
510
Packet pexp(const Packet& a) { using std::exp; return exp(a); }
521
Packet pexp(const Packet& a) { using std::exp; return exp(a); }
511
522
512
/** \internal \returns the expm1 of \a a (coeff-wise) */
523
/** \internal \returns the expm1 of \a a (coeff-wise) */
513
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
524
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
514
Packet pexpm1(const Packet& a) { return numext::expm1(a); }
525
Packet pexpm1(const Packet& a) { return numext::expm1(a); }
515
526
(-)a/Eigen/src/Core/MathFunctionsImpl.h (+52 lines)
Lines 113-128 T generic_fast_erf_float(const T& a_x) { Link Here
113
  q = pmadd(x2, q, beta_4);
113
  q = pmadd(x2, q, beta_4);
114
  q = pmadd(x2, q, beta_2);
114
  q = pmadd(x2, q, beta_2);
115
  q = pmadd(x2, q, beta_0);
115
  q = pmadd(x2, q, beta_0);
116
116
117
  // Divide the numerator by the denominator.
117
  // Divide the numerator by the denominator.
118
  return pdiv(p, q);
118
  return pdiv(p, q);
119
}
119
}
120
120
121
/** \internal \returns the error function of \a a (coeff-wise)
122
    Doesn't do anything fancy, just a 13/8-degree rational interpolant which
123
    is accurate up to a couple of ulp in the range [-4, 4], outside of which
124
    fl(erf(x)) = +/-1.
125
126
    This implementation works on both scalars and Ts.
127
*/
128
template <typename T>
129
T generic_fast_erf_float(const T& a_x) {
130
  // Clamp the inputs to the range [-4, 4] since anything outside
131
  // this range is +/-1.0f in single-precision.
132
  const T plus_4 = pset1<T>(4.f);
133
  const T minus_4 = pset1<T>(-4.f);
134
  const T x = pmax(pmin(a_x, plus_4), minus_4);
135
  // The monomial coefficients of the numerator polynomial (odd).
136
  const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
137
  const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
138
  const T alpha_5 = pset1<T>(-7.34990630326855e-04f);
139
  const T alpha_7 = pset1<T>(-5.69250639462346e-05f);
140
  const T alpha_9 = pset1<T>(-2.10102402082508e-06f);
141
  const T alpha_11 = pset1<T>(2.77068142495902e-08f);
142
  const T alpha_13 = pset1<T>(-2.72614225801306e-10f);
143
144
  // The monomial coefficients of the denominator polynomial (even).
145
  const T beta_0 = pset1<T>(-1.42647390514189e-02f);
146
  const T beta_2 = pset1<T>(-7.37332916720468e-03f);
147
  const T beta_4 = pset1<T>(-1.68282697438203e-03f);
148
  const T beta_6 = pset1<T>(-2.13374055278905e-04f);
149
  const T beta_8 = pset1<T>(-1.45660718464996e-05f);
150
151
  // Since the polynomials are odd/even, we need x^2.
152
  const T x2 = pmul(x, x);
153
154
  // Evaluate the numerator polynomial p.
155
  T p = pmadd(x2, alpha_13, alpha_11);
156
  p = pmadd(x2, p, alpha_9);
157
  p = pmadd(x2, p, alpha_7);
158
  p = pmadd(x2, p, alpha_5);
159
  p = pmadd(x2, p, alpha_3);
160
  p = pmadd(x2, p, alpha_1);
161
  p = pmul(x, p);
162
163
  // Evaluate the denominator polynomial p.
164
  T q = pmadd(x2, beta_8, beta_6);
165
  q = pmadd(x2, q, beta_4);
166
  q = pmadd(x2, q, beta_2);
167
  q = pmadd(x2, q, beta_0);
168
169
  // Divide the numerator by the denominator.
170
  return pdiv(p, q);
171
}
172
121
template<typename RealScalar>
173
template<typename RealScalar>
122
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
174
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
123
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
175
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
124
{
176
{
125
  EIGEN_USING_STD_MATH(sqrt);
177
  EIGEN_USING_STD_MATH(sqrt);
126
  RealScalar p, qp;
178
  RealScalar p, qp;
127
  p = numext::maxi(x,y);
179
  p = numext::maxi(x,y);
128
  if(p==RealScalar(0)) return RealScalar(0);
180
  if(p==RealScalar(0)) return RealScalar(0);
(-)a/Eigen/src/Core/arch/AVX/MathFunctions.h (+8 lines)
Lines 25-40 psin<Packet8f>(const Packet8f& _x) { Link Here
25
}
25
}
26
26
27
template <>
27
template <>
28
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
28
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
29
pcos<Packet8f>(const Packet8f& _x) {
29
pcos<Packet8f>(const Packet8f& _x) {
30
  return pcos_float(_x);
30
  return pcos_float(_x);
31
}
31
}
32
32
33
// Error function.
34
template <>
35
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
36
perf<Packet8f>(const Packet8f& x) {
37
  return internal::generic_fast_erf_float(x);
38
}
39
40
// Exponential function for dowubles.
33
template <>
41
template <>
34
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
42
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
35
plog<Packet8f>(const Packet8f& _x) {
43
plog<Packet8f>(const Packet8f& _x) {
36
  return plog_float(_x);
44
  return plog_float(_x);
37
}
45
}
38
46
39
// Exponential function. Works by writing "x = m*log(2) + r" where
47
// Exponential function. Works by writing "x = m*log(2) + r" where
40
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
48
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
(-)a/Eigen/src/Core/arch/AVX512/MathFunctions.h (+12 lines)
Lines 400-412 ptanh<Packet16f>(const Packet16f& _x) { Link Here
400
}
400
}
401
401
402
template <>
402
template <>
403
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
403
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
404
perf<Packet16f>(const Packet16f& _x) {
404
perf<Packet16f>(const Packet16f& _x) {
405
  return internal::generic_fast_erf_float(_x);
405
  return internal::generic_fast_erf_float(_x);
406
}
406
}
407
407
408
template <>
409
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
410
ptanh<Packet16f>(const Packet16f& _x) {
411
  return internal::generic_fast_tanh_float(_x);
412
}
413
414
template <>
415
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
416
perf<Packet16f>(const Packet16f& _x) {
417
  return internal::generic_fast_erf_float(_x);
418
}
419
408
}  // end namespace internal
420
}  // end namespace internal
409
421
410
}  // end namespace Eigen
422
}  // end namespace Eigen
411
423
412
#endif  // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
424
#endif  // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
(-)a/Eigen/src/Core/arch/AVX512/PacketMath.h (+2 lines)
Lines 77-92 template<> struct packet_traits<double> Link Here
77
  enum {
77
  enum {
78
    Vectorizable = 1,
78
    Vectorizable = 1,
79
    AlignedOnScalar = 1,
79
    AlignedOnScalar = 1,
80
    size = 8,
80
    size = 8,
81
    HasHalfPacket = 1,
81
    HasHalfPacket = 1,
82
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
82
#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
83
    HasSqrt = EIGEN_FAST_MATH,
83
    HasSqrt = EIGEN_FAST_MATH,
84
    HasRsqrt = EIGEN_FAST_MATH,
84
    HasRsqrt = EIGEN_FAST_MATH,
85
    HasTanh = EIGEN_FAST_MATH,
86
    HasErf = EIGEN_FAST_MATH,
85
#endif
87
#endif
86
    HasDiv = 1
88
    HasDiv = 1
87
  };
89
  };
88
};
90
};
89
91
90
/* TODO Implement AVX512 for integers
92
/* TODO Implement AVX512 for integers
91
template<> struct packet_traits<int>    : default_packet_traits
93
template<> struct packet_traits<int>    : default_packet_traits
92
{
94
{
(-)a/Eigen/src/Core/arch/AltiVec/MathFunctions.h (+14 lines)
Lines 87-99 ptanh<Packet4f>(const Packet4f& x) { Link Here
87
87
88
// Error function.
88
// Error function.
89
template <>
89
template <>
90
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
90
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
91
perf<Packet4f>(const Packet4f& x) {
91
perf<Packet4f>(const Packet4f& x) {
92
  return internal::generic_fast_erf_float(x);
92
  return internal::generic_fast_erf_float(x);
93
}
93
}
94
94
95
// Hyperbolic Tangent function.
96
template <>
97
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
98
ptanh<Packet4f>(const Packet4f& x) {
99
  return internal::generic_fast_tanh_float(x);
100
}
101
102
// Error function.
103
template <>
104
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
105
perf<Packet4f>(const Packet4f& x) {
106
  return internal::generic_fast_erf_float(x);
107
}
108
95
}  // end namespace internal
109
}  // end namespace internal
96
110
97
}  // end namespace Eigen
111
}  // end namespace Eigen
98
112
99
#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
113
#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
(-)a/Eigen/src/Core/arch/MSA/MathFunctions.h (+7 lines)
Lines 323-338 pcos<Packet4f>(const Packet4f& x) { Link Here
323
323
324
// Error function.
324
// Error function.
325
template <>
325
template <>
326
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
326
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
327
perf<Packet4f>(const Packet4f& x) {
327
perf<Packet4f>(const Packet4f& x) {
328
  return internal::generic_fast_erf_float(x);
328
  return internal::generic_fast_erf_float(x);
329
}
329
}
330
330
331
// Error function.
332
template <>
333
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
334
perf<Packet4f>(const Packet4f& x) {
335
  return internal::generic_fast_erf_float(x);
336
}
337
331
template <>
338
template <>
332
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d
339
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d
333
pexp<Packet2d>(const Packet2d& _x) {
340
pexp<Packet2d>(const Packet2d& _x) {
334
  // Limiting double-precision pexp's argument to [-1024, +1024] lets pexp
341
  // Limiting double-precision pexp's argument to [-1024, +1024] lets pexp
335
  // reach 0 and INFINITY naturally.
342
  // reach 0 and INFINITY naturally.
336
  static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -1024.0);
343
  static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -1024.0);
337
  static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, +1024.0);
344
  static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, +1024.0);
338
  static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
345
  static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
(-)a/Eigen/src/Core/arch/NEON/MathFunctions.h (+14 lines)
Lines 47-59 ptanh<Packet4f>(const Packet4f& x) { Link Here
47
47
48
// Error function.
48
// Error function.
49
template <>
49
template <>
50
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
50
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
51
perf<Packet4f>(const Packet4f& x) {
51
perf<Packet4f>(const Packet4f& x) {
52
  return internal::generic_fast_erf_float(x);
52
  return internal::generic_fast_erf_float(x);
53
}
53
}
54
54
55
// Hyperbolic Tangent function.
56
template <>
57
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
58
ptanh<Packet4f>(const Packet4f& x) {
59
  return internal::generic_fast_tanh_float(x);
60
}
61
62
// Error function.
63
template <>
64
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
65
perf<Packet4f>(const Packet4f& x) {
66
  return internal::generic_fast_erf_float(x);
67
}
68
55
} // end namespace internal
69
} // end namespace internal
56
70
57
} // end namespace Eigen
71
} // end namespace Eigen
58
72
59
#endif // EIGEN_MATH_FUNCTIONS_NEON_H
73
#endif // EIGEN_MATH_FUNCTIONS_NEON_H
(-)a/Eigen/src/Core/arch/SSE/MathFunctions.h (+7 lines)
Lines 142-157 ptanh<Packet4f>(const Packet4f& x) { Link Here
142
142
143
// Error function.
143
// Error function.
144
template <>
144
template <>
145
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
145
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
146
perf<Packet4f>(const Packet4f& x) {
146
perf<Packet4f>(const Packet4f& x) {
147
  return internal::generic_fast_erf_float(x);
147
  return internal::generic_fast_erf_float(x);
148
}
148
}
149
149
150
// Error function.
151
template <>
152
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
153
perf<Packet4f>(const Packet4f& x) {
154
  return internal::generic_fast_erf_float(x);
155
}
156
150
} // end namespace internal
157
} // end namespace internal
151
158
152
namespace numext {
159
namespace numext {
153
160
154
template<>
161
template<>
155
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
162
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
156
float sqrt(const float &x)
163
float sqrt(const float &x)
157
{
164
{
(-)a/Eigen/src/Core/arch/ZVector/MathFunctions.h (+14 lines)
Lines 234-246 ptanh<Packet4f>(const Packet4f& x) { Link Here
234
234
235
// Error function.
235
// Error function.
236
template <>
236
template <>
237
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
237
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
238
perf<Packet4f>(const Packet4f& x) {
238
perf<Packet4f>(const Packet4f& x) {
239
  return internal::generic_fast_erf_float(x);
239
  return internal::generic_fast_erf_float(x);
240
}
240
}
241
241
242
// Hyperbolic Tangent function.
243
template <>
244
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
245
ptanh<Packet4f>(const Packet4f& x) {
246
  return internal::generic_fast_tanh_float(x);
247
}
248
249
// Error function.
250
template <>
251
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
252
perf<Packet4f>(const Packet4f& x) {
253
  return internal::generic_fast_erf_float(x);
254
}
255
242
}  // end namespace internal
256
}  // end namespace internal
243
257
244
}  // end namespace Eigen
258
}  // end namespace Eigen
245
259
246
#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
260
#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H

Return to bug 1699