Lines 113-128
T generic_fast_erf_float(const T& a_x) {
Link Here
|
113 |
q = pmadd(x2, q, beta_4); |
113 |
q = pmadd(x2, q, beta_4); |
114 |
q = pmadd(x2, q, beta_2); |
114 |
q = pmadd(x2, q, beta_2); |
115 |
q = pmadd(x2, q, beta_0); |
115 |
q = pmadd(x2, q, beta_0); |
116 |
|
116 |
|
117 |
// Divide the numerator by the denominator. |
117 |
// Divide the numerator by the denominator. |
118 |
return pdiv(p, q); |
118 |
return pdiv(p, q); |
119 |
} |
119 |
} |
120 |
|
120 |
|
|
|
121 |
/** \internal \returns the error function of \a a (coeff-wise) |
122 |
Doesn't do anything fancy, just a 13/8-degree rational interpolant which |
123 |
is accurate up to a couple of ulp in the range [-4, 4], outside of which |
124 |
fl(erf(x)) = +/-1. |
125 |
|
126 |
This implementation works on both scalars and Ts. |
127 |
*/ |
128 |
template <typename T> |
129 |
T generic_fast_erf_float(const T& a_x) { |
130 |
// Clamp the inputs to the range [-4, 4] since anything outside |
131 |
// this range is +/-1.0f in single-precision. |
132 |
const T plus_4 = pset1<T>(4.f); |
133 |
const T minus_4 = pset1<T>(-4.f); |
134 |
const T x = pmax(pmin(a_x, plus_4), minus_4); |
135 |
// The monomial coefficients of the numerator polynomial (odd). |
136 |
const T alpha_1 = pset1<T>(-1.60960333262415e-02f); |
137 |
const T alpha_3 = pset1<T>(-2.95459980854025e-03f); |
138 |
const T alpha_5 = pset1<T>(-7.34990630326855e-04f); |
139 |
const T alpha_7 = pset1<T>(-5.69250639462346e-05f); |
140 |
const T alpha_9 = pset1<T>(-2.10102402082508e-06f); |
141 |
const T alpha_11 = pset1<T>(2.77068142495902e-08f); |
142 |
const T alpha_13 = pset1<T>(-2.72614225801306e-10f); |
143 |
|
144 |
// The monomial coefficients of the denominator polynomial (even). |
145 |
const T beta_0 = pset1<T>(-1.42647390514189e-02f); |
146 |
const T beta_2 = pset1<T>(-7.37332916720468e-03f); |
147 |
const T beta_4 = pset1<T>(-1.68282697438203e-03f); |
148 |
const T beta_6 = pset1<T>(-2.13374055278905e-04f); |
149 |
const T beta_8 = pset1<T>(-1.45660718464996e-05f); |
150 |
|
151 |
// Since the polynomials are odd/even, we need x^2. |
152 |
const T x2 = pmul(x, x); |
153 |
|
154 |
// Evaluate the numerator polynomial p. |
155 |
T p = pmadd(x2, alpha_13, alpha_11); |
156 |
p = pmadd(x2, p, alpha_9); |
157 |
p = pmadd(x2, p, alpha_7); |
158 |
p = pmadd(x2, p, alpha_5); |
159 |
p = pmadd(x2, p, alpha_3); |
160 |
p = pmadd(x2, p, alpha_1); |
161 |
p = pmul(x, p); |
162 |
|
163 |
// Evaluate the denominator polynomial p. |
164 |
T q = pmadd(x2, beta_8, beta_6); |
165 |
q = pmadd(x2, q, beta_4); |
166 |
q = pmadd(x2, q, beta_2); |
167 |
q = pmadd(x2, q, beta_0); |
168 |
|
169 |
// Divide the numerator by the denominator. |
170 |
return pdiv(p, q); |
171 |
} |
172 |
|
121 |
template<typename RealScalar> |
173 |
template<typename RealScalar> |
122 |
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
174 |
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
123 |
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) |
175 |
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) |
124 |
{ |
176 |
{ |
125 |
EIGEN_USING_STD_MATH(sqrt); |
177 |
EIGEN_USING_STD_MATH(sqrt); |
126 |
RealScalar p, qp; |
178 |
RealScalar p, qp; |
127 |
p = numext::maxi(x,y); |
179 |
p = numext::maxi(x,y); |
128 |
if(p==RealScalar(0)) return RealScalar(0); |
180 |
if(p==RealScalar(0)) return RealScalar(0); |