Bugzilla – Attachment 911 Details for
Bug 1652
Corner cases in SIMD sin/cos
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
Log In
[x]
|
Forgot Password
Login:
[x]
This bugzilla service is closed. All entries have been migrated to
https://gitlab.com/libeigen/eigen
proof-of-concept
sse-test.cpp (text/x-c++src), 5.49 KB, created by
Christoph Hertzberg
on 2018-12-27 22:01:13 UTC
(
hide
)
Description:
proof-of-concept
Filename:
MIME Type:
Creator:
Christoph Hertzberg
Created:
2018-12-27 22:01:13 UTC
Size:
5.49 KB
patch
obsolete
>// For simpler intrinsics >#include <Eigen/Core> > >using namespace Eigen; >using namespace Eigen::internal; > >#include <iostream> > > >std::ostream & operator<<(std::ostream & out, Packet4f const & p) >{ > Vector4f v; > v.writePacket<Aligned>(0, p); > out.precision(9); > out.width(15); > for(int i=0; i<4; ++i) > out << "\t" << std::defaultfloat << v[i]; > out << '\n'; > out.precision(6); > out.width(15); > for(int i=0; i<4; ++i) > out << "\t" << std::hexfloat << v[i]; > out << '\n'; > out.precision(6); > out.width(15); > for(int i=0; i<4; ++i) > out << "\t" << std::hex << reinterpret_cast<int&>(v[i]); > out << '\n'; > return (out << std::defaultfloat); >} > >template<bool ComputeSine,typename Packet> >EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS >EIGEN_UNUSED >Packet psincos_float2(const Packet& _x) >{ > typedef typename unpacket_traits<Packet>::integer_packet PacketI; > const Packet cst_1 = pset1<Packet>(1.0f); > const Packet cst_half = pset1<Packet>(0.5f); > const Packet cst_2_23 = pset1<Packet>(static_cast<float>(1<<23)); // 2^23 > > const PacketI csti_1 = pset1<PacketI>(1); >// const PacketI csti_not1 = pset1<PacketI>(~1); >// const PacketI csti_2 = pset1<PacketI>(2); >// const PacketI csti_3 = pset1<PacketI>(3); > > const Packet cst_sign_mask = pset1frombits<Packet>(0x80000000u); > > const Packet cst_minus_cephes_DP1 = pset1<Packet>(-0.78515625f*2.f); > const Packet cst_minus_cephes_DP2 = pset1<Packet>(-2.4187564849853515625e-4f*2.f); > const Packet cst_minus_cephes_DP3 = pset1<Packet>(-3.77489497744594108e-8f*2.f); > const Packet cst_sincof_p0 = pset1<Packet>(-1.9515295891E-4f); > const Packet cst_sincof_p1 = pset1<Packet>( 8.3321608736E-3f); > const Packet cst_sincof_p2 = pset1<Packet>(-1.6666654611E-1f); > const Packet cst_coscof_p0 = pset1<Packet>( 2.443315711809948E-005f); > const Packet cst_coscof_p1 = pset1<Packet>(-1.388731625493765E-003f); > const Packet cst_coscof_p2 = pset1<Packet>( 4.166664568298827E-002f); > const Packet cst_cephes_TOPI = pset1<Packet>( 1.27323954473516f*0.5f); // 2 / M_PI > const Packet cst_cephes_PIOF = pset1<Packet>( 0.78539816339744f); // M_PI/4 > const Packet cst_cephes_MPIOF = pset1<Packet>(-0.78539816339744f); // -M_PI/4 > > Packet x = pabs(_x); > std::cout << "x: " << x << '\n'; > > // Scale x by 2/Pi to find x's octant. > Packet y = pmul(x, cst_cephes_TOPI); > std::cout << "y: " << y << '\n'; > > Packet y_round = padd(y, cst_2_23); > std::cout << "y_:" << y_round << '\n'; > PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24) > // TODO protect next instruction against fast-math optimizations? > y = psub(y_round, cst_2_23); // nearest integer to x*4/pi > std::cout << "y:" << y << '\n'; > > // Get the octant. We'll reduce x by this number of octants or by one more than it. > //PacketI y_int = pcast<Packet,PacketI>(y); > // x's from even-numbered octants will translate to octant 0: [0, +Pi/4]. > // x's from odd-numbered octants will translate to octant -1: [-Pi/4, 0]. > // Adjustment for odd-numbered octants: octant = (octant + 1) & (~1). >// PacketI y_int1 = pand(padd(y_int, csti_1), csti_not1); // could be pbitclear<0>(...) > //y = pcast<PacketI,Packet>(y_int1); > > // Compute the sign to apply to the polynomial. > // sin: sign = second_bit(y_int) xor signbit(_x) > // cos: sign = second_bit(y_int+1) > Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(pshiftleft<30>(y_int))) > : preinterpret<Packet>(pshiftleft<30>(padd(y_int,csti_1))); > sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit > > // Get the polynomial selection mask from the second bit of y_int1 > // We'll calculate both (sin and cos) polynomials and then select from the two. > Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int, csti_1), pzero(y_int))); > std::cout << "M: " << poly_mask; > > // Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4. > // The magic pass: "Extended precision modular arithmetic" > // TODO if we have a true FMA, we could use full-precision DP1 > // x = ((x - y * DP1) - y * DP2) - y * DP3 > x = pmadd(y, cst_minus_cephes_DP1, x); > x = pmadd(y, cst_minus_cephes_DP2, x); > x = pmadd(y, cst_minus_cephes_DP3, x); > std::cout << "x: " << x << "\n"; > x = pmin(x,cst_cephes_PIOF); > x = pmax(x,cst_cephes_MPIOF); > std::cout << "x: " << x << "\n"; > > Packet x2 = pmul(x,x); > > // Evaluate the cos(x) polynomial. (-Pi/4 <= x <= Pi/4) > Packet y1 = cst_coscof_p0; > y1 = pmadd(y1, x2, cst_coscof_p1); > y1 = pmadd(y1, x2, cst_coscof_p2); > y1 = pmul(y1, x2); > y1 = pmul(y1, x2); > y1 = psub(y1, pmul(x2, cst_half)); > y1 = padd(y1, cst_1); > > > // Evaluate the sin(x) polynomial. (Pi/4 <= x <= Pi/4) > Packet y2 = cst_sincof_p0; > y2 = pmadd(y2, x2, cst_sincof_p1); > y2 = pmadd(y2, x2, cst_sincof_p2); > y2 = pmul(y2, x2); > y2 = pmadd(y2, x, x); > > std::cout << "y1: " << y1 << "y2: "<< y2 <<'\n'; > > // Select the correct result from the two polynomials. > y = ComputeSine ? pselect(poly_mask,y2,y1) > : pselect(poly_mask,y1,y2); > > // Update the sign > return pxor(y, sign_bit); >} > > > >int main() { > Eigen::Array4f vec(-0.1,2001*M_PI,1000.5*M_PI,1e34); > Packet4f a = vec.packet<Aligned>(0); >// std::cout << a << "\n"; >// a = psincos_float2<true>(a); > a = psincos_float2<false>(a); > std::cout << a << "\n"; > > std::cout.precision(9); > std::cout << sin(vec).transpose() << "\n"; > std::cout << cos(vec).transpose() << "\n"; > > >}
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 1652
: 911 |
913