MathFunctions.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
5 // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // Eigen is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Lesser General Public
9 // License as published by the Free Software Foundation; either
10 // version 3 of the License, or (at your option) any later version.
11 //
12 // Alternatively, you can redistribute it and/or
13 // modify it under the terms of the GNU General Public License as
14 // published by the Free Software Foundation; either version 2 of
15 // the License, or (at your option) any later version.
16 //
17 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
18 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
20 // GNU General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License and a copy of the GNU General Public License along with
24 // Eigen. If not, see <http://www.gnu.org/licenses/>.
25 
26 #ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
27 #define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
28 
29 namespace Eigen {
30 
31 namespace internal {
32 
34 template<typename Packet> inline static Packet pasin(Packet a) { return std::asin(a); }
35 
36 #ifdef EIGEN_VECTORIZE_SSE
37 
38 template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x)
39 {
40  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
41  _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5);
42  _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5);
43 
44  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
45 
46  _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654);
47  _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5);
48 
49  _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2);
50  _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2);
51  _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2);
52  _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2);
53  _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1);
54 
55  Packet4f a = pabs(x);//got the absolute value
56 
57  Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit
58 
59  Packet4f z1,z2;//will need them during computation
60 
61 
62 //will compute the two branches for asin
63 //so first compare with half
64 
65  Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take
66 //both will be taken, and finally results will be merged
67 //the branch for values >0.5
68 
69  {
70 //the core series expansion
71  z1=pmadd(p4f_minus_half,a,p4f_half);
72  Packet4f x1=psqrt(z1);
73  Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2);
74  Packet4f s2=pmadd(s1, z1, p4f_asin3);
75  Packet4f s3=pmadd(s2,z1, p4f_asin4);
76  Packet4f s4=pmadd(s3,z1, p4f_asin5);
77  Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd
78  z1=pmadd(temp,x1,x1);
79  z1=padd(z1,z1);
80  z1=psub(p4f_pi_over_2,z1);
81  }
82 
83  {
84 //the core series expansion
85  Packet4f x2=a;
86  z2=pmul(x2,x2);
87  Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2);
88  Packet4f s2=pmadd(s1, z2, p4f_asin3);
89  Packet4f s3=pmadd(s2,z2, p4f_asin4);
90  Packet4f s4=pmadd(s3,z2, p4f_asin5);
91  Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd
92  z2=pmadd(temp,x2,x2);
93  }
94 
95 /* select the correct result from the two branch evaluations */
96  z1 = _mm_and_ps(branch_mask, z1);
97  z2 = _mm_andnot_ps(branch_mask, z2);
98  Packet4f z = _mm_or_ps(z1,z2);
99 
100 /* update the sign */
101  return _mm_xor_ps(z, sign_bit);
102 }
103 
104 #endif // EIGEN_VECTORIZE_SSE
105 
106 } // end namespace internal
107 
108 } // end namespace Eigen
109 
110 #endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H