HMSBEAGLE  1.0.0
BeagleCPU4StateSSEImpl.h
1 /*
2  * BeagleCPU4StateSSEImpl.h
3  * BEAGLE
4  *
5  * Copyright 2009 Phylogenetic Likelihood Working Group
6  *
7  * This file is part of BEAGLE.
8  *
9  * BEAGLE is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser General Public License as
11  * published by the Free Software Foundation, either version 3 of
12  * the License, or (at your option) any later version.
13  *
14  * BEAGLE is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with BEAGLE. If not, see
21  * <http://www.gnu.org/licenses/>.
22  *
23  * @author Marc Suchard
24  */
25 
26 #ifndef __BeagleCPU4StateSSEImpl__
27 #define __BeagleCPU4StateSSEImpl__
28 
29 #ifdef HAVE_CONFIG_H
30 #include "libhmsbeagle/config.h"
31 #endif
32 
33 #include "libhmsbeagle/CPU/BeagleCPU4StateImpl.h"
34 
35 #include <vector>
36 
37 #define RESTRICT __restrict /* may need to define this instead to 'restrict' */
38 
39 #define T_PAD_4_SSE_DEFAULT 2 // Pad transition matrix with 2 rows for SSE
40 #define P_PAD_4_SSE_DEFAULT 0 // Partials padding not needed for 4 states SSE
41 
42 #define BEAGLE_CPU_4_SSE_FLOAT float, T_PAD, P_PAD
43 #define BEAGLE_CPU_4_SSE_DOUBLE double, T_PAD, P_PAD
44 #define BEAGLE_CPU_4_SSE_TEMPLATE template <int T_PAD, int P_PAD>
45 
46 namespace beagle {
47 namespace cpu {
48 
49 BEAGLE_CPU_TEMPLATE
50 class BeagleCPU4StateSSEImpl : public BeagleCPU4StateImpl<BEAGLE_CPU_GENERIC> {};
51 
52 
53 BEAGLE_CPU_4_SSE_TEMPLATE
54 class BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_FLOAT> : public BeagleCPU4StateImpl<BEAGLE_CPU_4_SSE_FLOAT> {
55 
56 protected:
73 
74 public:
75  virtual const char* getName();
76 
77  virtual const long getFlags();
78 
79 protected:
80  virtual int getPaddedPatternsModulus();
81 
82 private:
83 
84  virtual void calcStatesStates(float* destP,
85  const int* states1,
86  const float* matrices1,
87  const int* states2,
88  const float* matrices2);
89 
90  virtual void calcStatesPartials(float* destP,
91  const int* states1,
92  const float* __restrict matrices1,
93  const float* __restrict partials2,
94  const float* __restrict matrices2);
95 
96  virtual void calcStatesPartialsFixedScaling(float* destP,
97  const int* states1,
98  const float* __restrict matrices1,
99  const float* __restrict partials2,
100  const float* __restrict matrices2,
101  const float* __restrict scaleFactors);
102 
103  virtual void calcPartialsPartials(float* __restrict destP,
104  const float* __restrict partials1,
105  const float* __restrict matrices1,
106  const float* __restrict partials2,
107  const float* __restrict matrices2);
108 
109  virtual void calcPartialsPartialsFixedScaling(float* __restrict destP,
110  const float* __restrict child0Partials,
111  const float* __restrict child0TransMat,
112  const float* __restrict child1Partials,
113  const float* __restrict child1TransMat,
114  const float* __restrict scaleFactors);
115 
116  virtual void calcPartialsPartialsAutoScaling(float* __restrict destP,
117  const float* __restrict partials1,
118  const float* __restrict matrices1,
119  const float* __restrict partials2,
120  const float* __restrict matrices2,
121  int* activateScaling);
122 
123  virtual int calcEdgeLogLikelihoods(const int parentBufferIndex,
124  const int childBufferIndex,
125  const int probabilityIndex,
126  const int categoryWeightsIndex,
127  const int stateFrequenciesIndex,
128  const int scalingFactorsIndex,
129  double* outSumLogLikelihood);
130 
131 };
132 
133 
134 BEAGLE_CPU_4_SSE_TEMPLATE
135 class BeagleCPU4StateSSEImpl<BEAGLE_CPU_4_SSE_DOUBLE> : public BeagleCPU4StateImpl<BEAGLE_CPU_4_SSE_DOUBLE> {
136 
137 protected:
154 
155 public:
156  virtual const char* getName();
157 
158  virtual const long getFlags();
159 
160 protected:
161  virtual int getPaddedPatternsModulus();
162 
163 private:
164 
165  virtual void calcStatesStates(double* destP,
166  const int* states1,
167  const double* matrices1,
168  const int* states2,
169  const double* matrices2);
170 
171  virtual void calcStatesPartials(double* destP,
172  const int* states1,
173  const double* __restrict matrices1,
174  const double* __restrict partials2,
175  const double* __restrict matrices2);
176 
177  virtual void calcStatesPartialsFixedScaling(double* destP,
178  const int* states1,
179  const double* __restrict matrices1,
180  const double* __restrict partials2,
181  const double* __restrict matrices2,
182  const double* __restrict scaleFactors);
183 
184  virtual void calcPartialsPartials(double* __restrict destP,
185  const double* __restrict partials1,
186  const double* __restrict matrices1,
187  const double* __restrict partials2,
188  const double* __restrict matrices2);
189 
190  virtual void calcPartialsPartialsFixedScaling(double* __restrict destP,
191  const double* __restrict child0Partials,
192  const double* __restrict child0TransMat,
193  const double* __restrict child1Partials,
194  const double* __restrict child1TransMat,
195  const double* __restrict scaleFactors);
196 
197  virtual void calcPartialsPartialsAutoScaling(double* __restrict destP,
198  const double* __restrict partials1,
199  const double* __restrict matrices1,
200  const double* __restrict partials2,
201  const double* __restrict matrices2,
202  int* activateScaling);
203 
204  virtual int calcEdgeLogLikelihoods(const int parentBufferIndex,
205  const int childBufferIndex,
206  const int probabilityIndex,
207  const int categoryWeightsIndex,
208  const int stateFrequenciesIndex,
209  const int scalingFactorsIndex,
210  double* outSumLogLikelihood);
211 
212 };
213 
214 
215 BEAGLE_CPU_FACTORY_TEMPLATE
217 public:
218  virtual BeagleImpl* createImpl(int tipCount,
219  int partialsBufferCount,
220  int compactBufferCount,
221  int stateCount,
222  int patternCount,
223  int eigenBufferCount,
224  int matrixBufferCount,
225  int categoryCount,
226  int scaleBufferCount,
227  int resourceNumber,
228  long preferenceFlags,
229  long requirementFlags,
230  int* errorCode);
231 
232  virtual const char* getName();
233  virtual const long getFlags();
234 };
235 
236 } // namespace cpu
237 } // namespace beagle
238 
239 // now include the file containing template function implementations
240 #include "libhmsbeagle/CPU/BeagleCPU4StateSSEImpl.hpp"
241 
242 
243 #endif // __BeagleCPU4StateSSEImpl__