HMSBEAGLE  1.0.0
BeagleCPUSSEImpl.h
1 /*
2  * BeagleCPUSSEImpl.h
3  * BEAGLE
4  *
5  * Copyright 2010 Phylogenetic Likelihood Working Group
6  *
7  * This file is part of BEAGLE.
8  *
9  * BEAGLE is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser General Public License as
11  * published by the Free Software Foundation, either version 3 of
12  * the License, or (at your option) any later version.
13  *
14  * BEAGLE is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with BEAGLE. If not, see
21  * <http://www.gnu.org/licenses/>.
22  *
23  * @author Marc Suchard
24  */
25 
26 #ifndef __BeagleCPUSSEImpl__
27 #define __BeagleCPUSSEImpl__
28 
29 #ifdef HAVE_CONFIG_H
30 #include "libhmsbeagle/config.h"
31 #endif
32 
33 #include "libhmsbeagle/CPU/BeagleCPUImpl.h"
34 
35 #include <vector>
36 
37 #define RESTRICT __restrict /* may need to define this instead to 'restrict' */
38 
39 
40 // Pad transition matrix rows with an extra 1.0 for ambiguous characters
41 #define T_PAD_SSE_EVEN 2 // for even state counts
42 #define T_PAD_SSE_ODD 1 // for odd state counts
43 
44 // Partials padding
45 #define P_PAD_SSE_EVEN 0 // for even state counts
46 #define P_PAD_SSE_ODD 1 // for odd state counts
47 
48 
49 #define BEAGLE_CPU_SSE_FLOAT float, T_PAD, P_PAD
50 #define BEAGLE_CPU_SSE_DOUBLE double, T_PAD, P_PAD
51 #define BEAGLE_CPU_SSE_TEMPLATE template <int T_PAD, int P_PAD>
52 
53 namespace beagle {
54 namespace cpu {
55 
56 BEAGLE_CPU_TEMPLATE
57 class BeagleCPUSSEImpl : public BeagleCPUImpl<BEAGLE_CPU_GENERIC> {
58 // void inline innerPartialsPartals(
59 // const double* __restrict partials1,
60 // const double* __restrict matrices1,
61 // const double* __restrict partials2,
62 // const double* __restrict matrices2,
63 // V_Real& sum1_vec,
64 // V_Real& sum2_vec,
65 // V_Real& out,
66 // int& v,
67 // int& w);
68 
69 };
70 
71 BEAGLE_CPU_SSE_TEMPLATE
72 class BeagleCPUSSEImpl<BEAGLE_CPU_SSE_FLOAT> : public BeagleCPUImpl<BEAGLE_CPU_SSE_FLOAT> {
73 
74 protected:
91 
92 public:
93  virtual const char* getName();
94 
95  virtual const long getFlags();
96 
97 protected:
98  virtual int getPaddedPatternsModulus();
99 
100 private:
101  virtual void calcStatesStates(float* destP,
102  const int* states1,
103  const float* matrices1,
104  const int* states2,
105  const float* matrices2);
106 
107  virtual void calcStatesPartials(float* destP,
108  const int* states1,
109  const float* matrices1,
110  const float* partials2,
111  const float* matrices2);
112 
113  virtual void calcPartialsPartials(float* __restrict destP,
114  const float* __restrict partials1,
115  const float* __restrict matrices1,
116  const float* __restrict partials2,
117  const float* __restrict matrices2);
118 
119  virtual void calcPartialsPartialsFixedScaling(float* __restrict destP,
120  const float* __restrict partials1,
121  const float* __restrict matrices1,
122  const float* __restrict partials2,
123  const float* __restrict matrices2,
124  const float* __restrict scaleFactors);
125 
126  virtual void calcPartialsPartialsAutoScaling(float* __restrict destP,
127  const float* __restrict partials1,
128  const float* __restrict matrices1,
129  const float* __restrict partials2,
130  const float* __restrict matrices2,
131  int* activateScaling);
132 
133  virtual int calcEdgeLogLikelihoods(const int parentBufferIndex,
134  const int childBufferIndex,
135  const int probabilityIndex,
136  const int categoryWeightsIndex,
137  const int stateFrequenciesIndex,
138  const int scalingFactorsIndex,
139  double* outSumLogLikelihood);
140 
141 
142 };
143 
144 
145 BEAGLE_CPU_SSE_TEMPLATE
146 class BeagleCPUSSEImpl<BEAGLE_CPU_SSE_DOUBLE> : public BeagleCPUImpl<BEAGLE_CPU_SSE_DOUBLE> {
147 
148 protected:
165 
166 public:
167  virtual const char* getName();
168 
169  virtual const long getFlags();
170 
171 protected:
172  virtual int getPaddedPatternsModulus();
173 
174 private:
175  virtual void calcStatesStates(double* destP,
176  const int* states1,
177  const double* matrices1,
178  const int* states2,
179  const double* matrices2);
180 
181  virtual void calcStatesPartials(double* destP,
182  const int* states1,
183  const double* matrices1,
184  const double* partials2,
185  const double* matrices2);
186 
187  virtual void calcPartialsPartials(double* __restrict destP,
188  const double* __restrict partials1,
189  const double* __restrict matrices1,
190  const double* __restrict partials2,
191  const double* __restrict matrices2);
192 
193  virtual void calcPartialsPartialsFixedScaling(double* __restrict destP,
194  const double* __restrict partials1,
195  const double* __restrict matrices1,
196  const double* __restrict partials2,
197  const double* __restrict matrices2,
198  const double* __restrict scaleFactors);
199 
200  virtual void calcPartialsPartialsAutoScaling(double* __restrict destP,
201  const double* __restrict partials1,
202  const double* __restrict matrices1,
203  const double* __restrict partials2,
204  const double* __restrict matrices2,
205  int* activateScaling);
206 
207  virtual int calcEdgeLogLikelihoods(const int parentBufferIndex,
208  const int childBufferIndex,
209  const int probabilityIndex,
210  const int categoryWeightsIndex,
211  const int stateFrequenciesIndex,
212  const int scalingFactorsIndex,
213  double* outSumLogLikelihood);
214 
215 };
216 
217 BEAGLE_CPU_FACTORY_TEMPLATE
219 public:
220  virtual BeagleImpl* createImpl(int tipCount,
221  int partialsBufferCount,
222  int compactBufferCount,
223  int stateCount,
224  int patternCount,
225  int eigenBufferCount,
226  int matrixBufferCount,
227  int categoryCount,
228  int scaleBufferCount,
229  int resourceNumber,
230  long preferenceFlags,
231  long requirementFlags,
232  int* errorCode);
233 
234  virtual const char* getName();
235  virtual const long getFlags();
236 };
237 
238 } // namespace cpu
239 } // namespace beagle
240 
241 // now include the file containing template function implementations
242 #include "libhmsbeagle/CPU/BeagleCPUSSEImpl.hpp"
243 
244 
245 #endif // __BeagleCPUSSEImpl__