HMSBEAGLE  1.0.0
BeagleGPUImpl.h
1 /*
2  * @file BeagleGPUImpl.h
3  *
4  * Copyright 2009 Phylogenetic Likelihood Working Group
5  *
6  * This file is part of BEAGLE.
7  *
8  * BEAGLE is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as
10  * published by the Free Software Foundation, either version 3 of
11  * the License, or (at your option) any later version.
12  *
13  * BEAGLE is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with BEAGLE. If not, see
20  * <http://www.gnu.org/licenses/>.
21  *
22  *
23  * @brief GPU implementation header
24  *
25  * @author Marc Suchard
26  * @author Andrew Rambaut
27  * @author Daniel Ayres
28  */
29 
30 #ifndef __BeagleGPUImpl__
31 #define __BeagleGPUImpl__
32 
33 #ifdef HAVE_CONFIG_H
34 #include "libhmsbeagle/config.h"
35 #endif
36 
37 #include "libhmsbeagle/BeagleImpl.h"
38 #include "libhmsbeagle/GPU/GPUImplDefs.h"
39 #include "libhmsbeagle/GPU/GPUInterface.h"
40 #include "libhmsbeagle/GPU/KernelLauncher.h"
41 
42 #define BEAGLE_GPU_GENERIC Real
43 #define BEAGLE_GPU_TEMPLATE template <typename Real>
44 
45 namespace beagle {
46 namespace gpu {
47 
48 BEAGLE_GPU_TEMPLATE
49 class BeagleGPUImpl : public BeagleImpl {
50 private:
51  GPUInterface* gpu;
52  KernelLauncher* kernels;
53 
54  int kInitialized;
55 
56  long kFlags;
57 
58  int kTipCount;
59  int kPartialsBufferCount;
60  int kCompactBufferCount;
61  int kStateCount;
62  int kPatternCount;
63  int kEigenDecompCount;
64  int kMatrixCount;
65  int kCategoryCount;
66 
67  int kTipPartialsBufferCount;
68  int kInternalPartialsBufferCount;
69  int kBufferCount;
70  int kScaleBufferCount;
71 
72  int kPaddedStateCount;
73  int kPaddedPatternCount; // total # of patterns with padding so that kPaddedPatternCount
74  // * kPaddedStateCount is a multiple of 16
75  int kSumSitesBlockCount;
76 
77  int kPartialsSize;
78  int kMatrixSize;
79  int kEigenValuesSize;
80  int kScaleBufferSize;
81 
82  int kLastCompactBufferIndex;
83  int kLastTipPartialsBufferIndex;
84 
85  GPUPtr dIntegrationTmp;
86  GPUPtr dOutFirstDeriv;
87  GPUPtr dOutSecondDeriv;
88  GPUPtr dPartialsTmp;
89  GPUPtr dFirstDerivTmp;
90  GPUPtr dSecondDerivTmp;
91 
92  GPUPtr dSumLogLikelihood;
93  GPUPtr dSumFirstDeriv;
94  GPUPtr dSumSecondDeriv;
95 
96  GPUPtr dPatternWeights;
97 
98  GPUPtr dBranchLengths;
99 
100  GPUPtr dDistanceQueue;
101 
102  GPUPtr dPtrQueue;
103 
104  GPUPtr dMaxScalingFactors;
105  GPUPtr dIndexMaxScalingFactors;
106 
107  GPUPtr dAccumulatedScalingFactors;
108 
109  GPUPtr* dEigenValues;
110  GPUPtr* dEvec;
111  GPUPtr* dIevc;
112 
113  GPUPtr* dWeights;
114  GPUPtr* dFrequencies;
115 
116  GPUPtr* dScalingFactors;
117 
118  GPUPtr* dStates;
119 
120  GPUPtr* dPartials;
121  GPUPtr* dMatrices;
122 
123  GPUPtr* dCompactBuffers;
124  GPUPtr* dTipPartialsBuffers;
125 
126  unsigned int* hPtrQueue;
127 
128  double* hCategoryRates; // Can keep in double-precision
129 
130  Real* hPatternWeightsCache;
131 
132  Real* hDistanceQueue;
133 
134  Real* hWeightsCache;
135  Real* hFrequenciesCache;
136  Real* hLogLikelihoodsCache;
137  Real* hPartialsCache;
138  int* hStatesCache;
139  Real* hMatrixCache;
140 
141  int* hRescalingTrigger;
142  GPUPtr dRescalingTrigger;
143 
144  GPUPtr* dScalingFactorsMaster;
145 
146 public:
147  BeagleGPUImpl();
148 
149  virtual ~BeagleGPUImpl();
150 
151  int createInstance(int tipCount,
152  int partialsBufferCount,
153  int compactBufferCount,
154  int stateCount,
155  int patternCount,
156  int eigenDecompositionCount,
157  int matrixCount,
158  int categoryCount,
159  int scaleBufferCount,
160  int resourceNumber,
161  long preferenceFlags,
162  long requirementFlags);
163 
164  int getInstanceDetails(BeagleInstanceDetails* retunInfo);
165 
166  int setTipStates(int tipIndex,
167  const int* inStates);
168 
169  int setTipPartials(int tipIndex,
170  const double* inPartials);
171 
172  int setPartials(int bufferIndex,
173  const double* inPartials);
174 
175  int getPartials(int bufferIndex,
176  int scaleIndex,
177  double* outPartials);
178 
179  int setEigenDecomposition(int eigenIndex,
180  const double* inEigenVectors,
181  const double* inInverseEigenVectors,
182  const double* inEigenValues);
183 
184  int setStateFrequencies(int stateFrequenciesIndex,
185  const double* inStateFrequencies);
186 
187  int setCategoryWeights(int categoryWeightsIndex,
188  const double* inCategoryWeights);
189 
190  int setPatternWeights(const double* inPatternWeights);
191 
192 
193  int setCategoryRates(const double* inCategoryRates);
194 
195  int setTransitionMatrix(int matrixIndex,
196  const double* inMatrix,
197  double paddedValue);
198 
199  int setTransitionMatrices(const int* matrixIndices,
200  const double* inMatrices,
201  const double* paddedValues,
202  int count);
203 
204  int getTransitionMatrix(int matrixIndex,
205  double* outMatrix);
206 
207  int updateTransitionMatrices(int eigenIndex,
208  const int* probabilityIndices,
209  const int* firstDerivativeIndices,
210  const int* secondDerivativeIndices,
211  const double* edgeLengths,
212  int count);
213 
214  int updatePartials(const int* operations,
215  int operationCount,
216  int cumulativeScalingIndex);
217 
218  int waitForPartials(const int* destinationPartials,
219  int destinationPartialsCount);
220 
221  int accumulateScaleFactors(const int* scalingIndices,
222  int count,
223  int cumulativeScalingIndex);
224 
225  int removeScaleFactors(const int* scalingIndices,
226  int count,
227  int cumulativeScalingIndex);
228 
229  int resetScaleFactors(int cumulativeScalingIndex);
230 
231  int copyScaleFactors(int destScalingIndex,
232  int srcScalingIndex);
233 
234  int calculateRootLogLikelihoods(const int* bufferIndices,
235  const int* categoryWeightsIndices,
236  const int* stateFrequenciesIndices,
237  const int* cumulativeScaleIndices,
238  int count,
239  double* outSumLogLikelihood);
240 
241  int calculateEdgeLogLikelihoods(const int* parentBufferIndices,
242  const int* childBufferIndices,
243  const int* probabilityIndices,
244  const int* firstDerivativeIndices,
245  const int* secondDerivativeIndices,
246  const int* categoryWeightsIndices,
247  const int* stateFrequenciesIndices,
248  const int* cumulativeScaleIndices,
249  int count,
250  double* outSumLogLikelihood,
251  double* outSumFirstDerivative,
252  double* outSumSecondDerivative);
253 
254  int getSiteLogLikelihoods(double* outLogLikelihoods);
255 
256  int getSiteDerivatives(double* outFirstDerivatives,
257  double* outSecondDerivatives);
258 
259 private:
260  char* getInstanceName();
261 
262 };
263 
264 BEAGLE_GPU_TEMPLATE
266 public:
267  virtual BeagleImpl* createImpl(int tipCount,
268  int partialsBufferCount,
269  int compactBufferCount,
270  int stateCount,
271  int patternCount,
272  int eigenBufferCount,
273  int matrixBufferCount,
274  int categoryCount,
275  int scaleBufferCount,
276  int resourceNumber,
277  long preferenceFlags,
278  long requirementFlags,
279  int* errorCode);
280 
281  virtual const char* getName();
282  virtual const long getFlags();
283 };
284 
285 template <typename Real>
286 void modifyFlagsForPrecision(long* flags, Real r);
287 
288 } // namespace gpu
289 } // namespace beagle
290 
291 #include "libhmsbeagle/GPU/BeagleGPUImpl.hpp"
292 
293 #endif // __BeagleGPUImpl__