30 #ifndef __BeagleGPUImpl__
31 #define __BeagleGPUImpl__
34 #include "libhmsbeagle/config.h"
37 #include "libhmsbeagle/BeagleImpl.h"
38 #include "libhmsbeagle/GPU/GPUImplDefs.h"
39 #include "libhmsbeagle/GPU/GPUInterface.h"
40 #include "libhmsbeagle/GPU/KernelLauncher.h"
42 #define BEAGLE_GPU_GENERIC Real
43 #define BEAGLE_GPU_TEMPLATE template <typename Real>
59 int kPartialsBufferCount;
60 int kCompactBufferCount;
63 int kEigenDecompCount;
67 int kTipPartialsBufferCount;
68 int kInternalPartialsBufferCount;
70 int kScaleBufferCount;
72 int kPaddedStateCount;
73 int kPaddedPatternCount;
75 int kSumSitesBlockCount;
82 int kLastCompactBufferIndex;
83 int kLastTipPartialsBufferIndex;
85 GPUPtr dIntegrationTmp;
86 GPUPtr dOutFirstDeriv;
87 GPUPtr dOutSecondDeriv;
89 GPUPtr dFirstDerivTmp;
90 GPUPtr dSecondDerivTmp;
92 GPUPtr dSumLogLikelihood;
93 GPUPtr dSumFirstDeriv;
94 GPUPtr dSumSecondDeriv;
96 GPUPtr dPatternWeights;
98 GPUPtr dBranchLengths;
100 GPUPtr dDistanceQueue;
104 GPUPtr dMaxScalingFactors;
105 GPUPtr dIndexMaxScalingFactors;
107 GPUPtr dAccumulatedScalingFactors;
109 GPUPtr* dEigenValues;
114 GPUPtr* dFrequencies;
116 GPUPtr* dScalingFactors;
123 GPUPtr* dCompactBuffers;
124 GPUPtr* dTipPartialsBuffers;
126 unsigned int* hPtrQueue;
128 double* hCategoryRates;
130 Real* hPatternWeightsCache;
132 Real* hDistanceQueue;
135 Real* hFrequenciesCache;
136 Real* hLogLikelihoodsCache;
137 Real* hPartialsCache;
141 int* hRescalingTrigger;
142 GPUPtr dRescalingTrigger;
144 GPUPtr* dScalingFactorsMaster;
151 int createInstance(
int tipCount,
152 int partialsBufferCount,
153 int compactBufferCount,
156 int eigenDecompositionCount,
159 int scaleBufferCount,
161 long preferenceFlags,
162 long requirementFlags);
166 int setTipStates(
int tipIndex,
167 const int* inStates);
169 int setTipPartials(
int tipIndex,
170 const double* inPartials);
172 int setPartials(
int bufferIndex,
173 const double* inPartials);
175 int getPartials(
int bufferIndex,
177 double* outPartials);
179 int setEigenDecomposition(
int eigenIndex,
180 const double* inEigenVectors,
181 const double* inInverseEigenVectors,
182 const double* inEigenValues);
184 int setStateFrequencies(
int stateFrequenciesIndex,
185 const double* inStateFrequencies);
187 int setCategoryWeights(
int categoryWeightsIndex,
188 const double* inCategoryWeights);
190 int setPatternWeights(
const double* inPatternWeights);
193 int setCategoryRates(
const double* inCategoryRates);
195 int setTransitionMatrix(
int matrixIndex,
196 const double* inMatrix,
199 int setTransitionMatrices(
const int* matrixIndices,
200 const double* inMatrices,
201 const double* paddedValues,
204 int getTransitionMatrix(
int matrixIndex,
207 int updateTransitionMatrices(
int eigenIndex,
208 const int* probabilityIndices,
209 const int* firstDerivativeIndices,
210 const int* secondDerivativeIndices,
211 const double* edgeLengths,
214 int updatePartials(
const int* operations,
216 int cumulativeScalingIndex);
218 int waitForPartials(
const int* destinationPartials,
219 int destinationPartialsCount);
221 int accumulateScaleFactors(
const int* scalingIndices,
223 int cumulativeScalingIndex);
225 int removeScaleFactors(
const int* scalingIndices,
227 int cumulativeScalingIndex);
229 int resetScaleFactors(
int cumulativeScalingIndex);
231 int copyScaleFactors(
int destScalingIndex,
232 int srcScalingIndex);
234 int calculateRootLogLikelihoods(
const int* bufferIndices,
235 const int* categoryWeightsIndices,
236 const int* stateFrequenciesIndices,
237 const int* cumulativeScaleIndices,
239 double* outSumLogLikelihood);
241 int calculateEdgeLogLikelihoods(
const int* parentBufferIndices,
242 const int* childBufferIndices,
243 const int* probabilityIndices,
244 const int* firstDerivativeIndices,
245 const int* secondDerivativeIndices,
246 const int* categoryWeightsIndices,
247 const int* stateFrequenciesIndices,
248 const int* cumulativeScaleIndices,
250 double* outSumLogLikelihood,
251 double* outSumFirstDerivative,
252 double* outSumSecondDerivative);
254 int getSiteLogLikelihoods(
double* outLogLikelihoods);
256 int getSiteDerivatives(
double* outFirstDerivatives,
257 double* outSecondDerivatives);
260 char* getInstanceName();
268 int partialsBufferCount,
269 int compactBufferCount,
272 int eigenBufferCount,
273 int matrixBufferCount,
275 int scaleBufferCount,
277 long preferenceFlags,
278 long requirementFlags,
281 virtual const char* getName();
282 virtual const long getFlags();
285 template <
typename Real>
286 void modifyFlagsForPrecision(
long* flags, Real r);
291 #include "libhmsbeagle/GPU/BeagleGPUImpl.hpp"
293 #endif // __BeagleGPUImpl__