27 #ifndef __KernelLauncher__
28 #define __KernelLauncher__
31 #include "libhmsbeagle/config.h"
34 #include "libhmsbeagle/GPU/GPUImplDefs.h"
35 #include "libhmsbeagle/GPU/GPUInterface.h"
41 GPUFunction fMatrixMulADB;
42 GPUFunction fMatrixMulADBFirstDeriv;
43 GPUFunction fMatrixMulADBSecondDeriv;
45 GPUFunction fPartialsPartialsByPatternBlockCoherent;
46 GPUFunction fPartialsPartialsByPatternBlockAutoScaling;
47 GPUFunction fPartialsPartialsByPatternBlockFixedScaling;
48 GPUFunction fPartialsPartialsByPatternBlockCheckScaling;
49 GPUFunction fPartialsPartialsByPatternBlockFixedCheckScaling;
50 GPUFunction fStatesPartialsByPatternBlockCoherent;
51 GPUFunction fStatesPartialsByPatternBlockFixedScaling;
52 GPUFunction fStatesStatesByPatternBlockCoherent;
53 GPUFunction fStatesStatesByPatternBlockFixedScaling;
54 GPUFunction fPartialsPartialsEdgeLikelihoods;
55 GPUFunction fPartialsPartialsEdgeLikelihoodsSecondDeriv;
56 GPUFunction fStatesPartialsEdgeLikelihoods;
57 GPUFunction fStatesPartialsEdgeLikelihoodsSecondDeriv;
59 GPUFunction fIntegrateLikelihoodsDynamicScaling;
60 GPUFunction fIntegrateLikelihoodsDynamicScalingSecondDeriv;
61 GPUFunction fAccumulateFactorsDynamicScaling;
62 GPUFunction fAccumulateFactorsAutoScaling;
63 GPUFunction fRemoveFactorsDynamicScaling;
64 GPUFunction fPartialsDynamicScaling;
65 GPUFunction fPartialsDynamicScalingAccumulate;
66 GPUFunction fPartialsDynamicScalingAccumulateDifference;
67 GPUFunction fPartialsDynamicScalingAccumulateReciprocal;
68 GPUFunction fPartialsDynamicScalingSlow;
69 GPUFunction fIntegrateLikelihoods;
70 GPUFunction fIntegrateLikelihoodsSecondDeriv;
71 GPUFunction fIntegrateLikelihoodsMulti;
72 GPUFunction fIntegrateLikelihoodsFixedScaleMulti;
73 GPUFunction fIntegrateLikelihoodsAutoScaling;
75 GPUFunction fSumSites1;
76 GPUFunction fSumSites2;
77 GPUFunction fSumSites3;
79 Dim3Int bgTransitionProbabilitiesBlock;
80 Dim3Int bgTransitionProbabilitiesGrid;
92 unsigned int kPaddedStateCount;
93 unsigned int kCategoryCount;
94 unsigned int kPatternCount;
95 unsigned int kPatternBlockSize;
96 unsigned int kMatrixBlockSize;
97 unsigned int kSlowReweighing;
98 unsigned int kMultiplyBlockSize;
99 unsigned int kSumSitesBlockSize;
109 void GetTransitionProbabilitiesSquare(GPUPtr dMatrices,
114 GPUPtr distanceQueue,
115 unsigned int totalMatrix);
117 void GetTransitionProbabilitiesSquareFirstDeriv(GPUPtr dMatrices,
122 GPUPtr distanceQueue,
123 unsigned int totalMatrix);
125 void GetTransitionProbabilitiesSquareSecondDeriv(GPUPtr dMatrices,
130 GPUPtr distanceQueue,
131 unsigned int totalMatrix);
134 void GetTransitionProbabilitiesSquare(GPUPtr dPtr,
138 GPUPtr distanceQueue,
139 unsigned int totalMatrix,
143 void PartialsPartialsPruningDynamicCheckScaling(GPUPtr partials1,
148 int writeScalingIndex,
149 int readScalingIndex,
150 int cumulativeScalingIndex,
151 GPUPtr* dScalingFactors,
152 GPUPtr* dScalingFactorsMaster,
153 unsigned int patternCount,
154 unsigned int categoryCount,
156 int* hRescalingTrigger,
157 GPUPtr dRescalingTrigger,
160 void PartialsPartialsPruningDynamicScaling(GPUPtr partials1,
165 GPUPtr scalingFactors,
166 GPUPtr cumulativeScaling,
167 unsigned int patternCount,
168 unsigned int categoryCount,
171 void StatesPartialsPruningDynamicScaling(GPUPtr states1,
176 GPUPtr scalingFactors,
177 GPUPtr cumulativeScaling,
178 unsigned int patternCount,
179 unsigned int categoryCount,
182 void StatesStatesPruningDynamicScaling(GPUPtr states1,
187 GPUPtr scalingFactors,
188 GPUPtr cumulativeScaling,
189 unsigned int patternCount,
190 unsigned int categoryCount,
193 void IntegrateLikelihoodsDynamicScaling(GPUPtr dResult,
194 GPUPtr dRootPartials,
197 GPUPtr dRootScalingFactors,
198 unsigned int patternCount,
199 unsigned int categoryCount);
201 void IntegrateLikelihoodsAutoScaling(GPUPtr dResult,
202 GPUPtr dRootPartials,
205 GPUPtr dRootScalingFactors,
206 unsigned int patternCount,
207 unsigned int categoryCount);
209 void IntegrateLikelihoodsDynamicScalingSecondDeriv(GPUPtr dResult,
210 GPUPtr dFirstDerivResult,
211 GPUPtr dSecondDerivResult,
212 GPUPtr dRootPartials,
213 GPUPtr dRootFirstDeriv,
214 GPUPtr dRootSecondDeriv,
217 GPUPtr dRootScalingFactors,
218 unsigned int patternCount,
219 unsigned int categoryCount);
221 void PartialsPartialsEdgeLikelihoods(GPUPtr dPartialsTmp,
222 GPUPtr dParentPartials,
223 GPUPtr dChildParials,
225 unsigned int patternCount,
226 unsigned int categoryCount);
228 void PartialsPartialsEdgeLikelihoodsSecondDeriv(GPUPtr dPartialsTmp,
229 GPUPtr dFirstDerivTmp,
230 GPUPtr dSecondDerivTmp,
231 GPUPtr dParentPartials,
232 GPUPtr dChildParials,
234 GPUPtr dFirstDerivMatrix,
235 GPUPtr dSecondDerivMatrix,
236 unsigned int patternCount,
237 unsigned int categoryCount);
240 void StatesPartialsEdgeLikelihoods(GPUPtr dPartialsTmp,
241 GPUPtr dParentPartials,
244 unsigned int patternCount,
245 unsigned int categoryCount);
247 void StatesPartialsEdgeLikelihoodsSecondDeriv(GPUPtr dPartialsTmp,
248 GPUPtr dFirstDerivTmp,
249 GPUPtr dSecondDerivTmp,
250 GPUPtr dParentPartials,
253 GPUPtr dFirstDerivMatrix,
254 GPUPtr dSecondDerivMatrix,
255 unsigned int patternCount,
256 unsigned int categoryCount);
258 void AccumulateFactorsDynamicScaling(GPUPtr dScalingFactors,
259 GPUPtr dNodePtrQueue,
260 GPUPtr dRootScalingFactors,
261 unsigned int nodeCount,
262 unsigned int patternCount);
264 void AccumulateFactorsAutoScaling(GPUPtr dScalingFactors,
265 GPUPtr dNodePtrQueue,
266 GPUPtr dRootScalingFactors,
267 unsigned int nodeCount,
268 unsigned int patternCount,
269 unsigned int scaleBufferSize);
271 void RemoveFactorsDynamicScaling(GPUPtr dScalingFactors,
272 GPUPtr dNodePtrQueue,
273 GPUPtr dRootScalingFactors,
274 unsigned int nodeCount,
275 unsigned int patternCount);
277 void RescalePartials(GPUPtr partials3,
278 GPUPtr scalingFactors,
279 GPUPtr cumulativeScaling,
280 unsigned int patternCount,
281 unsigned int categoryCount,
282 unsigned int fillWithOnes);
284 void IntegrateLikelihoods(GPUPtr dResult,
285 GPUPtr dRootPartials,
288 unsigned int patternCount,
289 unsigned int categoryCount);
291 void IntegrateLikelihoodsSecondDeriv(GPUPtr dResult,
292 GPUPtr dFirstDerivResult,
293 GPUPtr dSecondDerivResult,
294 GPUPtr dRootPartials,
295 GPUPtr dRootFirstDeriv,
296 GPUPtr dRootSecondDeriv,
299 unsigned int patternCount,
300 unsigned int categoryCount);
302 void IntegrateLikelihoodsMulti(GPUPtr dResult,
303 GPUPtr dRootPartials,
306 unsigned int patternCount,
307 unsigned int categoryCount,
308 unsigned int takeLog);
310 void IntegrateLikelihoodsFixedScaleMulti(GPUPtr dResult,
311 GPUPtr dRootPartials,
314 GPUPtr dScalingFactors,
316 GPUPtr dMaxScalingFactors,
317 GPUPtr dIndexMaxScalingFactors,
318 unsigned int patternCount,
319 unsigned int categoryCount,
320 unsigned int subsetCount,
321 unsigned int subsetIndex);
323 void SumSites1(GPUPtr dArray1,
325 GPUPtr dPatternWeights,
326 unsigned int patternCount);
328 void SumSites2(GPUPtr dArray1,
332 GPUPtr dPatternWeights,
333 unsigned int patternCount);
335 void SumSites3(GPUPtr dArray1,
341 GPUPtr dPatternWeights,
342 unsigned int patternCount);
344 void SetupKernelBlocksAndGrids();
350 #endif // __KernelLauncher__