HMSBEAGLE  1.0.0
KernelLauncher.h
1 /*
2  *
3  * Copyright 2009 Phylogenetic Likelihood Working Group
4  *
5  * This file is part of BEAGLE.
6  *
7  * BEAGLE is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as
9  * published by the Free Software Foundation, either version 3 of
10  * the License, or (at your option) any later version.
11  *
12  * BEAGLE is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with BEAGLE. If not, see
19  * <http://www.gnu.org/licenses/>.
20  *
21  * @brief GPU kernel launcher
22  *
23  * @author Marc Suchard
24  * @author Daniel Ayres
25  */
26 
27 #ifndef __KernelLauncher__
28 #define __KernelLauncher__
29 
30 #ifdef HAVE_CONFIG_H
31 #include "libhmsbeagle/config.h"
32 #endif
33 
34 #include "libhmsbeagle/GPU/GPUImplDefs.h"
35 #include "libhmsbeagle/GPU/GPUInterface.h"
36 
38 private:
39  GPUInterface* gpu;
40 
41  GPUFunction fMatrixMulADB;
42  GPUFunction fMatrixMulADBFirstDeriv;
43  GPUFunction fMatrixMulADBSecondDeriv;
44 
45  GPUFunction fPartialsPartialsByPatternBlockCoherent;
46  GPUFunction fPartialsPartialsByPatternBlockAutoScaling;
47  GPUFunction fPartialsPartialsByPatternBlockFixedScaling;
48  GPUFunction fPartialsPartialsByPatternBlockCheckScaling;
49  GPUFunction fPartialsPartialsByPatternBlockFixedCheckScaling;
50  GPUFunction fStatesPartialsByPatternBlockCoherent;
51  GPUFunction fStatesPartialsByPatternBlockFixedScaling;
52  GPUFunction fStatesStatesByPatternBlockCoherent;
53  GPUFunction fStatesStatesByPatternBlockFixedScaling;
54  GPUFunction fPartialsPartialsEdgeLikelihoods;
55  GPUFunction fPartialsPartialsEdgeLikelihoodsSecondDeriv;
56  GPUFunction fStatesPartialsEdgeLikelihoods;
57  GPUFunction fStatesPartialsEdgeLikelihoodsSecondDeriv;
58 
59  GPUFunction fIntegrateLikelihoodsDynamicScaling;
60  GPUFunction fIntegrateLikelihoodsDynamicScalingSecondDeriv;
61  GPUFunction fAccumulateFactorsDynamicScaling;
62  GPUFunction fAccumulateFactorsAutoScaling;
63  GPUFunction fRemoveFactorsDynamicScaling;
64  GPUFunction fPartialsDynamicScaling;
65  GPUFunction fPartialsDynamicScalingAccumulate;
66  GPUFunction fPartialsDynamicScalingAccumulateDifference;
67  GPUFunction fPartialsDynamicScalingAccumulateReciprocal;
68  GPUFunction fPartialsDynamicScalingSlow;
69  GPUFunction fIntegrateLikelihoods;
70  GPUFunction fIntegrateLikelihoodsSecondDeriv;
71  GPUFunction fIntegrateLikelihoodsMulti;
72  GPUFunction fIntegrateLikelihoodsFixedScaleMulti;
73  GPUFunction fIntegrateLikelihoodsAutoScaling;
74 
75  GPUFunction fSumSites1;
76  GPUFunction fSumSites2;
77  GPUFunction fSumSites3;
78 
79  Dim3Int bgTransitionProbabilitiesBlock;
80  Dim3Int bgTransitionProbabilitiesGrid;
81  Dim3Int bgPeelingBlock;
82  Dim3Int bgPeelingGrid;
83  Dim3Int bgLikelihoodBlock;
84  Dim3Int bgLikelihoodGrid;
85  Dim3Int bgAccumulateBlock;
86  Dim3Int bgAccumulateGrid;
87  Dim3Int bgScaleBlock;
88  Dim3Int bgScaleGrid;
89  Dim3Int bgSumSitesBlock;
90  Dim3Int bgSumSitesGrid;
91 
92  unsigned int kPaddedStateCount;
93  unsigned int kCategoryCount;
94  unsigned int kPatternCount;
95  unsigned int kPatternBlockSize;
96  unsigned int kMatrixBlockSize;
97  unsigned int kSlowReweighing;
98  unsigned int kMultiplyBlockSize;
99  unsigned int kSumSitesBlockSize;
100  long kFlags;
101 
102 public:
104 
105  ~KernelLauncher();
106 
107 // Kernel links
108 #ifdef CUDA
109  void GetTransitionProbabilitiesSquare(GPUPtr dMatrices,
110  GPUPtr dPtrQueue,
111  GPUPtr dEvec,
112  GPUPtr dIevc,
113  GPUPtr dEigenValues,
114  GPUPtr distanceQueue,
115  unsigned int totalMatrix);
116 
117  void GetTransitionProbabilitiesSquareFirstDeriv(GPUPtr dMatrices,
118  GPUPtr dPtrQueue,
119  GPUPtr dEvec,
120  GPUPtr dIevc,
121  GPUPtr dEigenValues,
122  GPUPtr distanceQueue,
123  unsigned int totalMatrix);
124 
125  void GetTransitionProbabilitiesSquareSecondDeriv(GPUPtr dMatrices,
126  GPUPtr dPtrQueue,
127  GPUPtr dEvec,
128  GPUPtr dIevc,
129  GPUPtr dEigenValues,
130  GPUPtr distanceQueue,
131  unsigned int totalMatrix);
132 
133 #else //OpenCL
134  void GetTransitionProbabilitiesSquare(GPUPtr dPtr,
135  GPUPtr dEvec,
136  GPUPtr dIevc,
137  GPUPtr dEigenValues,
138  GPUPtr distanceQueue,
139  unsigned int totalMatrix,
140  unsigned int index);
141 #endif
142 
143  void PartialsPartialsPruningDynamicCheckScaling(GPUPtr partials1,
144  GPUPtr partials2,
145  GPUPtr partials3,
146  GPUPtr matrices1,
147  GPUPtr matrices2,
148  int writeScalingIndex,
149  int readScalingIndex,
150  int cumulativeScalingIndex,
151  GPUPtr* dScalingFactors,
152  GPUPtr* dScalingFactorsMaster,
153  unsigned int patternCount,
154  unsigned int categoryCount,
155  int doRescaling,
156  int* hRescalingTrigger,
157  GPUPtr dRescalingTrigger,
158  int sizeReal);
159 
160  void PartialsPartialsPruningDynamicScaling(GPUPtr partials1,
161  GPUPtr partials2,
162  GPUPtr partials3,
163  GPUPtr matrices1,
164  GPUPtr matrices2,
165  GPUPtr scalingFactors,
166  GPUPtr cumulativeScaling,
167  unsigned int patternCount,
168  unsigned int categoryCount,
169  int doRescaling);
170 
171  void StatesPartialsPruningDynamicScaling(GPUPtr states1,
172  GPUPtr partials2,
173  GPUPtr partials3,
174  GPUPtr matrices1,
175  GPUPtr matrices2,
176  GPUPtr scalingFactors,
177  GPUPtr cumulativeScaling,
178  unsigned int patternCount,
179  unsigned int categoryCount,
180  int doRescaling);
181 
182  void StatesStatesPruningDynamicScaling(GPUPtr states1,
183  GPUPtr states2,
184  GPUPtr partials3,
185  GPUPtr matrices1,
186  GPUPtr matrices2,
187  GPUPtr scalingFactors,
188  GPUPtr cumulativeScaling,
189  unsigned int patternCount,
190  unsigned int categoryCount,
191  int doRescaling);
192 
193  void IntegrateLikelihoodsDynamicScaling(GPUPtr dResult,
194  GPUPtr dRootPartials,
195  GPUPtr dWeights,
196  GPUPtr dFrequencies,
197  GPUPtr dRootScalingFactors,
198  unsigned int patternCount,
199  unsigned int categoryCount);
200 
201  void IntegrateLikelihoodsAutoScaling(GPUPtr dResult,
202  GPUPtr dRootPartials,
203  GPUPtr dWeights,
204  GPUPtr dFrequencies,
205  GPUPtr dRootScalingFactors,
206  unsigned int patternCount,
207  unsigned int categoryCount);
208 
209  void IntegrateLikelihoodsDynamicScalingSecondDeriv(GPUPtr dResult,
210  GPUPtr dFirstDerivResult,
211  GPUPtr dSecondDerivResult,
212  GPUPtr dRootPartials,
213  GPUPtr dRootFirstDeriv,
214  GPUPtr dRootSecondDeriv,
215  GPUPtr dWeights,
216  GPUPtr dFrequencies,
217  GPUPtr dRootScalingFactors,
218  unsigned int patternCount,
219  unsigned int categoryCount);
220 
221  void PartialsPartialsEdgeLikelihoods(GPUPtr dPartialsTmp,
222  GPUPtr dParentPartials,
223  GPUPtr dChildParials,
224  GPUPtr dTransMatrix,
225  unsigned int patternCount,
226  unsigned int categoryCount);
227 
228  void PartialsPartialsEdgeLikelihoodsSecondDeriv(GPUPtr dPartialsTmp,
229  GPUPtr dFirstDerivTmp,
230  GPUPtr dSecondDerivTmp,
231  GPUPtr dParentPartials,
232  GPUPtr dChildParials,
233  GPUPtr dTransMatrix,
234  GPUPtr dFirstDerivMatrix,
235  GPUPtr dSecondDerivMatrix,
236  unsigned int patternCount,
237  unsigned int categoryCount);
238 
239 
240  void StatesPartialsEdgeLikelihoods(GPUPtr dPartialsTmp,
241  GPUPtr dParentPartials,
242  GPUPtr dChildStates,
243  GPUPtr dTransMatrix,
244  unsigned int patternCount,
245  unsigned int categoryCount);
246 
247  void StatesPartialsEdgeLikelihoodsSecondDeriv(GPUPtr dPartialsTmp,
248  GPUPtr dFirstDerivTmp,
249  GPUPtr dSecondDerivTmp,
250  GPUPtr dParentPartials,
251  GPUPtr dChildStates,
252  GPUPtr dTransMatrix,
253  GPUPtr dFirstDerivMatrix,
254  GPUPtr dSecondDerivMatrix,
255  unsigned int patternCount,
256  unsigned int categoryCount);
257 
258  void AccumulateFactorsDynamicScaling(GPUPtr dScalingFactors,
259  GPUPtr dNodePtrQueue,
260  GPUPtr dRootScalingFactors,
261  unsigned int nodeCount,
262  unsigned int patternCount);
263 
264  void AccumulateFactorsAutoScaling(GPUPtr dScalingFactors,
265  GPUPtr dNodePtrQueue,
266  GPUPtr dRootScalingFactors,
267  unsigned int nodeCount,
268  unsigned int patternCount,
269  unsigned int scaleBufferSize);
270 
271  void RemoveFactorsDynamicScaling(GPUPtr dScalingFactors,
272  GPUPtr dNodePtrQueue,
273  GPUPtr dRootScalingFactors,
274  unsigned int nodeCount,
275  unsigned int patternCount);
276 
277  void RescalePartials(GPUPtr partials3,
278  GPUPtr scalingFactors,
279  GPUPtr cumulativeScaling,
280  unsigned int patternCount,
281  unsigned int categoryCount,
282  unsigned int fillWithOnes);
283 
284  void IntegrateLikelihoods(GPUPtr dResult,
285  GPUPtr dRootPartials,
286  GPUPtr dWeights,
287  GPUPtr dFrequencies,
288  unsigned int patternCount,
289  unsigned int categoryCount);
290 
291  void IntegrateLikelihoodsSecondDeriv(GPUPtr dResult,
292  GPUPtr dFirstDerivResult,
293  GPUPtr dSecondDerivResult,
294  GPUPtr dRootPartials,
295  GPUPtr dRootFirstDeriv,
296  GPUPtr dRootSecondDeriv,
297  GPUPtr dWeights,
298  GPUPtr dFrequencies,
299  unsigned int patternCount,
300  unsigned int categoryCount);
301 
302  void IntegrateLikelihoodsMulti(GPUPtr dResult,
303  GPUPtr dRootPartials,
304  GPUPtr dWeights,
305  GPUPtr dFrequencies,
306  unsigned int patternCount,
307  unsigned int categoryCount,
308  unsigned int takeLog);
309 
310  void IntegrateLikelihoodsFixedScaleMulti(GPUPtr dResult,
311  GPUPtr dRootPartials,
312  GPUPtr dWeights,
313  GPUPtr dFrequencies,
314  GPUPtr dScalingFactors,
315  GPUPtr dPtrQueue,
316  GPUPtr dMaxScalingFactors,
317  GPUPtr dIndexMaxScalingFactors,
318  unsigned int patternCount,
319  unsigned int categoryCount,
320  unsigned int subsetCount,
321  unsigned int subsetIndex);
322 
323  void SumSites1(GPUPtr dArray1,
324  GPUPtr dSum1,
325  GPUPtr dPatternWeights,
326  unsigned int patternCount);
327 
328  void SumSites2(GPUPtr dArray1,
329  GPUPtr dSum1,
330  GPUPtr dArray2,
331  GPUPtr dSum2,
332  GPUPtr dPatternWeights,
333  unsigned int patternCount);
334 
335  void SumSites3(GPUPtr dArray1,
336  GPUPtr dSum1,
337  GPUPtr dArray2,
338  GPUPtr dSum2,
339  GPUPtr dArray3,
340  GPUPtr dSum3,
341  GPUPtr dPatternWeights,
342  unsigned int patternCount);
343 
344  void SetupKernelBlocksAndGrids();
345 
346 protected:
347  void LoadKernels();
348 
349 };
350 #endif // __KernelLauncher__