HMSBEAGLE  1.0.0
GPUInterface.h
1 /*
2  *
3  * Copyright 2009 Phylogenetic Likelihood Working Group
4  *
5  * This file is part of BEAGLE.
6  *
7  * BEAGLE is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as
9  * published by the Free Software Foundation, either version 3 of
10  * the License, or (at your option) any later version.
11  *
12  * BEAGLE is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with BEAGLE. If not, see
19  * <http://www.gnu.org/licenses/>.
20  *
21  * @author Marc Suchard
22  * @author Dat Huynh
23  * @author Daniel Ayres
24  */
25 
26 #ifndef __GPUInterface__
27 #define __GPUInterface__
28 
29 #include <cstdio>
30 
31 #ifdef HAVE_CONFIG_H
32 #include "libhmsbeagle/config.h"
33 #endif
34 
35 #include <map>
36 
37 #include "libhmsbeagle/GPU/GPUImplDefs.h"
38 #include "libhmsbeagle/GPU/KernelResource.h"
39 
40 #ifdef CUDA
41  #include <cuda.h>
42 # ifdef BEAGLE_XCODE
43  #include "libhmsbeagle/GPU/kernels/BeagleCUDA_kernels_xcode.h"
44 # else
45  #include "libhmsbeagle/GPU/kernels/BeagleCUDA_kernels.h"
46 # endif
47  typedef CUdeviceptr GPUPtr;
48  typedef CUfunction GPUFunction;
49 #else
50 #ifdef OPENCL
51  #include <OpenCL/opencl.h>
52  #include "libhmsbeagle/GPU/BeagleOpenCL_kernels.h"
53  typedef cl_mem GPUPtr;
54  typedef cl_kernel GPUFunction;
55 #endif
56 #endif
57 
58 class GPUInterface {
59 private:
60 #ifdef CUDA
61  CUdevice cudaDevice;
62  CUcontext cudaContext;
63  CUmodule cudaModule;
64  const char* GetCUDAErrorDescription(int errorCode);
65 #else
66 #ifdef OPENCL
67  cl_device_id openClDeviceId; // compute device id
68  cl_context openClContext; // compute context
69  cl_command_queue openClCommandQueue; // compute command queue
70  cl_program openClProgram; // compute program
71  cl_uint openClNumDevices;
72  const char* GetCLErrorDescription(int errorCode);
73 #endif
74 #endif
75 public:
76  GPUInterface();
77 
78  ~GPUInterface();
79 
80  int Initialize();
81 
82  int GetDeviceCount();
83 
84  void SetDevice(int deviceNumber,
85  int paddedStateCount,
86  int categoryCount,
87  int patternCount,
88  long flags);
89 
90  void Synchronize();
91 
92  GPUFunction GetFunction(const char* functionName);
93 
94  void LaunchKernel(GPUFunction deviceFunction,
95  Dim3Int block,
96  Dim3Int grid,
97  int parameterCountV,
98  int totalParameterCount,
99  ...); // parameters
100 
101  void* MallocHost(size_t memSize);
102 
103  void* CallocHost(size_t size, size_t length);
104 
105  void* AllocatePinnedHostMemory(size_t memSize,
106  bool writeCombined,
107  bool mapped);
108 
109  GPUPtr AllocateMemory(size_t memSize);
110 
111  GPUPtr AllocateRealMemory(size_t length);
112 
113  GPUPtr AllocateIntMemory(size_t length);
114 
115  void MemsetShort(GPUPtr dest,
116  unsigned short val,
117  size_t count);
118 
119  void MemcpyHostToDevice(GPUPtr dest,
120  const void* src,
121  size_t memSize);
122 
123  void MemcpyDeviceToHost(void* dest,
124  const GPUPtr src,
125  size_t memSize);
126 
127  void MemcpyDeviceToDevice(GPUPtr dest,
128  GPUPtr src,
129  size_t memSize);
130 
131  void FreeHostMemory(void* hPtr);
132 
133  void FreePinnedHostMemory(void* hPtr);
134 
135  void FreeMemory(GPUPtr dPtr);
136 
137  GPUPtr GetDevicePointer(void* hPtr);
138 
139  unsigned int GetAvailableMemory();
140 
141  void GetDeviceName(int deviceNumber,
142  char* deviceName,
143  int nameLength);
144 
145  void GetDeviceDescription(int deviceNumber,
146  char* deviceDescription);
147 
148  bool GetSupportsDoublePrecision(int deviceNumber);
149 
150  template<typename Real>
151  void PrintfDeviceVector(GPUPtr dPtr, int length, Real r) {
152  PrintfDeviceVector(dPtr,length,-1, 0, r);
153  }
154 
155  template<typename Real>
156  void PrintfDeviceVector(GPUPtr dPtr,
157  int length, double checkValue, Real r);
158 
159  template<typename Real>
160  void PrintfDeviceVector(GPUPtr dPtr,
161  int length,
162  double checkValue,
163  int *signal,
164  Real r) {
165  Real* hPtr = (Real*) malloc(sizeof(Real) * length);
166 
167  MemcpyDeviceToHost(hPtr, dPtr, sizeof(Real) * length);
168  printfVector(hPtr, length);
169 
170  if (checkValue != -1) {
171  double sum = 0;
172  for(int i=0; i<length; i++) {
173  sum += hPtr[i];
174  if( (hPtr[i] > checkValue) && (hPtr[i]-checkValue > 1.0E-4)) {
175  fprintf(stderr,"Check value exception! (%d) %2.5e > %2.5e (diff = %2.5e)\n",
176  i,hPtr[i],checkValue, (hPtr[i]-checkValue));
177  if( signal != 0 )
178  *signal = 1;
179  }
180  if (hPtr[i] != hPtr[i]) {
181  fprintf(stderr,"NaN found!\n");
182  if( signal != 0 )
183  *signal = 1;
184  }
185  }
186  if (sum == 0) {
187  fprintf(stderr,"Zero-sum vector!\n");
188  if( signal != 0 )
189  *signal = 1;
190  }
191  }
192  free(hPtr);
193  }
194 
195  void PrintfDeviceInt(GPUPtr dPtr,
196  int length);
197 
198  void DestroyKernelMap();
199 
200  KernelResource* kernelResource;
201 
202 protected:
203  void InitializeKernelMap();
204 
205  std::map<int, int>* resourceMap;
206 
207  bool supportDoublePrecision;
208 };
209 
210 #endif // __GPUInterface__