26 #ifndef __GPUInterface__
27 #define __GPUInterface__
32 #include "libhmsbeagle/config.h"
37 #include "libhmsbeagle/GPU/GPUImplDefs.h"
38 #include "libhmsbeagle/GPU/KernelResource.h"
43 #include "libhmsbeagle/GPU/kernels/BeagleCUDA_kernels_xcode.h"
45 #include "libhmsbeagle/GPU/kernels/BeagleCUDA_kernels.h"
47 typedef CUdeviceptr GPUPtr;
48 typedef CUfunction GPUFunction;
51 #include <OpenCL/opencl.h>
52 #include "libhmsbeagle/GPU/BeagleOpenCL_kernels.h"
53 typedef cl_mem GPUPtr;
54 typedef cl_kernel GPUFunction;
62 CUcontext cudaContext;
64 const char* GetCUDAErrorDescription(
int errorCode);
67 cl_device_id openClDeviceId;
68 cl_context openClContext;
69 cl_command_queue openClCommandQueue;
70 cl_program openClProgram;
71 cl_uint openClNumDevices;
72 const char* GetCLErrorDescription(
int errorCode);
84 void SetDevice(
int deviceNumber,
92 GPUFunction GetFunction(
const char* functionName);
94 void LaunchKernel(GPUFunction deviceFunction,
98 int totalParameterCount,
101 void* MallocHost(
size_t memSize);
103 void* CallocHost(
size_t size,
size_t length);
105 void* AllocatePinnedHostMemory(
size_t memSize,
109 GPUPtr AllocateMemory(
size_t memSize);
111 GPUPtr AllocateRealMemory(
size_t length);
113 GPUPtr AllocateIntMemory(
size_t length);
115 void MemsetShort(GPUPtr dest,
119 void MemcpyHostToDevice(GPUPtr dest,
123 void MemcpyDeviceToHost(
void* dest,
127 void MemcpyDeviceToDevice(GPUPtr dest,
131 void FreeHostMemory(
void* hPtr);
133 void FreePinnedHostMemory(
void* hPtr);
135 void FreeMemory(GPUPtr dPtr);
137 GPUPtr GetDevicePointer(
void* hPtr);
139 unsigned int GetAvailableMemory();
141 void GetDeviceName(
int deviceNumber,
145 void GetDeviceDescription(
int deviceNumber,
146 char* deviceDescription);
148 bool GetSupportsDoublePrecision(
int deviceNumber);
150 template<
typename Real>
151 void PrintfDeviceVector(GPUPtr dPtr,
int length, Real r) {
152 PrintfDeviceVector(dPtr,length,-1, 0, r);
155 template<
typename Real>
156 void PrintfDeviceVector(GPUPtr dPtr,
157 int length,
double checkValue, Real r);
159 template<
typename Real>
160 void PrintfDeviceVector(GPUPtr dPtr,
165 Real* hPtr = (Real*) malloc(
sizeof(Real) * length);
167 MemcpyDeviceToHost(hPtr, dPtr,
sizeof(Real) * length);
168 printfVector(hPtr, length);
170 if (checkValue != -1) {
172 for(
int i=0; i<length; i++) {
174 if( (hPtr[i] > checkValue) && (hPtr[i]-checkValue > 1.0E-4)) {
175 fprintf(stderr,
"Check value exception! (%d) %2.5e > %2.5e (diff = %2.5e)\n",
176 i,hPtr[i],checkValue, (hPtr[i]-checkValue));
180 if (hPtr[i] != hPtr[i]) {
181 fprintf(stderr,
"NaN found!\n");
187 fprintf(stderr,
"Zero-sum vector!\n");
195 void PrintfDeviceInt(GPUPtr dPtr,
198 void DestroyKernelMap();
203 void InitializeKernelMap();
205 std::map<int, int>* resourceMap;
207 bool supportDoublePrecision;
210 #endif // __GPUInterface__