Fixed CUDA mode for Clang/LLVM 9 upgrade
- fix bug, which was caused by executing a transaction in the device interpreter - fixed warning from the device compiler - update test cases
This commit is contained in:
parent
84bcd07963
commit
be5ea3a651
@ -1339,7 +1339,10 @@ static void stringifyPreprocSetting(PreprocessorOptions& PPOpts,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!COpts.HasOutput || !HasInput) {
|
if (!COpts.HasOutput || !HasInput) {
|
||||||
argvCompile.push_back("-c");
|
// suppress the warning "argument unused during compilation: -c" of the
|
||||||
|
// device interpreter instance
|
||||||
|
if (!COpts.CUDADevice)
|
||||||
|
argvCompile.push_back("-c");
|
||||||
argvCompile.push_back("-");
|
argvCompile.push_back("-");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1670,9 +1670,12 @@ namespace cling {
|
|||||||
IncrementalExecutor::ExecutionResult ExeRes
|
IncrementalExecutor::ExecutionResult ExeRes
|
||||||
= IncrementalExecutor::kExeSuccess;
|
= IncrementalExecutor::kExeSuccess;
|
||||||
|
|
||||||
// Forward to IncrementalExecutor; should not be called by
|
// CUDA device code is not direct executable
|
||||||
// anyone except for IncrementalParser.
|
// the code is executed by a CUDA library function in the host code
|
||||||
ExeRes = m_Executor->runStaticInitializersOnce(T);
|
if (!m_Opts.CompilerOpts.CUDADevice)
|
||||||
|
// Forward to IncrementalExecutor; should not be called by
|
||||||
|
// anyone except for IncrementalParser.
|
||||||
|
ExeRes = m_Executor->runStaticInitializersOnce(T);
|
||||||
|
|
||||||
return ConvertExecutionResult(ExeRes);
|
return ConvertExecutionResult(ExeRes);
|
||||||
}
|
}
|
||||||
|
@ -17,12 +17,12 @@
|
|||||||
// Check if cuda driver is available
|
// Check if cuda driver is available
|
||||||
int version;
|
int version;
|
||||||
cudaDriverGetVersion(&version)
|
cudaDriverGetVersion(&version)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
// Check if a CUDA compatible device (GPU) is available.
|
// Check if a CUDA compatible device (GPU) is available.
|
||||||
int device_count = 0;
|
int device_count = 0;
|
||||||
cudaGetDeviceCount(&device_count)
|
cudaGetDeviceCount(&device_count)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
device_count > 0
|
device_count > 0
|
||||||
// CHECK: (bool) true
|
// CHECK: (bool) true
|
||||||
|
|
||||||
@ -42,7 +42,7 @@ __global__ void g(){
|
|||||||
// the host interpreter.
|
// the host interpreter.
|
||||||
g<<<1,1>>>();
|
g<<<1,1>>>();
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
// expected-no-diagnostics
|
// expected-no-diagnostics
|
||||||
.q
|
.q
|
||||||
|
@ -28,15 +28,15 @@ sum(41,1)
|
|||||||
int hostOutput = 0;
|
int hostOutput = 0;
|
||||||
int * deviceOutput;
|
int * deviceOutput;
|
||||||
cudaMalloc( (void **) &deviceOutput, sizeof(int))
|
cudaMalloc( (void **) &deviceOutput, sizeof(int))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
gKernel1<<<1,1>>>(deviceOutput);
|
gKernel1<<<1,1>>>(deviceOutput);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
hostOutput
|
hostOutput
|
||||||
// CHECK: (int) 42
|
// CHECK: (int) 42
|
||||||
|
@ -17,12 +17,12 @@
|
|||||||
// Check if cuda driver is available
|
// Check if cuda driver is available
|
||||||
int version;
|
int version;
|
||||||
cudaDriverGetVersion(&version)
|
cudaDriverGetVersion(&version)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
// Check if a CUDA compatible device (GPU) is available.
|
// Check if a CUDA compatible device (GPU) is available.
|
||||||
int device_count = 0;
|
int device_count = 0;
|
||||||
cudaGetDeviceCount(&device_count)
|
cudaGetDeviceCount(&device_count)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
device_count > 0
|
device_count > 0
|
||||||
// CHECK: (bool) true
|
// CHECK: (bool) true
|
||||||
|
|
||||||
@ -39,7 +39,7 @@ foo()
|
|||||||
// the host interpreter.
|
// the host interpreter.
|
||||||
bar<<<1,1>>>();
|
bar<<<1,1>>>();
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
|
|
||||||
// expected-no-diagnostics
|
// expected-no-diagnostics
|
||||||
|
@ -22,15 +22,15 @@ __global__ void gKernel1(int * out){
|
|||||||
int * deviceOutput;
|
int * deviceOutput;
|
||||||
int hostOutput = 0;
|
int hostOutput = 0;
|
||||||
cudaMalloc( (void **) &deviceOutput, sizeof(int))
|
cudaMalloc( (void **) &deviceOutput, sizeof(int))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
gKernel1<<<1,1>>>(deviceOutput);
|
gKernel1<<<1,1>>>(deviceOutput);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
hostOutput
|
hostOutput
|
||||||
// CHECK: (int) 42
|
// CHECK: (int) 42
|
||||||
|
|
||||||
@ -55,15 +55,15 @@ for(unsigned int i = 0; i < numberOfThreads; ++i){
|
|||||||
hostOutput2[i] = 0;
|
hostOutput2[i] = 0;
|
||||||
}
|
}
|
||||||
cudaMalloc( (void **) &deviceOutput2, sizeof(int)*numberOfThreads)
|
cudaMalloc( (void **) &deviceOutput2, sizeof(int)*numberOfThreads)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
gKernel2<<<1,numberOfThreads>>>(deviceOutput2);
|
gKernel2<<<1,numberOfThreads>>>(deviceOutput2);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpy(hostOutput2, deviceOutput2, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost)
|
cudaMemcpy(hostOutput2, deviceOutput2, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
unsigned int expectedSum = 0;
|
unsigned int expectedSum = 0;
|
||||||
unsigned int cudaSum = 0;
|
unsigned int cudaSum = 0;
|
||||||
|
@ -23,15 +23,15 @@ __global__ void gKernel1(T * value){
|
|||||||
int * deviceOutput1;
|
int * deviceOutput1;
|
||||||
int hostOutput1 = 1;
|
int hostOutput1 = 1;
|
||||||
cudaMalloc( (void **) &deviceOutput1, sizeof(int))
|
cudaMalloc( (void **) &deviceOutput1, sizeof(int))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
gKernel1<<<1,1>>>(deviceOutput1);
|
gKernel1<<<1,1>>>(deviceOutput1);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpy(&hostOutput1, deviceOutput1, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput1, deviceOutput1, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
hostOutput1
|
hostOutput1
|
||||||
// CHECK: (int) 42
|
// CHECK: (int) 42
|
||||||
|
|
||||||
@ -55,27 +55,27 @@ __global__ void gKernel2<float>(float * value){
|
|||||||
int * deviceOutput2;
|
int * deviceOutput2;
|
||||||
int hostOutput2 = 10;
|
int hostOutput2 = 10;
|
||||||
cudaMalloc( (void **) &deviceOutput2, sizeof(int))
|
cudaMalloc( (void **) &deviceOutput2, sizeof(int))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
float * deviceOutput3;
|
float * deviceOutput3;
|
||||||
float hostOutput3= 10.0;
|
float hostOutput3= 10.0;
|
||||||
cudaMalloc( (void **) &deviceOutput3, sizeof(float))
|
cudaMalloc( (void **) &deviceOutput3, sizeof(float))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
gKernel2<<<1,1>>>(deviceOutput2);
|
gKernel2<<<1,1>>>(deviceOutput2);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
gKernel2<<<1,1>>>(deviceOutput3);
|
gKernel2<<<1,1>>>(deviceOutput3);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
cudaMemcpy(&hostOutput2, deviceOutput2, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput2, deviceOutput2, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpy(&hostOutput3, deviceOutput3, sizeof(float), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput3, deviceOutput3, sizeof(float), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
hostOutput2
|
hostOutput2
|
||||||
// CHECK: (int) 1
|
// CHECK: (int) 1
|
||||||
@ -107,19 +107,19 @@ int func1(int * input){
|
|||||||
int * deviceOutput4;
|
int * deviceOutput4;
|
||||||
int hostOutput4 = 10;
|
int hostOutput4 = 10;
|
||||||
cudaMalloc( (void **) &deviceOutput4, sizeof(int))
|
cudaMalloc( (void **) &deviceOutput4, sizeof(int))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
int * deviceOutput5;
|
int * deviceOutput5;
|
||||||
cudaMalloc( (void **) &deviceOutput5, sizeof(int))
|
cudaMalloc( (void **) &deviceOutput5, sizeof(int))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
gKernel3<<<1,1>>>(deviceOutput4, func1(deviceOutput5));
|
gKernel3<<<1,1>>>(deviceOutput4, func1(deviceOutput5));
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
cudaMemcpy(&hostOutput4, deviceOutput4, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput4, deviceOutput4, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
hostOutput4
|
hostOutput4
|
||||||
// CHECK: (int) 5
|
// CHECK: (int) 5
|
||||||
@ -155,27 +155,27 @@ __global__ void gKernel5(T * x, Functor const functor){
|
|||||||
int * deviceOutput6;
|
int * deviceOutput6;
|
||||||
int hostOutput6 = 10;
|
int hostOutput6 = 10;
|
||||||
cudaMalloc( (void **) &deviceOutput6, sizeof(int))
|
cudaMalloc( (void **) &deviceOutput6, sizeof(int))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
double * deviceOutput7;
|
double * deviceOutput7;
|
||||||
double hostOutput7 = 10.0;
|
double hostOutput7 = 10.0;
|
||||||
cudaMalloc( (void **) &deviceOutput7, sizeof(double))
|
cudaMalloc( (void **) &deviceOutput7, sizeof(double))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
gKernel5<<<1,1>>>(deviceOutput6, Struct1<int>{});
|
gKernel5<<<1,1>>>(deviceOutput6, Struct1<int>{});
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
gKernel5<<<1,1>>>(deviceOutput7, Struct1<double>{});
|
gKernel5<<<1,1>>>(deviceOutput7, Struct1<double>{});
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
cudaMemcpy(&hostOutput6, deviceOutput6, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput6, deviceOutput6, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpy(&hostOutput7, deviceOutput7, sizeof(double), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput7, deviceOutput7, sizeof(double), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
hostOutput6
|
hostOutput6
|
||||||
// CHECK: (int) 1
|
// CHECK: (int) 1
|
||||||
|
@ -26,15 +26,15 @@ __global__ void gKernel1(int * out){
|
|||||||
int * deviceOutput;
|
int * deviceOutput;
|
||||||
int hostOutput = 0;
|
int hostOutput = 0;
|
||||||
cudaMalloc( (void **) &deviceOutput, sizeof(int))
|
cudaMalloc( (void **) &deviceOutput, sizeof(int))
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
gKernel1<<<1,1>>>(deviceOutput);
|
gKernel1<<<1,1>>>(deviceOutput);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
hostOutput
|
hostOutput
|
||||||
// CHECK: (int) 42
|
// CHECK: (int) 42
|
||||||
|
|
||||||
@ -50,11 +50,11 @@ __global__ void gKernel2(int * out){
|
|||||||
hostOutput = 0;
|
hostOutput = 0;
|
||||||
gKernel2<43><<<1,1>>>(deviceOutput);
|
gKernel2<43><<<1,1>>>(deviceOutput);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
hostOutput
|
hostOutput
|
||||||
// CHECK: (int) 43
|
// CHECK: (int) 43
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
// if process() works, the general input also works
|
// if process() works, the general input also works
|
||||||
gCling->process("cudaGetLastError()");
|
gCling->process("cudaGetLastError()");
|
||||||
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
//CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
// declare a cuda kernel with with a define
|
// declare a cuda kernel with with a define
|
||||||
// do not this in real code ;-)
|
// do not this in real code ;-)
|
||||||
@ -27,16 +27,16 @@ gCling->declare("__global__ void g1(int * out){ *out = FOO;}");
|
|||||||
int *d1;
|
int *d1;
|
||||||
int h1 = 0;
|
int h1 = 0;
|
||||||
cudaMalloc((void**)&d1, sizeof(int))
|
cudaMalloc((void**)&d1, sizeof(int))
|
||||||
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
//CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
// run kernel
|
// run kernel
|
||||||
g1<<<1,1>>>(d1);
|
g1<<<1,1>>>(d1);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
//CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
// check result
|
// check result
|
||||||
cudaMemcpy(&h1, d1, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&h1, d1, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
//CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
h1
|
h1
|
||||||
//CHECK: (int) 42
|
//CHECK: (int) 42
|
||||||
|
|
||||||
@ -49,10 +49,10 @@ gCling->parse("__global__ void g2(int * out){ *out = 52;}");
|
|||||||
|
|
||||||
g2<<<1,1>>>(d1);
|
g2<<<1,1>>>(d1);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
//CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
cudaMemcpy(&h1, d1, sizeof(int), cudaMemcpyDeviceToHost)
|
cudaMemcpy(&h1, d1, sizeof(int), cudaMemcpyDeviceToHost)
|
||||||
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
//CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
h1
|
h1
|
||||||
//CHECK: (int) 52
|
//CHECK: (int) 52
|
||||||
|
|
||||||
|
@ -32,19 +32,19 @@ for(unsigned int i = 0; i < numberOfThreads; ++i){
|
|||||||
int * deviceInput;
|
int * deviceInput;
|
||||||
int * deviceOutput;
|
int * deviceOutput;
|
||||||
cudaMalloc( (void **) &deviceInput, sizeof(int)*numberOfThreads)
|
cudaMalloc( (void **) &deviceInput, sizeof(int)*numberOfThreads)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMalloc( (void **) &deviceOutput, sizeof(int)*numberOfThreads)
|
cudaMalloc( (void **) &deviceOutput, sizeof(int)*numberOfThreads)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
cudaMemcpy(deviceInput, hostInput, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice)
|
cudaMemcpy(deviceInput, hostInput, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
gKernel1<<<1,numberOfThreads, sizeof(int)*numberOfThreads>>>(deviceInput, deviceOutput);
|
gKernel1<<<1,numberOfThreads, sizeof(int)*numberOfThreads>>>(deviceInput, deviceOutput);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpy(hostOutput, deviceOutput, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost)
|
cudaMemcpy(hostOutput, deviceOutput, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
int expectedSum = (numberOfThreads*(numberOfThreads+1))/2;
|
int expectedSum = (numberOfThreads*(numberOfThreads+1))/2;
|
||||||
int cudaSum = 0;
|
int cudaSum = 0;
|
||||||
|
@ -15,12 +15,12 @@
|
|||||||
// Check if cuda driver is available
|
// Check if cuda driver is available
|
||||||
int version;
|
int version;
|
||||||
cudaDriverGetVersion(&version)
|
cudaDriverGetVersion(&version)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
// Check if a CUDA compatible device (GPU) is available.
|
// Check if a CUDA compatible device (GPU) is available.
|
||||||
int device_count = 0;
|
int device_count = 0;
|
||||||
cudaGetDeviceCount(&device_count)
|
cudaGetDeviceCount(&device_count)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
device_count > 0
|
device_count > 0
|
||||||
// CHECK: (bool) true
|
// CHECK: (bool) true
|
||||||
|
|
||||||
@ -30,9 +30,9 @@ __global__ void gKernel1(){}
|
|||||||
.rawInput 0
|
.rawInput 0
|
||||||
gKernel1<<<1,1>>>();
|
gKernel1<<<1,1>>>();
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
// Check, if a simple __device__ kernel is useable.
|
// Check, if a simple __device__ kernel is useable.
|
||||||
.rawInput 1
|
.rawInput 1
|
||||||
@ -41,9 +41,9 @@ __global__ void gKernel2(){int i = dKernel1();}
|
|||||||
.rawInput 0
|
.rawInput 0
|
||||||
gKernel2<<<1,1>>>();
|
gKernel2<<<1,1>>>();
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
|
|
||||||
// expected-no-diagnostics
|
// expected-no-diagnostics
|
||||||
|
@ -22,9 +22,9 @@ __global__ void gKernel1(int * a, int b){
|
|||||||
|
|
||||||
cudaStream_t stream1, stream2;
|
cudaStream_t stream1, stream2;
|
||||||
cudaStreamCreate(&stream1)
|
cudaStreamCreate(&stream1)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaStreamCreate(&stream2)
|
cudaStreamCreate(&stream2)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
int hostInput1[numberOfThreads];
|
int hostInput1[numberOfThreads];
|
||||||
int hostInput2[numberOfThreads];
|
int hostInput2[numberOfThreads];
|
||||||
@ -37,30 +37,30 @@ for(unsigned int i = 0; i < numberOfThreads; ++i){
|
|||||||
int * device1;
|
int * device1;
|
||||||
int * device2;
|
int * device2;
|
||||||
cudaMalloc( (void **) &device1, sizeof(int)*numberOfThreads)
|
cudaMalloc( (void **) &device1, sizeof(int)*numberOfThreads)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMalloc( (void **) &device2, sizeof(int)*numberOfThreads)
|
cudaMalloc( (void **) &device2, sizeof(int)*numberOfThreads)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
cudaMemcpyAsync(device1, hostInput1, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice, stream1)
|
cudaMemcpyAsync(device1, hostInput1, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice, stream1)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpyAsync(device2, hostInput2, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice, stream2)
|
cudaMemcpyAsync(device2, hostInput2, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice, stream2)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
gKernel1<<<1,numberOfThreads,0,stream2>>>(device2, 2);
|
gKernel1<<<1,numberOfThreads,0,stream2>>>(device2, 2);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
gKernel1<<<1,numberOfThreads,0,stream1>>>(device1, 1);
|
gKernel1<<<1,numberOfThreads,0,stream1>>>(device1, 1);
|
||||||
cudaGetLastError()
|
cudaGetLastError()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaDeviceSynchronize()
|
cudaDeviceSynchronize()
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
cudaMemcpyAsync(hostOutput2, device2, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost, stream2)
|
cudaMemcpyAsync(hostOutput2, device2, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost, stream2)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
cudaMemcpyAsync(hostOutput1, device1, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost, stream1)
|
cudaMemcpyAsync(hostOutput1, device1, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost, stream1)
|
||||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
unsigned int expectedSum1 = 0;
|
unsigned int expectedSum1 = 0;
|
||||||
unsigned int cudaSum1 = 0;
|
unsigned int cudaSum1 = 0;
|
||||||
|
@ -21,7 +21,10 @@
|
|||||||
// Compare the cuda module ctor and dtor of two random modules.
|
// Compare the cuda module ctor and dtor of two random modules.
|
||||||
std::string ctor1, ctor2, dtor1, dtor2;
|
std::string ctor1, ctor2, dtor1, dtor2;
|
||||||
|
|
||||||
auto M1 = gCling->getLatestTransaction()->getModule();
|
auto T1 = gCling->getLatestTransaction();
|
||||||
|
// only __global__ CUDA kernel definitions has the cuda module ctor and dtor
|
||||||
|
__global__ void g1() { int i = 1; }
|
||||||
|
auto M1 = T1->getNext()->getModule();
|
||||||
|
|
||||||
for(auto &I : *M1){
|
for(auto &I : *M1){
|
||||||
// The trailing '_' identify the function name as modified name.
|
// The trailing '_' identify the function name as modified name.
|
||||||
@ -34,7 +37,9 @@ for(auto &I : *M1){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto M2 = gCling->getLatestTransaction()->getModule();
|
auto T2 = gCling->getLatestTransaction();
|
||||||
|
__global__ void g2() { int i = 2; }
|
||||||
|
auto M2 = T2->getNext()->getModule();
|
||||||
|
|
||||||
// The two modules should have different names, because of the for loop.
|
// The two modules should have different names, because of the for loop.
|
||||||
M1->getName().str() != M2->getName().str()
|
M1->getName().str() != M2->getName().str()
|
||||||
|
@ -19,7 +19,7 @@ __global__ void foo(){ int i = 3; }
|
|||||||
.rawInput 0
|
.rawInput 0
|
||||||
|
|
||||||
cudaError error
|
cudaError error
|
||||||
// CHECK: (cudaError) (cudaError::cudaSuccess) : (unsigned int) 0
|
// CHECK: (cudaError) (cudaSuccess) : (unsigned int) 0
|
||||||
|
|
||||||
// expected-no-diagnostics
|
// expected-no-diagnostics
|
||||||
.q
|
.q
|
||||||
|
Loading…
Reference in New Issue
Block a user