Fixed CUDA mode for Clang/LLVM 9 upgrade

- fix bug, which was caused by executing a transaction in the device
interpreter
- fixed warning from the device compiler
- update test cases
This commit is contained in:
Simeon Ehrig 2021-02-11 16:20:30 +00:00 committed by jenkins
parent 84bcd07963
commit be5ea3a651
14 changed files with 98 additions and 87 deletions

View File

@ -1339,6 +1339,9 @@ static void stringifyPreprocSetting(PreprocessorOptions& PPOpts,
#endif #endif
if (!COpts.HasOutput || !HasInput) { if (!COpts.HasOutput || !HasInput) {
// suppress the warning "argument unused during compilation: -c" of the
// device interpreter instance
if (!COpts.CUDADevice)
argvCompile.push_back("-c"); argvCompile.push_back("-c");
argvCompile.push_back("-"); argvCompile.push_back("-");
} }

View File

@ -1670,6 +1670,9 @@ namespace cling {
IncrementalExecutor::ExecutionResult ExeRes IncrementalExecutor::ExecutionResult ExeRes
= IncrementalExecutor::kExeSuccess; = IncrementalExecutor::kExeSuccess;
// CUDA device code is not direct executable
// the code is executed by a CUDA library function in the host code
if (!m_Opts.CompilerOpts.CUDADevice)
// Forward to IncrementalExecutor; should not be called by // Forward to IncrementalExecutor; should not be called by
// anyone except for IncrementalParser. // anyone except for IncrementalParser.
ExeRes = m_Executor->runStaticInitializersOnce(T); ExeRes = m_Executor->runStaticInitializersOnce(T);

View File

@ -17,12 +17,12 @@
// Check if cuda driver is available // Check if cuda driver is available
int version; int version;
cudaDriverGetVersion(&version) cudaDriverGetVersion(&version)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// Check if a CUDA compatible device (GPU) is available. // Check if a CUDA compatible device (GPU) is available.
int device_count = 0; int device_count = 0;
cudaGetDeviceCount(&device_count) cudaGetDeviceCount(&device_count)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
device_count > 0 device_count > 0
// CHECK: (bool) true // CHECK: (bool) true
@ -42,7 +42,7 @@ __global__ void g(){
// the host interpreter. // the host interpreter.
g<<<1,1>>>(); g<<<1,1>>>();
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// expected-no-diagnostics // expected-no-diagnostics
.q .q

View File

@ -28,15 +28,15 @@ sum(41,1)
int hostOutput = 0; int hostOutput = 0;
int * deviceOutput; int * deviceOutput;
cudaMalloc( (void **) &deviceOutput, sizeof(int)) cudaMalloc( (void **) &deviceOutput, sizeof(int))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel1<<<1,1>>>(deviceOutput); gKernel1<<<1,1>>>(deviceOutput);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
hostOutput hostOutput
// CHECK: (int) 42 // CHECK: (int) 42

View File

@ -17,12 +17,12 @@
// Check if cuda driver is available // Check if cuda driver is available
int version; int version;
cudaDriverGetVersion(&version) cudaDriverGetVersion(&version)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// Check if a CUDA compatible device (GPU) is available. // Check if a CUDA compatible device (GPU) is available.
int device_count = 0; int device_count = 0;
cudaGetDeviceCount(&device_count) cudaGetDeviceCount(&device_count)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
device_count > 0 device_count > 0
// CHECK: (bool) true // CHECK: (bool) true
@ -39,7 +39,7 @@ foo()
// the host interpreter. // the host interpreter.
bar<<<1,1>>>(); bar<<<1,1>>>();
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// expected-no-diagnostics // expected-no-diagnostics

View File

@ -22,15 +22,15 @@ __global__ void gKernel1(int * out){
int * deviceOutput; int * deviceOutput;
int hostOutput = 0; int hostOutput = 0;
cudaMalloc( (void **) &deviceOutput, sizeof(int)) cudaMalloc( (void **) &deviceOutput, sizeof(int))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel1<<<1,1>>>(deviceOutput); gKernel1<<<1,1>>>(deviceOutput);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
hostOutput hostOutput
// CHECK: (int) 42 // CHECK: (int) 42
@ -55,15 +55,15 @@ for(unsigned int i = 0; i < numberOfThreads; ++i){
hostOutput2[i] = 0; hostOutput2[i] = 0;
} }
cudaMalloc( (void **) &deviceOutput2, sizeof(int)*numberOfThreads) cudaMalloc( (void **) &deviceOutput2, sizeof(int)*numberOfThreads)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel2<<<1,numberOfThreads>>>(deviceOutput2); gKernel2<<<1,numberOfThreads>>>(deviceOutput2);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(hostOutput2, deviceOutput2, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost) cudaMemcpy(hostOutput2, deviceOutput2, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
unsigned int expectedSum = 0; unsigned int expectedSum = 0;
unsigned int cudaSum = 0; unsigned int cudaSum = 0;

View File

@ -23,15 +23,15 @@ __global__ void gKernel1(T * value){
int * deviceOutput1; int * deviceOutput1;
int hostOutput1 = 1; int hostOutput1 = 1;
cudaMalloc( (void **) &deviceOutput1, sizeof(int)) cudaMalloc( (void **) &deviceOutput1, sizeof(int))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel1<<<1,1>>>(deviceOutput1); gKernel1<<<1,1>>>(deviceOutput1);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput1, deviceOutput1, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput1, deviceOutput1, sizeof(int), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
hostOutput1 hostOutput1
// CHECK: (int) 42 // CHECK: (int) 42
@ -55,27 +55,27 @@ __global__ void gKernel2<float>(float * value){
int * deviceOutput2; int * deviceOutput2;
int hostOutput2 = 10; int hostOutput2 = 10;
cudaMalloc( (void **) &deviceOutput2, sizeof(int)) cudaMalloc( (void **) &deviceOutput2, sizeof(int))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
float * deviceOutput3; float * deviceOutput3;
float hostOutput3= 10.0; float hostOutput3= 10.0;
cudaMalloc( (void **) &deviceOutput3, sizeof(float)) cudaMalloc( (void **) &deviceOutput3, sizeof(float))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel2<<<1,1>>>(deviceOutput2); gKernel2<<<1,1>>>(deviceOutput2);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel2<<<1,1>>>(deviceOutput3); gKernel2<<<1,1>>>(deviceOutput3);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput2, deviceOutput2, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput2, deviceOutput2, sizeof(int), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput3, deviceOutput3, sizeof(float), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput3, deviceOutput3, sizeof(float), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
hostOutput2 hostOutput2
// CHECK: (int) 1 // CHECK: (int) 1
@ -107,19 +107,19 @@ int func1(int * input){
int * deviceOutput4; int * deviceOutput4;
int hostOutput4 = 10; int hostOutput4 = 10;
cudaMalloc( (void **) &deviceOutput4, sizeof(int)) cudaMalloc( (void **) &deviceOutput4, sizeof(int))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
int * deviceOutput5; int * deviceOutput5;
cudaMalloc( (void **) &deviceOutput5, sizeof(int)) cudaMalloc( (void **) &deviceOutput5, sizeof(int))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel3<<<1,1>>>(deviceOutput4, func1(deviceOutput5)); gKernel3<<<1,1>>>(deviceOutput4, func1(deviceOutput5));
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput4, deviceOutput4, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput4, deviceOutput4, sizeof(int), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
hostOutput4 hostOutput4
// CHECK: (int) 5 // CHECK: (int) 5
@ -155,27 +155,27 @@ __global__ void gKernel5(T * x, Functor const functor){
int * deviceOutput6; int * deviceOutput6;
int hostOutput6 = 10; int hostOutput6 = 10;
cudaMalloc( (void **) &deviceOutput6, sizeof(int)) cudaMalloc( (void **) &deviceOutput6, sizeof(int))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
double * deviceOutput7; double * deviceOutput7;
double hostOutput7 = 10.0; double hostOutput7 = 10.0;
cudaMalloc( (void **) &deviceOutput7, sizeof(double)) cudaMalloc( (void **) &deviceOutput7, sizeof(double))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel5<<<1,1>>>(deviceOutput6, Struct1<int>{}); gKernel5<<<1,1>>>(deviceOutput6, Struct1<int>{});
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel5<<<1,1>>>(deviceOutput7, Struct1<double>{}); gKernel5<<<1,1>>>(deviceOutput7, Struct1<double>{});
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput6, deviceOutput6, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput6, deviceOutput6, sizeof(int), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput7, deviceOutput7, sizeof(double), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput7, deviceOutput7, sizeof(double), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
hostOutput6 hostOutput6
// CHECK: (int) 1 // CHECK: (int) 1

View File

@ -26,15 +26,15 @@ __global__ void gKernel1(int * out){
int * deviceOutput; int * deviceOutput;
int hostOutput = 0; int hostOutput = 0;
cudaMalloc( (void **) &deviceOutput, sizeof(int)) cudaMalloc( (void **) &deviceOutput, sizeof(int))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel1<<<1,1>>>(deviceOutput); gKernel1<<<1,1>>>(deviceOutput);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
hostOutput hostOutput
// CHECK: (int) 42 // CHECK: (int) 42
@ -50,11 +50,11 @@ __global__ void gKernel2(int * out){
hostOutput = 0; hostOutput = 0;
gKernel2<43><<<1,1>>>(deviceOutput); gKernel2<43><<<1,1>>>(deviceOutput);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
hostOutput hostOutput
// CHECK: (int) 43 // CHECK: (int) 43

View File

@ -16,7 +16,7 @@
// if process() works, the general input also works // if process() works, the general input also works
gCling->process("cudaGetLastError()"); gCling->process("cudaGetLastError()");
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 //CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// declare a cuda kernel with with a define // declare a cuda kernel with with a define
// do not this in real code ;-) // do not this in real code ;-)
@ -27,16 +27,16 @@ gCling->declare("__global__ void g1(int * out){ *out = FOO;}");
int *d1; int *d1;
int h1 = 0; int h1 = 0;
cudaMalloc((void**)&d1, sizeof(int)) cudaMalloc((void**)&d1, sizeof(int))
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 //CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// run kernel // run kernel
g1<<<1,1>>>(d1); g1<<<1,1>>>(d1);
cudaGetLastError() cudaGetLastError()
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 //CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// check result // check result
cudaMemcpy(&h1, d1, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&h1, d1, sizeof(int), cudaMemcpyDeviceToHost)
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 //CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
h1 h1
//CHECK: (int) 42 //CHECK: (int) 42
@ -49,10 +49,10 @@ gCling->parse("__global__ void g2(int * out){ *out = 52;}");
g2<<<1,1>>>(d1); g2<<<1,1>>>(d1);
cudaGetLastError() cudaGetLastError()
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 //CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(&h1, d1, sizeof(int), cudaMemcpyDeviceToHost) cudaMemcpy(&h1, d1, sizeof(int), cudaMemcpyDeviceToHost)
//CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 //CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
h1 h1
//CHECK: (int) 52 //CHECK: (int) 52

View File

@ -32,19 +32,19 @@ for(unsigned int i = 0; i < numberOfThreads; ++i){
int * deviceInput; int * deviceInput;
int * deviceOutput; int * deviceOutput;
cudaMalloc( (void **) &deviceInput, sizeof(int)*numberOfThreads) cudaMalloc( (void **) &deviceInput, sizeof(int)*numberOfThreads)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMalloc( (void **) &deviceOutput, sizeof(int)*numberOfThreads) cudaMalloc( (void **) &deviceOutput, sizeof(int)*numberOfThreads)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(deviceInput, hostInput, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice) cudaMemcpy(deviceInput, hostInput, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel1<<<1,numberOfThreads, sizeof(int)*numberOfThreads>>>(deviceInput, deviceOutput); gKernel1<<<1,numberOfThreads, sizeof(int)*numberOfThreads>>>(deviceInput, deviceOutput);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpy(hostOutput, deviceOutput, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost) cudaMemcpy(hostOutput, deviceOutput, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
int expectedSum = (numberOfThreads*(numberOfThreads+1))/2; int expectedSum = (numberOfThreads*(numberOfThreads+1))/2;
int cudaSum = 0; int cudaSum = 0;

View File

@ -15,12 +15,12 @@
// Check if cuda driver is available // Check if cuda driver is available
int version; int version;
cudaDriverGetVersion(&version) cudaDriverGetVersion(&version)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// Check if a CUDA compatible device (GPU) is available. // Check if a CUDA compatible device (GPU) is available.
int device_count = 0; int device_count = 0;
cudaGetDeviceCount(&device_count) cudaGetDeviceCount(&device_count)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
device_count > 0 device_count > 0
// CHECK: (bool) true // CHECK: (bool) true
@ -30,9 +30,9 @@ __global__ void gKernel1(){}
.rawInput 0 .rawInput 0
gKernel1<<<1,1>>>(); gKernel1<<<1,1>>>();
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// Check, if a simple __device__ kernel is useable. // Check, if a simple __device__ kernel is useable.
.rawInput 1 .rawInput 1
@ -41,9 +41,9 @@ __global__ void gKernel2(){int i = dKernel1();}
.rawInput 0 .rawInput 0
gKernel2<<<1,1>>>(); gKernel2<<<1,1>>>();
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
// expected-no-diagnostics // expected-no-diagnostics

View File

@ -22,9 +22,9 @@ __global__ void gKernel1(int * a, int b){
cudaStream_t stream1, stream2; cudaStream_t stream1, stream2;
cudaStreamCreate(&stream1) cudaStreamCreate(&stream1)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaStreamCreate(&stream2) cudaStreamCreate(&stream2)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
int hostInput1[numberOfThreads]; int hostInput1[numberOfThreads];
int hostInput2[numberOfThreads]; int hostInput2[numberOfThreads];
@ -37,30 +37,30 @@ for(unsigned int i = 0; i < numberOfThreads; ++i){
int * device1; int * device1;
int * device2; int * device2;
cudaMalloc( (void **) &device1, sizeof(int)*numberOfThreads) cudaMalloc( (void **) &device1, sizeof(int)*numberOfThreads)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMalloc( (void **) &device2, sizeof(int)*numberOfThreads) cudaMalloc( (void **) &device2, sizeof(int)*numberOfThreads)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpyAsync(device1, hostInput1, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice, stream1) cudaMemcpyAsync(device1, hostInput1, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice, stream1)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpyAsync(device2, hostInput2, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice, stream2) cudaMemcpyAsync(device2, hostInput2, sizeof(int)*numberOfThreads, cudaMemcpyHostToDevice, stream2)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel1<<<1,numberOfThreads,0,stream2>>>(device2, 2); gKernel1<<<1,numberOfThreads,0,stream2>>>(device2, 2);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
gKernel1<<<1,numberOfThreads,0,stream1>>>(device1, 1); gKernel1<<<1,numberOfThreads,0,stream1>>>(device1, 1);
cudaGetLastError() cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize() cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpyAsync(hostOutput2, device2, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost, stream2) cudaMemcpyAsync(hostOutput2, device2, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost, stream2)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
cudaMemcpyAsync(hostOutput1, device1, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost, stream1) cudaMemcpyAsync(hostOutput1, device1, sizeof(int)*numberOfThreads, cudaMemcpyDeviceToHost, stream1)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
unsigned int expectedSum1 = 0; unsigned int expectedSum1 = 0;
unsigned int cudaSum1 = 0; unsigned int cudaSum1 = 0;

View File

@ -21,7 +21,10 @@
// Compare the cuda module ctor and dtor of two random modules. // Compare the cuda module ctor and dtor of two random modules.
std::string ctor1, ctor2, dtor1, dtor2; std::string ctor1, ctor2, dtor1, dtor2;
auto M1 = gCling->getLatestTransaction()->getModule(); auto T1 = gCling->getLatestTransaction();
// only __global__ CUDA kernel definitions has the cuda module ctor and dtor
__global__ void g1() { int i = 1; }
auto M1 = T1->getNext()->getModule();
for(auto &I : *M1){ for(auto &I : *M1){
// The trailing '_' identify the function name as modified name. // The trailing '_' identify the function name as modified name.
@ -34,7 +37,9 @@ for(auto &I : *M1){
} }
} }
auto M2 = gCling->getLatestTransaction()->getModule(); auto T2 = gCling->getLatestTransaction();
__global__ void g2() { int i = 2; }
auto M2 = T2->getNext()->getModule();
// The two modules should have different names, because of the for loop. // The two modules should have different names, because of the for loop.
M1->getName().str() != M2->getName().str() M1->getName().str() != M2->getName().str()

View File

@ -19,7 +19,7 @@ __global__ void foo(){ int i = 3; }
.rawInput 0 .rawInput 0
cudaError error cudaError error
// CHECK: (cudaError) (cudaError::cudaSuccess) : (unsigned int) 0 // CHECK: (cudaError) (cudaSuccess) : (unsigned int) 0
// expected-no-diagnostics // expected-no-diagnostics
.q .q