Add test cases for CUDA features.
- CUDA __constant__ memory - CUDA global __device__ memory - CUDA __host__ prefix - CUDA kernel launch with arguments - CUDA templated kernels - CUDA shared memory with dynamic runtime - CUDA Streams - test if CUDA device is available
This commit is contained in:
parent
d14ab2daec
commit
e7b0e22ae8
47
test/CUDADeviceCode/CUDAConstantMemory.C
Normal file
47
test/CUDADeviceCode/CUDAConstantMemory.C
Normal file
@ -0,0 +1,47 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// CLING - the C++ LLVM-based InterpreterG :)
|
||||
//
|
||||
// This file is dual-licensed: you can choose to license it under the University
|
||||
// of Illinois Open Source License or the GNU Lesser General Public License. See
|
||||
// LICENSE.TXT for details.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// The Test checks if constant memory works.
|
||||
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
|
||||
// REQUIRES: cuda-runtime
|
||||
|
||||
__constant__ int constNum[4];
|
||||
|
||||
.rawInput 1
|
||||
__global__ void gKernel1(int * output){
|
||||
int i = threadIdx.x;
|
||||
output[i] = constNum[i];
|
||||
}
|
||||
|
||||
int hostInput[4] = {1,2,3,4};
|
||||
int hostOutput[4] = {0,0,0,0};
|
||||
int * deviceOutput;
|
||||
cudaMalloc( (void **) &deviceOutput, sizeof(int)*4)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
cudaMemcpyToSymbol(constNum, &hostInput, sizeof(int)*4)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
gKernel1<<<1,4>>>(deviceOutput);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int)*4, cudaMemcpyDeviceToHost)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
// FIXME: output of the whole static array isn't working at the moment
|
||||
hostOutput[0]
|
||||
// CHECK: (int) 1
|
||||
hostOutput[1]
|
||||
// CHECK: (int) 2
|
||||
hostOutput[2]
|
||||
// CHECK: (int) 3
|
||||
hostOutput[3]
|
||||
// CHECK: (int) 4
|
||||
|
||||
|
||||
// expected-no-diagnostics
|
||||
.q
|
65
test/CUDADeviceCode/CUDAGlobalDeviceMemory.C
Normal file
65
test/CUDADeviceCode/CUDAGlobalDeviceMemory.C
Normal file
@ -0,0 +1,65 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// CLING - the C++ LLVM-based InterpreterG :)
|
||||
//
|
||||
// This file is dual-licensed: you can choose to license it under the University
|
||||
// of Illinois Open Source License or the GNU Lesser General Public License. See
|
||||
// LICENSE.TXT for details.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// The Test checks if global __device__ memory works. There two tests. One use
|
||||
// direct value assignment at declaration and the other use a reassignment.
|
||||
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
|
||||
// REQUIRES: cuda-runtime
|
||||
|
||||
__device__ int dAnswer = 42;
|
||||
|
||||
.rawInput 1
|
||||
__global__ void gKernel1(int * output){
|
||||
int i = threadIdx.x;
|
||||
output[i] = dAnswer;
|
||||
}
|
||||
.rawInput 0
|
||||
|
||||
int hostOutput[4] = {1,1,1,1};
|
||||
int * deviceOutput;
|
||||
cudaMalloc( (void **) &deviceOutput, sizeof(int)*4)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
gKernel1<<<1,4>>>(deviceOutput);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int)*4, cudaMemcpyDeviceToHost)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
// FIXME: output of the whole static array isn't working at the moment
|
||||
hostOutput[0]
|
||||
// CHECK: (int) 42
|
||||
hostOutput[1]
|
||||
// CHECK: (int) 42
|
||||
hostOutput[2]
|
||||
// CHECK: (int) 42
|
||||
hostOutput[3]
|
||||
// CHECK: (int) 42
|
||||
|
||||
// Test, if value assignment also works.
|
||||
dAnswer = 43;
|
||||
|
||||
gKernel1<<<1,4>>>(deviceOutput);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int)*4, cudaMemcpyDeviceToHost)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
// FIXME: output of the whole static array isn't working at the moment
|
||||
hostOutput[0]
|
||||
// CHECK: (int) 43
|
||||
hostOutput[1]
|
||||
// CHECK: (int) 43
|
||||
hostOutput[2]
|
||||
// CHECK: (int) 43
|
||||
hostOutput[3]
|
||||
// CHECK: (int) 43
|
||||
|
||||
|
||||
// expected-no-diagnostics
|
||||
.q
|
43
test/CUDADeviceCode/CUDAHostPrefix.C
Normal file
43
test/CUDADeviceCode/CUDAHostPrefix.C
Normal file
@ -0,0 +1,43 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// CLING - the C++ LLVM-based InterpreterG :)
|
||||
//
|
||||
// This file is dual-licensed: you can choose to license it under the University
|
||||
// of Illinois Open Source License or the GNU Lesser General Public License. See
|
||||
// LICENSE.TXT for details.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// The Test checks if a function with __host__ and __device__ prefix available
|
||||
// on host and device side.
|
||||
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
|
||||
// REQUIRES: cuda-runtime
|
||||
|
||||
.rawInput 1
|
||||
__host__ __device__ int sum(int a, int b){
|
||||
return a + b;
|
||||
}
|
||||
|
||||
__global__ void gKernel1(int * output){
|
||||
*output = sum(40,2);
|
||||
}
|
||||
.rawInput 0
|
||||
|
||||
sum(41,1)
|
||||
// CHECK: (int) 42
|
||||
|
||||
int hostOutput = 0;
|
||||
int * deviceOutput;
|
||||
cudaMalloc( (void **) &deviceOutput, sizeof(int))
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
gKernel1<<<1,1>>>(deviceOutput);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
hostOutput
|
||||
// CHECK: (int) 42
|
||||
|
||||
|
||||
// expected-no-diagnostics
|
||||
.q
|
61
test/CUDADeviceCode/CUDAKernelArgument.C
Normal file
61
test/CUDADeviceCode/CUDAKernelArgument.C
Normal file
@ -0,0 +1,61 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// CLING - the C++ LLVM-based InterpreterG :)
|
||||
//
|
||||
// This file is dual-licensed: you can choose to license it under the University
|
||||
// of Illinois Open Source License or the GNU Lesser General Public License. See
|
||||
// LICENSE.TXT for details.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// The Test checks if a CUDA kernel works with a arguments and built-in
|
||||
// functions.
|
||||
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
|
||||
// REQUIRES: cuda-runtime
|
||||
|
||||
// Test, if a simple kernel with arguments works.
|
||||
.rawInput 1
|
||||
__global__ void gKernel1(int * out){
|
||||
*out = 42;
|
||||
}
|
||||
.rawInput 0
|
||||
|
||||
int * deviceOutput;
|
||||
int hostOutput = 0;
|
||||
cudaMalloc( (void **) &deviceOutput, sizeof(int))
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
gKernel1<<<1,1>>>(deviceOutput);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
hostOutput
|
||||
// CHECK: (int) 42
|
||||
|
||||
|
||||
// Test, if a parallel kernel with built-in functions.
|
||||
.rawInput 1
|
||||
__device__ int mul7(int in){
|
||||
return 7*in;
|
||||
}
|
||||
|
||||
__global__ void gKernel2(int * out){
|
||||
int i = threadIdx.x;
|
||||
out[i] = mul7(i);
|
||||
}
|
||||
.rawInput 0
|
||||
|
||||
int * deviceOutput2;
|
||||
int hostOutput2[4] = {0,0,0,0};
|
||||
cudaMalloc( (void **) &deviceOutput2, sizeof(int)*4)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
gKernel2<<<1,4>>>(deviceOutput2);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpy(&hostOutput2, deviceOutput2, sizeof(int)*4, cudaMemcpyDeviceToHost)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
hostOutput2[0] + hostOutput2[1] + hostOutput2[2] + hostOutput2[3]
|
||||
// CHECK: (int) 42
|
||||
|
||||
|
||||
// expected-no-diagnostics
|
||||
.q
|
58
test/CUDADeviceCode/CUDAKernelTemplate.C
Normal file
58
test/CUDADeviceCode/CUDAKernelTemplate.C
Normal file
@ -0,0 +1,58 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// CLING - the C++ LLVM-based InterpreterG :)
|
||||
//
|
||||
// This file is dual-licensed: you can choose to license it under the University
|
||||
// of Illinois Open Source License or the GNU Lesser General Public License. See
|
||||
// LICENSE.TXT for details.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// The Test checks if templated CUDA kernel works.
|
||||
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
|
||||
// REQUIRES: cuda-runtime
|
||||
|
||||
// Check if template device side resoultion works.
|
||||
.rawInput 1
|
||||
template <int T>
|
||||
__device__ int dKernel1(){
|
||||
return T;
|
||||
}
|
||||
|
||||
__global__ void gKernel1(int * out){
|
||||
*out = dKernel1<42>();
|
||||
}
|
||||
.rawInput 0
|
||||
|
||||
int * deviceOutput;
|
||||
int hostOutput = 0;
|
||||
cudaMalloc( (void **) &deviceOutput, sizeof(int))
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
gKernel1<<<1,1>>>(deviceOutput);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
hostOutput
|
||||
// CHECK: (int) 42
|
||||
|
||||
|
||||
// Check if template host-device side resoultion works.
|
||||
.rawInput 1
|
||||
template <int T>
|
||||
__global__ void gKernel2(int * out){
|
||||
*out = dKernel1<T>();
|
||||
}
|
||||
.rawInput 0
|
||||
|
||||
hostOutput = 0;
|
||||
gKernel2<43><<<1,1>>>(deviceOutput);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
hostOutput
|
||||
// CHECK: (int) 43
|
||||
|
||||
|
||||
// expected-no-diagnostics
|
||||
.q
|
51
test/CUDADeviceCode/CUDASharedMemory.C
Normal file
51
test/CUDADeviceCode/CUDASharedMemory.C
Normal file
@ -0,0 +1,51 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// CLING - the C++ LLVM-based InterpreterG :)
|
||||
//
|
||||
// This file is dual-licensed: you can choose to license it under the University
|
||||
// of Illinois Open Source License or the GNU Lesser General Public License. See
|
||||
// LICENSE.TXT for details.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// The Test checks if runtime shared memory works.
|
||||
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
|
||||
// REQUIRES: cuda-runtime
|
||||
|
||||
.rawInput 1
|
||||
__global__ void gKernel1(int * input, int * output){
|
||||
extern __shared__ int s[];
|
||||
int i = threadIdx.x;
|
||||
s[i] = input[i];
|
||||
output[i] = s[i];
|
||||
}
|
||||
.rawInput 0
|
||||
|
||||
int hostInput[4] = {1,2,3,4};
|
||||
int hostOutput[4] = {0,0,0,0};
|
||||
int * deviceInput;
|
||||
int * deviceOutput;
|
||||
cudaMalloc( (void **) &deviceInput, sizeof(int)*4)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMalloc( (void **) &deviceOutput, sizeof(int)*4)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
cudaMemcpy(deviceInput, &hostInput, sizeof(int)*4, cudaMemcpyHostToDevice)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
gKernel1<<<1,4, 4 * sizeof(int)>>>(deviceInput, deviceOutput);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int)*4, cudaMemcpyDeviceToHost)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
// FIXME: output of the whole static array isn't working at the moment
|
||||
hostOutput[0]
|
||||
// CHECK: (int) 1
|
||||
hostOutput[1]
|
||||
// CHECK: (int) 2
|
||||
hostOutput[2]
|
||||
// CHECK: (int) 3
|
||||
hostOutput[3]
|
||||
// CHECK: (int) 4
|
||||
|
||||
|
||||
// expected-no-diagnostics
|
||||
.q
|
40
test/CUDADeviceCode/CUDASimpleKernel.C
Normal file
40
test/CUDADeviceCode/CUDASimpleKernel.C
Normal file
@ -0,0 +1,40 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// CLING - the C++ LLVM-based InterpreterG :)
|
||||
//
|
||||
// This file is dual-licensed: you can choose to license it under the University
|
||||
// of Illinois Open Source License or the GNU Lesser General Public License. See
|
||||
// LICENSE.TXT for details.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// The Test checks if a CUDA compatible device is available and checks, if simple
|
||||
// __global__ and __device__ kernels are running.
|
||||
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
|
||||
// REQUIRES: cuda-runtime
|
||||
|
||||
// Check if a CUDA compatible device (GPU) is available.
|
||||
int device_count = 0;
|
||||
cudaGetDeviceCount(&device_count)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
device_count > 0
|
||||
// CHECK: (bool) true
|
||||
|
||||
// Check, if the smallest __global__ kernel is callable.
|
||||
.rawInput 1
|
||||
__global__ void gKernel1(){}
|
||||
.rawInput 0
|
||||
gKernel1<<<1,1>>>();
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
// Check, if a simple __device__ kernel is useable.
|
||||
.rawInput 1
|
||||
__device__ int dKernel1(){return 42;}
|
||||
__global__ void gKernel2(){int i = dKernel1();}
|
||||
.rawInput 0
|
||||
gKernel2<<<1,1>>>();
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
|
||||
// expected-no-diagnostics
|
||||
.q
|
58
test/CUDADeviceCode/CUDAStreams.C
Normal file
58
test/CUDADeviceCode/CUDAStreams.C
Normal file
@ -0,0 +1,58 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// CLING - the C++ LLVM-based InterpreterG :)
|
||||
//
|
||||
// This file is dual-licensed: you can choose to license it under the University
|
||||
// of Illinois Open Source License or the GNU Lesser General Public License. See
|
||||
// LICENSE.TXT for details.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// The Test checks if cuda streams works.
|
||||
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
|
||||
// REQUIRES: cuda-runtime
|
||||
|
||||
.rawInput 1
|
||||
__global__ void gKernel1(int * a, int b){
|
||||
int i = threadIdx.x;
|
||||
a[i] += b;
|
||||
}
|
||||
.rawInput 0
|
||||
|
||||
cudaStream_t stream1, stream2;
|
||||
cudaStreamCreate(&stream1)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaStreamCreate(&stream2)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
int host1[] = {1,2,3,4};
|
||||
int host2[] = {11,12,13,14};
|
||||
int * device1;
|
||||
int * device2;
|
||||
cudaMalloc( (void **) &device1, sizeof(int)*4)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMalloc( (void **) &device2, sizeof(int)*4)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
cudaMemcpyAsync(device1, &host1, sizeof(int)*4, cudaMemcpyHostToDevice, stream1)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpyAsync(device2, &host2, sizeof(int)*4, cudaMemcpyHostToDevice, stream2)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
gKernel1<<<1,4,0,stream2>>>(device2, 2);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
gKernel1<<<1,4,0,stream1>>>(device1, 1);
|
||||
cudaGetLastError()
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
|
||||
cudaMemcpyAsync(&host2, device2, sizeof(int)*4, cudaMemcpyDeviceToHost, stream2)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
cudaMemcpyAsync(&host1, device1, sizeof(int)*4, cudaMemcpyDeviceToHost, stream1)
|
||||
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
|
||||
host1[0] + host1[1] + host1[2] + host1[3]
|
||||
// CHECK: (int) 14
|
||||
host2[0] + host2[1] + host2[2] + host2[3]
|
||||
// CHECK: (int) 58
|
||||
|
||||
|
||||
// expected-no-diagnostics
|
||||
.q
|
Loading…
Reference in New Issue
Block a user