Add test cases for CUDA features.

- CUDA __constant__ memory - CUDA global __device__ memory - CUDA __host__ prefix - CUDA kernel launch with arguments - CUDA templated kernels - CUDA shared memory with dynamic runtime - CUDA Streams - test if CUDA device is available
2018-04-24 16:45:35 +02:00 · 2018-04-24 16:45:35 +02:00 · e7b0e22ae8
commit e7b0e22ae8
parent d14ab2daec
8 changed files with 423 additions and 0 deletions
--- a/test/CUDADeviceCode/CUDAConstantMemory.C
+++ b/test/CUDADeviceCode/CUDAConstantMemory.C
@ -0,0 +1,47 @@
+//------------------------------------------------------------------------------
+// CLING - the C++ LLVM-based InterpreterG :)
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+// The Test checks if constant memory works.
+// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
+// REQUIRES: cuda-runtime
+
+__constant__ int constNum[4];
+
+.rawInput 1
+__global__ void gKernel1(int * output){
+  int i = threadIdx.x;
+  output[i] = constNum[i];
+}
+
+int hostInput[4] = {1,2,3,4};
+int hostOutput[4] = {0,0,0,0};
+int * deviceOutput;
+cudaMalloc( (void **) &deviceOutput, sizeof(int)*4)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+cudaMemcpyToSymbol(constNum, &hostInput, sizeof(int)*4)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+gKernel1<<<1,4>>>(deviceOutput);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpy(&hostOutput, deviceOutput, sizeof(int)*4, cudaMemcpyDeviceToHost)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+// FIXME: output of the whole static array isn't working at the moment
+hostOutput[0]
+// CHECK: (int) 1
+hostOutput[1]
+// CHECK: (int) 2
+hostOutput[2]
+// CHECK: (int) 3
+hostOutput[3]
+// CHECK: (int) 4
+
+
+// expected-no-diagnostics
+.q
--- a/test/CUDADeviceCode/CUDAGlobalDeviceMemory.C
+++ b/test/CUDADeviceCode/CUDAGlobalDeviceMemory.C
@ -0,0 +1,65 @@
+//------------------------------------------------------------------------------
+// CLING - the C++ LLVM-based InterpreterG :)
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+// The Test checks if global __device__ memory works. There two tests. One use
+// direct value assignment at declaration and the other use a reassignment.
+// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
+// REQUIRES: cuda-runtime
+
+__device__ int dAnswer = 42;
+
+.rawInput 1
+__global__ void gKernel1(int * output){
+  int i = threadIdx.x;
+  output[i] = dAnswer;
+}
+.rawInput 0
+
+int hostOutput[4] = {1,1,1,1};
+int * deviceOutput;
+cudaMalloc( (void **) &deviceOutput, sizeof(int)*4)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+gKernel1<<<1,4>>>(deviceOutput);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpy(&hostOutput, deviceOutput, sizeof(int)*4, cudaMemcpyDeviceToHost)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+// FIXME: output of the whole static array isn't working at the moment
+hostOutput[0]
+// CHECK: (int) 42
+hostOutput[1]
+// CHECK: (int) 42
+hostOutput[2]
+// CHECK: (int) 42
+hostOutput[3]
+// CHECK: (int) 42
+
+// Test, if value assignment also works.
+dAnswer = 43;
+
+gKernel1<<<1,4>>>(deviceOutput);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpy(&hostOutput, deviceOutput, sizeof(int)*4, cudaMemcpyDeviceToHost)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+// FIXME: output of the whole static array isn't working at the moment
+hostOutput[0]
+// CHECK: (int) 43
+hostOutput[1]
+// CHECK: (int) 43
+hostOutput[2]
+// CHECK: (int) 43
+hostOutput[3]
+// CHECK: (int) 43
+
+
+// expected-no-diagnostics
+.q
--- a/test/CUDADeviceCode/CUDAHostPrefix.C
+++ b/test/CUDADeviceCode/CUDAHostPrefix.C
@ -0,0 +1,43 @@
+//------------------------------------------------------------------------------
+// CLING - the C++ LLVM-based InterpreterG :)
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+// The Test checks if a function with __host__ and __device__ prefix available
+// on host and device side.
+// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
+// REQUIRES: cuda-runtime
+
+.rawInput 1
+__host__ __device__ int sum(int a, int b){
+  return a + b;
+}
+
+__global__ void gKernel1(int * output){
+  *output = sum(40,2);
+}
+.rawInput 0
+
+sum(41,1)
+// CHECK: (int) 42
+
+int hostOutput = 0;
+int * deviceOutput;
+cudaMalloc( (void **) &deviceOutput, sizeof(int))
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+gKernel1<<<1,1>>>(deviceOutput);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+hostOutput
+// CHECK: (int) 42
+
+
+// expected-no-diagnostics
+.q
--- a/test/CUDADeviceCode/CUDAKernelArgument.C
+++ b/test/CUDADeviceCode/CUDAKernelArgument.C
@ -0,0 +1,61 @@
+//------------------------------------------------------------------------------
+// CLING - the C++ LLVM-based InterpreterG :)
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+// The Test checks if a CUDA kernel works with a arguments and built-in 
+// functions.
+// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
+// REQUIRES: cuda-runtime
+
+// Test, if a simple kernel with arguments works.
+.rawInput 1
+__global__ void gKernel1(int * out){
+  *out = 42;
+}
+.rawInput 0
+
+int * deviceOutput;
+int hostOutput = 0;
+cudaMalloc( (void **) &deviceOutput, sizeof(int))
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+gKernel1<<<1,1>>>(deviceOutput);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+hostOutput
+// CHECK: (int) 42
+
+
+// Test, if a parallel kernel with built-in functions.
+.rawInput 1
+__device__ int mul7(int  in){
+  return 7*in;
+}
+
+__global__ void gKernel2(int * out){
+  int i = threadIdx.x;
+  out[i] = mul7(i);
+}
+.rawInput 0
+
+int * deviceOutput2;
+int hostOutput2[4] = {0,0,0,0};
+cudaMalloc( (void **) &deviceOutput2, sizeof(int)*4)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+gKernel2<<<1,4>>>(deviceOutput2);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpy(&hostOutput2, deviceOutput2, sizeof(int)*4, cudaMemcpyDeviceToHost)
+// CHECK:  (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+hostOutput2[0] + hostOutput2[1] + hostOutput2[2] + hostOutput2[3]
+// CHECK: (int) 42
+
+
+// expected-no-diagnostics
+.q
--- a/test/CUDADeviceCode/CUDAKernelTemplate.C
+++ b/test/CUDADeviceCode/CUDAKernelTemplate.C
@ -0,0 +1,58 @@
+//------------------------------------------------------------------------------
+// CLING - the C++ LLVM-based InterpreterG :)
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+// The Test checks if templated CUDA kernel works.
+// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
+// REQUIRES: cuda-runtime
+
+// Check if template device side resoultion works.
+.rawInput 1
+template <int T>
+__device__ int dKernel1(){
+  return T;
+}
+
+__global__ void gKernel1(int * out){
+  *out = dKernel1<42>();
+}
+.rawInput 0
+
+int * deviceOutput;
+int hostOutput = 0;
+cudaMalloc( (void **) &deviceOutput, sizeof(int))
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+gKernel1<<<1,1>>>(deviceOutput);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+hostOutput
+// CHECK: (int) 42
+
+
+// Check if template host-device side resoultion works.
+.rawInput 1
+template <int T>
+__global__ void gKernel2(int * out){
+  *out = dKernel1<T>();
+}
+.rawInput 0
+
+hostOutput = 0;
+gKernel2<43><<<1,1>>>(deviceOutput);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+hostOutput
+// CHECK: (int) 43
+
+
+// expected-no-diagnostics
+.q
--- a/test/CUDADeviceCode/CUDASharedMemory.C
+++ b/test/CUDADeviceCode/CUDASharedMemory.C
@ -0,0 +1,51 @@
+//------------------------------------------------------------------------------
+// CLING - the C++ LLVM-based InterpreterG :)
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+// The Test checks if runtime shared memory works.
+// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
+// REQUIRES: cuda-runtime
+
+.rawInput 1
+__global__ void gKernel1(int * input, int * output){
+  extern __shared__ int s[];
+  int i = threadIdx.x;
+  s[i] = input[i];
+  output[i] = s[i];
+}
+.rawInput 0
+
+int hostInput[4] = {1,2,3,4};
+int hostOutput[4] = {0,0,0,0};
+int * deviceInput;
+int * deviceOutput;
+cudaMalloc( (void **) &deviceInput, sizeof(int)*4)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMalloc( (void **) &deviceOutput, sizeof(int)*4)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+cudaMemcpy(deviceInput, &hostInput, sizeof(int)*4, cudaMemcpyHostToDevice)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+gKernel1<<<1,4, 4 * sizeof(int)>>>(deviceInput, deviceOutput);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpy(&hostOutput, deviceOutput, sizeof(int)*4, cudaMemcpyDeviceToHost)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+ 
+// FIXME: output of the whole static array isn't working at the moment
+hostOutput[0]
+// CHECK: (int) 1
+hostOutput[1]
+// CHECK: (int) 2
+hostOutput[2]
+// CHECK: (int) 3
+hostOutput[3]
+// CHECK: (int) 4
+
+
+// expected-no-diagnostics
+.q
--- a/test/CUDADeviceCode/CUDASimpleKernel.C
+++ b/test/CUDADeviceCode/CUDASimpleKernel.C
@ -0,0 +1,40 @@
+//------------------------------------------------------------------------------
+// CLING - the C++ LLVM-based InterpreterG :)
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+// The Test checks if a CUDA compatible device is available and checks, if simple
+// __global__ and __device__ kernels are running.
+// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
+// REQUIRES: cuda-runtime
+
+// Check if a CUDA compatible device (GPU) is available.
+int device_count = 0;
+cudaGetDeviceCount(&device_count)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+device_count > 0
+// CHECK: (bool) true
+
+// Check, if the smallest __global__ kernel is callable.
+.rawInput 1
+__global__ void gKernel1(){}
+.rawInput 0
+gKernel1<<<1,1>>>();
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+// Check, if a simple __device__ kernel is useable.
+.rawInput 1
+__device__ int dKernel1(){return 42;}
+__global__ void gKernel2(){int i = dKernel1();}
+.rawInput 0
+gKernel2<<<1,1>>>();
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+
+// expected-no-diagnostics
+.q
--- a/test/CUDADeviceCode/CUDAStreams.C
+++ b/test/CUDADeviceCode/CUDAStreams.C
@ -0,0 +1,58 @@
+//------------------------------------------------------------------------------
+// CLING - the C++ LLVM-based InterpreterG :)
+//
+// This file is dual-licensed: you can choose to license it under the University
+// of Illinois Open Source License or the GNU Lesser General Public License. See
+// LICENSE.TXT for details.
+//------------------------------------------------------------------------------
+
+// The Test checks if cuda streams works.
+// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
+// REQUIRES: cuda-runtime
+
+.rawInput 1
+__global__ void gKernel1(int * a, int b){
+  int i = threadIdx.x;
+  a[i] += b;
+}
+.rawInput 0
+
+cudaStream_t stream1, stream2;
+cudaStreamCreate(&stream1)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaStreamCreate(&stream2)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+int host1[] = {1,2,3,4};
+int host2[] = {11,12,13,14};
+int * device1;
+int * device2;
+cudaMalloc( (void **) &device1, sizeof(int)*4)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMalloc( (void **) &device2, sizeof(int)*4)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+cudaMemcpyAsync(device1, &host1, sizeof(int)*4, cudaMemcpyHostToDevice, stream1)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpyAsync(device2, &host2, sizeof(int)*4, cudaMemcpyHostToDevice, stream2)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+gKernel1<<<1,4,0,stream2>>>(device2, 2);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+gKernel1<<<1,4,0,stream1>>>(device1, 1);
+cudaGetLastError()
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+
+cudaMemcpyAsync(&host2, device2, sizeof(int)*4, cudaMemcpyDeviceToHost, stream2)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+cudaMemcpyAsync(&host1, device1, sizeof(int)*4, cudaMemcpyDeviceToHost, stream1)
+// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
+host1[0] + host1[1] + host1[2] + host1[3]
+// CHECK: (int) 14
+host2[0] + host2[1] + host2[2] + host2[3]
+// CHECK: (int) 58
+
+
+// expected-no-diagnostics
+.q