cling/test/CUDADeviceCode/CUDAHostPrefix.C
Simeon Ehrig 2011246c17 Overwork CUDA device tests.
- add cudaDeviceSynchronize() at every kernel launch
- remove small address bug at cudaMemcpy, if host array is used
- in parallel test cases, replace fixes thread number with variable
- overworked shared memory kernel
2018-06-25 08:29:07 +02:00

46 lines
1.3 KiB
C

//------------------------------------------------------------------------------
// CLING - the C++ LLVM-based InterpreterG :)
//
// This file is dual-licensed: you can choose to license it under the University
// of Illinois Open Source License or the GNU Lesser General Public License. See
// LICENSE.TXT for details.
//------------------------------------------------------------------------------
// The Test checks if a function with __host__ and __device__ prefix available
// on host and device side.
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
// REQUIRES: cuda-runtime
.rawInput 1
__host__ __device__ int sum(int a, int b){
return a + b;
}
__global__ void gKernel1(int * output){
*output = sum(40,2);
}
.rawInput 0
sum(41,1)
// CHECK: (int) 42
int hostOutput = 0;
int * deviceOutput;
cudaMalloc( (void **) &deviceOutput, sizeof(int))
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
gKernel1<<<1,1>>>(deviceOutput);
cudaGetLastError()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
cudaDeviceSynchronize()
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
cudaMemcpy(&hostOutput, deviceOutput, sizeof(int), cudaMemcpyDeviceToHost)
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
hostOutput
// CHECK: (int) 42
// expected-no-diagnostics
.q