2018-04-24 17:45:35 +03:00
//------------------------------------------------------------------------------
// CLING - the C++ LLVM-based InterpreterG :)
2019-10-04 17:30:01 +03:00
// author: Simeon Ehrig <s.ehrig@hzdr.de>
2018-04-24 17:45:35 +03:00
//
// This file is dual-licensed: you can choose to license it under the University
// of Illinois Open Source License or the GNU Lesser General Public License. See
// LICENSE.TXT for details.
//------------------------------------------------------------------------------
2018-05-17 17:28:12 +03:00
// The Test checks if a CUDA kernel works with a arguments and built-in
2018-04-24 17:45:35 +03:00
// functions.
2021-02-12 13:13:04 +03:00
// RUN: cat %s | %cling -x cuda --cuda-path=%cudapath %cudasmlevel -Xclang -verify 2>&1 | FileCheck %s
2018-04-24 17:45:35 +03:00
// REQUIRES: cuda-runtime
// Test, if a simple kernel with arguments works.
. rawInput 1
__global__ void gKernel1 ( int * out ) {
* out = 42 ;
}
. rawInput 0
int * deviceOutput ;
int hostOutput = 0 ;
cudaMalloc ( ( void * * ) & deviceOutput , sizeof ( int ) )
2021-02-11 19:20:30 +03:00
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
2018-04-24 17:45:35 +03:00
gKernel1 < < < 1 , 1 > > > ( deviceOutput ) ;
cudaGetLastError ( )
2021-02-11 19:20:30 +03:00
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaDeviceSynchronize ( )
2021-02-11 19:20:30 +03:00
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
2018-04-24 17:45:35 +03:00
cudaMemcpy ( & hostOutput , deviceOutput , sizeof ( int ) , cudaMemcpyDeviceToHost )
2021-02-11 19:20:30 +03:00
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
2018-04-24 17:45:35 +03:00
hostOutput
// CHECK: (int) 42
// Test, if a parallel kernel with built-in functions.
2018-05-02 17:29:43 +03:00
const unsigned int numberOfThreads = 4 ;
2018-04-24 17:45:35 +03:00
. rawInput 1
__device__ int mul7 ( int in ) {
return 7 * in ;
}
__global__ void gKernel2 ( int * out ) {
int i = threadIdx . x ;
out [ i ] = mul7 ( i ) ;
}
. rawInput 0
int * deviceOutput2 ;
2018-05-02 17:29:43 +03:00
int hostOutput2 [ numberOfThreads ] ;
for ( unsigned int i = 0 ; i < numberOfThreads ; + + i ) {
hostOutput2 [ i ] = 0 ;
}
cudaMalloc ( ( void * * ) & deviceOutput2 , sizeof ( int ) * numberOfThreads )
2021-02-11 19:20:30 +03:00
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
gKernel2 < < < 1 , numberOfThreads > > > ( deviceOutput2 ) ;
2018-04-24 17:45:35 +03:00
cudaGetLastError ( )
2021-02-11 19:20:30 +03:00
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaDeviceSynchronize ( )
2021-02-11 19:20:30 +03:00
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaMemcpy ( hostOutput2 , deviceOutput2 , sizeof ( int ) * numberOfThreads , cudaMemcpyDeviceToHost )
2021-02-11 19:20:30 +03:00
// CHECK: (cudaError_t) (cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
unsigned int expectedSum = 0 ;
unsigned int cudaSum = 0 ;
for ( unsigned int i = 0 ; i < numberOfThreads ; + + i ) {
expectedSum + = 7 * i ;
cudaSum + = hostOutput2 [ i ] ;
}
2018-05-17 17:28:12 +03:00
expectedSum = = cudaSum // expected-note {{use '=' to turn this equality comparison into an assignment}}
2018-05-02 17:29:43 +03:00
// CHECK: (bool) true
2018-04-24 17:45:35 +03:00
. q