2018-04-24 17:45:35 +03:00
//------------------------------------------------------------------------------
// CLING - the C++ LLVM-based InterpreterG :)
//
// This file is dual-licensed: you can choose to license it under the University
// of Illinois Open Source License or the GNU Lesser General Public License. See
// LICENSE.TXT for details.
//------------------------------------------------------------------------------
// The Test checks if cuda streams works.
// RUN: cat %s | %cling -x cuda -Xclang -verify 2>&1 | FileCheck %s
// REQUIRES: cuda-runtime
2018-05-02 17:29:43 +03:00
const unsigned int numberOfThreads = 4 ;
2018-04-24 17:45:35 +03:00
. rawInput 1
__global__ void gKernel1 ( int * a , int b ) {
int i = threadIdx . x ;
a [ i ] + = b ;
}
. rawInput 0
cudaStream_t stream1 , stream2 ;
cudaStreamCreate ( & stream1 )
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
cudaStreamCreate ( & stream2 )
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
int hostInput1 [ numberOfThreads ] ;
int hostInput2 [ numberOfThreads ] ;
int hostOutput1 [ numberOfThreads ] ;
int hostOutput2 [ numberOfThreads ] ;
for ( unsigned int i = 0 ; i < numberOfThreads ; + + i ) {
hostInput1 [ i ] = i ;
hostInput2 [ i ] = i + 10 ;
}
2018-04-24 17:45:35 +03:00
int * device1 ;
int * device2 ;
2018-05-02 17:29:43 +03:00
cudaMalloc ( ( void * * ) & device1 , sizeof ( int ) * numberOfThreads )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaMalloc ( ( void * * ) & device2 , sizeof ( int ) * numberOfThreads )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaMemcpyAsync ( device1 , hostInput1 , sizeof ( int ) * numberOfThreads , cudaMemcpyHostToDevice , stream1 )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaMemcpyAsync ( device2 , hostInput2 , sizeof ( int ) * numberOfThreads , cudaMemcpyHostToDevice , stream2 )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
gKernel1 < < < 1 , numberOfThreads , 0 , stream2 > > > ( device2 , 2 ) ;
2018-04-24 17:45:35 +03:00
cudaGetLastError ( )
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaDeviceSynchronize ( )
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
gKernel1 < < < 1 , numberOfThreads , 0 , stream1 > > > ( device1 , 1 ) ;
2018-04-24 17:45:35 +03:00
cudaGetLastError ( )
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaDeviceSynchronize ( )
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-04-24 17:45:35 +03:00
2018-05-02 17:29:43 +03:00
cudaMemcpyAsync ( hostOutput2 , device2 , sizeof ( int ) * numberOfThreads , cudaMemcpyDeviceToHost , stream2 )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaMemcpyAsync ( hostOutput1 , device1 , sizeof ( int ) * numberOfThreads , cudaMemcpyDeviceToHost , stream1 )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
unsigned int expectedSum1 = 0 ;
unsigned int cudaSum1 = 0 ;
unsigned int expectedSum2 = 0 ;
unsigned int cudaSum2 = 0 ;
for ( unsigned int i = 0 ; i < numberOfThreads ; + + i ) {
expectedSum1 + = i + 1 ;
cudaSum1 + = hostOutput1 [ i ] ;
expectedSum2 + = i + 12 ;
cudaSum2 + = hostOutput2 [ i ] ;
}
// small workaround, to avoid compiler hint '='
bool result1 = expectedSum1 = = cudaSum1
// CHECK: (bool) true
bool result2 = expectedSum2 = = cudaSum2
// CHECK: (bool) true
2018-04-24 17:45:35 +03:00
// expected-no-diagnostics
. q