2018-04-24 17:45:35 +03:00
//------------------------------------------------------------------------------
// CLING - the C++ LLVM-based InterpreterG :)
2019-10-04 17:30:01 +03:00
// author: Simeon Ehrig <s.ehrig@hzdr.de>
2018-04-24 17:45:35 +03:00
//
// This file is dual-licensed: you can choose to license it under the University
// of Illinois Open Source License or the GNU Lesser General Public License. See
// LICENSE.TXT for details.
//------------------------------------------------------------------------------
// The Test checks if runtime shared memory works.
2020-10-20 17:04:46 +03:00
// RUN: cat %s | %cling -x cuda --cuda-path=%cudapath -Xclang -verify 2>&1 | FileCheck %s
2018-04-24 17:45:35 +03:00
// REQUIRES: cuda-runtime
2018-05-02 17:29:43 +03:00
const unsigned int numberOfThreads = 4 ;
2018-04-24 17:45:35 +03:00
. rawInput 1
__global__ void gKernel1 ( int * input , int * output ) {
extern __shared__ int s [ ] ;
int i = threadIdx . x ;
2018-05-02 17:29:43 +03:00
s [ ( i + 1 ) % blockDim . x ] = input [ i ] ;
__syncthreads ( ) ;
2018-04-24 17:45:35 +03:00
output [ i ] = s [ i ] ;
}
. rawInput 0
2018-05-02 17:29:43 +03:00
int hostInput [ numberOfThreads ] ;
int hostOutput [ numberOfThreads ] ;
for ( unsigned int i = 0 ; i < numberOfThreads ; + + i ) {
hostInput [ i ] = i + 1 ;
hostOutput [ i ] = 0 ;
}
2018-04-24 17:45:35 +03:00
int * deviceInput ;
int * deviceOutput ;
2018-05-02 17:29:43 +03:00
cudaMalloc ( ( void * * ) & deviceInput , sizeof ( int ) * numberOfThreads )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaMalloc ( ( void * * ) & deviceOutput , sizeof ( int ) * numberOfThreads )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaMemcpy ( deviceInput , hostInput , sizeof ( int ) * numberOfThreads , cudaMemcpyHostToDevice )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
gKernel1 < < < 1 , numberOfThreads , sizeof ( int ) * numberOfThreads > > > ( deviceInput , deviceOutput ) ;
2018-04-24 17:45:35 +03:00
cudaGetLastError ( )
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaDeviceSynchronize ( )
2018-04-24 17:45:35 +03:00
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
2018-05-02 17:29:43 +03:00
cudaMemcpy ( hostOutput , deviceOutput , sizeof ( int ) * numberOfThreads , cudaMemcpyDeviceToHost )
// CHECK: (cudaError_t) (cudaError::cudaSuccess) : (unsigned int) 0
int expectedSum = ( numberOfThreads * ( numberOfThreads + 1 ) ) / 2 ;
int cudaSum = 0 ;
2018-05-17 17:28:12 +03:00
2018-05-02 17:29:43 +03:00
for ( unsigned int i = 0 ; i < numberOfThreads ; + + i ) {
cudaSum + = hostOutput [ i ] ;
}
//check, if elements was shifted
2018-05-17 17:28:12 +03:00
hostOutput [ 0 ] = = numberOfThreads // expected-note {{use '=' to turn this equality comparison into an assignment}}
2018-05-02 17:29:43 +03:00
// CHECK: (bool) true
2018-05-17 17:28:12 +03:00
hostOutput [ numberOfThreads - 1 ] = = numberOfThreads - 1 // expected-note {{use '=' to turn this equality comparison into an assignment}}
2018-05-02 17:29:43 +03:00
// CHECK: (bool) true
2018-05-17 17:28:12 +03:00
expectedSum = = cudaSum // expected-note {{use '=' to turn this equality comparison into an assignment}}
2018-05-02 17:29:43 +03:00
// CHECK: (bool) true
2018-05-29 17:53:24 +03:00
. q