From 086f53870ade34c70f30c975650f2b77031e4864 Mon Sep 17 00:00:00 2001 From: Volker Hoffmann Date: Fri, 4 Jul 2014 14:06:30 +0200 Subject: use template --- functionTest.cu | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/functionTest.cu b/functionTest.cu index d0f066d..8848d48 100644 --- a/functionTest.cu +++ b/functionTest.cu @@ -2,7 +2,8 @@ #include #include -__device__ void f2(int idx, float x_s[4]) +template +__device__ void f2(int idx, float x_s[nthreads]) { if(idx>0) { printf("%i %.2e %.2e\n", idx, x_s[idx], x_s[idx-1]); @@ -11,14 +12,15 @@ __device__ void f2(int idx, float x_s[4]) } } +template __global__ void f1(float *x_d) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - __shared__ float x_s[4]; + __shared__ float x_s[nthreads]; x_s[idx] = x_d[idx]; __syncthreads(); //printf("%i %.2e %.2e\n", idx, x_d[idx], x_s[idx]); - f2(idx, x_s); + f2 (idx, x_s); } int main() @@ -38,7 +40,7 @@ int main() cudaMemcpy(x_d, x_h, sizeof(float)*nx, cudaMemcpyHostToDevice); // run kernel - f1 <<< 1, 4 >>> (x_d); + f1 <4> <<< 1, 4 >>> (x_d); // free cudaFree(x_d); -- cgit v1.1