diff options
author | Volker Hoffmann <volker@cheleb.net> | 2014-07-04 14:06:30 +0200 |
---|---|---|
committer | Volker Hoffmann <volker@cheleb.net> | 2014-07-04 14:06:30 +0200 |
commit | 086f53870ade34c70f30c975650f2b77031e4864 (patch) | |
tree | 1d492149691a40bdaf618bffc91b851c6e46580a | |
parent | fe66d798f59da25f826c547313b779cee0c9f443 (diff) |
use template
-rw-r--r-- | functionTest.cu | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/functionTest.cu b/functionTest.cu index d0f066d..8848d48 100644 --- a/functionTest.cu +++ b/functionTest.cu @@ -2,7 +2,8 @@ #include <stdio.h> #include <stdlib.h> -__device__ void f2(int idx, float x_s[4]) +template <int nthreads> +__device__ void f2(int idx, float x_s[nthreads]) { if(idx>0) { printf("%i %.2e %.2e\n", idx, x_s[idx], x_s[idx-1]); @@ -11,14 +12,15 @@ __device__ void f2(int idx, float x_s[4]) } } +template <int nthreads> __global__ void f1(float *x_d) { int idx = blockIdx.x * blockDim.x + threadIdx.x; - __shared__ float x_s[4]; + __shared__ float x_s[nthreads]; x_s[idx] = x_d[idx]; __syncthreads(); //printf("%i %.2e %.2e\n", idx, x_d[idx], x_s[idx]); - f2(idx, x_s); + f2 <nthreads> (idx, x_s); } int main() @@ -38,7 +40,7 @@ int main() cudaMemcpy(x_d, x_h, sizeof(float)*nx, cudaMemcpyHostToDevice); // run kernel - f1 <<< 1, 4 >>> (x_d); + f1 <4> <<< 1, 4 >>> (x_d); // free cudaFree(x_d); |