diff options
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | functionTest.cu | 47 |
2 files changed, 55 insertions, 2 deletions
@@ -1,7 +1,7 @@ SM=35 ARCH= -arch sm_${SM} -all: advection diff2d diff2d_old incrementArrays moveArrays +all: advection diff2d diff2d_old functionTest incrementArrays hydro1d moveArrays advection: nvcc $(ARCH) -o advection advection.cu @@ -12,12 +12,18 @@ diff2d: diff2d_old: nvcc $(ARCH) -o diff2d_old diff2d_old.cu +functionTest: + nvcc $(ARCH) -o functionTest functionTest.cu + incrementArrays: nvcc $(ARCH) -o incrementArrays incrementArrays.cu +hydro1d: + nvcc $(ARCH) -o hydro1d hydro1d.cu + moveArrays: nvcc $(ARCH) -o moveArrays moveArrays.cu clean: - rm -rf advection diff2d diff2d_old incrementArrays moveArrays + rm -rf advection diff2d diff2d_old functionTest incrementArrays hydro1d moveArrays diff --git a/functionTest.cu b/functionTest.cu new file mode 100644 index 0000000..d0f066d --- /dev/null +++ b/functionTest.cu @@ -0,0 +1,47 @@ +#include <cuda.h> +#include <stdio.h> +#include <stdlib.h> + +__device__ void f2(int idx, float x_s[4]) +{ + if(idx>0) { + printf("%i %.2e %.2e\n", idx, x_s[idx], x_s[idx-1]); + } else { + printf("%i %.2e\n", idx, x_s[idx]); + } +} + +__global__ void f1(float *x_d) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + __shared__ float x_s[4]; + x_s[idx] = x_d[idx]; + __syncthreads(); + //printf("%i %.2e %.2e\n", idx, x_d[idx], x_s[idx]); + f2(idx, x_s); +} + +int main() +{ + float *x_h, *x_d; + int nx = 4; + + // allocate, copy + cudaSetDevice(0); + x_h = (float *) malloc(sizeof(float)*nx); + for(int ii=0; ii<nx; ii++) { + x_h[ii] = ii; + printf("%i %.2e\n", ii, x_h[ii]); + } + printf("\n"); + cudaMalloc((void**) &x_d, sizeof(float)*nx); + cudaMemcpy(x_d, x_h, sizeof(float)*nx, cudaMemcpyHostToDevice); + + // run kernel + f1 <<< 1, 4 >>> (x_d); + + // free + cudaFree(x_d); + free(x_h); +} + |