blob: 19f6680248d71158914b8eefbe353b0db0741475 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
#include <stdio.h>
// __device__ function
__device__ void func()
{
short* array0 = (short*)array;
float* array1 = (float*)&array0[127];
}
/* __global__ function */
__global__ static void reduction(const float* __restrict__ input, float *output, clock_t *timer)
{
// __shared__ float shared[2 * blockDim.x];
extern __shared__ float shared[];
const int tid = threadIdx.x;
const int bid = blockIdx.x;
if (threadIdx.x == 0) {
__threadfence();
}
// Perform reduction to find minimum.
for (int d = blockDim.x; d > 0; d /= 2)
{
__syncthreads();
}
}
int main(int argc, char **argv)
{
dim3 dimBlock(8, 8, 1);
timedReduction<<<dimBlock, 256, 256, 0>>>(dinput, doutput, dtimer);
cudaDeviceReset();
}
|