|
|
@ -37,13 +37,16 @@ __global__ void reduce_kernel( int n, const int *in_buffer, int *out_buffer, con |
|
|
|
{ |
|
|
|
// Allocate shared memory inside the block. |
|
|
|
extern __shared__ int s_mem[]; |
|
|
|
float my_sum=0; |
|
|
|
int my_sum=0; |
|
|
|
// The range of data to work with. |
|
|
|
int2 range = block_ranges[blockIdx.x]; |
|
|
|
|
|
|
|
// Compute the sum of my elements. |
|
|
|
|
|
|
|
// TODO: fill-in that section of the code |
|
|
|
for(int i = range.x + threadIdx.x; i < range.y; i+=blockDim.x) |
|
|
|
{ |
|
|
|
my_sum += in_buffer[i]; |
|
|
|
} |
|
|
|
|
|
|
|
// Copy my sum in shared memory. |
|
|
|
s_mem[threadIdx.x] = my_sum; |
|
|
@ -54,6 +57,16 @@ float my_sum=0; |
|
|
|
// Compute the sum inside the block. |
|
|
|
|
|
|
|
// TODO: fill-in that section of the code |
|
|
|
|
|
|
|
for(int e = 1; 1<<e <= blockDim.x; e++) |
|
|
|
{ |
|
|
|
int a = threadIdx.x+(1<<(e-1)); |
|
|
|
if(threadIdx.x%(1<<e) == 0 && a < blockDim.x) |
|
|
|
{ |
|
|
|
s_mem[threadIdx.x] += s_mem[a]; |
|
|
|
} |
|
|
|
__syncthreads(); |
|
|
|
} |
|
|
|
|
|
|
|
// The first thread of the block stores its result. |
|
|
|
if( threadIdx.x == 0 ) |
|
|
@ -124,7 +137,7 @@ __global__ void reduce_kernel_optimized( int n, const int *in_buffer, int *out_b |
|
|
|
{ |
|
|
|
// The number of warps in the block. |
|
|
|
const int NUM_WARPS = BLOCK_DIM / WARP_SIZE; |
|
|
|
float my_sum=0; |
|
|
|
int my_sum=0; |
|
|
|
// Allocate shared memory inside the block. |
|
|
|
__shared__ volatile int s_mem[BLOCK_DIM]; |
|
|
|
|
|
|
@ -138,12 +151,16 @@ float my_sum=0; |
|
|
|
// Compute the sum of my elements. |
|
|
|
|
|
|
|
// TODO: fill-in that section of the code |
|
|
|
for(int i = range.x + threadIdx.x; i < range.y; i+=blockDim.x) |
|
|
|
{ |
|
|
|
my_sum += in_buffer[i]; |
|
|
|
} |
|
|
|
|
|
|
|
// Copy my sum in shared memory. |
|
|
|
s_mem[threadIdx.x] = my_sum; |
|
|
|
|
|
|
|
// Compute the sum inside each warp. |
|
|
|
|
|
|
|
|
|
|
|
// TODO: fill-in that section of the code |
|
|
|
|
|
|
|
// Each warp leader stores the result for the warp. |
|
|
|