in the CUDA_C_Programming_Guide,Chapter2,Thread Hierarchy
__global__ void MatAdd(float A[N][N],float B[N][N],float C[N][N])
{
int i=blockId.x*blockDim.x+threadIdx.x;
int j=blockId.y*blockDim.y+threadIdx.y;
if(i<N&&j<N)
C[i][j]=A[i][j]+B[i][j];
}
int main()
{
....
dim3 threadPerBlock(16,16);
dim3 numBlock(N/threadPerBlcok.x,N/threadPerBlock.y);
MatAdd<<<numBlocks,threadPerBlock>>>(A,B,C);
....
}
I'm a fresh man to this,can't make sense of "int i=blockIdx.x*blockDim.x+threadIdx.x".Why can be this? Is there anyone can explain it to me? Thanks a lot. For example,how to confirm the Thread(1,1) in Block(1,1) using "i" and "j"?