I'm testing an opencl kernel with four work items and a single work group. The kernel is:
__kernel void pgs(__global float l2_norm)
{
int gid_x=get_global_id(0);
int gid_y=get_global_id(1);
if (gid_x==0 && gid_y==0) printf("[INFO] local_size_x:%02d, local_size_y:%02d, global_size_x:%02d, global_size_y:%02d, group_size_x:%02d, group_size_y:%02d\n", get_local_size(0), get_local_size(1), get_global_size(0), get_global_size(1), get_group_size(0), get_group_size(1));
barrier(CLK_GLOBAL_MEM_FENCE);
printf("%d,%d before: %2.6f\n",gid_x,gid_y,l2_norm);
barrier(CLK_GLOBAL_MEM_FENCE);
l2_norm+=1;
barrier(CLK_GLOBAL_MEM_FENCE);
printf("%d,%d after: %2.6f\n",gid_x,gid_y,l2_norm);
printf("testing %d,%d\n",gid_x,gid_y);
}
The output is:
1,1 before: 0.000000
0,1 before: 0.000000
1,0 before: 0.000000
[INFO] local_size_x:01, local_size_y:01, global_size_x:02, global_size_y:02, group_size_x:01, group_size_y:01
1,1 after: 1.000000
0,1 after: 2.000000
1,0 after: 3.000000
testing 1,1
0,0 before: 3.000000
testing 0,1
testing 1,0
0,0 after: 4.000000
testing 0,0
My question is: Why is the line starting with [INFO]
not printed first? Shouldn't the global barrier stop all work items until work item 0 has printed the [INFO]
line?