I have started to use OpenCL for the first time and I'm trying to do this example for sobel edge detection in this site http://www.karlosp.net/blog/2012/05/03/opencl-opencv-sobel-edge-detector/ but when run the kernal for gpu number fps is less than 15 and the gpu utilization is less than 5% how can run all thread for gpu like openmp to make utilization at less 95%
the code
kernal code
_kernel void sobel(_global float *A, __global float *R, uint width, uint height) {
int globalIdx = get_global_id(0);
int globalIdy = get_global_id(1);
int index = width * globalIdy + globalIdx;
float a,b,c,d,e,f,g,h,i;
float sobelX = 0;
float sobelY = 0;
if(index > width && index < (height*width)-width && (index % width-1) > 0 && (index % width-1) < width-1){
a = A[index-1-width] * -1.0f;
b = A[index-0-width] * 0.0f;
c = A[index+1-width] * +1.0f;
d = A[index-1] * -2.0f;
e = A[index-0] * 0.0f;
f = A[index+1] * +2.0f;
g = A[index-1+width] * -1.0f;
h = A[index-0+width] * 0.0f;
i = A[index+1+width] * +1.0f;
sobelX = a+b+c+d+e+f+g+h+i;
a = A[index-1-width] * -1.0f;
b = A[index-0-width] * -2.0f;
c = A[index+1-width] * -1.0f;
d = A[index-1] * 0.0f;
e = A[index-0] * 0.0f;
f = A[index+1] * 0.0f;
g = A[index-1+width] * +1.0f;
h = A[index-0+width] * +2.0f;
i = A[index+1+width] * +1.0f;
sobelY = a+b+c+d+e+f+g+h+i;
}
R[index] = sqrt(pow(sobelX,2) + pow(sobelY,2));
}