0
votes

I have written a code in OpenCL in which I am not using local (shared) memory. My code crashes during execution and gives error -5. The error goes away when I replace global memory access to cvt_img buffer (in the middle of the code) with some constant values.

I do not understand why this happens, becuase I prevent accessing to out-of-the-scope memory locations using an if statement.

This code is part of a 3D pipeline, but right now, I have seperated it from my main application, and have put it in a seperate project in which all of the buffers are initialized randomly.

The size of the grid (in terms of number of threads) is the same as size of the image (img_size.x, img_size.y) and size of the block is (16, 16). The application is running for 15 images.

void compute_cost_volume( global float3 *cvt_img, global float8 *spixl_map,
global float *disp_level, global int *view_subset, global int *subset_num, int array_width, int2 map_size, int2 img_size, float bl_ratio, int sp_size, int num_disp, float2 step, int x, int y, int z, int view_count ) { barrier(CLK_GLOBAL_MEM_FENCE);

int idx = map_size.x * map_size.y * z + map_size.x * y + x;

float8 spixl = spixl_map[idx];
float2 center = spixl.s12;
int2 camIdx  = (int2)(z % array_width, z / array_width);
float cost_est = 1000000.0, disp_est = 0.0;


for (int dl = 0 ; dl < num_disp ; dl++)
{
    float d = disp_level[dl];
    float min_val = 1000000.0;

    for (int n = 0 ; n < subset_num[z] ; n++)
    {
        int view = view_subset[n];
        int2 viewIdx = (int2)(view % array_width, view / array_width);
        float val = 0.0;

        for (int i = -2 ; i <= 2 ; i++) for (int j = -2 ; j <= 2 ; j++)
        {
            //int2 xy_ref = (int2)(center.x - 2*step.x + i*step.x, center.y - 2*step.y + j*step.y);
            int2 xy_ref = (int2)(center.x + i*step.x, center.y + j*step.y);
            int2 xy_proj = (int2)((int)(xy_ref.x - d*(viewIdx.x - camIdx.x)), (int)(xy_ref.y - bl_ratio*d*(viewIdx.y - camIdx.y) ) );                   

            if (xy_ref.x >= 0 && xy_ref.y >= 0 && xy_proj.x >= 0 && xy_proj.y >= 0 && xy_ref.x < img_size.x && xy_ref.y < img_size.y && xy_proj.x < img_size.x  && xy_proj.y < img_size.y)
            {
                float3 color_ref  = cvt_img[img_size.x*img_size.y*z     + img_size.x*xy_ref.y  + xy_ref.x];
                float3 color_proj = cvt_img[img_size.x*img_size.y*view  + img_size.x*xy_proj.y + xy_proj.x];
                val += fabs(color_ref.x - color_proj.x) + fabs(color_ref.y - color_proj.y) + fabs(color_ref.z - color_proj.z);
            }
            else 
                val += 30;
        }
        if (val < min_val)
            min_val = val;
    }
    if (min_val < cost_est)
    {
        cost_est = min_val;
        disp_est = d;
    }
}

spixl_map[idx].s7 = disp_est;

}

kernel void initial_depth_estimation( global float3 *cvt_img, global float8 *spixl_map, global float *disp_level, int array_width, int2 map_size, int2 img_size, float bl_ratio, int sp_size, int disp_num, global int *view_subset, global int *subset_num ) {

int x = get_global_id(0);
int y = get_global_id(1);

if (x >= map_size.x || y >= map_size.y)
    return;

//float2 step = (float2)(1, 1);
for (int z = 0 ; z < 15 ; z++){

    int idx = map_size.x*map_size.y*z + map_size.x*y + x;

    // Set The Bounding Box

    float2 step = (float2)(1.0, 1.0);

    compute_cost_volume(cvt_img, spixl_map, disp_level, view_subset, subset_num, 
                                    array_width, map_size, img_size, bl_ratio, sp_size, disp_num, step, x, y, z, 15);

    barrier(CLK_LOCAL_MEM_FENCE);
}

}

1

1 Answers

0
votes

From the documentation

https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/vectorDataTypes.html

" The vector data type is defined with the type name i.e. char, uchar, short, ushort, int, uint, float, long, and ulong followed by a literal value n that defines the number of elements in the vector. Supported values of n are 2, 4, 8, and 16. "

Therefore, there is no float3, maybe you can try to use float4 and make the last element zero?

Also, assuming that float3 existed, this line of code

float3 color_proj = cvt_img[img_size.x*img_size.y*view  + img_size.x*xy_proj.y + xy_proj.x];

does not do what you want, this will produce ONE value that cannot be assigned to vector, you should have used something like

float3 color_proj = (float3) cvt_img[img_size.x*img_size.y*view  + img_size.x*xy_proj.y + xy_proj.x];

this would copy the one value returned by the cvt_img[...] to 3 vector elements.