I have a simple kernel, which basically calculates the maximum from a list of doubles and stores its index. All work is done locally, with a work group size of 64.
In short, I create a list of indexes, populate it with consecutive numbers (based on local_id), and then move around based on fitness values.
Currently my main problem is the fact, that reading this list of indexes returns really crazy values. I have one printf statement and while it should print something like this
>> my_index 1 ; other_index 33 ; local_id 1 <<
>> my_index 2; other_index 34; local_id 2 <<
>> .......
The output I get is
>> my_index 1600085855 ; other_index 32 ; local_id 0 <<
>> my_index 1072652127 ; other_index 33 ; local_id 1 <<
>> my_index 942797699 ; other_index 34 ; local_id 2 <<
>> my_index 1072923423 ; other_index 35 ; local_id 3 <<
>> my_index -987348804 ; other_index 36 ; local_id 4 <<
>> my_index 1072849931 ; other_index 37 ; local_id 5 <<
>> my_index -833351863 ; other_index 38 ; local_id 6 <<
>> my_index 1073209710 ; other_index 39 ; local_id 7 <<
>> my_index -833351863 ; other_index 40 ; local_id 8 <<
>> my_index 1073209710 ; other_index 41 ; local_id 9 <<
>> my_index 1206451488 ; other_index 42 ; local_id 10 <<
>> my_index 1072822847 ; other_index 43 ; local_id 11 <<
>> my_index -1561806289 ; other_index 44 ; local_id 12 <<
>> my_index 1072836235 ; other_index 45 ; local_id 13 <<
>> my_index 1797893287 ; other_index 46 ; local_id 14 <<
>> my_index 1072863946 ; other_index 47 ; local_id 15 <<
>> my_index 1499829849 ; other_index 48 ; local_id 16 <<
>> my_index 1073309078 ; other_index 49 ; local_id 17 <<
>> my_index 1215556782 ; other_index 50 ; local_id 18 <<
>> my_index 1073623117 ; other_index 51 ; local_id 19 <<
>> my_index -1741202958 ; other_index 52 ; local_id 20 <<
>> my_index 1073061666 ; other_index 53 ; local_id 21 <<
>> my_index 1908874354 ; other_index 54 ; local_id 22 <<
>> my_index 1072809756 ; other_index 55 ; local_id 23 <<
>> my_index 1499829849 ; other_index 56 ; local_id 24 <<
>> my_index 1073309078 ; other_index 57 ; local_id 25 <<
>> my_index 1965493508 ; other_index 58 ; local_id 26 <<
>> my_index 1073421919 ; other_index 59 ; local_id 27 <<
>> my_index -1908874354 ; other_index 60 ; local_id 28 <<
>> my_index 1073101027 ; other_index 61 ; local_id 29 <<
>> my_index -1561806289 ; other_index 62 ; local_id 30 <<
>> my_index 31 ; other_index 63 ; local_id 31 <<
>> my_index 1600085855 ; other_index 1499829849 ; local_id 0 <<
>> my_index 1072652127 ; other_index 1073309078 ; local_id 1 <<
>> my_index 942797699 ; other_index 1215556782 ; local_id 2 <<
>> my_index 1072923423 ; other_index 1073623117 ; local_id 3 <<
>> my_index -987348804 ; other_index -1741202958 ; local_id 4 <<
>> my_index 1072849931 ; other_index 1073061666 ; local_id 5 <<
>> my_index -833351863 ; other_index 1908874354 ; local_id 6 <<
>> my_index 1073209710 ; other_index 1072809756 ; local_id 7 <<
>> my_index -833351863 ; other_index 1499829849 ; local_id 8 <<
>> my_index 1073209710 ; other_index 1073309078 ; local_id 9 <<
>> my_index 1206451488 ; other_index 1965493508 ; local_id 10 <<
>> my_index 1072822847 ; other_index 1073421919 ; local_id 11 <<
>> my_index -1561806289 ; other_index -1908874354 ; local_id 12 <<
>> my_index 1072836235 ; other_index 1073101027 ; local_id 13 <<
>> my_index 1797893287 ; other_index -1561806289 ; local_id 14 <<
>> my_index 1072863946 ; other_index 31 ; local_id 15 <<
>> my_index 1600085855 ; other_index -833351863 ; local_id 0 <<
>> my_index 1072652127 ; other_index 1073309078 ; local_id 1 <<
>> my_index 942797699 ; other_index 1965493508 ; local_id 2 <<
>> my_index 1073623117 ; other_index 1072822847 ; local_id 3 <<
>> my_index -1741202958 ; other_index -1908874354 ; local_id 4 <<
>> my_index 1072849931 ; other_index 1073101027 ; local_id 5 <<
>> my_index -833351863 ; other_index 1797893287 ; local_id 6 <<
>> my_index 1073209710 ; other_index 1072863946 ; local_id 7 <<
>> my_index -833351863 ; other_index -1908874354 ; local_id 0 <<
>> my_index 1073309078 ; other_index 1072849931 ; local_id 1 <<
>> my_index 942797699 ; other_index -833351863 ; local_id 2 <<
>> my_index 1073623117 ; other_index 1072863946 ; local_id 3 <<
>> my_index -833351863 ; other_index -833351863 ; local_id 0 <<
>> my_index 1073309078 ; other_index 1072863946 ; local_id 1 <<
>> my_index -833351863 ; other_index 1072863946 ; local_id 0 <<
How could it be?
Code:
__kernel void reduce( __global char* inputAgent,
__global double* output,
__global double* bestFitness,
__local double* localFitness,
__local int* indexes,
const unsigned int size)
{
int local_id = get_local_id(0);
// populate local memory
if (local_id <= size) {
localFitness[local_id] = bestFitness[local_id];
} else {
localFitness[local_id] = 0;
}
//populate table with consecutive numbers
indexes[local_id] = local_id;
barrier(CLK_LOCAL_MEM_FENCE);
for(int offset = get_local_size(0) / 2;
offset > 0;
offset >>= 1) {
if (local_id < offset) {
// find greater fitness
double mine = localFitness[local_id];
double other = localFitness[local_id + offset];
localFitness[local_id] = (mine > other) ? mine : other;
// store index of this greater fitness
int my_index = indexes[local_id];
int other_index = indexes[local_id + offset];
indexes[local_id] = (mine > other) ? my_index : other_index;
printf(">> my_index %d ; other_index %d ; local_id %d <<",
my_index, other_index, local_id);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (local_id == 0) { //last, not mutated
// return
output[0] = localFitness[0];
// reuse found index
int bit_to_change = indexes[0];
if (bit_to_change < size) {
inputAgent[bit_to_change] = (inputAgent[bit_to_change] - 1) * -1;
}
}
}
Any suggestions welcome.
EDIT:
I was able to pinpoint problem to barrier. After adding to more printf's, one just before first barrier, and second just after:
[...]
indexes[localID] = localID;
printf("<<< Fit %f for %d\n", localFitness[localID], indexes[localID] );
barrier(CLK_LOCAL_MEM_FENCE);
printf("*** Fit %f for %d\n", localFitness[localID], indexes[localID] );
[...]
output I get :
<<< Fit 0.990099 for 0
<<< Fit 1.449275 for 1
<<< Fit 1.538462 for 2
<<< Fit 1.030928 for 3
[...]
******* Fit 0.990099 for -1636178018
******* Fit 1.449275 for 1072593383
******* Fit 1.538462 for -1184818564
******* Fit 1.030928 for 1072042407
******* Fit 2.222222 for -1688619621
******* Fit 2.222222 for 1072388533
[...]
which suggest that barrier doesn't work; and I don't know what to do with that. Any ideas?
printftheindexes[lcoal_id]directly before and after the first barrier? Maybe someone else has an idea, but in any case, some information about your OpenCL version (and how you launch the kernel - i.e. the parameters) could be helpful. - Marco13mineorotherinsideforloop (in same run; after first print) in some cases gives me0.0. What's even more odd, this "sometimes" could be recreated (same index, with same data); but usually on third or even later run of function calling this kernel. Maybe this will help with at least suggesting something. - mpmclEnqueueNDRangeKernel(...))? - jprice