I am trying to implement the dynamic binding of functions with CUDA under the convenient unified memory model. Here, we have a struct Parameters containing a member, a function pointer void (*p_func)().
#include <cstdio>
struct Parameters {
void (*p_func)();
};
The struct is managed by the unified memory and we assign the actual function func_A to p_func.
__host__ __device__
void func_A() {
printf("func_A is correctly invoked!\n");
return;
}
When we go through the following code, the problem arises: if assignment 1 runs, i.e., para->p_func = func_A, both device and host function addresses are actually assigned by the function address at the host. In the contrast, if assignment 2 runs, the addresses both become the device one.
__global__ void assign_func_pointer(Parameters* para) {
para->p_func = func_A;
}
__global__ void run_on_device(Parameters* para) {
printf("run on device with address %p\n", para->p_func);
para->p_func();
}
void run_on_host(Parameters* para) {
printf("run on host with address %p\n", para->p_func);
para->p_func();
}
int main(int argc, char* argv[]) {
Parameters* para;
cudaMallocManaged(¶, sizeof(Parameters));
// assignment 1, if we uncomment this section, p_func points to address at host
para->p_func = func_A;
printf("addr@host: %p\n", para->p_func);
// assignment 2, if we uncomment this section, p_func points to address at device
assign_func_pointer<<<1,1>>>(para); //
cudaDeviceSynchronize();
printf("addr@device: %p\n", para->p_func);
run_on_device<<<1,1>>>(para);
cudaDeviceSynchronize();
run_on_host(para);
cudaFree(para);
return 0;
}
The question now is, is it possible for the function pointers at both the device and host point to the correct function addresses, respectively, under the unified memory model?