I have a GTX TITAN on which I would use the Dynamic Parallelism.
I did some tests by adding "-rdc = true" and the flag "-lcudadevrt" but I always get a linking error when compiling.
This is my Makefile:
DEP = main.o Optimizer.o cudaOptimizer.o
CUDA_INSTALL_PATH = /usr/local/cuda
INC_CUDA = -I$(CUDA_INSTALL_PATH)/include
LIB_CUDA = -L$(CUDA_INSTALL_PATH)/lib64 -lcuda -lcudart -lcublas -lcublas_device -lcudadevrt
optx: $(DEP)
g++ $(LIB_CUDA) -o optx $(DEP)
main.o:
g++ $(INC_CUDA) -c main.cpp
Optimizer.o:
g++ $(INC_CUDA) -c Optimizer.cpp
cudaOptimizer.o:
nvcc --compiler-options '-fPIC -O3' -arch sm_35 -c cudaOptimizer.cu -rdc=true
This is the out:
g++ -I/usr/local/cuda/include -c main.cpp
g++ -I/usr/local/cuda/include -c Optimizer.cpp
nvcc --compiler-options '-fPIC -O3' -arch sm_35 -c cudaOptimizer.cu -rdc=true
g++ -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcublas_device -lcudadevrt -o optx main.o Optimizer.o cudaOptimizer.o
cudaOptimizer.o: In function`__sti____cudaRegisterAll_48_tmpxft_00003942_00000000_6_cudaOptimizer_cpp1_ii__Z4ciaov()':tmpxft_00003942_00000000-3_cudaOptimizer.cudafe1.cpp:(.text.startup+0x1d): undefined reference to `__cudaRegisterLinkedBinary_48_tmpxft_00003942_00000000_6_cudaOptimizer_cpp1_ii__Z4ciaov'
collect2: error: ld returned 1 exit status
make: *** [optx] Error 1
How can I solve this problem?
Thanks