I apologize if this problem has been addressed before, but I've done some searching and so far I've come up empty handed. I'm trying to compile a cuda version of Hello World, slightly modified from here. My code is:
// This is the REAL "hello world" for CUDA!
// It takes the string "Hello ", prints it, then passes it to CUDA with an array
// of offsets. Then the offsets are added in parallel to produce the string "World!"
// By Ingemar Ragnemalm 2010
#include <stdio.h>
#include <iostream>
const int N = 16;
const int blocksize = 16;
void hello(char *a, int *b)
a[threadIdx.x] += b[threadIdx.x];
int main()
char a[N] = "Hello \0\0\0\0\0\0";
int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
char *ad;
int *bd;
const int csize = N*sizeof(char);
const int isize = N*sizeof(int);
printf("%s", a);
cudaMalloc( (void**)&ad, csize );
cudaMalloc( (void**)&bd, isize );
cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice );
cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice );
dim3 dimBlock( blocksize, 1 );
dim3 dimGrid( 1, 1 );
int runtime_version = -1;
auto error_type_runtime = cudaRuntimeGetVersion(&runtime_version);
int driver_version = -1;
auto error_type_driver = cudaDriverGetVersion(&driver_version);
std::cout << "Blocksize: " << blocksize << std::endl;
std::cout << "NumBlocks: " << (N + blocksize - 1)/blocksize << std::endl;
std::cout << "Runtime API: " << runtime_version << std::endl;
std::cout << "cudaRuntimeGetVersion error type: " << error_type_runtime << std::endl;
std::cout << "Driver API: " << driver_version << std::endl;
std::cout << "cudaRuntimeGetVersion error type: " << error_type_driver << std::endl;
hello<<<(N + blocksize - 1)/blocksize, dimBlock>>>(ad, bd);
cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost );
cudaFree( ad );
cudaFree( bd );
printf("%s\n", a);
But I get:
$ nvcc cuda_hello_world.cu -arch=sm_20 --std=c++11
nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
$ ./a.out
Hello Blocksize: 16
NumBlocks: 1
Runtime API: -1
cudaRuntimeGetVersion error type: 35
Driver API: 0
cudaRuntimeGetVersion error type: 0
I looked up cuda error 35, which is ' indicates that the installed NVIDIA CUDA driver is older than the CUDA runtime library,' but after running
I get NVIDIA-SMI 375.82 Driver Version: 375.82 which is from Jul 24, 2017, and
$nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2016 NVIDIA Corporation
Built on Tue_Jan_10_13:22:03_CST_2017
Cuda compilation tools, release 8.0, V8.0.61
so it looks like the correct libraries/drivers are installed, but nvcc can't find them. If I build with -v I get:
$ nvcc cuda_hello_world.cu -arch=sm_20 --std=c++11 -v
nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
#$ _SPACE_=
#$ _CUDART_=cudart
#$ _HERE_=/usr/local/cuda-8.0/bin
#$ _THERE_=/usr/local/cuda-8.0/bin
#$ _TARGET_DIR_=targets/x86_64-linux
#$ TOP=/usr/local/cuda-8.0/bin/..
#$ NVVMIR_LIBRARY_DIR=/usr/local/cuda-8.0/bin/../nvvm/libdevice
#$ LD_LIBRARY_PATH=/usr/local/cuda-8.0/bin/../lib:
#$ PATH=/usr/local/cuda-8.0/bin/../open64/bin:/usr/local/cuda-8.0/bin/../nvvm/bin:/usr/local/cuda-8.0/bin:/home/michael/bin:/home/michael/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games/usr/local/games:/snap/bin:/usr/local/cuda-8.0/bin/:/usr/local/MATLAB/R2016b/bin/
#$ INCLUDES="-I/usr/local/cuda-8.0/bin/../targets/x86_64-linux/include"
#$ LIBRARIES= "-L/usr/local/cuda-8.0/bin/../targets/x86_64-linux/lib/stubs" "-L/usr/local/cuda-8.0/bin/../targets/x86_64-linux/lib"
Am I making a stupid mistake by not including the correct libraries, or is something totally different going on here?
