I have a code in Ada that must use CUDA without using the Ada binding. So I made an interface that allows the Ada program to call C code. Now I want to compile it.
How can I tell gprbuild to not use gcc to compile .cu
files by nvcc? If it's not possible, maybe I have to generate the objects using nvcc and then link them with the ada code? How would you do it?
EDIT: Using the link given by Simon Wright, I made this gpr file:
project Cuda_Interface is
for Languages use ("Ada", "Cuda");
for Source_Dirs use ("src");
for Object_Dir use "obj";
for Exec_Dir use ".";
for Main use ("cuda_interface.adb");
for Create_Missing_Dirs use "True";
package Naming is
for Body_Suffix("Cuda") use ".cu";
for Spec_Suffix("Cuda") use ".cuh";
end Naming;
package Compiler is
for Driver("Cuda") use "nvcc";
for Leading_Required_Switches("Cuda") use ("-c");
end Compiler;
package Linker is
for Default_Switches("Ada") use ("-L/usr/local/cuda/lib64", "-lcuda", "-lcudart", "-lm");
end Linker;
end Cuda_Interface;
The compilation works well but the linker returns this error:
/usr/bin/ld : cuda_interface.o : in the function « _ada_cuda_interface » :
cuda_interface.adb:(.text+0x3a5) : undefined reference to « inter_add_two »
collect2: error: ld returned 1 exit status
gprbuild: link of cuda_interface.adb failed
cuda_interface.adb:
with Ada.Text_IO; use Ada.Text_IO;
procedure Cuda_Interface is
type Index is range 1 .. 5;
type Element_Type is new Natural;
type Array_Type is array (Index) of Element_Type;
procedure Inter_Add_Two(Arr : in out Array_Type; Length : Index)
with
Import => True,
Convention => C,
External_Name => "inter_add_two";
A : Array_Type := (1, 2, 3, 4, 5);
begin
for I in Index loop
Put_Line("Value at "
& Index'Image(I)
& " is "
& Element_Type'Image(A(I)));
end loop;
New_Line;
Inter_Add_Two(A, Index'Last);
for I in Index loop
Put_Line("Value at "
& Index'Image(I)
& " is "
& Element_Type'Image(A(I)));
end loop;
end Cuda_Interface;
kernel.cuh
#ifndef __KERNEL_CUH__
#define __KERNEL_CUH__
#include <cuda.h>
__global__ void kernel_add_two(unsigned int *a, unsigned int length);
void inter_add_two(unsigned int *a, unsigned int length);
#endif // __KERNEL_CUH__
kernel.cu
#include "kernel.cuh"
#include <math.h>
#define THREADS_PER_BLOCK (1024)
__global__ void kernel_add_two(unsigned int *a, unsigned int length)
{
unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < length) a[tid] += 2;
}
void inter_add_two(unsigned int *a, unsigned int length)
{
unsigned int block_number = ceil(((float)length) / THREADS_PER_BLOCK);
unsigned int *d_a;
cudaMalloc((void**)&d_a, sizeof(unsigned int) * length);
cudaMemcpy(d_a, a, sizeof(unsigned int) * length, cudaMemcpyHostToDevice);
kernel_add_two<<<block_number, THREADS_PER_BLOCK>>>(d_a, length);
cudaMemcpy(a, d_a, sizeof(unsigned int) * length, cudaMemcpyDeviceToHost);
cudaFree(d_a);
}
Link_Name
, might be useful instead of/as well asExternal_Name
– Simon Wright