You mention batches as well as 1D, so I will assume you want to do either row-wise 1D transforms, or column-wise 1D transforms.
In this case, the number of batches is equal to the number of rows for the row-wise case or the number of columns for the column-wise case.
For 1D transforms, inembed
and onembed
don't really matter, but they must not be set to NULL.
The idist
, istride
, odist
, and ostride
parameters are the key ones to change for this example (along with batch
). Use the CUFFT advanced data layout information.
Here is a worked example, showing row-wise and column-wise transforms:
$ cat t1620.cu
#include <cufft.h>
#include <iostream>
int main(){
cufftComplex data[] = {
{1.0f, 0}, {2.0f, 0}, {3.0f, 0}, {4.0f, 0},
{1.0f, 0}, {2.0f, 0}, {3.0f, 0}, {4.0f, 0},
{1.0f, 0}, {2.0f, 0}, {3.0f, 0}, {4.0f, 0},
{1.0f, 0}, {2.0f, 0}, {3.0f, 0}, {4.0f, 0}};
cufftComplex *d_data;
int ds = sizeof(data)/sizeof(data[0]);
cudaMalloc(&d_data, ds*sizeof(data[0]));
cudaMemcpy(d_data, data, ds*sizeof(data[0]), cudaMemcpyHostToDevice);
cufftHandle plan;
int dim = 4;
int rank = 1;
int nx = dim;
int ny = dim;
#ifdef ROW_WISE
int batch = ny;
int inembed[rank] = {nx};
int onembed[rank] = {nx};
int istride = 1;
int idist = nx;
int ostride = 1;
int odist = nx;
int n[] = {nx};
#else
int batch = nx;
int inembed[rank] = {ny};
int onembed[rank] = {ny};
int istride = nx;
int idist = 1;
int ostride = nx;
int odist = 1;
int n[] = {ny};
#endif
cufftResult err = cufftPlanMany(&plan, rank, n, inembed,
istride, idist, onembed, ostride,
odist, CUFFT_C2C, batch);
std::cout << "plan :" << (int)err << std::endl;
err = cufftExecC2C(plan, d_data, d_data, CUFFT_FORWARD);
std::cout << "exec :" << (int)err << std::endl;
cudaMemcpy(data, d_data, ds*sizeof(data[0]), cudaMemcpyDeviceToHost);
for (int i = 0; i < ds; i++) std::cout << data[i].x << "," << data[i].y << std::endl;
return 0;
}
$ nvcc -o t1620 t1620.cu -lcufft -DROW_WISE
$ ./t1620
plan :0
exec :0
10,0
-2,2
-2,0
-2,-2
10,0
-2,2
-2,0
-2,-2
10,0
-2,2
-2,0
-2,-2
10,0
-2,2
-2,0
-2,-2
$ nvcc -o t1620 t1620.cu -lcufft
$ ./t1620
plan :0
exec :0
4,0
8,0
12,0
16,0
0,0
0,0
0,0
0,0
0,0
0,0
0,0
0,0
0,0
0,0
0,0
0,0
$