As I mentioned in my comment, I believe a recursive approach to have two inherent flaws to this task.
The first flaw is the limit on open files. This limit imposes a limit on deep traversal. If there are enough sub-folders, the recursive approach will break. (See edit regarding stack overflow)
The second flaw is a bit more subtle. The recursive approach makes it very hard to test for hard links. If a folder tree is cyclic (due to hard links), the recursive approach will break (hopefully without a stack overflow). (See edit regarding hard links)
However, it is quite simple to avoid these issues by replacing recursion with a single file descriptor and linked lists.
I assume this isn't a school project and that recursion is optional.
Here's an example application.
Use a.out ./
to view folder tree.
I apologize for the macros and stuff... I usually use inline functions, but I thought it would be easier to follow the code if it was all in a single function.
#include <dirent.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
int main(int argc, char const *argv[]) {
/* print use instruction unless a folder name was given */
if (argc < 2)
fprintf(stderr,
"\nuse:\n"
" %s <directory>\n"
"for example:\n"
" %s ./\n\n",
argv[0], argv[0]),
exit(0);
/*************** a small linked list macro implementation ***************/
typedef struct list_s {
struct list_s *next;
struct list_s *prev;
} list_s;
#define LIST_INIT(name) \
{ .next = &name, .prev = &name }
#define LIST_PUSH(dest, node) \
do { \
(node)->next = (dest)->next; \
(node)->prev = (dest); \
(node)->next->prev = (node); \
(dest)->next = (node); \
} while (0);
#define LIST_POP(list, var) \
if ((list)->next == (list)) { \
var = NULL; \
} else { \
var = (list)->next; \
(list)->next = var->next; \
var->next->prev = var->prev; \
}
/*************** a record (file / folder) item type ***************/
typedef struct record_s {
/* this is a flat processing queue. */
list_s queue;
/* this will list all queued and processed folders (cyclic protection) */
list_s folders;
/* this will list all the completed items (siblings and such) */
list_s list;
/* unique ID */
ino_t ino;
/* name length */
size_t len;
/* name string */
char name[];
} record_s;
/* take a list_s pointer and convert it to the record_s pointer */
#define NODE2RECORD(node, list_name) \
((record_s *)(((uintptr_t)(node)) - \
((uintptr_t) & ((record_s *)0)->list_name)))
/* initializes a new record */
#define RECORD_INIT(name) \
(record_s){.queue = LIST_INIT((name).queue), \
.folders = LIST_INIT((name).folders), \
.list = LIST_INIT((name).list)}
/*************** the actual code ***************/
record_s records = RECORD_INIT(records);
record_s *pos, *item;
list_s *tmp;
DIR *dir;
struct dirent *entry;
/* initialize the root folder record and add it to the queue */
pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2);
*pos = RECORD_INIT(*pos);
pos->len = strlen(argv[1]);
memcpy(pos->name, argv[1], pos->len);
if (pos->name[pos->len - 1] != '/')
pos->name[pos->len++] = '/';
pos->name[pos->len] = 0;
/* push to queue, but also push to list (first item processed) */
LIST_PUSH(&records.queue, &pos->queue);
LIST_PUSH(&records.list, &pos->list);
/* as long as the queue has items to be processed, do so */
while (records.queue.next != &records.queue) {
/* pop queued item */
LIST_POP(&records.queue, tmp);
/* collect record to process */
pos = NODE2RECORD(tmp, queue);
/* add record to the processed folder list */
LIST_PUSH(&records.folders, &pos->folders);
/* process the folder and add all folder data to current list */
dir = opendir(pos->name);
if (!dir)
continue;
while ((entry = readdir(dir)) != NULL) {
/* create new item, copying it's path data and unique ID */
item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2);
*item = RECORD_INIT(*item);
item->len = pos->len + entry->d_namlen;
memcpy(item->name, pos->name, pos->len);
memcpy(item->name + pos->len, entry->d_name, entry->d_namlen);
item->name[item->len] = 0;
item->ino = entry->d_ino;
/* add item to the list, right after the `pos` item */
LIST_PUSH(&pos->list, &item->list);
/* unless it's a folder, we're done. */
if (entry->d_type != DT_DIR)
continue;
/* test for '.' and '..' */
if (entry->d_name[0] == '.' &&
(entry->d_name[1] == 0 ||
(entry->d_name[1] == '.' && entry->d_name[2] == 0)))
continue;
/* add folder marker */
item->name[item->len++] = '/';
item->name[item->len] = 0;
/* test for cyclic processing */
list_s *t = records.folders.next;
while (t != &records.folders) {
if (NODE2RECORD(t, folders)->ino == item->ino) {
/* we already processed this folder! */
break; /* this breaks from the small loop... */
}
t = t->next;
}
if (t != &records.folders)
continue; /* if we broke from the small loop, entry is done */
/* item is a new folder, add to queue */
LIST_PUSH(&records.queue, &item->queue);
}
closedir(dir);
}
/*************** Printing the results and cleaning up ***************/
while (records.list.next != &records.list) {
/* pop list item */
LIST_POP(&records.list, tmp);
/* collect and process record */
pos = NODE2RECORD(tmp, list);
fwrite(pos->name, pos->len, 1, stderr);
fwrite("\n", 1, 1, stderr);
/* free node */
free(pos);
}
return 0;
}
EDIT
@Stargateur mentioned in the comments that the recursive code will probably overflow the stack before reaching the open file limit.
Although I don't see how a stack-overflow is any better, this assessment is probably correct as long as the process isn't close to the file limit when invoked.
Another point mentioned by @Stargateur in the comments was that the depth of the recursive code is limited by the maximum amount of sub-directories (64000 on the ext4 filesystem) and that hard links are extremely unlikely (since hard links to folders aren't allowed on Linux/Unix).
This is good news if the code is running on Linux (which it is, according to the question), so this issue isn't a real concern (unless running the code on macOS or, maybe, Windows)... although 64K subfolders in recursion might blow the stack wide open.
Having said that, the none recursive option still has advantages, such as being able to easily add a limit to the amount of items processed as well as being able to cache the result.
P.S.
According to the comments, here's a non-recursive version of the code that doesn't check for cyclic hierarchies. It's faster and should be safe enough to use on a Linux machine where hard links to folders aren't allowed.
#include <dirent.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
int main(int argc, char const *argv[]) {
/* print use instruction unless a folder name was given */
if (argc < 2)
fprintf(stderr,
"\nuse:\n"
" %s <directory>\n"
"for example:\n"
" %s ./\n\n",
argv[0], argv[0]),
exit(0);
/*************** a small linked list macro implementation ***************/
typedef struct list_s {
struct list_s *next;
struct list_s *prev;
} list_s;
#define LIST_INIT(name) \
{ .next = &name, .prev = &name }
#define LIST_PUSH(dest, node) \
do { \
(node)->next = (dest)->next; \
(node)->prev = (dest); \
(node)->next->prev = (node); \
(dest)->next = (node); \
} while (0);
#define LIST_POP(list, var) \
if ((list)->next == (list)) { \
var = NULL; \
} else { \
var = (list)->next; \
(list)->next = var->next; \
var->next->prev = var->prev; \
}
/*************** a record (file / folder) item type ***************/
typedef struct record_s {
/* this is a flat processing queue. */
list_s queue;
/* this will list all the completed items (siblings and such) */
list_s list;
/* unique ID */
ino_t ino;
/* name length */
size_t len;
/* name string */
char name[];
} record_s;
/* take a list_s pointer and convert it to the record_s pointer */
#define NODE2RECORD(node, list_name) \
((record_s *)(((uintptr_t)(node)) - \
((uintptr_t) & ((record_s *)0)->list_name)))
/* initializes a new record */
#define RECORD_INIT(name) \
(record_s){.queue = LIST_INIT((name).queue), .list = LIST_INIT((name).list)}
/*************** the actual code ***************/
record_s records = RECORD_INIT(records);
record_s *pos, *item;
list_s *tmp;
DIR *dir;
struct dirent *entry;
/* initialize the root folder record and add it to the queue */
pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2);
*pos = RECORD_INIT(*pos);
pos->len = strlen(argv[1]);
memcpy(pos->name, argv[1], pos->len);
if (pos->name[pos->len - 1] != '/')
pos->name[pos->len++] = '/';
pos->name[pos->len] = 0;
/* push to queue, but also push to list (first item processed) */
LIST_PUSH(&records.queue, &pos->queue);
LIST_PUSH(&records.list, &pos->list);
/* as long as the queue has items to be processed, do so */
while (records.queue.next != &records.queue) {
/* pop queued item */
LIST_POP(&records.queue, tmp);
/* collect record to process */
pos = NODE2RECORD(tmp, queue);
/* process the folder and add all folder data to current list */
dir = opendir(pos->name);
if (!dir)
continue;
while ((entry = readdir(dir)) != NULL) {
/* create new item, copying it's path data and unique ID */
item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2);
*item = RECORD_INIT(*item);
item->len = pos->len + entry->d_namlen;
memcpy(item->name, pos->name, pos->len);
memcpy(item->name + pos->len, entry->d_name, entry->d_namlen);
item->name[item->len] = 0;
item->ino = entry->d_ino;
/* add item to the list, right after the `pos` item */
LIST_PUSH(&pos->list, &item->list);
/* unless it's a folder, we're done. */
if (entry->d_type != DT_DIR)
continue;
/* test for '.' and '..' */
if (entry->d_name[0] == '.' &&
(entry->d_name[1] == 0 ||
(entry->d_name[1] == '.' && entry->d_name[2] == 0)))
continue;
/* add folder marker */
item->name[item->len++] = '/';
item->name[item->len] = 0;
/* item is a new folder, add to queue */
LIST_PUSH(&records.queue, &item->queue);
}
closedir(dir);
}
/*************** Printing the results and cleaning up ***************/
while (records.list.next != &records.list) {
/* pop list item */
LIST_POP(&records.list, tmp);
/* collect and process record */
pos = NODE2RECORD(tmp, list);
fwrite(pos->name, pos->len, 1, stderr);
fwrite("\n", 1, 1, stderr);
/* free node */
free(pos);
}
return 0;
}