We can bind the data frame columns separately from the regular columns, here are 3 similar solutions wrapping the 3 functions mentioned in the question :
base R
rbind_fixed <- function(...){
dfs <- list(...)
# get all names of data.frame columns
get_df_col_ind <- function(df) sapply(df, is.data.frame)
df_col_names_list <- lapply(dfs, function(df) names(df[get_df_col_ind(df)]))
df_col_names <- unique(do.call(c,df_col_names_list))
# fail if these are not consistently data frames in all arguments
for(df_col_name in df_col_names) {
for(df in dfs){
if(!is.null(df[[df_col_name]]) && !is.data.frame(df[[df_col_name]]))
stop(df_col_name, "is not consistently a data frame column")
}
}
# bind data frames, except for data frame columns
dfs_regular <- lapply(dfs, function(df) df[setdiff(names(df), df_col_names)])
res <- do.call(rbind, dfs_regular)
# bind data frame columns separately and add them to the result
for(df_col_name in df_col_names) {
subdfs <- lapply(dfs, function(df) {
if(df_col_name %in% names(df)) df[[df_col_name]] else
data.frame(row.names = seq.int(nrow(df)))
})
# recursive to be robust in case of deep nested data frames
res[[df_col_name]] <- do.call(rbind_fixed, subdfs)
}
res
}
rbind_fixed(x, y)
#> a z
#> 1 1 2
#> 2 3 4
dplyr
bind_rows_fixed <- function(...){
# use list2() so we can use `!!!`, as we lose the "autosplice" feature of bind_rows
dfs <- rlang::list2(...)
# get all names of data.frame columns
get_df_col_ind <- function(df) sapply(df, is.data.frame)
df_col_names_list <- lapply(dfs, function(df) names(df[get_df_col_ind(df)]))
df_col_names <- unique(do.call(c,df_col_names_list))
# fail if these are not consistently data frames in all arguments
for(df_col_name in df_col_names) {
for(df in dfs){
if(!is.null(df[[df_col_name]]) && !is.data.frame(df[[df_col_name]]))
stop(df_col_name, "is not consistently a data frame column")
}
}
# bind data frames, except for data frame columns
dfs_regular <- lapply(dfs, function(df) df[setdiff(names(df), df_col_names)])
res <- dplyr::bind_rows(dfs_regular)
# bind data frame columns separately and add them to the result
for(df_col_name in df_col_names) {
subdfs <- lapply(dfs, function(df) {
if(df_col_name %in% names(df)) df[[df_col_name]] else
tibble(.rows = nrow(df))
})
# recursive to be robust in case of deep nested data frames
res[[df_col_name]] <- bind_rows_fixed(!!!subdfs)
}
res
}
bind_rows_fixed(x,y)
#> a z
#> 1 1 2
#> 2 3 4
data.table
rbindlist_fixed <- function(l){
dfs <- l
# get all names of data.frame columns
get_df_col_ind <- function(df) sapply(df, is.data.frame)
df_col_names_list <- lapply(dfs, function(df) names(df[get_df_col_ind(df)]))
df_col_names <- unique(do.call(c,df_col_names_list))
# fail if these are not consistently data frames in all arguments
for(df_col_name in df_col_names) {
for(df in dfs){
if(!is.null(df[[df_col_name]]) && !is.data.frame(df[[df_col_name]]))
stop(df_col_name, "is not consistently a data frame column")
}
}
# bind data frames, except for data frame columns
dfs_regular <- lapply(dfs, function(df) df[setdiff(names(df), df_col_names)])
res <- data.table::rbindlist(dfs_regular)
# bind data frame columns separately and add them to the result
for(df_col_name in df_col_names) {
subdfs <- lapply(dfs, function(df) {
if(df_col_name %in% names(df)) df[[df_col_name]] else
data.frame(row.names = seq.int(nrow(df)))
})
# recursive to be robust in case of deep nested data frames
res[[df_col_name]] <- rbindlist_fixed(subdfs)
}
res
}
dt <- rbindlist_fixed(list(x,y))
dt
#> a b
#> 1: 1 <multi-column>
#> 2: 3 <multi-column>
str(dt)
#> Classes 'data.table' and 'data.frame': 2 obs. of 2 variables:
#> $ a: num 1 3
#> $ b:Classes 'data.table' and 'data.frame': 2 obs. of 1 variable:
#> ..$ z: num 2 4
#> ..- attr(*, ".internal.selfref")=<externalptr>
#> - attr(*, ".internal.selfref")=<externalptr>
res$b <- data.frame(z = c(2,4))
? – Ali