0
votes

I have the following dataframe:

str(data_raw) 'data.frame': 20 obs. of 18 variables:

$ id : chr "2306173214621953247_176548637" "2304792897512042631_176548637" "2298783867773662543_176548637" "2249480367030200759_176548637" ...

$ user :'data.frame': 20 obs. of 4 variables:

..$ id : chr "176548637" "176548637" "176548637" "176548637" ...

..$ full_name : chr "Carlos Costa" "Carlos Costa" "Carlos Costa" "Carlos Costa" ...

But when I try to get the user$id variable, it shows the error:

data_raw<- data_raw %>% select(id,user.id)

Error: Can't subset columns that don't exist. x The column user.id doesn't exist.

I also tried this way:

data_raw<- data_raw %>% + select(id,user$id) Error: object 'user' not found

I know that the user variable is a dataframe, but how do I get information from a dataframe within another dataframe?

2

2 Answers

1
votes

I simplified the structure to help. The dataframe has only 1 line.

dput(head(data_raw, 2))

    structure(list(id = "2306173214621953247_176548637", user = structure(list(
    id = "176548637", full_name = "Carlos Costa", profile_picture = "link.com", 
    username = "carlosocosta"), row.names = 1L, class = "data.frame"), 
    images = structure(list(thumbnail = structure(list(width = 150L, height = 150L, url = "link.com"), row.names = 1L, class = "data.frame"), 
        low_resolution = structure(list(width = 320L, height = 320L, 
            url = "link.com"), row.names = 1L, class = "data.frame"), 
        standard_resolution = structure(list(width = 640L, height = 640L, 
            url = "link.com"), row.names = 1L, class = "data.frame")), row.names = 1L, class = "data.frame"), 
    created_time = "1589137292", caption = structure(list(id = "18105905566138276", 
        text = "Não basta ser mãe! Tem que aprender a jogar Fortnite! Feliz dia das mães! #maedemenino", 
        created_time = "1589137292", from = structure(list(id = "176548637", 
            full_name = "Carlos Costa", profile_picture = "link.com", 
            username = "carlosocosta"), row.names = 1L, class = "data.frame")), row.names = 1L, class = "data.frame"), 
    user_has_liked = FALSE, likes = structure(list(count = 69L), row.names = 1L, class = "data.frame"), 
    tags = list("maedemenino"), filter = "Normal", comments = structure(list(
        count = 3L), row.names = 1L, class = "data.frame"), type = "image", 
    link = "link.com", location = structure(list(
        latitude = NA_real_, longitude = NA_real_, name = NA_character_, 
        id = NA_integer_), row.names = 1L, class = "data.frame"), 
    attribution = NA, users_in_photo = list(structure(list(user = structure(list(
        username = "vivicosta_oficial"), class = "data.frame", row.names = 1L), 
        position = structure(list(x = 0.2210144928, y = 0.5857487923), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L)), 
    carousel_media = list(NULL), videos = structure(list(standard_resolution = structure(list(
        width = NA_integer_, height = NA_integer_, url = NA_character_, 
        id = NA_character_), row.names = 1L, class = "data.frame"), 
        low_resolution = structure(list(width = NA_integer_, 
            height = NA_integer_, url = NA_character_, id = NA_character_), row.names = 1L, class = "data.frame"), 
        low_bandwidth = structure(list(width = NA_integer_, height = NA_integer_, 
            url = NA_character_, id = NA_character_), row.names = 1L, class = "data.frame")), row.names = 1L, class = "data.frame"), 
    video_views = NA_integer_), row.names = 1L, class = "data.frame")
0
votes

Since we have a dataframe inside a dataframe, make it a single dataframe using do.call +cbind and then subset the columns needed.

do.call(cbind, data_raw)[c('id', 'user.id')]

#                             id   user.id
#1 2306173214621953247_176548637 176548637
#2 2304792897512042631_176548637 176548637

Or with dplyr::select

library(dplyr)
do.call(cbind, data_raw) %>% select(id, user.id)

data

Tested on this data :

data_raw <- data.frame(id = c('2306173214621953247_176548637',
                              '2304792897512042631_176548637'))
user <- data.frame(id = c('176548637', '176548637'), full_name = c('a', 'b'))     
data_raw$user <- user

str(data_raw)
#'data.frame':  2 obs. of  2 variables:
# $ id  : chr  "2306173214621953247_176548637" "2304792897512042631_176548637"
# $ user:'data.frame':  2 obs. of  2 variables:
#  ..$ id       : chr  "176548637" "176548637"
#  ..$ full_name: chr  "a" "b"