2
votes

I have a tibble from a SPSS file loaded by haven. Some columns have data and lablels:

tmp <- structure(list(CNT = structure(c("ALB", "ALB", "ARE"), label = "Country code 3-character", labels = c(Moldova = "MDA", 
Thailand = "THA", Brazil = "BRA", France = "FRA", `United States` = "USA", 
Italy = "ITA", Latvia = "LVA", Albania = "ALB", Serbia = "SRB", 
Macao = "MAC", `Moscow City (RUS)` = "QMC", Greece = "GRC", `North Macedonia` = "MKD", 
Netherlands = "NLD", Switzerland = "CHE", Montenegro = "MNE", 
`United Arab Emirates` = "ARE", Sweden = "SWE", `Czech Republic` = "CZE", 
`Hong Kong` = "HKG", Argentina = "ARG", `Bosnia and Herzegovina` = "BIH", 
`B-S-J-Z (China)` = "QCI", `Costa Rica` = "CRI", Denmark = "DNK", 
`Slovak Republic` = "SVK", Belgium = "BEL", Chile = "CHL", Philippines = "PHL", 
Colombia = "COL", Poland = "POL", Ireland = "IRL", Iceland = "ISL", 
`New Zealand` = "NZL", Vietnam = "VNM", `Dominican Republic` = "DOM", 
Canada = "CAN", Panama = "PAN", Lebanon = "LBN", Indonesia = "IDN", 
Finland = "FIN", Japan = "JPN", `Brunei Darussalam` = "BRN", 
Hungary = "HUN", Slovenia = "SVN", Georgia = "GEO", `Chinese Taipei` = "TAP", 
Singapore = "SGP", Spain = "ESP", Morocco = "MAR", `United Kingdom` = "GBR", 
Peru = "PER", Bulgaria = "BGR", Ukraine = "UKR", Belarus = "BLR", 
`Moscow Region (RUS)` = "QMR", Jordan = "JOR", Korea = "KOR", 
Norway = "NOR", Israel = "ISR", Turkey = "TUR", Australia = "AUS", 
`Russian Federation` = "RUS", Malaysia = "MYS", Qatar = "QAT", 
Malta = "MLT", Portugal = "PRT", `Tatarstan (RUS)` = "QRT", Estonia = "EST", 
Austria = "AUT", `Saudi Arabia` = "SAU", Germany = "DEU", Romania = "ROU", 
Lithuania = "LTU", Croatia = "HRV", Kosovo = "KSV", Mexico = "MEX", 
Luxembourg = "LUX", Cyprus = "QCY", Uruguay = "URY", Kazakhstan = "KAZ", 
`Baku (Azerbaijan)` = "QAZ"), class = "haven_labelled"), SC156Q05HA = structure(c(1, 
2, 1), label = "At school: A specific programme to prepare students for responsible Internet behaviour", labels = c(Yes = 1, 
No = 2, `Valid Skip` = 5, `Not Applicable` = 7, Invalid = 8, 
`No Response` = 9), class = "haven_labelled"), percentage = c(71.1, 
28.9, 81.5), total_schools = c(325L, 325L, 692L)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -3L), groups = structure(list(
    CNT = structure(c("ALB", "ALB", "ARE"), label = "Country code 3-character", labels = c(Moldova = "MDA", 
    Thailand = "THA", Brazil = "BRA", France = "FRA", `United States` = "USA", 
    Italy = "ITA", Latvia = "LVA", Albania = "ALB", Serbia = "SRB", 
    Macao = "MAC", `Moscow City (RUS)` = "QMC", Greece = "GRC", 
    `North Macedonia` = "MKD", Netherlands = "NLD", Switzerland = "CHE", 
    Montenegro = "MNE", `United Arab Emirates` = "ARE", Sweden = "SWE", 
    `Czech Republic` = "CZE", `Hong Kong` = "HKG", Argentina = "ARG", 
    `Bosnia and Herzegovina` = "BIH", `B-S-J-Z (China)` = "QCI", 
    `Costa Rica` = "CRI", Denmark = "DNK", `Slovak Republic` = "SVK", 
    Belgium = "BEL", Chile = "CHL", Philippines = "PHL", Colombia = "COL", 
    Poland = "POL", Ireland = "IRL", Iceland = "ISL", `New Zealand` = "NZL", 
    Vietnam = "VNM", `Dominican Republic` = "DOM", Canada = "CAN", 
    Panama = "PAN", Lebanon = "LBN", Indonesia = "IDN", Finland = "FIN", 
    Japan = "JPN", `Brunei Darussalam` = "BRN", Hungary = "HUN", 
    Slovenia = "SVN", Georgia = "GEO", `Chinese Taipei` = "TAP", 
    Singapore = "SGP", Spain = "ESP", Morocco = "MAR", `United Kingdom` = "GBR", 
    Peru = "PER", Bulgaria = "BGR", Ukraine = "UKR", Belarus = "BLR", 
    `Moscow Region (RUS)` = "QMR", Jordan = "JOR", Korea = "KOR", 
    Norway = "NOR", Israel = "ISR", Turkey = "TUR", Australia = "AUS", 
    `Russian Federation` = "RUS", Malaysia = "MYS", Qatar = "QAT", 
    Malta = "MLT", Portugal = "PRT", `Tatarstan (RUS)` = "QRT", 
    Estonia = "EST", Austria = "AUT", `Saudi Arabia` = "SAU", 
    Germany = "DEU", Romania = "ROU", Lithuania = "LTU", Croatia = "HRV", 
    Kosovo = "KSV", Mexico = "MEX", Luxembourg = "LUX", Cyprus = "QCY", 
    Uruguay = "URY", Kazakhstan = "KAZ", `Baku (Azerbaijan)` = "QAZ"
    ), class = "haven_labelled"), SC156Q05HA = structure(c(1, 
    2, 1), label = "At school: A specific programme to prepare students for responsible Internet behaviour", labels = c(Yes = 1, 
    No = 2, `Valid Skip` = 5, `Not Applicable` = 7, Invalid = 8, 
    `No Response` = 9), class = "haven_labelled"), .rows = list(
        1L, 2L, 3L)), row.names = c(NA, -3L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))

I want to use the lbl values in the tibble, e.g. Italy instead of ITA, so try to mutate them:

tmp %>% ungroup() %>% mutate(SC156Q05HA = attr(SC156Q05HA, "lbl"))

But all seems to do is remove the SC156Q05HA field:

# A tibble: 3 x 4
# Groups:   CNT, SC156Q05HA [3]
  CNT                        percentage total_schools
  <chr+lbl>                       <dbl>         <int>
1 ALB [Albania]                    71.1           325
2 ALB [Albania]                    28.9           325
3 ARE [United Arab Emirates]       81.5           692
2
If you look at the str it is haven_labelledakrun
for SC156Q05HA it reads: <dbl+lbl>. I tried mutate(SC156Q05HA = attr(SC156Q05HA, "haven_labelled")) but it gives the same resultpluke

2 Answers

2
votes

One option would be to convert to factor with as_factor from haven

library(dplyr)
library(haven)
tmp %>% 
     ungroup %>% 
     mutate(SC156Q05HA  = as_factor(SC156Q05HA)) 
# A tibble: 3 x 4
#  CNT                        SC156Q05HA percentage total_schools
#  <chr+lbl>                  <fct>           <dbl>         <int>
#1 ALB [Albania]              Yes              71.1           325
#2 ALB [Albania]              No               28.9           325
#3 ARE [United Arab Emirates] Yes              81.5           692

Or to convert all the labelled columns to factor, can use mutate_if

tmp %>% 
     ungroup %>% 
     mutate_if(is.labelled, as_factor)
# A tibble: 3 x 4
#  CNT                  SC156Q05HA percentage total_schools
#  <fct>                <fct>           <dbl>         <int>
#1 Albania              Yes              71.1           325
#2 Albania              No               28.9           325
#3 United Arab Emirates Yes              81.5           692

Inspired from @M--'s comments with mutate_all

2
votes

In base we can use factor while stacking the attributes:

tmp %>% 
  ungroup %>% 
  mutate(SC156Q05HA = factor(x = SC156Q05HA, 
                              levels = stack(attr(SC156Q05HA, 'labels'))$value,
                              labels = stack(attr(SC156Q05HA, 'labels'))$ind))
#> # A tibble: 3 x 4
#>   CNT                        SC156Q05HA percentage total_schools
#>   <chr+lbl>                  <fct>           <dbl>         <int>
#> 1 ALB [Albania]              Yes              71.1           325
#> 2 ALB [Albania]              No               28.9           325
#> 3 ARE [United Arab Emirates] Yes              81.5           692