0
votes

I have a dataset with two rows as columns, I would like to melt the dataset in order to have just 6 columns (country , date, var1, var2, var3, var4). Below a sample code to understand. Thanks for the help.

# what i have
date_or <- c("2001 q1", "2001 q2", "2001 q3","2001 q4")

AT <- c("var1","1","2","3")
AT1 <- c("var2","1","2","3") #in the real dataset is the same name as column ("AT")
AT2 <- c("var3","1","2","3") #in the real dataset is the same name as column ("AT")
AT3 <- c("var4","1","2","3") #in the real dataset is the same name as column ("AT")
BE <- c("var1","1","2","3")
BE1 <- c("var2","1","2","3") #in the real dataset is the same name as column ("BE")
BE2 <- c("var3","1","2","3") #in the real dataset is the same name as column ("BE")
BE3 <- c("var4","1","2","3") #in the real dataset is the same name as column ("BE")

dt_or <- data.frame(date_or, AT, AT1, AT2, AT3)
head(dt_or)

# what I would like to obtain
date <- c("2001 q1", "2001 q2", "2001 q3","2001 q4"
          ,"2002 q1","2002 q2","2002 q3","2002 q4")

country <- c("AT","AT","AT","AT",
             "BE","BE","BE","BE",
             "DE","DE","DE","DE",
             "ES","ES","ES","ES")

feature <- c("var1","var2", "var3", "var4"
             ,"var1","var2", "var3", "var4"
             ,"var1","var2", "var3", "var4"
             ,"var1","var2", "var3", "var4")

value <- c(seq(1,16,1)) # as for example

dt <- data.frame(date, country, feature, value)


2

2 Answers

0
votes

Get the columns in long format which starts with "AT", get the second-row values in a separate column (feature). We need to use some logic to get these values, in the example shared the values which we want in second column contain alphabets so we use that logic in replace to change them to NA.

Finally, we fill the missing values by the previous non-NA values and remove the rows with alphabets.

library(dplyr)
library(tidyr)

dt_or %>%
  pivot_longer(cols = starts_with('AT')) %>%
  mutate(feature = replace(value, !grepl('[A-Za-z]', value), NA)) %>%
  group_by(name) %>%
  fill(feature) %>%
  ungroup() %>%
  filter(!grepl('[A-Za-z]', value)) %>%
  type.convert(as.is = TRUE)


# A tibble: 12 x 4
#   date_or name  value feature
#   <chr>   <chr> <int> <chr>  
# 1 2001 q2 AT        1 var1   
# 2 2001 q2 AT1       1 var2   
# 3 2001 q2 AT2       1 var3   
# 4 2001 q2 AT3       1 var4   
# 5 2001 q3 AT        2 var1   
# 6 2001 q3 AT1       2 var2   
# 7 2001 q3 AT2       2 var3   
# 8 2001 q3 AT3       2 var4   
# 9 2001 q4 AT        3 var1   
#10 2001 q4 AT1       3 var2   
#11 2001 q4 AT2       3 var3   
#12 2001 q4 AT3       3 var4   
0
votes

We can use data.table. Convert the 'data.frame' to 'data.table' (setDT(dt_or)), melt the data into 'long' format, grouped by 'variable', create the 'feature' column as the first element in 'value', and filter the rows where the 'value' startsWith 'var'

library(data.table)
melt(setDT(dt_or), id.var = 'date_or')[,
   feature := first(value), .(variable)][!startsWith(value, 'var')][order(date_or)]
#    date_or variable value feature
# 1: 2001 q2       AT     1    var1
# 2: 2001 q2      AT1     1    var2
# 3: 2001 q2      AT2     1    var3
# 4: 2001 q2      AT3     1    var4
# 5: 2001 q3       AT     2    var1
# 6: 2001 q3      AT1     2    var2
# 7: 2001 q3      AT2     2    var3
# 8: 2001 q3      AT3     2    var4
# 9: 2001 q4       AT     3    var1
#10: 2001 q4      AT1     3    var2
#11: 2001 q4      AT2     3    var3
#12: 2001 q4      AT3     3    var4

Or with tidyverse, use pivot_longer to reshape into 'long' format, grouped by 'name', create the 'feature' as first of 'value' and filter out the rows that are not needed

library(dplyr)
library(tidyr)
dt_or %>%
    pivot_longer(cols = starts_with('AT')) %>% 
    group_by(name) %>% 
    mutate(feature = first(value)) %>% 
    ungroup %>%
    filter(!startsWith(as.character(value), 'var')) %>% 
    type.convert(as.is = TRUE)
# A tibble: 12 x 4
#   date_or name  value feature
#   <chr>   <chr> <int> <chr>  
# 1 2001 q2 AT        1 var1   
# 2 2001 q2 AT1       1 var2   
# 3 2001 q2 AT2       1 var3   
# 4 2001 q2 AT3       1 var4   
# 5 2001 q3 AT        2 var1   
# 6 2001 q3 AT1       2 var2   
# 7 2001 q3 AT2       2 var3   
# 8 2001 q3 AT3       2 var4   
# 9 2001 q4 AT        3 var1   
#10 2001 q4 AT1       3 var2   
#11 2001 q4 AT2       3 var3   
#12 2001 q4 AT3       3 var4