0
votes

I have multiple data-frames containing an unknown (and variable) number of columns (always a multiple of 2). Headers of the file look like that:

X0, Y0, X1, Y1, X2, Y2 ... Xn, Yn

Each of the paired columns contains the same number of row, but the number of row is different between non-paired column.

Using R, I would like to slit these data-frames in multiple small one that contain only the paired column:

X0, Y0, X1, Y1, X2, Y2,... Xn, Yn

into:

X0, Y0
X1, Y1
X2, Y2

and so on.

I tried to set up loops but without success so far.

Could anyone point me in the right direction / approach to solve this ?

Many thanks.

3
Are there even separators present for the columns whose rows have missing data? I don't even know how you are going to read these files into R. - Tim Biegeleisen
"Each of the paired columns contains the same number of row, but the number of row is different between non-paired column." How is that even possible? A data.frame literally can not have an uneven number of rows in columns. A complete, reproducible example showing what you are working with would make this a lot clearer. - thelatemail

3 Answers

2
votes
lapply(seq(1, ncol(df), by=2), function(i) 
     df[i: pmin((i+1), ncol(df))])

[[1]]
  X0 Y0
1  1  2
2 11 12
3 21 22
4 31 32
5 41 42

[[2]]
  X1 Y1
1  3  4
2 13 14
3 23 24
4 33 34
5 43 44

[[3]]
  X2 Y2
1  5  6
2 15 16
3 25 26
4 35 36
5 45 46

[[4]]
  X3 Y3
1  7  8
2 17 18
3 27 28
4 37 38
5 47 48

[[5]]
  X4 Y4
1  9 10
2 19 20
3 29 30
4 39 40
5 49 50

Data:

dput(df)
structure(list(X0 = c(1L, 11L, 21L, 31L, 41L), Y0 = c(2L, 12L, 
22L, 32L, 42L), X1 = c(3L, 13L, 23L, 33L, 43L), Y1 = c(4L, 14L, 
24L, 34L, 44L), X2 = c(5L, 15L, 25L, 35L, 45L), Y2 = c(6L, 16L, 
26L, 36L, 46L), X3 = c(7L, 17L, 27L, 37L, 47L), Y3 = c(8L, 18L, 
28L, 38L, 48L), X4 = c(9L, 19L, 29L, 39L, 49L), Y4 = c(10L, 20L, 
30L, 40L, 50L)), .Names = c("X0", "Y0", "X1", "Y1", "X2", "Y2", 
"X3", "Y3", "X4", "Y4"), class = "data.frame", row.names = c(NA, 
-5L))
1
votes

tidyverse solution:

library(tidyverse)

 set.seed(123)

 dt<-data.frame(
  X0=rnorm(5),
  Y0=rnorm(5),
  X1=c(rnorm(4),NA),
  Y1=c(rnorm(4),NA),
  X2=c(rnorm(3),NA,NA),
  Y2=c(rnorm(3),NA,NA)
 )

 dt
           X0         Y0        X1         Y1         X2         Y2
1 -0.56047565  1.7150650 1.2240818 -0.5558411  0.7013559 -0.2179749
2 -0.23017749  0.4609162 0.3598138  1.7869131 -0.4727914 -1.0260044
3  1.55870831 -1.2650612 0.4007715  0.4978505 -1.0678237 -0.7288912
4  0.07050839 -0.6868529 0.1106827 -1.9666172         NA         NA
5  0.12928774 -0.4456620        NA         NA         NA         NA

 seq(2,ncol(dt),2) %>%   map(~ select(dt,(.-1):.))
[[1]]
           X0         Y0
1 -0.56047565  1.7150650
2 -0.23017749  0.4609162
3  1.55870831 -1.2650612
4  0.07050839 -0.6868529
5  0.12928774 -0.4456620

[[2]]
         X1         Y1
1 1.2240818 -0.5558411
2 0.3598138  1.7869131
3 0.4007715  0.4978505
4 0.1106827 -1.9666172
5        NA         NA

[[3]]
          X2         Y2
1  0.7013559 -0.2179749
2 -0.4727914 -1.0260044
3 -1.0678237 -0.7288912
4         NA         NA
5         NA         NA
1
votes

Another lapply solution.

# Example data frame
dt <- data.frame(X0 = "a",
                 Y0 = "b",
                 X1 = "c",
                 Y1 = "d",
                 X2 = "e",
                 Y2 = "f",
                 stringsAsFactors = FALSE)

# Split the data frame
lapply(1:(ncol(dt)/2), function(x) dt[, c(2 * x - 1, 2 * x)])
[[1]]
  X0 Y0
1  a  b

[[2]]
  X1 Y1
1  c  d

[[3]]
  X2 Y2
1  e  f