4
votes

I have two data.table with the same structure. Two key columns followed by a number of data columns. The number of data columns may vary. I want to add the values from the second data.table to the corresponding rows/columns in the first data.table.

DT1 <- cbind(data.table(loc=c("L1","L2","L3"), product=c("P1","P2","P1")), matrix(10,nrow=3,ncol=12))
setkey(DT1, loc, product)
DT1
   loc product V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
1:  L1      P1 10 10 10 10 10 10 10 10 10  10  10  10
2:  L2      P2 10 10 10 10 10 10 10 10 10  10  10  10
3:  L3      P1 10 10 10 10 10 10 10 10 10  10  10  10
DT2 <- cbind(data.table(loc=c("L2","L3"), product=c("P2","P1")), matrix(1:24,nrow=2,ncol=12))
setkey(DT2, loc, product)
   loc product V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
1:  L2      P2  1  3  5  7  9 11 13 15 17  19  21  23
2:  L3      P1  2  4  6  8 10 12 14 16 18  20  22  24

My best bet so far is the following

DT1[DT2, 3:14 := as.data.table(DT1[DT2, 3:14, with=FALSE] + DT2[, 3:14, with=FALSE]), with=FALSE]
   loc product V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
1:  L1      P1 10 10 10 10 10 10 10 10 10  10  10  10
2:  L2      P2 11 13 15 17 19 21 23 25 27  29  31  33
3:  L3      P1 12 14 16 18 20 22 24 26 28  30  32  34

Note that nrow and ncol and the loc and product entries are all variable depending on the source data.

This works if every row in DT2 matches one in DT1, but otherwise will have unexpected results. Is there a more rigorous/elegant way to express the RHS to do this variable number of column assignments referring to both DT1 and DT2?

2

2 Answers

7
votes

How about:

cols = paste0('V', 1:12)

DT1[DT2, (cols) := setDT(mget(cols)) + mget(paste0('i.', cols))]
DT1
#   loc product V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
#1:  L1      P1 10 10 10 10 10 10 10 10 10  10  10  10
#2:  L2      P2 11 13 15 17 19 21 23 25 27  29  31  33
#3:  L3      P1 12 14 16 18 20 22 24 26 28  30  32  34
0
votes

One possibility is to do the join, and where you have duplicated columns, you can create an expression to use those names to perform the sum

DT3 <- DT2[ DT1 ]

dup <- names(DT3)[grep("[i.]", names(DT3))]
dup2 <- gsub("[i.]", "", dup)
expr <- paste0("`:=`(", paste0(dup2,  "=",  dup2, "+", dup, collapse = ","), ")")

## set NA to 0
for(j in names(DT3)) set(DT3, which(is.na(DT3[[j]])), j, 0)

DT3[, eval(parse(text = expr))][, c("loc", "product", dup2), with=F]

#    loc product V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12
# 1:  L1      P1 10 10 10 10 10 10 10 10 10  10  10  10
# 2:  L2      P2 11 13 15 17 19 21 23 25 27  29  31  33
# 3:  L3      P1 12 14 16 18 20 22 24 26 28  30  32  34