0
votes

I try to make some calculations with the following dataset:

dataset <- data.frame(specimen = c("NIA","NIA","NIA","MAT","MAT"),
                      brakg = c(9.4,0,0,7.8,0),
                      cebkg = c(0,2.3,3.1,0,2.4),
                      rotkg = c(0,1,1.1,0,1.2),
                      stringsAsFactors = FALSE)

  specimen brakg cebkg rotkg
1      NIA   9.4   0.0   0.0
2      NIA   0.0   2.3   1.0
3      NIA   0.0   3.1   1.1
4      MAT   7.8   0.0   0.0
5      MAT   0.0   2.4   1.2

I want to have the brakg variable duplicated, the variables cebkg and rotkg available and a calculation from brakg - sum(cebkg) and brakg/sum(rotkg). This should be the result:

specimen    list    value
NIA         Init    9.4
NIA         Eval    9.4
NIA         Ceb     2.3
NIA         Ceb     3.1
NIA         Rot     1.0
NIA         Rot     1.1
NIA         DiffA   4.0
NIA         RateB   4.48
MAT         Init    7.8
MAT         Eval    7.8
MAT         Ceb     2.4
MAT         Rot     1.2
MAT         DiffA   5.4
MAT         RateB   6.5

I have tried (without success) this code:

spds <- split(dataset, dataset$specimen)
# Splitting the dataset to make an evaluation per specimen

res <- lapply(spds, function(DF){
  i <- which(DF[['brakg']] != 0)
  j <- which(DF[['cebkg']] != 0)
  k <- which(DF[['rotkg']] != 0)
  tmp <- rbind(DF[rep(i, 2), ], DF[j, ], DF[k, ])
  # So I can stack the brakg value repetition... after that null ideas
})

Please, any help will be useful, even in base R or Tidyverse (I don't know if tidy functions can output different number of vars). Thank you.

2
Can you explain your calculation? It is not clear to me how you get the values in list and value. - Ronak Shah
Hello @RonakShah, for NIA DiffA = 9.4 - (2.3+3.1) = 4.0 and RateB = 9.4/(1.0+1.1) = 4.48. For MAT DiffA = 7.8 - (2.4) = 5.4 and RateB = 7.8 / (1.2) = 6.5 - Manu
@RonakShah and list is a named vector, could be list = c("Init","Eval",rep("Ceb", length(quantity of non zero cebkg) , rep("Rot", length(quantity of non zero rotkg), "DiffA","RateB") repeated for each specimen. - Manu
Can there be more than 1 non-zero value in brakg for a specimen ? If yes, can you include that in the example? - Ronak Shah
@RonakShah, no, only one non-zero brakg per specimen. By the way if brakg is non-zero, then cebkg and rotkg are 0. And viceversa, if cbkgand rotkg are non-zero, then brakg is 0. - Manu

2 Answers

2
votes

After splitting you can try with lapply to get all the calculation for each specimen :

spds <- split(dataset, dataset$specimen)
out <- do.call(rbind, lapply(spds, function(tmp) {
         vals <- tmp$brakg[tmp$brakg != 0]
         i <- tmp$cebkg != 0
         j <- tmp$rotkg != 0

   data.frame(specimen = tmp$specimen[1], 
         list = c("Init","Eval",rep("Ceb", sum(i)), 
                  rep("Rot", sum(j)), "DiffA","RateB"), 
         value = c(rep(vals, 2), tmp$cebkg[i], tmp$rotkg[j], 
                   vals - sum(tmp$cebkg), vals/sum(tmp$rotkg)))
}))

rownames(out) <- NULL
out


out
#   specimen  list   value
#1       MAT  Init 7.80000
#2       MAT  Eval 7.80000
#3       MAT   Ceb 2.40000
#4       MAT   Rot 1.20000
#5       MAT DiffA 5.40000
#6       MAT RateB 6.50000
#7       NIA  Init 9.40000
#8       NIA  Eval 9.40000
#9       NIA   Ceb 2.30000
#10      NIA   Ceb 3.10000
#11      NIA   Rot 1.00000
#12      NIA   Rot 1.10000
#13      NIA DiffA 4.00000
#14      NIA RateB 4.47619
2
votes

This code is a little long, but I believe it is clear and easy to debug/edit.

DiffA = merge(
            dataset[which(dataset$brakg != 0), c(1, 2)],
            aggregate(cbind(cebkg = cebkg) ~ specimen, dataset[which(dataset$cebkg != 0), c(1, 3)], FUN="sum")
            )

RateB = merge(
            dataset[which(dataset$brakg != 0), c(1, 2)],
            aggregate(cbind(rotkg = rotkg) ~ specimen, dataset[which(dataset$rotkg != 0), c(1, 4)], FUN="sum")
            )

res = rbind(
        # Init
        with(dataset[which(dataset$brakg != 0), ], 
            data.frame(
                    speciment = specimen,
                    list = "Init",
                    val = brakg
                )
        ),
        # Eval
        with(dataset[which(dataset$brakg != 0), ], 
            data.frame(
                    speciment = specimen,
                    list = "Eval",
                    val = brakg
                )
        ),
        # RateB
        with(RateB, 
            data.frame(
                    speciment = specimen, 
                    list = "RateB",
                    val = brakg / rotkg
                )
        ),
        # DiffA
        with(DiffA, 
            data.frame(
                    speciment = specimen, 
                    list = "DifA", 
                    val = brakg - cebkg
                )
        ),
        # Ceb
        with(dataset[which(dataset$cebkg != 0), ], 
            data.frame(
                    speciment = specimen,
                    list = "Ceb",
                    val = cebkg
                )
        ),
        # Rot
        with(dataset[which(dataset$rotkg != 0), ],
            data.frame(
                    speciment = specimen,
                    list = "Rot",
                    val = rotkg
                )
        )
)


> res
   speciment  list     val
1        NIA  Init 9.40000
2        MAT  Init 7.80000
3        NIA  Eval 9.40000
4        MAT  Eval 7.80000
5        MAT RateB 6.50000
6        NIA RateB 4.47619
7        MAT  DifA 5.40000
8        NIA  DifA 4.00000
9        NIA   Ceb 2.30000
10       NIA   Ceb 3.10000
11       MAT   Ceb 2.40000
12       NIA   Rot 1.00000
13       NIA   Rot 1.10000
14       MAT   Rot 1.20000