1
votes

I have a data.frame that looks like this:

C1    C2   C3   C4   
 1    -1   -1   1     
 1     1   -1   1    
 1     1   -1   1   
 1    -1    1  -1      

I would like to count -1 and 1 occurrences by columns so I used:

tab= apply(DF, 2, table)

After I used the following string:

final <- as.data.frame(do.call("cbind", tab))      

to write the result as data.frame. Unfortunately it gives me back an error because of the first element:

tab[[1]]
1
4

tab[[2]]
-1 1
2 2

..........

So I would like to add 0 to tab[[1]] regarding -1 frequencies to be able to write the results as data.frame.

2

2 Answers

4
votes

And a third way:

 x <- read.table(text = "C1    C2   C3   C4   
   1    -1   -1   1     
   1     1   -1   1    
   1     1   -1   1   
   1    -1    1  -1 ", header = TRUE)

sapply(sapply(x, factor, levels = c(1, -1), simplify = FALSE), table)

   C1 C2 C3 C4
1   4  2  1  3
-1  0  2  3  1

Some benchmarking:

xx <- as.data.frame(matrix(sample(c(-1,1), 1e7, replace=TRUE), ncol=100))

Roland <- function(DF) {
    res <- table(stack(DF))
    res2 <- as.data.frame(res)
    reshape(res2, timevar = "ind", idvar = "values", direction = "wide")
}

Roman <- function(x) {
    sapply(sapply(x, factor, levels = c(1, -1), simplify = FALSE), table)
}

user20650 <- function(x) {
    rbind(colSums(x == 1), colSums(x==-1))
}

require(microbenchmark)
microbenchmark(m1 <- Roland(xx), m2 <- Roman(xx), m3 <- user20650(xx), times = 2)

Unit: milliseconds
                expr        min         lq     median         uq        max neval
    m1 <- Roland(xx) 17624.6297 17624.6297 18116.6595 18608.6893 18608.6893     2
     m2 <- Roman(xx) 13838.2030 13838.2030 14301.9159 14765.6288 14765.6288     2
 m3 <- user20650(xx)   786.3689   786.3689   788.7253   791.0818   791.0818     2
2
votes
DF <- read.table(text="C1    C2   C3   C4   
 1    -1   -1   1     
 1     1   -1   1    
 1     1   -1   1   
 1    -1    1  -1  ",header=TRUE)

res <- table(stack(DF))
#       ind
# values C1 C2 C3 C4
#     -1  0  2  3  1
#     1   4  2  1  3

res2 <- as.data.frame(res)
#   values ind Freq
# 1     -1  C1    0
# 2      1  C1    4
# 3     -1  C2    2
# 4      1  C2    2
# 5     -1  C3    3
# 6      1  C3    1
# 7     -1  C4    1
# 8      1  C4    3

reshape(res2, timevar = "ind", idvar = "values", direction = "wide")
#   values Freq.C1 Freq.C2 Freq.C3 Freq.C4
# 1     -1       0       2       3       1
# 2      1       4       2       1       3

An alternative is res <- ftable(stack(DF)), which can be written to a file directly using write.ftable.