0
votes

I am trying to output the number of groups on 0s in multiple data frames in a list. I believe the package I need to do this is the raster R package. Here is my attempt...

set.seed(12345)
output_1 <- matrix(sample(c(0,1), 225, prob=c(0.8,0.2), replace=TRUE), nrow = 15)
df_output_1 <- data.frame(output_1)

set.seed(99999)
output_2 <- matrix(sample(c(0,1), 225, prob=c(0.8,0.2), replace=TRUE), nrow = 15)
df_output_2 <- data.frame(output_2)

output_list <- list(df_output_2, df_output_2)

install.packages("raster")
library(raster)

lapply(output_list, function (onedf) {
  Rastermat <- raster(onedf)
  Clumps <- as.matrix(clump(Rastermat, directions = 8))

  #turning the clumps into a list
  tot <- max(Clumps, na.rm=TRUE)
  res <- vector("list", tot)
  for (i in 1:tot){
    res[i] <- list(which(Clumps == i, arr.ind = TRUE))
  }
  res
})

But I get the following error:

Error in .local(x, ...) : list has no "x"

  1. stop("list has no \"x\"")

  2. .local(x, ...)

  3. raster(onedf)

  4. raster(onedf)

  5. FUN(X[[i]], ...)

1.lapply(df_list, function(onedf) {

Rastermat <- raster(onedf)

Clumps <- as.matrix(clump(Rastermat, directions = 8))

tot <- max(Clumps, na.rm = TRUE) ...

Can someone please help me? I am really stuck on what to do.

1

1 Answers

1
votes

The only problem is that you need to use matrices instead of data.frames.

This should work:

library(raster)
set.seed(12345)
output_1 <- matrix(sample(c(0,1), 225, prob=c(0.8,0.2), replace=TRUE), nrow = 15)
set.seed(99999)
output_2 <- matrix(sample(c(0,1), 225, prob=c(0.8,0.2), replace=TRUE), nrow = 15)

output_list <- list(output_2, output_2)

lapply(output_list, function (onedf) {
  Rastermat <- raster(onedf)
  Clumps <- as.matrix(clump(Rastermat, directions = 8))
  tot <- max(Clumps, na.rm=TRUE)
  res <- vector("list", tot)
  for (i in 1:tot){
    res[i] <- list(which(Clumps == i, arr.ind = TRUE))
  }
  res
})

EDIT to answer your follow-up questions:

Provided your input are data.frames containing 0s and 1s and you want to count the number of clumps of 0s you could have following code to return a list of the number of clumps in each data.frame:

sapply(list_of_dfs, function(df) {
  rm <- raster(as.matrix(df)-1) # -1 because the clump function counts non-zero values
  rc <- clump(rm, directions = 8, gaps = F) # gaps = F to prevent having missing numbers in the chunk numbers
  rc@data@max # return the highest chunk number
})