I have a loop going through each file in a directory. It works fine on one file, but as soon as there are 2 or more files in the directory, the second (or more) output is all NAs.
I've tried switching from read.csv to fread, I've tried converting .csv to .txt, I have tried different methods of selecting specific columns (e.g., keeps, select), but I always get NAs for the second time through the loop. It is not the second file, because if I remove the first file, the second is processed perfectly.
Not sure if it is something at the end of the .csv, or if it is adding rownames to the second file or what. Thanks!
filenames <- list.files()
n_filenames <- length(filenames)
SSRT_cb1_pre <- data.frame(matrix(ncol = 4, nrow = n_filenames))
cols <- c(13, 23, 24, 25, 28, 29, 31, 32)
for (i in 1:n_filenames) {
print(filenames)
dt_pre <- fread(filenames[i], header=T, sep=",", select=cols,
stringsAsFactors=F, na.strings=c("NA", "", "."))
dt_pre$RT <- as.numeric(dt_pre$rt)
data_real_pre <- subset(dt_pre, SSTBlocks.thisRepN>=0)
data_corr_pre <- subset(data_real_pre, corr == 1)
data_corr_pre_RTmean <- aggregate(RT ~ P, data = data_corr_pre,
FUN=mean, na.rm=TRUE)
data_corr_pre_SSDmean <- aggregate(SSD ~ P, data = data_corr_pre,
FUN = mean, na.rm = TRUE)
pre_sub <- data_corr_pre_RTmean[i,1]
preMeanRT <- data_corr_pre_RTmean[i,2]
preMeanSSD <- data_corr_pre_SSDmean[i,2]
SSRT_cb1_pre[i, 1] <- i
SSRT_cb1_pre[i, 2] <- pre_sub
SSRT_cb1_pre[i, 3] <- preMeanRT
SSRT_cb1_pre[i, 4] <- preMeanSSD
}
SSRT_cb1_pre
The following gives me this output:
Outputs:
SSRT_cb1_pre
i sub1 preRT preSSD
1 1 301 0.4877872 0.2580645
2 2 NA NA NA
NEWER CODE THAN ABO
filenames <- list.files()
n_filenames <- length(filenames)
n_rows <- n_filenames/2
SSRT_cb1_pre <- data.frame(matrix(ncol = 4, nrow = n_filenames)) # for output
colnames(SSRT_cb1_pre) <- c("i","sub1", "preRT", "preSSD")
cols <- c(13, 23, 24, 25, 28, 29, 31, 32)
colsnames <- c("SSTBlocks.thisRepN", "SSD", "corr", "rt", "sess", "CB", "P", "expName")
for (i in 1:n_filenames) {
print(filenames)
dt_pre <- fread(filenames[i], header=T, sep=",", select=colsnames, stringsAsFactors=F, na.strings=c("NA", "", "."))
dt_pre$RT <- as.numeric(dt_pre$rt)
data_real_pre <- subset(dt_pre, SSTBlocks.thisRepN>=0)
data_corr_pre <- subset(data_real_pre, corr == 1)
data_corr_pre_RTmean <- data_corr_pre[, mean(RT, na.rm=T), by = P] #suggested by Yannis Vassiliadis Stackoverflow as alt to aggregate
data_corr_pre_SSDmean <- data_corr_pre[, mean(SSD, na.rm=T), by = P]
# values to collect from each file
pre_sub <- data_corr_pre_RTmean[i, 1]
preMeanRT <- data_corr_pre_RTmean[i, 2]
preMeanSSD <- data_corr_pre_SSDmean[i, 2]
# output for values - should iterate through
SSRT_cb1_pre[i, 1] <- i
SSRT_cb1_pre[i, 2] <- pre_sub
SSRT_cb1_pre[i, 3] <- preMeanRT
SSRT_cb1_pre[i, 4] <- preMeanSSD
}
SSRT_cb1_pre
class(data_corr_pre_RTmean)
class(data_corr_pre_SSDmean)
This gives the out put of:
[1] "301_1_PsychoPy_SST_Pretest_2.csv" "303_1_PsychoPy_SST_Pretest.csv"
[1] "301_1_PsychoPy_SST_Pretest_2.csv" "303_1_PsychoPy_SST_Pretest.csv"
Warning messages:
1: In as.numeric(dt_pre$rt) : NAs introduced by coercion
2: In as.numeric(dt_pre$rt) : NAs introduced by coercion
>
> SSRT_cb1_pre
i sub1 preRT preSSD
1 1 301 0.4877872 0.2580645
2 2 NA NA NA
> class(data_corr_pre_RTmean)
[1] "data.table" "data.frame"
> class(data_corr_pre_SSDmean)
[1] "data.table" "data.frame"
selectargument offreadaccepts characters as well. As an aside, given thatdt_preanddata_real_prehave classdata.table, I suggest you usedt_corr_pre[, mean(RT, na.rm=T), by = P]as a faster alternative toaggregate. - Yannis VassiliadisPtakes on more than 1 values in each file, thendata_corr_pre_RTmeananddata_corr_pre_SSDmeanare data frames. Then for the 2nd file, you get the 2nd row ofdata_corr_pre_RTmeanand createpreMeanRT. But if the 2nd row doesn't exist, it means thatPonly takes one value, i.e. you shouldn't even be usingaggregate, justmean. - Yannis Vassiliadis