I have a data.frame with columns of varying length that I'm trying to align according to the last value in each column. The first 5 rows contain specific identifying information that I can't discard.
I've been using a code in excel that does exactly what I want, but was hoping I could use a similar code to do the same process in R.
Sample data.frame (actual data set much larger):
Series1 <- c("Lync", "23017323003", "2011", "sp1", "45.6", "2.4", "3.1", "1.9", "6.6", "1.4")
Series2 <- c("Lync", "23017323003", "2010", "sp2", "52.8", "3.8", "2.5", "4.3", "NA", "NA")
Series3 <- c("Faye", "23011195006", "2011", "sp1", "63.1", "1.3", "5.2", "0.7", "3.1", "NA")
df <- data.frame(Series1, Series2, Series3)
Intended output data.frame:
Row_Names <- c("Town", "SiteID", "EndYear", "Subplot", "PathLength", "2007", "2008","2009", "2010", "2011")
Series1fix <- c("Lync", "23017323003", "2011", "sp1", "45.6", "2.4", "3.1", "1.9", "6.6", "1.4")
Series2fix <- c("Lync", "23017323003", "2010", "sp2", "52.8", "NA", "3.8", "2.5", "4.3", "NA")
Series3fix <- c("Faye", "23011195006", "2011", "sp1", "63.1", "NA", "1.3", "5.2", "0.7", "3.1")
FixedDF <- data.frame(Row_Names, Series1fix, Series2fix, Series3fix)
The excel code that someone helped me with is as follows:
Sub shift_to_last_row()
Dim LastRowOnSheet As Long
Dim LastRowInColumn As Long
Dim LastColumn As Long
Dim col As Long
Dim arr As Variant
With Cells
LastRowOnSheet = .Find("*", .Cells(1, 1), xlFormulas, xlPart, xlByRows, xlPrevious, False, False).Row
LastColumn = .Find("*", .Cells(1, 1), xlFormulas, xlPart, xlByColumns, xlPrevious, False, False).Column
End With
For col = 1 To LastColumn
LastRowInColumn = Cells(Rows.Count, col).End(xlUp).Row
If LastRowInColumn <> LastRowOnSheet Then
arr = Range(Cells(6, col), Cells(LastRowInColumn, col))
Range(Cells(6, col), Cells(LastRowOnSheet, col)).ClearContents
Range(Cells(6 + LastRowOnSheet - LastRowInColumn, col), Cells(LastRowOnSheet, col)) = arr
End If
Next col
Any ideas on how to do this in R would be great. I have around 150 files to do this to, each containing around 50 columns and 150 rows.
EDIT A sample subset of a real data.frame I'm using.
structure(c("23017323003sp4", "2011", "40", "2/18/2014", "13:40:54", "67.9709", "2.516", "2.510", "1.095", "1.721", "0.574", "0.730", "0.924", "0.585", "1.565", "1.208", "1.104", "0.842", "0.671", "1.399", "1.136", "2.005", "0.946", "1.114", "1.191", "1.192", "2.217", "2.528", "3.706", "2.899", "2.646", "1.698", "1.815", "3.647", "2.141", "2.080", "1.022", "1.610", "2.25", "2.844", "2.651", "1.554", "1.538", "0.958", "1.290", "1.253", "23017323003sp4", "2011", "40", "2/18/2014", "13:40:54", "51.4189", "0.894", "0.977", "0.308", "0.670", "0.357", "0.151", "0.208", "0.256", "0.418", "0.591", "1.119", "0.758", "1.616", "1.698", "1.003", "1.774", "1.348", "1.088", "0.979", "0.992", "1.408", "1.312", "1.828", "1.429", "1.243", "1.093", "2.027", "2.205", "1.637", "1.456", "1.311", "1.531", "1.97", "2.182", "2.217", "2.128", "2.402", "1.471", "1.561", "1.449", "23017323003sp4", "2011", "19", "2/18/2014", "13:40:54", "36.6195", "1.631", "2.290", "1.652", "1.348", "1.335", "1.936", "3.442", "2.258", "1.883", "1.463", "1.282", "1.557", "2.282", "2.737", "2.736", "2.388", "1.346", "1.388", "1.240", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), .Dim = c(46L, 3L), .Dimnames = list(c("V2", "V3", "V9", "V13", "V14", "V112", "V113", "V114", "V115", "V116", "V117", "V118", "V119", "V120", "V121", "V122", "V123", "V124", "V125", "V126", "V127", "V128", "V129", "V130", "V131", "V132", "V133", "V134", "V135", "V136", "V137", "V138", "V139", "V140", "V141", "V142", "V143", "V144", "V145", "V146", "V147", "V148", "V149", "V150", "V151", "V152"), c("LY3A003B", "LY3A004A", "LY3A004B" )))
Using jlhoward's suggested code, I've tried the following (data.frame sample above is titled "Lync3rwlTrans":
series <- as.vector(Lync3rwlTrans[,3])
result <- do.call(cbind,lapply(series,function(s){
+ data <- s[7:46]
+ data <- data[data!="NA"]
+ end <- 40-(2011-as.numeric(s[2]))
+ start <- end-length(data)+1
+ ret <- rep("NA",40)
+ ret[start:end] <- data
+ return(c(s[1:6],ret))
+ }))
rownames(result) <- c("SiteID", "EndYear", "#Rings", "EditDate", "EditTime", "PathLength", 1972:2011)
result <- data.frame(result, stringsAsFactors=F)
result
However, I keep getting the following error: Error in start:end : NA/NaN argument
NA
(or value) moved up? – sgibb