First thing you will need to do is extract the numbers from your strings. Running:
newdfstring <- apply(df,1:2, function(v) unlist(strsplit(v,"-")))
newdf <- apply(newdfstring,1:3, as.numeric)
splits the strings in the first line, and converts them to numeric values in the second. The result is a 3-dimensional matrix which you can use to extract your values.
First create a new dataframe:
#length of the columns in the new frame = number of unique values
dflength <- length(unique(array(newdf[2,,])))
dfout <- data.frame(Value=rep(0,dflength),repeated=rep(0,dflength),s1N=rep(0,dflength),s2N=rep(0,dflength),ss1=rep(0,dflength),ss2=rep(0,dflength))
The most obvious way (yet maybe not the most efficient) would then be to loop and match whatever it is you need:
dfout$Value <- unique(array(newdf[2,,]))
for(i in 1:dflength){
getID <- which(as.data.frame(table(newdf[2,,]))$Var1==dfout$Value[i])
dfout$repeated[i] <- as.data.frame(table(newdf[2,,]))$Freq[getID]
dfout$s1N[i] <- as.data.frame(table(newdf[2,,1]))$Freq[getID]
if(is.na(dfout$s1N[i])){
dfout$s1N[i] <- 0
}
dfout$s2N[i] <- as.data.frame(table(newdf[2,,2]))$Freq[getID]
if(is.na(dfout$s2N[i])){
dfout$s2N[i] <- 0
}
getID <- which(newdf[2,,1]==dfout$Value[i])
if(length(getID)>0){
dfout$ss1[i] <- toString(newdf[1,,1][getID])
} else {
dfout$ss1[i] <- 0
}
getID <- which(newdf[2,,2]==dfout$Value[i])
if(length(getID)>0){
dfout$ss2[i] <- toString(newdf[1,,2][getID])
} else {
dfout$ss2[i] <- 0
}
}
dfout
# Value repeated s1N s2N ss1 ss2
#1 4 3 2 1 3, 5 2
#2 1 1 1 1 4 0
#3 15 1 0 1 0 3
#4 16 1 0 0 0 7
EDIT to loop n amount of s values
newdfstring <- apply(df,1:2, function(v) unlist(strsplit(v,"-")))
newdf <- apply(newdfstring,1:3, as.numeric)
dflength <- length(unique(array(newdf[2,,])))
#find the number of s variables
slength <- length(newdf[1,1,])
#create a matrix of appropriate size
dfout <- matrix(data=NA,nrow=dflength,ncol=(2+2*slength))
#create a (near)-empty names array, we will fill it in later
names <- c("Value","repeated",rep("",2*slength))
#fill in the Values column
dfout[,1] <- unique(array(newdf[2,,]))
#loop for every s variable
for(j in 1:slength){
#get their names, paste N or s and add them to the names array
names[2+j] <- paste(names(df)[j],"N",sep="")
names[2+j+slength] <- paste("s",names(df)[j],sep="")
#loop to get the other values
for(i in 1:dflength){
getID <- which(as.data.frame(table(newdf[2,,]))$Var1==dfout[i,1])
dfout[i,2] <- as.data.frame(table(newdf[2,,]))$Freq[getID]
dfout[i,2+j] <- as.data.frame(table(newdf[2,,j]))$Freq[getID]
if(is.na(dfout[i,2+j])){
dfout[i,2+j] <- 0
}
getID <- which(newdf[2,,j]==dfout[i,1])
if(length(getID)>0){
dfout[i,2+j+slength] <- toString(newdf[1,,j][getID])
} else {
dfout[i,2+j+slength] <- 0
}
}
}
colnames(dfout)<-names
as.data.frame(dfout)
# Value repeated s1N s2N ss1 ss2
#1 4 3 2 1 3, 5 2
#2 1 1 1 1 4 0
#3 15 1 0 1 0 3
#4 16 1 0 0 0 7