0
votes

I'm having trouble getting expected results with an ifelse statement, used to parse University data into semesters and breaks within/between semester, creating a new column in the data frame, refstats$Semester.

Here's some sample data and my code for the section where I think/hope the problem is:

Date<-c('2009-04-12','2009-07-07','2009-08-09','2009-08-20','2009-08-25','2009-08-30','2020-09-03','2009-09-07','2009-11-15','2009-11-22','2009-12-05','2009-12-12','2010-01-02','2010-01-18','2010-02-14','2010-03-22','2010-05-23','2010-06-13','2010-06-19','2010-08-08','2010-09-05','2010-12-09','2011-03-27','2011-08-06','2011-12-09','2012-06-10','2013-01-20','2013-01-21','2013-05-10','2013-08-03','2013-12-14','2014-05-22','2015-03-20','2015-08-20','2015-11-30','2015-12-15','2016-01-10','2016-02-15')

# create data frame
refstats<-data.frame(Date)

# convert Date to a Date object for evaluation in nested    ifelse statement
refstats$Date <- as.Date(refstats$Date, format = '%Y-%m-%d')

# ---- determine Semester, broken up into school year ifelse statments
#  because R gets confused if the ifelse is too deeply nested.

# dummvar is just something to do in the final else condition, so it doesn't
# overwrite the correct Semester once it is found.

# 2009-2010
ifelse(refstats$Date >= as.Date("2009-08-24") & refstats$Date <= as.Date("2009-09-06"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2009-09-07") & refstats$Date <= as.Date("2009-09-07"), refstats$Semester<-"Labor Day" , 
  ifelse(refstats$Date >= as.Date("2009-09-08") & refstats$Date <= as.Date("2009-11-20"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2009-11-21") & refstats$Date <= as.Date("2009-11-29"), refstats$Semester<-"Fall Break" , 
  ifelse(refstats$Date >= as.Date("2009-11-30") & refstats$Date <= as.Date("2009-12-10"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2009-12-11") & refstats$Date <= as.Date("2009-12-18"), refstats$Semester<-"Fall Finals" , 
  ifelse(refstats$Date >= as.Date("2009-12-19") & refstats$Date <= as.Date("2010-01-18"), refstats$Semester<-"Winter Break" , 
  ifelse(refstats$Date >= as.Date("2010-01-19") & refstats$Date <= as.Date("2010-03-19"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2010-03-20") & refstats$Date <= as.Date("2010-03-28"), refstats$Semester<-"Spring Break" , 
  ifelse(refstats$Date >= as.Date("2010-03-29") & refstats$Date <= as.Date("2010-05-06"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2010-05-07") & refstats$Date <= as.Date("2010-05-14"), refstats$Semester<-"Spring Finals" , 
  ifelse(refstats$Date >= as.Date("2010-05-15") & refstats$Date <= as.Date("2010-05-16"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2010-05-17") & refstats$Date <= as.Date("2010-06-12"), refstats$Semester<-"Summer I" , 
  ifelse(refstats$Date >= as.Date("2010-06-13") & refstats$Date <= as.Date("2010-06-13"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2010-06-14") & refstats$Date <= as.Date("2010-08-07"), refstats$Semester<-"Summer II" , 
  ifelse(refstats$Date >= as.Date("2010-08-08") & refstats$Date <= as.Date("2010-08-22"), refstats$Semester<-"Summer Break" , 
  dummvar<-NA ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )

# 2010-2011
ifelse(refstats$Date >= as.Date("2010-08-23") & refstats$Date <= as.Date("2010-09-05"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2010-09-06") & refstats$Date <= as.Date("2010-09-06"), refstats$Semester<-"Labor Day" , 
  ifelse(refstats$Date >= as.Date("2010-09-07") & refstats$Date <= as.Date("2010-11-19"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2010-11-20") & refstats$Date <= as.Date("2010-11-28"), refstats$Semester<-"Fall Break" , 
  ifelse(refstats$Date >= as.Date("2010-11-29") & refstats$Date <= as.Date("2010-12-09"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2010-12-10") & refstats$Date <= as.Date("2010-12-17"), refstats$Semester<-"Fall Finals" , 
  ifelse(refstats$Date >= as.Date("2010-12-18") & refstats$Date <= as.Date("2011-01-17"), refstats$Semester<-"Winter Break" , 
  ifelse(refstats$Date >= as.Date("2011-01-18") & refstats$Date <= as.Date("2011-03-18"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2011-03-19") & refstats$Date <= as.Date("2011-03-27"), refstats$Semester<-"Spring Break" , 
  ifelse(refstats$Date >= as.Date("2011-03-28") & refstats$Date <= as.Date("2011-05-05"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2011-05-06") & refstats$Date <= as.Date("2011-05-13"), refstats$Semester<-"Spring Finals" , 
  ifelse(refstats$Date >= as.Date("2011-05-14") & refstats$Date <= as.Date("2011-05-15"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2011-05-16") & refstats$Date <= as.Date("2011-06-11"), refstats$Semester<-"Summer I" , 
  ifelse(refstats$Date >= as.Date("2011-06-12") & refstats$Date <= as.Date("2011-06-12"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2011-06-13") & refstats$Date <= as.Date("2011-08-06"), refstats$Semester<-"Summer II" , 
  ifelse(refstats$Date >= as.Date("2011-08-07") & refstats$Date <= as.Date("2011-08-21"), refstats$Semester<-"Summer Break" , 
  dummvar<-NA ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )

# 2011-2012
ifelse(refstats$Date >= as.Date("2011-08-22") & refstats$Date <= as.Date("2011-09-04"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2011-09-05") & refstats$Date <= as.Date("2011-09-05"), refstats$Semester<-"Labor Day" , 
  ifelse(refstats$Date >= as.Date("2011-09-06") & refstats$Date <= as.Date("2011-11-18"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2011-11-19") & refstats$Date <= as.Date("2011-11-27"), refstats$Semester<-"Fall Break" , 
  ifelse(refstats$Date >= as.Date("2011-11-28") & refstats$Date <= as.Date("2011-12-08"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2011-12-09") & refstats$Date <= as.Date("2011-12-16"), refstats$Semester<-"Fall Finals" , 
  ifelse(refstats$Date >= as.Date("2011-12-17") & refstats$Date <= as.Date("2012-01-16"), refstats$Semester<-"Winter Break" , 
  ifelse(refstats$Date >= as.Date("2012-01-17") & refstats$Date <= as.Date("2012-03-16"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2012-03-17") & refstats$Date <= as.Date("2012-03-25"), refstats$Semester<-"Spring Break" , 
  ifelse(refstats$Date >= as.Date("2012-03-26") & refstats$Date <= as.Date("2012-05-03"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2012-05-04") & refstats$Date <= as.Date("2012-05-11"), refstats$Semester<-"Spring Finals" , 
  ifelse(refstats$Date >= as.Date("2012-05-12") & refstats$Date <= as.Date("2012-05-13"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2012-05-15") & refstats$Date <= as.Date("2012-06-09"), refstats$Semester<-"Summer I" , 
  ifelse(refstats$Date >= as.Date("2012-06-10") & refstats$Date <= as.Date("2012-06-10"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2012-06-11") & refstats$Date <= as.Date("2012-08-04"), refstats$Semester<-"Summer II" , 
  ifelse(refstats$Date >= as.Date("2012-08-05") & refstats$Date <= as.Date("2012-08-26"), refstats$Semester<-"Summer Break" , 
  dummvar<-NA ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )

# 2012-2013 note this year has MLK day within Spring Semester
ifelse(refstats$Date >= as.Date("2012-08-27") & refstats$Date <= as.Date("2012-09-02"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2012-09-03") & refstats$Date <= as.Date("2012-09-03"), refstats$Semester<-"Labor Day" , 
  ifelse(refstats$Date >= as.Date("2012-09-04") & refstats$Date <= as.Date("2012-11-16"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2012-11-17") & refstats$Date <= as.Date("2012-11-25"), refstats$Semester<-"Fall Break" , 
  ifelse(refstats$Date >= as.Date("2012-11-26") & refstats$Date <= as.Date("2012-12-13"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2012-12-14") & refstats$Date <= as.Date("2012-12-20"), refstats$Semester<-"Fall Finals" , 
  ifelse(refstats$Date >= as.Date("2012-12-21") & refstats$Date <= as.Date("2013-01-13"), refstats$Semester<-"Winter Break" , 
  ifelse(refstats$Date >= as.Date("2013-01-14") & refstats$Date <= as.Date("2013-01-20"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2013-01-21") & refstats$Date <= as.Date("2013-01-21"), refstats$Semester<-"MLK Day" , 
  ifelse(refstats$Date >= as.Date("2013-01-22") & refstats$Date <= as.Date("2013-03-15"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2013-03-16") & refstats$Date <= as.Date("2013-03-24"), refstats$Semester<-"Spring Break" , 
  ifelse(refstats$Date >= as.Date("2013-03-25") & refstats$Date <= as.Date("2013-05-02"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2013-05-03") & refstats$Date <= as.Date("2013-05-10"), refstats$Semester<-"Spring Finals" , 
  ifelse(refstats$Date >= as.Date("2013-05-11") & refstats$Date <= as.Date("2013-05-12"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2013-05-13") & refstats$Date <= as.Date("2013-06-08"), refstats$Semester<-"Summer I" , 
  ifelse(refstats$Date >= as.Date("2013-06-09") & refstats$Date <= as.Date("2013-06-09"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2013-06-10") & refstats$Date <= as.Date("2013-08-03"), refstats$Semester<-"Summer II" , 
  ifelse(refstats$Date >= as.Date("2013-08-04") & refstats$Date <= as.Date("2013-08-25"), refstats$Semester<-"Summer Break" , 
  dummvar<-NA ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )

# 2013-2014
ifelse(refstats$Date >= as.Date("2013-08-26") & refstats$Date <= as.Date("2013-09-01"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2013-09-02") & refstats$Date <= as.Date("2013-09-02"), refstats$Semester<-"Labor Day" , 
  ifelse(refstats$Date >= as.Date("2013-09-03") & refstats$Date <= as.Date("2013-11-22"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2013-11-23") & refstats$Date <= as.Date("2013-12-01"), refstats$Semester<-"Fall Break" , 
  ifelse(refstats$Date >= as.Date("2013-12-02") & refstats$Date <= as.Date("2013-12-12"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2013-12-13") & refstats$Date <= as.Date("2013-12-20"), refstats$Semester<-"Fall Finals" , 
  ifelse(refstats$Date >= as.Date("2013-12-21") & refstats$Date <= as.Date("2014-01-20"), refstats$Semester<-"Winter Break" , 
  ifelse(refstats$Date >= as.Date("2014-01-21") & refstats$Date <= as.Date("2014-03-21"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2014-03-22") & refstats$Date <= as.Date("2014-03-30"), refstats$Semester<-"Spring Break" , 
  ifelse(refstats$Date >= as.Date("2014-03-31") & refstats$Date <= as.Date("2014-05-08"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2014-05-09") & refstats$Date <= as.Date("2014-05-16"), refstats$Semester<-"Spring Finals" , 
  ifelse(refstats$Date >= as.Date("2014-05-17") & refstats$Date <= as.Date("2014-05-18"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2014-05-19") & refstats$Date <= as.Date("2014-06-14"), refstats$Semester<-"Summer I" , 
  ifelse(refstats$Date >= as.Date("2014-06-15") & refstats$Date <= as.Date("2014-06-15"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2014-06-16") & refstats$Date <= as.Date("2014-08-09"), refstats$Semester<-"Summer II" , 
  ifelse(refstats$Date >= as.Date("2014-08-10") & refstats$Date <= as.Date("2014-08-24"), refstats$Semester<-"Summer Break" , 
  dummvar<-NA ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )

# 2014-2015
ifelse(refstats$Date >= as.Date("2014-08-25") & refstats$Date <= as.Date("2014-08-31"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2014-09-01") & refstats$Date <= as.Date("2014-09-01"), refstats$Semester<-"Labor Day" , 
  ifelse(refstats$Date >= as.Date("2014-09-02") & refstats$Date <= as.Date("2014-11-21"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2014-11-22") & refstats$Date <= as.Date("2014-11-30"), refstats$Semester<-"Fall Break" , 
  ifelse(refstats$Date >= as.Date("2014-12-01") & refstats$Date <= as.Date("2014-12-11"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2014-12-12") & refstats$Date <= as.Date("2014-12-19"), refstats$Semester<-"Fall Finals" , 
  ifelse(refstats$Date >= as.Date("2014-12-20") & refstats$Date <= as.Date("2015-01-19"), refstats$Semester<-"Winter Break" , 
  ifelse(refstats$Date >= as.Date("2015-01-20") & refstats$Date <= as.Date("2015-03-20"), refstats$Semester<-"Spring" , 
  ifelse(refstats$Date >= as.Date("2015-03-21") & refstats$Date <= as.Date("2015-03-29"), refstats$Semester<-"Spring Break" , 
  ifelse(refstats$Date >= as.Date("2015-03-30") & refstats$Date <= as.Date("2015-05-07"), refstats$Semester<-"Spring", 
  ifelse(refstats$Date >= as.Date("2015-05-08") & refstats$Date <= as.Date("2015-05-15"), refstats$Semester<-"Spring Finals" , 
  ifelse(refstats$Date >= as.Date("2015-05-16") & refstats$Date <= as.Date("2015-05-17"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2015-05-18") & refstats$Date <= as.Date("2015-06-13"), refstats$Semester<-"Summer I" , 
  ifelse(refstats$Date >= as.Date("2015-06-14") & refstats$Date <= as.Date("2015-06-14"), refstats$Semester<-"Closed" , 
  ifelse(refstats$Date >= as.Date("2015-06-15") & refstats$Date <= as.Date("2015-08-08"), refstats$Semester<-"Summer II" , 
  ifelse(refstats$Date >= as.Date("2015-08-09") & refstats$Date <= as.Date("2015-08-23"), refstats$Semester<-"Summer Break" , 
  dummvar<-NA ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )

# 2015-
ifelse(refstats$Date >= as.Date("2015-08-24") & refstats$Date <= as.Date("2015-09-06"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2015-09-07") & refstats$Date <= as.Date("2015-09-07"), refstats$Semester<-"Labor Day" , 
  ifelse(refstats$Date >= as.Date("2015-09-08") & refstats$Date <= as.Date("2015-11-20"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2015-11-21") & refstats$Date <= as.Date("2015-11-29"), refstats$Semester<-"Fall Break" , 
  ifelse(refstats$Date >= as.Date("2015-11-30") & refstats$Date <= as.Date("2015-12-10"), refstats$Semester<-"Fall" , 
  ifelse(refstats$Date >= as.Date("2015-12-11") & refstats$Date <= as.Date("2015-12-18"), refstats$Semester<-"Fall Finals" , 
  ifelse(refstats$Date >= as.Date("2015-12-19") & refstats$Date <= as.Date("2016-01-18"), refstats$Semester<-"Winter Break" , 
  dummvar<-NA ) ) ) ) ) ) )

refstats

I would have expected the final statement to print something that looks like:

Date Semester
2009-04-12 NA
2009-07-07 NA
2009-08-09 NA
2009-08-20 NA
2009-08-25 Fall
2009-08-30 Fall
2020-09-03 NA
2009-09-07 Labor Day
2009-11-15 Fall
2009-11-22 Fall Break
2009-12-05 Fall
2009-12-12 Fall Finals
2010-01-02 Winter Break
2010-01-18 Winter Break
2010-02-14 Spring
2010-03-22 Spring Break
2010-05-23 Summer I
2010-06-13 Closed
2010-06-19 Summer II
2010-08-08 Summer Break
2010-09-05 Fall
2010-12-09 Fall
2011-03-27 Spring Break
2011-08-06 Summer II
2011-12-09 Fall Break
2012-06-10 Summer I
2013-01-20 Spring
2013-01-21 MLK Day
2013-05-10 Spring Finals
2013-08-03 Summer II
2013-12-14 Fall Finals
2014-05-22 Summer I
2015-03-20 Spring
2015-08-20 Summer Break
2015-11-30 Fall
2015-12-15 Fall Finals
2016-01-10 Winter Break
2016-02-15 NA

Instead, the refstats$Semeseter is all "Winter Break"

Any thoughts or advice on how I might have gone astray, or perhaps some better approach to the problem? My last question was related to this - I'm still struggling to understand.


deleted some of intermediate attempts to resolve question, so that I stay in the 30,000 characther limit


Third try, adding as.Date:

I had to replace my initial code (due to 30,0000 character limit on asking question) - so the new code is at the top, with as.Date in there.

Alas! Now I'm back to everything coming out as "Winter Break" again!!!


Fourth try, trying to use findInterval:

Date<-c('2009-04-12','2009-07-07','2009-08-09','2009-08-20','2009-08-25','2009-08-30','2020-09-03','2009-09-07','2009-11-15','2009-11-22','2009-12-05','2009-12-12','2010-01-02','2010-01-18','2010-02-14','2010-03-22','2010-05-23','2010-06-13','2010-06-19','2010-08-08','2010-09-05','2010-12-09','2011-03-27','2011-08-06','2011-12-09','2012-06-10','2013-01-20','2013-01-21','2013-05-10','2013-08-03','2013-12-14','2014-05-22','2015-03-20','2015-08-20','2015-11-30','2015-12-15','2016-01-10','2016-02-15')

Semester<-c('NA','Fall','Labor Day','Fall','Fall Break','Fall','Fall Finals','Winter Break','Spring','Spring Break','Spring','Spring Finals','Closed','Summer I','Closed','Summer II','Summer Break','Fall','Labor Day','Fall','Fall Break','Fall','Fall Finals','Winter Break','Spring','Spring Break','Spring','Spring Finals','Closed','Summer I','Closed','Summer II','Summer Break','Fall','Labor Day','Fall','Fall Break','Fall','Fall Finals','Winter Break','Spring','Spring Break','Spring','Spring Finals','Closed','Summer I','Closed','Summer II','Summer Break','Fall','Labor Day','Fall','Fall Break','Fall','Fall Finals','Winter Break','Spring','MLK Day','Spring','Spring Break','Spring','Spring Finals','Closed','Summer I','Closed','Summer II','Summer Break','Fall','Labor Day','Fall','Fall Break','Fall','Fall Finals','Winter Break','Spring','Spring Break','Spring','Spring Finals','Closed','Summer I','Closed','Summer II','Summer Break','Fall','Labor Day','Fall','Fall Break','Fall','Fall Finals','Winter Break','Spring','Spring Break','Spring','Spring Finals','Closed','Summer I','Closed','Summer II','Summer Break','Fall','Labor Day','Fall','Fall Break','Fall','Fall Finals','Winter Break','NA')
StartDate<-c('1000-01-01','2009-08-24','2009-09-07','2009-09-08','2009-11-21','2009-11-30','2009-12-11','2009-12-19','2010-01-19','2010-03-20','2010-03-29','2010-05-07','2010-05-15','2010-05-17','2010-06-13','2010-06-14','2010-08-08','2010-08-23','2010-09-06','2010-09-07','2010-11-20','2010-11-29','2010-12-10','2010-12-18','2011-01-18','2011-03-19','2011-03-28','2011-05-06','2011-05-14','2011-05-16','2011-06-12','2011-06-13','2011-08-07','2011-08-22','2011-09-05','2011-09-06','2011-11-19','2011-11-28','2011-12-09','2011-12-17','2012-01-17','2012-03-17','2012-03-26','2012-05-04','2012-05-12','2012-05-15','2012-06-10','2012-06-11','2012-08-05','2012-08-27','2012-09-03','2012-09-04','2012-11-17','2012-11-26','2012-12-14','2012-12-21','2013-01-14','2013-01-21','2013-01-22','2013-03-16','2013-03-25','2013-05-03','2013-05-11','2013-05-13','2013-06-09','2013-06-10','2013-08-04','2013-08-26','2013-09-02','2013-09-03','2013-11-23','2013-12-02','2013-12-13','2013-12-21','2014-01-21','2014-03-22','2014-03-31','2014-05-09','2014-05-17','2014-05-19','2014-06-15','2014-06-16','2014-08-10','2014-08-25','2014-09-01','2014-09-02','2014-11-22','2014-12-01','2014-12-12','2014-12-20','2015-01-20','2015-03-21','2015-03-30','2015-05-08','2015-05-16','2015-05-18','2015-06-14','2015-06-15','2015-08-19','2015-08-24','2015-09-07','2015-09-08','2015-11-21','2015-11-30','2015-12-11','2015-12-19','2016-01-19')
EndDate<-c('2009-08-23','2009-09-06','2009-09-07','2009-11-20','2009-11-29','2009-12-10','2009-12-18','2010-01-18','2010-03-19','2010-03-28','2010-05-06','2010-05-14','2010-05-16','2010-06-12','2010-06-13','2010-08-07','2010-08-22','2010-09-05','2010-09-06','2010-11-19','2010-11-28','2010-12-09','2010-12-17','2011-01-17','2011-03-18','2011-03-27','2011-05-05','2011-05-13','2011-05-15','2011-06-11','2011-06-12','2011-08-06','2011-08-21','2011-09-04','2011-09-05','2011-11-18','2011-11-27','2011-12-08','2011-12-16','2012-01-16','2012-03-16','2012-03-25','2012-05-03','2012-05-11','2012-05-13','2012-06-09','2012-06-10','2012-08-04','2012-08-26','2012-09-02','2012-09-03','2012-11-16','2012-11-25','2012-12-13','2012-12-20','2013-01-13','2013-01-20','2013-01-21','2013-03-15','2013-03-24','2013-05-02','2013-05-10','2013-05-12','2013-06-08','2013-06-09','2013-08-03','2013-08-25','2013-09-01','2013-09-02','2013-11-22','2013-12-01','2013-12-12','2013-12-20','2014-01-20','2014-03-21','2014-03-30','2014-05-08','2014-05-16','2014-05-18','2014-06-14','2014-06-15','2014-08-09','2014-08-24','2014-08-31','2014-09-01','2014-11-21','2014-11-30','2014-12-11','2014-12-19','2015-01-19','2015-03-20','2015-03-29','2015-05-07','2015-05-15','2015-05-17','2015-06-13','2015-06-14','2015-08-08','2015-08-23','2015-09-06','2015-09-07','2015-11-20','2015-11-29','2015-12-10','2015-12-18','2016-01-18','3000-01-01')


# create data frames
refstats<-data.frame(Date)
calendar<-data.frame(Semester,StartDate,EndDate)

# convert Date to a Date object for evaluation in nested    ifelse statement
refstats$Date <- as.Date(refstats$Date, format = '%Y-%m-%d')
calendar$StartDate <- as.Date(calendar$StartDate, fromat = '%Y-%m-%d')
calendar$EndDate <- as.Date(calendar$EndDate, fromat = '%Y-%m-%d')

# determine semester of each refstats$Date, by reference dataframe containing calendar
# of school year dates.  calendar$EndDate is not really used, because the
# findInterval function defaults to left-closed intervals so only need to use StartDate:
refstats$Semester <- calendar$Semester[ findInterval(refstats$Date, calendar$StartDate)]

At first, this yielded an error message Error in $<-.data.frame(*tmp*, "Semester", value = c(2L, 2L, 13L, : replacement has 34 rows, data has 38

My data has some values that are out of range on both ends for the intervals. So I added a start interval that goes back to the year 1000, and an end interval that goes up to year 3000. It works!

1
You are comparing numeric values (which is what Date-classed variables are) to character values.IRTFM
I have added replacement code at the bottom of my question, with '' removed within ifelse statement. The code runs, but finds no matching dates - am I still comparing apples and oranges?Steve T
Type 2015-12-19 with no quotes at your console and see what you get.IRTFM
well that produces: 1984 but I am no closer to understanding why - obviously I have some beginner's problemSteve T
The number of dates is not a limiting factor. Once ypu have encoded all the "dividing-Dates" and assembled corresponding names, yu could run a million Dates through that "machine". It's going to be quite a bit faster than the ifelse monstrosity you are building. If you wnat to continue with the ifelse strategy you should try it with just a few intervals to get your logic correct; then "scale it up".IRTFM

1 Answers

2
votes

A logical problems you have not addressed. What about the dates prior to the first date in your ifelse?

> dt2009_10 <- scan(what="")
1: '2009-08-24'
2: '2009-09-07'
3: '2009-09-08'
4: '2009-11-21'
5: '2009-11-30'
6: '2009-12-11'
7: '2009-12-19'
8: '2010-01-19'
9: '2010-03-20'
10: '2010-03-29'
11: '2010-05-07'
12: '2010-05-15'
13: '2010-05-17'
14: '2010-06-13'
15: '2010-06-14'
16: '2010-08-08'
17: 
Read 16 items

> nam2009_10 <- c("Fall" , 
+ "Labor Day" , 
+ "Fall" , 
+ "Fall Break" , 
+ "Fall" , 
+ "Fall Finals" , 
+ "Winter Break" , 
+ "Spring" , 
+ "Spring Break" , 
+ "Spring" , 
+ "Spring Finals" , 
+ "Closed" , 
+ "Summer I" , 
+ "Closed" , 
+ "Summer II" , 
+ "Summer Break" )

I'm not going to do all the work to construct the multi-year set of values but rather will demonstrate how to avoid using nested ifelse statements. The findInterval function defaults to left-closed intervals so we can use your beginning set of values which I extracted and put into dt2009_10. Then comapre to your Date-classed Date variable and use as an index into the names of intervals that I put into nam2009_10:

nam2009_10[ findInterval(refstats$Date, as.Date(dt2009_10) ) ]
 [1] "Fall"         "Fall"         "Summer Break" "Labor Day"    "Fall"        
 [6] "Fall Break"   "Fall"         "Fall Finals"  "Winter Break" "Winter Break"
[11] "Spring"       "Spring Break" "Summer I"     "Closed"       "Summer II"   
[16] "Summer Break" "Summer Break" "Summer Break" "Summer Break" "Summer Break"
[21] "Summer Break" "Summer Break" "Summer Break" "Summer Break" "Summer Break"
[26] "Summer Break" "Summer Break" "Summer Break" "Summer Break" "Summer Break"
[31] "Summer Break" "Summer Break" "Summer Break" "Summer Break"

Since I didn't do 2010-11, or later years, all the later dates showed up as Summer Break".

Then it