This is a sample dataset -
data.frame(ISIN = c("US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US9898171015", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US5535301064", "US5535301064",
"US5535301064", "US5535301064", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US9898171015", "US9898171015", "US9898171015",
"US9898171015", "US5535301064", "US5535301064", "US9898171015",
"US9898171015", "US9898171015"), year = c(2016, 2017, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2015, 2010, 2011, 2012, 2013, 2014,
2015, 2016, 2017, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2000,
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
2012, 2013, 2014, 2015, 2016, 2017, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2000, 2001, 2002,
2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
2014, 2015, 2016, 2017, 2017, 2009, 2010, 2011, 2006, 2007, 2008,
2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2006, 2007,
2008, 2009, 2010, 2011, 2013, 2014, 2015, 2016, 2017, 2016, 2017,
2007, 2008, 2009), DirectorName = c("Steven Paladino", "Steven Paladino",
"Louise Koopman Goeser", "Louise Koopman Goeser", "Louise Koopman Goeser",
"Louise Koopman Goeser", "Louise Koopman Goeser", "Louise Koopman Goeser",
"Louise Koopman Goeser", "Kalen F Holmes", "Doctor Jonathan L Byrnes",
"Doctor Jonathan L Byrnes", "Doctor Jonathan L Byrnes", "Doctor Jonathan L Byrnes",
"Doctor Jonathan L Byrnes", "Doctor Jonathan L Byrnes", "Doctor Jonathan L Byrnes",
"Doctor Jonathan L Byrnes", "Sarah (Sally) Gaines McCoy", "Sarah (Sally) Gaines McCoy",
"Sarah (Sally) Gaines McCoy", "Sarah (Sally) Gaines McCoy", "Sarah (Sally) Gaines McCoy",
"Sarah (Sally) Gaines McCoy", "Sarah (Sally) Gaines McCoy", "Denis F Kelly",
"Denis F Kelly", "Denis F Kelly", "Denis F Kelly", "Denis F Kelly",
"Denis F Kelly", "Denis F Kelly", "Denis F Kelly", "Denis F Kelly",
"Denis F Kelly", "Denis F Kelly", "Denis F Kelly", "Denis F Kelly",
"Denis F Kelly", "Denis F Kelly", "Denis F Kelly", "Denis F Kelly",
"Denis F Kelly", "Raymond (Ray) B Langton", "Raymond (Ray) B Langton",
"Raymond (Ray) B Langton", "Raymond (Ray) B Langton", "Raymond (Ray) B Langton",
"Raymond (Ray) B Langton", "Raymond (Ray) B Langton", "Raymond (Ray) B Langton",
"Roger B Fradin", "Roger B Fradin", "Roger B Fradin", "Roger B Fradin",
"Roger B Fradin", "Roger B Fradin", "Roger B Fradin", "Roger B Fradin",
"Roger B Fradin", "Roger B Fradin", "Roger B Fradin", "Roger B Fradin",
"Roger B Fradin", "Roger B Fradin", "Roger B Fradin", "Roger B Fradin",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Philip (Phil) R Peller", "Philip (Phil) R Peller", "Philip (Phil) R Peller",
"Scott Andrew Bailey", "William (Bill) Milroy Barnum Jr", "William (Bill) Milroy Barnum Jr",
"William (Bill) Milroy Barnum Jr", "Thomas (Tom) E Davin", "James (Jim) M Weber",
"James (Jim) M Weber", "James (Jim) M Weber", "James (Jim) M Weber",
"James (Jim) M Weber", "Ernest R Johnson", "Ernest R Johnson",
"Ernest R Johnson", "Ernest R Johnson", "Ernest R Johnson", "Ernest R Johnson",
"Matthew (Matt) L Hyde", "Matthew (Matt) L Hyde", "Matthew (Matt) L Hyde",
"Matthew (Matt) L Hyde", "Matthew (Matt) L Hyde", "Matthew (Matt) L Hyde",
"Travis D Smith", "Travis D Smith", "Travis D Smith", "Travis D Smith",
"Travis D Smith", "Michael (Mike) C Kaufmann", "Michael (Mike) C Kaufmann",
"David (Dave) M DeMattei", "David (Dave) M DeMattei", "David (Dave) M DeMattei"
), DirectorID = c("1000169302", "1000169302", "1058973478", "1058973478",
"1058973478", "1058973478", "1058973478", "1058973478", "1058973478",
"11051172801", "11275933344", "11275933344", "11275933344", "11275933344",
"11275933344", "11275933344", "11275933344", "11275933344", "11434863691",
"11434863691", "11434863691", "11434863691", "11434863691", "11434863691",
"11434863691", "1223062984", "1223062984", "1223062984", "1223062984",
"1223062984", "1223062984", "1223062984", "1223062984", "1223062984",
"1223062984", "1223062984", "1223062984", "1223062984", "1223062984",
"1223062984", "1223062984", "1223062984", "1223062984", "1223122984",
"1223122984", "1223122984", "1223122984", "1223122984", "1223122984",
"1223122984", "1223122984", "1223392984", "1223392984", "1223392984",
"1223392984", "1223392984", "1223392984", "1223392984", "1223392984",
"1223392984", "1223392984", "1223392984", "1223392984", "1223392984",
"1223392984", "1223392984", "1223392984", "1223552984", "1223552984",
"1223552984", "1223552984", "1223552984", "1223552984", "1223552984",
"1223552984", "1223552984", "1223552984", "1223552984", "1223552984",
"1223552984", "1223552984", "1223552984", "1223552984", "1223552984",
"1223552984", "174488610522", "20462211719", "20462211719", "20462211719",
"2247441792", "3581636766", "3581636766", "3581636766", "3581636766",
"3581636766", "40425210975", "40425210975", "40425210975", "40425210975",
"40425210975", "40425210975", "4842568996", "4842568996", "4842568996",
"4842568996", "4842568996", "4842568996", "53006212569", "53006212569",
"53006212569", "53006212569", "53006212569", "5532705122", "5532705122",
"759047198", "759047198", "759047198"))
Actually I want to filter out those DIRECTOR_ID
that are same from previous year t-1
to current year t
I run the following code to create group -
ceo1 %>%
group_by(ISIN, YEAR) %>%
mutate(
GROUP_ID = cur_group_id()
)
# A tibble: 38 x 6
# Groups: ISIN, YEAR [12]
ROW ISIN YEAR DIRECTOR_NAME DIRECTOR_ID GROUP_ID
<dbl> <chr> <dbl> <chr> <dbl> <int>
1 1 US98981710~ 2006 Thomas (Tom) E Davin 2247441792 1
2 2 US98981710~ 2006 Matthew (Matt) L Hyde 4842568996 1
3 3 US98981710~ 2007 James (Jim) M Weber 3581636766 2
4 4 US98981710~ 2007 Matthew (Matt) L Hyde 4842568996 2
5 5 US98981710~ 2007 David (Dave) M DeMattei 759047198 2
6 6 US98981710~ 2008 James (Jim) M Weber 3581636766 3
7 7 US98981710~ 2008 Matthew (Matt) L Hyde 4842568996 3
8 8 US98981710~ 2008 David (Dave) M DeMattei 759047198 3
9 9 US98981710~ 2009 William (Bill) Milroy Ba~ 20462211719 4
10 10 US98981710~ 2009 James (Jim) M Weber 3581636766 4
Then I do not know how to filter out those DIRECTOR_ID that are same from year to year. For example, for above outupt - DIRECTOR_ID - 4842568996 is in 2006, 2007, 2008 and DIRECTOR_ID - 3581636766 is in 2007, 2008, 2009. I want to filter them out who are present previous year (t-1
) and current year (t
). Note that I have to keep all variables in the new data set. Thanks
The expected output until year 2012 of sample data will look like this -
A tibble: 16 x 5
ROW ISIN YEAR DIRECTOR_NAME DIRECTOR_ID
<dbl> <chr> <dbl> <chr> <dbl>
1 3 US9898171015 2007 James (Jim) M Weber 3581636766
2 4 US9898171015 2007 Matthew (Matt) L Hyde 4842568996
3 5 US9898171015 2007 David (Dave) M DeMattei 759047198
4 6 US9898171015 2008 James (Jim) M Weber 3581636766
5 7 US9898171015 2008 Matthew (Matt) L Hyde 4842568996
6 8 US9898171015 2008 David (Dave) M DeMattei 759047198
7 9 US9898171015 2009 William (Bill) Milroy Barnum Jr 20462211719
8 10 US9898171015 2009 James (Jim) M Weber 3581636766
9 11 US9898171015 2009 Matthew (Matt) L Hyde 4842568996
10 13 US9898171015 2010 William (Bill) Milroy Barnum Jr 20462211719
11 14 US9898171015 2010 James (Jim) M Weber 3581636766
12 15 US9898171015 2010 Matthew (Matt) L Hyde 4842568996
13 16 US9898171015 2011 Sarah (Sally) Gaines McCoy 11434863691
14 17 US9898171015 2011 William (Bill) Milroy Barnum Jr 20462211719
15 19 US9898171015 2011 Matthew (Matt) L Hyde 4842568996
16 20 US9898171015 2012 Sarah (Sally) Gaines McCoy 11434863691