0
votes

I have a dataframe of children with their month and year of birth as well as air quality data their location from 1999 to 2013. I want to replicate a method used in a paper (https://www.nature.com/articles/s41586-018-0263-3#Sec2) where the authors compute the in utero exposure. Their calculation covers both air quality exposure for two years if the pregnancy to birth period occurred in two-year period. Using a direct quote from the paper "For instance, a child born in the third month of year t would be assigned an in utero pollution exposure of 2/9(exposure in year t) + 7/9(exposure in year t−1)". exposure in year t−1 means the year prior to the birth year.So for a child born in se month 9 to month 12 in year t, the child's exposure is the mean value for just year t.

Is there a way to capture this computing formula in R such that if the birth month is less than 9, then the formula above is applied. Else, the value for the birth year is returned.

The structure of a sample data is

structure(list(CaseID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 
44, 45, 46, 47, 48, 49, 50), Birth_month = c(1, 5, 7, 3, 12, 
12, 11, 8, 8, 6, 9, 2, 10, 4, 8, 12, 9, 9, 1, 5, 7, 3, 10, 9, 
11, 8, 8, 3, 9, 2, 8, 9, 8, 5, 6, 8, 9, 1, 5, 7, 3, 10, 9, 11, 
8, 8, 3, 9, 2, 11), Birth_Year = c(2000, 2003, 2010, 2008, 2006, 
2001, 2012, 2013, 2007, 2007, 2008, 2000, 2013, 2004, 2001, 2010, 
2008, 2006, 2001, 2012, 2001, 2012, 2013, 2007, 2007, 2008, 2000, 
2013, 2013, 2004, 2001, 2010, 2008, 2006, 2001, 2012, 2001, 2012, 
2013, 2006, 2001, 2012, 2013, 2007, 2007, 2008, 2000, 2013, 2004, 
2001), AIR1999 = c(8.96798959699599, 6.80355783305597, 5.89652403522655, 
6.08351130989008, 0.421893172909738, 9.04891222893749, 7.32566080174991, 
11.9538120562027, 5.83821052537696, 3.05275316117657, 7.56280655869865, 
14.8201468275825, 7.64593596096383, 13.4841890986795, 14.0470039087897, 
7.70797096865252, 1.61059988426883, 3.80031918734312, 14.9501510635975, 
10.0847597081028, 10.1585740911858, 14.173089028311, 6.63212439636118, 
5.61902280925075, 0.986908540996723, 3.13157703826507, 4.89343957488122, 
5.31442170692771, 12.6929693986033, 1.26105029549589, 5.4180377367679, 
4.25665508354944, 14.3792099452973, 5.24041748730466, 6.07755938634626, 
4.95787754927552, 9.61953763780231, 3.75130929742288, 7.53080678060791, 
12.3820129775514, 13.6887069176028, 4.13298569958261, 14.1752688808057, 
3.52867313173367, 11.8386358425156, 1.79493586691422, 11.4111429566422, 
14.4466513980518, 10.9929119145086, 0.526445570948068), AIR2000 = c(11.5400193935689, 
4.51400380016048, 13.0405851599751, 9.26140852396632, 8.20474216026161, 
13.7864063032873, 2.26955144127086, 2.73370701770671, 1.31484532203898, 
5.95023887719191, 1.35904698184086, 6.38629071258241, 10.6814684475625, 
11.7245391233198, 6.32023995462316, 14.6061184533106, 13.1787409855174, 
2.6639327040026, 9.77708519725292, 14.2537217202925, 0.251322827515658, 
11.6949597093156, 0.901117715214845, 8.8839327113477, 7.87421084115212, 
11.7988941842713, 11.5616693068156, 13.6192478532903, 10.2098129178141, 
2.6167536478648, 1.54635387455602, 12.0428954083114, 4.64425452833646, 
15.1004807784159, 8.74940761103621, 6.54845786664169, 3.74066828807676, 
13.8062808303186, 3.14344133563456, 4.87711996067478, 1.83335648309905, 
0.564922403780743, 4.75278873524722, 14.1680012285325, 10.9841650458858, 
4.35061422900041, 9.69220729148341, 12.3134085446505, 4.58245493312832, 
6.09497011553915), AIR2001 = c(2.34047661832464, 10.1636542402387, 
5.63355084510311, 4.85862207704014, 9.23495329037285, 14.7018283497058, 
3.34896555680828, 10.7035308826005, 9.70681187241594, 5.94983366167778, 
10.6982700353058, 8.20110325993877, 5.47815369644901, 2.29662297346932, 
5.81311151243444, 5.86180122796237, 3.29303425198002, 12.6395572883144, 
9.19196868231753, 6.9857222145889, 11.3394421438701, 11.047885820206, 
10.3115117570383, 6.52485884934803, 4.32370917450194, 4.76700344993779, 
2.66265303340764, 9.34482253280724, 8.82734048097441, 12.4377567085049, 
13.4038470998239, 6.85719530528947, 13.0154083911972, 13.9397814449291, 
6.42288194378652, 11.5360288507433, 4.05413083475339, 3.52317486527632, 
5.33977231099782, 2.62500006449549, 6.07684988212213, 9.66904514889885, 
4.74849246345088, 4.30533116448019, 7.45883561741118, 6.83800770166353, 
13.7860890292185, 8.95913024599384, 4.22882908951701, 11.9366229237847
), AIR2002 = c(5.5421209915732, 8.82941941950074, 13.9415065737988, 
4.05088145747734, 0.634888762937393, 8.94379655820853, 14.6699177745578, 
2.92273129951465, 3.32000594967115, 9.6626486005201, 7.67113231661101, 
5.73763535333215, 6.66896355409967, 4.76775433524861, 11.7307171144853, 
11.9204512965262, 4.30270969435317, 6.07337335666409, 0.895004365468398, 
7.92298369263555, 0.373079233546509, 11.6968179701357, 2.86150118232053, 
2.24216720518936, 11.7806082410973, 9.40187198882131, 12.0526066867323, 
11.9956986808735, 3.58023497230932, 8.58664103748742, 7.57416980166524, 
4.31572127419198, 4.87601018757676, 1.42685255046957, 0.610385331556434, 
4.55194799008779, 14.0502073411413, 9.80004817027273, 8.61341614048323, 
0.900034241871675, 1.41049400813086, 1.55246704871324, 4.72272099354747,
0.316862174819224, 2.93053907830617, 8.05078771834052, 5.28952837263816, 
10.1443750119205, 9.46636938612279, 7.2068673761792), AIR2003 = c(11.6987492112564, 
8.03123494261876, 14.6267426478136, 10.1608755875826, 12.2925055704629, 
4.41903535030968, 1.32192490517627, 14.9171937333997, 5.989500815548, 
1.33018757369556, 11.5268041675435, 10.0354422837207, 12.2193238575859, 
8.34460034461785, 3.19971360721812, 4.72411370138545, 8.99170322034135, 
8.84431456421316, 9.19228873900324, 11.6073826548178, 5.04809568638727, 
2.44113702619448, 1.3020562816998, 6.58069895481877, 12.9026523987483, 
7.16368647065572, 8.66070749361161, 6.00576582145039, 2.10032442033477, 
6.07895397265628, 9.84188887039386, 8.72011480981018, 8.3973909978196, 
2.93135377582256, 2.07479393241927, 4.33787543469854, 11.5524645352829, 
6.32281940148678, 13.4578647589665, 8.25475389447622, 6.92853975148313, 
13.9737096681725, 3.20975688825082, 5.83681199098192, 14.7013523747697, 
12.8483105113003, 14.7442567509897, 11.3872388804331, 11.9289726106934, 
11.3432943127351), AIR2004 = c(12.3298210280007, 13.1959769331908, 
12.370661520134, 15.0868251372818, 8.94963014865853, 12.2191067473528, 
5.22362188581377, 0.720411225163843, 12.4820522407312, 0.351069664391922, 
1.30037323312717, 4.37076402375568, 3.62288978581387, 8.48409274117183, 
0.679597671930213, 7.34746075248183, 13.1647057766567, 11.1547359531655, 
4.96984202732425, 8.96129352980759, 9.53370854761545, 5.8134242443291, 
8.71016911426559, 11.8883539470122, 12.2352502209791, 4.68252076998213, 
5.95696850650571, 13.8056899585128, 7.50516538352217, 5.02250328123337, 
13.487732587669, 14.5949376791599, 14.9951364343211, 6.59220350365457, 
9.48408624811447, 1.41794018899766, 14.1069514028479, 9.36621385829034, 
13.7439532769257, 10.4824247457942, 14.7214399003615, 3.88337102833367, 
6.97264864603989, 8.02159295712691, 12.6082874644615, 13.1953726709504, 
13.6941493970903, 8.22842252415093, 2.50750512640784, 3.58666553804674
), AIR2005 = c(14.1543983449384, 0.207154301401461, 10.3945003528385, 
1.55163006050745, 11.7088354445258, 13.8307717176049, 1.60947553726472, 
12.4807727501071, 11.4753113418682, 12.1466166828242, 4.28278952114563, 
15.0142466179228, 10.775123869299, 8.21507695954968, 10.2687020091594, 
1.73548990486166, 5.87357538677612, 0.838787299717544, 2.09888134363526, 
1.10793127552466, 12.8584094551371, 5.82341710817441, 7.35206087073311, 
2.75513321271562, 2.28206509921863, 14.7161662780128, 1.92734397216351, 
8.52569639326329, 0.435341671429342, 14.8357928042437, 11.0187966747552, 
6.7161015153341, 0.729511158074718, 3.54536445078091, 0.25745916891587, 
13.7526868569555, 12.4909417931621, 2.44651768055302, 4.29353768908698, 
4.3296417530498, 0.332072943038773, 3.9897366906961, 7.34801499286247, 
9.05279436282767, 7.81229433161113, 14.6314884596826, 13.0453833885309, 
13.3968611858496, 7.20212454602541, 13.6024225526371), AIR2006 = c(14.3602117700654, 
9.67784631192451, 3.55228421162465, 9.17853459700709, 3.97472074926249, 
0.453478662259644, 13.5944658882257, 5.97668834996084, 5.33786357147782, 
7.47689095921, 2.81935510349181, 3.96459207065962, 13.239876144056, 
8.31305088601867, 4.70401683545741, 3.55927178151719, 0.198072984460741,
12.4664548987749, 3.95155436665518, 1.47646058510151, 1.15288747034175, 
10.5564422610337, 11.799767498377, 9.69579476557509, 13.4778237678527, 
0.578315307238372, 7.05817080976465, 12.4450234571598, 3.42532579906844, 
8.00017632821924, 8.99522336013103, 10.3307407885047, 7.55966268972564, 
2.25758463139716, 0.838791949320352, 14.5796161984191, 0.842886504037306, 
12.3815860850157, 7.64115155479219, 8.93122855890589, 0.309247846194077, 
9.11649027255387, 14.334240223086, 11.4479621604206, 0.662708246090217, 
11.4913576022733, 7.95924178698263, 2.67515668715094, 0.879071030008839, 
13.6501796660786), AIR2007 = c(9.97003772936854, 5.66991338106431, 
13.1535815664255, 6.65941205831734, 12.9431154270126, 7.54975869239541, 
2.68487146483106, 8.1954225972998, 0.989637708235998, 15.0157662873652, 
14.2492329591238, 8.72830057256878, 10.6656589124098, 14.407073575431, 
5.56657257661223, 12.0258530469052, 14.3806802332073, 6.06250679987832, 
12.6181324996231, 13.7419910761765, 7.96911312058102, 4.95833557256381, 
4.3284153526728, 4.37892194461683, 8.58931198042166, 15.086752575278, 
6.84238949001231, 3.06172467645654, 13.959445785162, 4.88097198925563, 
8.14034017345565, 14.5361199092346, 14.4077074720624, 8.95354569882294, 
2.42978998625372, 3.71779510277393, 8.94922842727159, 14.5382596874342, 
11.9046104310593, 3.08895008495753, 12.1694276854037, 12.2801624915064, 
3.20018387928163, 1.09386935686017, 2.04407673706114, 5.70477642147499, 
15.0334163082221, 6.06478327849694, 1.98094456569944, 1.69397633470688
), AIR2008 = c(9.30397066498711, 14.7486538500078, 5.58379651584826, 
8.80245459234202, 13.487498242771, 4.53696579690417, 14.4067800776965, 
6.79924817802059, 13.2960046531346, 14.6843470196975, 1.25236799454479, 
5.97676394814788, 8.07777813938539, 8.63516177444276, 8.9589754464475, 
15.0031446587539, 14.8031015943952, 2.49399273799104, 10.7569668015961, 
4.28002286700346, 10.1122482336515, 5.05124714686349, 11.0521815955096, 
0.343769731794717, 13.0853167199786, 12.1844864826184, 7.30493155200058, 
8.15045584558858, 7.54400607235706, 2.49430188038922, 0.753580058788415, 
13.3526555226133, 6.2869325775844, 8.3021644556059, 10.9850292504223, 
2.18342414438305, 7.04020942923496, 11.017099474912, 8.27777373495162, 
2.20133028454101, 8.49576657339185, 1.58340844516177, 2.63671668942692, 
12.1220992218638, 1.75073969954345, 1.62113385793217, 14.099990950956, 
6.91643273566617, 4.30560507998336, 3.77403995886794), AIR2009 = c(1.67957183947507, 
7.36369897405664, 6.58358088548272, 2.38956139467959, 7.1751103291281, 
6.10804192248383, 14.9931828636669, 1.0084126098291, 12.1872471913092, 
5.01698368997965, 13.3619180300804, 12.8454065388709, 13.4218508905321, 
14.4554111795065, 13.0900773408331, 12.3451018773937, 11.6958484717617, 
7.24591008744668, 8.89023789232783, 11.9545650390091, 10.6377289210549, 
1.51871868283534, 12.5126682352189, 5.65189430709695, 13.4598202915611, 
14.3695526043761, 7.51904321175604, 6.24126743773976, 14.8900968077758, 
8.72267893032101, 12.6833998839031, 2.2834426388687, 8.63599658885389, 
9.25064845107519, 2.92453739979817, 10.8969492889207, 8.18858585808612, 
1.62620084687509, 7.34673358480399, 12.0605738303089, 14.7037668900241, 
12.6395342878401, 3.43311224376853, 3.07568217523932, 12.7473398134939, 
12.403539704418, 11.9232473302584, 10.1782277375013, 4.20707396323467, 
0.415611640287098), AIR2010 = c(3.43646944312239, 1.35975490884157, 
0.983912079614354, 0.778802652686601, 12.3345025972561, 1.00613869713736, 
6.00807040606486, 10.3557605237481, 0.571828635439742, 3.23014963916549, 
1.50621501078014, 5.46978982399334, 10.487337793163, 2.86241729479888, 
1.20100536868256, 9.26454035470425, 0.789336679036962, 1.52654933029041, 
8.34533523020451, 11.7671594076965, 8.67667731308495, 11.9934830903618, 
3.69538885993906, 9.75540565486182, 9.09214047452505, 1.95581501926063, 
14.9897193048559, 14.5229993574421, 9.34056145941513, 1.41195704022213, 
9.19928426172957, 11.7689157741664, 6.50435117529007, 3.27914538083738, 
1.54789654616197, 10.6469015548979, 12.8618307883253, 8.4587018141062, 
11.5180247743961, 2.29130000634049, 2.89561315982137, 2.7726630775854, 
6.8413915040032, 13.9176417988634, 13.7705030146381, 0.52759597543464, 
1.77941870173556, 5.53348983248253, 12.8605902424706, 3.84327436685492
), AIR2011 = c(7.77058110130485, 6.32489416138083, 9.22097524081613, 
12.5675709558835, 8.91335506780934, 0.296842172110919, 14.3338035923715, 
4.95561812325357, 1.608350189402, 9.59421682450897, 0.325252260173904, 
4.3207606499726, 3.37387573508476, 7.52029055199213, 13.879214577225, 
0.226798813565169, 14.1581023259959, 4.70900534654991, 14.7834656228917, 
4.33595546064293, 9.77514042361919, 0.503774107655976, 7.74240870135883, 
4.71506228298857, 14.9198252162652, 7.62993979265424, 10.4375572762149, 
11.1040101385666, 3.69114950193861, 4.54456000300357, 8.70021347533702, 
6.00627810265892, 0.358524227888556, 5.78310163486213, 6.06518945520674, 
0.372255713034654, 1.26057865931257, 12.8861343483718, 13.7745616754233, 
11.8614557243602, 5.64862275314773, 6.31275415731431, 8.65507386001036, 
13.9926604995227, 0.355387645868585, 8.69261395941931, 13.1685344892796, 
6.71539060721896, 6.13620514613669, 11.5668249191546), AIR2012 = c(6.27081028353097, 
7.86677249507676, 0.58618860890693, 12.388112353907, 2.85958583392995, 
7.95952853473043, 2.56079553408665, 10.8089659682196, 5.11167549905134, 
11.0882686589232, 5.7707566303697, 11.3495220269873, 2.28377063603839, 
0.545485086908331, 3.47129222193733, 6.00050472526765, 9.87287770145945, 
4.24923844732647, 4.15209205533913, 14.5653932706246, 9.64327895222511, 
5.11627035313752, 13.1698418143084, 0.588214227333898, 5.74336776575609, 
3.65524465449248, 6.91148347930191, 9.29653071753797, 14.6478107533806, 
9.5541726365569, 3.80763447697437, 2.96324152937718, 2.91963071328681, 
6.08352127633477, 13.1309727081126, 10.8486344935496, 2.23741842607246, 
3.65041686724569, 1.81148035473563, 0.905375592498109, 13.6041364369688, 
10.7900264257432, 6.13362577195535, 6.43496705320152, 10.7790867917938, 
0.281412984936498, 5.93390544060129, 4.08022078832751, 14.0153999755471, 
5.40949646908627), AIR2013 = c(8.27712373652565, 9.04319920654944, 
6.74612877325271, 13.8873930297748, 11.9153593588832, 4.61324733532476, 
8.4878295564407, 5.77498647962371, 4.68663368575624, 9.53798435499915, 
13.8239222256639, 12.4329902363573, 11.6120311678429, 10.0747855773047, 
3.92196284314175, 5.33750072185858, 3.57540085327323, 9.85522755583306,
11.9822010349946, 3.45481815994019, 10.892755780485, 6.48081650614436, 
3.70137969797198, 12.7409640677895, 10.6433459473609, 2.62831938764267, 
14.5458863536206, 4.11003716498474, 3.62549759677309, 13.7243097189411, 
14.4954679398946, 6.75792811234971, 4.24549357121275, 7.19747523671994, 
10.0870223054055, 12.3274281337378, 12.2477434444681, 10.0755658776555, 
3.12831819514418, 6.52412743031723, 8.59926303841453, 10.9694159899252, 
6.06990504534845, 2.95378935882496, 1.91351455361326, 13.3501023647671, 
6.91474291581591, 8.28943165152357, 10.4379813756945, 7.7918838394885
)), row.names = c(NA, -50L), class = c("tbl_df", "tbl", "data.frame"
))

Using the sample data, my formula in simple terms would be

*exp = x/9(t) + y/9(t-1) if Birth_month < 9 #else exp = t

where x= Birth_month - 1; y = 9 - x; t = air quality value for the year of birth.*

Thank you

1
What have you tried so far?TarJae
Can you show your desired output? or maybe one sample calculation based on your data set.Anoushiravan R
In order to verify the answers can you do the calculation by hand and provide us the expected output for the example shared?Ronak Shah
Hi, Folks. Sorry for the late reply and also for not providing the desired output. The output from Nicolás Velásquez's solution is the same as the manual computations. In future, I will add the desired output. Thank you.Ekow_ababio

1 Answers

1
votes

There could be a more elegant way, but I would break it in 3 parts:

  1. Getting a table of cases with the years and months (numeric) of conception and birth.

  2. Getting a tidy table of caseID-year pairs of pollution values.

  3. Join 1 and 2 to get pollution values for the conception and birth years, and then run the arithmetic.

    library(tidyverse)
    
    df_cases <- df %>% 
      group_by(CaseID) %>% 
      select(Birth_month, Birth_Year) %>% 
      mutate(conception_year = if_else(Birth_month <= 9, true = Birth_Year - 1, false = Birth_Year), 
             months_in_birth_year = min(9, Birth_month), 
             months_in_conception_year = 9 - months_in_birth_year ) %>% ungroup()
    
    df_airquality <- 
      df %>% select(CaseID, AIR1999:AIR2013) %>% 
      pivot_longer(names_to = "year", names_prefix = "AIR", values_to = "airquality", cols = !CaseID) %>% mutate(year = as.numeric(year))
    
    df_exposure <- 
    df_cases %>% 
      left_join(df_airquality, by = c( "CaseID", "Birth_Year" = "year")) %>% 
      left_join(df_airquality, by = c( "CaseID", "conception_year" = "year")) %>% 
      mutate(utero_exposure = ((months_in_birth_year - 1)/9*airquality.x) + 
                              ((months_in_conception_year + 1)/9*airquality.y) ) %>% 
      select(CaseID, Birth_month,Birth_Year, airquality.x, airquality.y, utero_exposure)
    
    df_exposure
    # A tibble: 50 x 6
      CaseID Birth_month Birth_Year airquality.x airquality.y utero_exposure
      <dbl>       <dbl>      <dbl>        <dbl>        <dbl>          <dbl>
    1      1           1       2000       11.5           8.97           8.97
    2      2           5       2003        8.03          8.83           8.47
    3      3           7       2010        0.984         6.58           2.85
    4      4           3       2008        8.80          6.66           7.14
    5      5          12       2006        3.97          3.97           3.97
    6      6          12       2001       14.7          14.7           14.7 
    7      7          11       2012        2.56          2.56           2.56
    8      8           8       2013        5.77         10.8            6.89
    # ... with 42 more rows