Plotting confidence intervals with NA values

Question

I would like to plot confidence intervals to a data with NAs, using Gviz package. I modified manual example to expose my problem. First as the manual expose:

library(Gviz)

## Loading GRanges object
data(twoGroups)

## Plot data without NAs
dTrack <- DataTrack(twoGroups, name = "uniform")
tiff("Gviz_original.tiff", units="in", width=11, height=8.5, res=200, compress="lzw")
plotTracks(dTrack, groups = rep(c("control", "treated"),
                                each = 3), type = c("a", "p", "confint"))
graphics.off()

Now, using data with NA values and na.rm=TRUE statment:

  ## Transforming in data frame
  df <- as.data.frame(twoGroups)

## Input NAs to look like my real data
df[ df <= 0 ] = NA
df <- df[,-4]
df <- df[,-4]
names(df) <- c("chr", "start", "end", "control", "control.1", "control.2", "treated", "treated.1", "treated.2")

## Plot with NA
library(GenomicRanges)
df <- makeGRangesFromDataFrame(df, TRUE)
dftrack <- DataTrack(df, name = "uniform")
tiff("Gviz_NA.tiff", units="in", width=11, height=8.5, res=200, compress="lzw")
plotTracks(dftrack, groups = rep(c("control", "treated"),
                                 each = 3), type = c("a", "p", "confint"), na.rm=TRUE)
graphics.off()

Please note that I included the na.rm=TRUE statment in plotTracks function, which allowed the computation of the line following the mean. However, the shaded area which represents the confidence interval, can´t be estimated where I have NA values, even with the na.rm=TRUE.
Any ideas to deal with this issue? Thank you!

UPDATE to @rbatt:

> dput(twoGroups)
new("GRanges"
    , seqnames = new("Rle"
    , values = structure(1L, .Label = "chrX", class = "factor")
    , lengths = 25L
    , elementMetadata = NULL
    , metadata = list()
)
    , ranges = new("IRanges"
    , start = c(1L, 42L, 84L, 125L, 167L, 209L, 250L, 292L, 334L, 375L, 417L, 
458L, 500L, 542L, 583L, 625L, 667L, 708L, 750L, 791L, 833L, 875L, 
916L, 958L, 1000L)
    , width = c(30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L
)
    , NAMES = NULL
    , elementType = "integer"
    , elementMetadata = NULL
    , metadata = list()
)
    , strand = new("Rle"
    , values = structure(3L, .Label = c("+", "-", "*"), class = "factor")
    , lengths = 25L
    , elementMetadata = NULL
    , metadata = list()
)
    , elementMetadata = new("DataFrame"
    , rownames = NULL
    , nrows = 25L
    , listData = structure(list(control = c(-8.96125989500433, -4.2114706709981, 
2.28711236733943, 9.20983788557351, 0.406841854564846, 5.90989288408309, 
5.20958516281098, 2.78549935668707, -8.57040509115905, -8.43395926523954, 
-8.77848833333701, -2.30348631739616, 0.988166537135839, -0.557612692937255, 
-7.67730884253979, -5.16523499507457, -3.01896842662245, -3.11802179086953, 
-7.91133752092719, 3.95565569866449, 2.71242363378406, 0.727043347433209, 
7.3868807638064, -5.54162500426173, -1.13912807777524), control.1 = c(-7.65790161676705, 
4.6882571419701, 8.01326935179532, -6.23242623638362, -7.05442394595593, 
-5.10347711388022, -9.60906079504639, -4.69888434745371, -5.72342518251389, 
5.06623945198953, -2.53558184020221, 5.75232566334307, -7.08328293636441, 
-5.78988547902554, 1.57217930071056, -6.07197678647935, -7.39777445793152, 
5.28266688808799, -0.175534035079181, 5.19415136426687, 7.53853759262711, 
-0.950022372417152, 4.8170017497614, -2.23117967601866, 2.86112546455115
), control.2 = c(9.87956526689231, -1.0533055011183, -7.1219984581694, 
8.59682233538479, -0.551973707042634, 1.56467542983592, -0.415736702270806, 
1.69801083859056, 3.67223800625652, -1.30616669543087, -5.99444826599211, 
-0.745276440866292, -4.42522280383855, -9.33690558653325, 3.56628117151558, 
8.04066675715148, 5.54990579374135, 7.0927129406482, -2.37754446454346, 
-5.13221249915659, 6.56280730385333, -7.63786241877824, 3.64003846421838, 
-4.65625441167504, 8.1775445304811), treated = c(-5.84375557024032, 
1.03083667811006, -4.46718293242157, -6.32041404955089, 9.36362744309008, 
-0.488725560717285, -9.12991860881448, 6.98352626990527, 3.66103118285537, 
6.59625696251169, 26.3747013662942, 4.21735171694309, 23.1465750234202, 
5.14831536915153, 16.2545943120494, -2.77631865814328, 8.87154446449131, 
4.34142326004803, 0.0693343719467521, -5.7483538496308, -3.42396105173975, 
-28.9633466186933, -7.59088161867112, 7.04729768447578, -5.34924863371998
), treated.1 = c(9.71352839842439, -6.77430204115808, -4.05887754634023, 
-1.56806231010705, -4.88056596834213, 6.99816173873842, 4.07760242931545, 
-9.04069183394313, 23.9087636698969, 20.8488084585406, 24.4913479057141, 
9.37918818555772, 21.6068591410294, 0.408056953456253, 20.2703413087875, 
-3.44990291167051, -9.94784070644528, 5.36248424556106, 5.6652726046741, 
-20.9520940342918, -25.0159116648138, -15.0660670618527, 5.14691891148686, 
-7.55597376730293, 0.874496018514037), treated.2 = c(9.99328563921154, 
0.593712376430631, 8.05319488979876, 3.5114610241726, 1.55288028530777, 
-2.03484911937267, 3.07067603804171, -2.71020049229264, 21.1088214861229, 
11.0598625196144, 10.9187916945666, 7.2046619025059, 29.7064534015954, 
1.79014495806769, 7.76732922066003, 8.54645798448473, 5.30277661513537, 
-4.55057015176862, 8.73211439698935, -20.1880806474946, -14.8638874059543, 
-26.3618095312268, -5.80431585200131, -8.46893921960145, -6.32030902896076
)), .Names = c("control", "control.1", "control.2", "treated", 
"treated.1", "treated.2"))
    , elementType = "ANY"
    , elementMetadata = NULL
    , metadata = list()
)
    , seqinfo = new("Seqinfo"
    , seqnames = "chrX"
    , seqlengths = NA_integer_
    , is_circular = NA
    , genome = "hg19"
)
    , metadata = list()
)

I am not sure how to remove NA´s in 'df' object producing a suitable matrix to my target plot. Have you some code idea? — user2120870
@rbatt, I lost all rows: ## Transforming in data frame df <- as.data.frame(twoGroups) ## Input NAs to look like my real data df[ df <= 0 ] = NA df <- df[,-4] df <- df[,-4] names(df) <- c("chr", "start", "end", "control", "control.1", "control.2", "treated", "treated.1", "treated.2") ## Plot with NA df <- df[complete.cases(df),] > nrow(df) [1] 0 — user2120870
Any chance you could share some of the output of dput(twoGroups)? I'm not terribly familiar with the package you're using, and may have to experiment. — rbatt

nate nate · Accepted Answer · 2015-08-14T20:12:38

You could just drop the NAs from the dataframe before you go to plot or impute the values if you're willing to modify the data's structure. You might have to remove the NA's column by column.

Like this:

First, make a pretty dataframe:

df<- data.frame(userid=seq(1,100,1), numVarA=rnorm(100, mean=0, sd=1), numVarB=rnorm(100, mean=2, sd=1),  wholeNumVar=seq(from=1, to=300, by=3), Sex=rep(c("Male", "Female"), 50), Age=floor(runif(100, min=30, max=55)))

Next, punch some holes in it.

df$numVarA[c(1, 10, 15, 20, 25, 27, 29, 44, 69, 96, 45)]<- NA
df$numVarB[c(12, 80, 17, 19, 77, 71, 74, 76)]<- NA

Third, drop the NA's

df<- df[!is.na(df$numVarA), ]
df<- df[!is.na(df$numVarB), ]

Then try to plot everything again. Hopefully this helps. Best, NF

Plotting confidence intervals with NA values

1 Answers