1
votes

Could someone try to debug the script below? ggplot does not find my x and y values it seems. What is wrong?

data:

  data <- read.table(file="./MatureMiRNA_1M_2M_Trim18.csv", sep=",", header=T, check.names=F, row.names=1)
    head(data)
                      1M   2M
    hsa-let-7a-2-3p    0    0
    hsa-let-7a-3p      0   12
    hsa-let-7a-5p   2770 1344
    hsa-let-7b-3p     26   27
    hsa-let-7b-5p    627  492
    hsa-let-7c-3p      0    0

Script

    keep <- rowSums(cpm(data)>3) >=2
    data <- data[keep, ]
    table(keep)
    keep

    FALSE  TRUE 
     2381   200 

    data <- log(data,2)
    head(data)
                         1M        2M
    hsa-let-7a-5p 11.435670 10.392317
    hsa-let-7b-3p  4.700440  4.754888
    hsa-let-7b-5p  9.292322  8.942515
    hsa-let-7c-5p  5.906891  4.754888
    hsa-let-7d-3p  9.552669  7.882643
    hsa-let-7d-5p  8.573647  7.851749

    library(ggplot2) 
    ggplot(data, aes(x=1M,y=2M)) + geom_point()
    Error: unexpected symbol in "ggplot(data, aes(x=1M"
1

1 Answers

1
votes

R does not allow variable names that begin with a number (See, for example, here). But check_names=F in the read.table command stops R from checking for valid names. Leaving check.names as the default (TRUE), R will make the names legal by adding an "X" to the variable name. If you want the original names as the axis labels in the ggplot, change the axis labels using labs.

data <- read.table(text = " 1M        2M
    hsa-let-7a-5p 11.435670 10.392317
    hsa-let-7b-3p  4.700440  4.754888
    hsa-let-7b-5p  9.292322  8.942515
    hsa-let-7c-5p  5.906891  4.754888
    hsa-let-7d-3p  9.552669  7.882643
    hsa-let-7d-5p  8.573647  7.851749", 
    header = TRUE, 
    check.names = TRUE, 
    row.names = 1)

names(data)

library(ggplot2)
ggplot(data, aes(X1M, X2M)) + 
   geom_point() + 
   labs(x = "1M", y = "2M")