2
votes

I have a dataframe "A" with samples as rows and Genes as columns with gene expression values (RPKM).

        Gene1   Gene2           Gene3        Gene4        Gene5    Gene6
Sample1 0.02   0.038798682  0.1423662   2.778587067 0.471403939 18.93687655
Sample2 0      0.059227225  0.208765213 0.818810739 0.353671882 1.379027685
Sample3 0      0.052116384  0.230437735 2.535040249 0.504061015 9.773089223
Sample4 0.06   0.199264618  0.261100548 2.516963635 0.63659138  11.01441624
Sample5 0      0.123521916  0.273330986 2.751309388 0.623572499 34.0563519
Sample6 0      0.128767634  0.263491811 2.882878373 0.359322715 13.02402045
Sample7 0      0.080097356  0.234511372 3.568192768 0.386217698 9.068928569
Sample8 0      0.017421323  0.247775683 5.109428797 0.068760572 15.7490551
Sample9 0      2.10281137   0.401582013 8.202902242 0.140596724 60.25989178

To make a scatter plot showing correlation between Gene1 vs Gene5 and Gene6 I used following code:

library(tidyr)
library(ggplot2)
pdf("Gene1.pdf")
A %>% 
  gather(key = variable, value = values, Gene5:Gene6) %>% 
  ggplot(aes(Gene1, values)) + 
  geom_point() + 
  facet_grid(. ~ variable, scales = "free_x") + 
  geom_smooth(method = "lm", se = FALSE) + 
  scale_y_continuous(trans = "log2", labels = NULL, breaks = NULL) + 
  scale_x_continuous(trans = "log2", labels = NULL, breaks = NULL)
dev.off()

I also want to plot for

Gene2 vs Gene5 and Gene6
Gene3 vs Gene5 and Gene6
Gene4 vs Gene5 and Gene6

Ofcourse I can replace Gene1 in the code with other Gene to plot. But instead of manual replacement I would like to make a loop so that from Gene1 to Gene4 plots against Gene5 and Gene6 with different plots each saved in pdf with respective gene names.

Based on request this is an Update about dput(A):

structure(list(Gene1 = c(0.02, 0, 0, 0.06, 0, 0, 0, 0, 0), Gene2 = c(0.038798682, 
0.059227225, 0.052116384, 0.199264618, 0.123521916, 0.128767634, 
0.080097356, 0.017421323, 2.10281137), Gene3 = c(0.1423662, 0.208765213, 
0.230437735, 0.261100548, 0.273330986, 0.263491811, 0.234511372, 
0.247775683, 0.401582013), Gene4 = c(2.778587067, 0.818810739, 
2.535040249, 2.516963635, 2.751309388, 2.882878373, 3.568192768, 
5.109428797, 8.202902242), Gene5 = c(0.471403939, 0.353671882, 
0.504061015, 0.63659138, 0.623572499, 0.359322715, 0.386217698, 
0.068760572, 0.140596724), Gene6 = c(18.93687655, 1.379027685, 
9.773089223, 11.01441624, 34.0563519, 13.02402045, 9.068928569, 
15.7490551, 60.25989178)), class = "data.frame", row.names = c("Sample1", 
"Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", 
"Sample8", "Sample9"))
2

2 Answers

1
votes

Would a loop work for you? I haven't tested (since you don't have the data as dput()), so may need some cleanup.

cols <- colnames(A)
cols <- cols[!cols %in% c("Gene5", "Gene6")]
for(i in cols){
    name <- paste(i, ".pdf", sep = "")
    id <- which(colnames(A) == i)
    # add a new column - this is the one accepting the "rotating" gene input
    A$Gene <- A[,id]

p <- A %>% 
        select(Gene, Gene5, Gene6) %>%
        gather(variable, values, Gene5:Gene6) %>% 
        ggplot(aes(Gene, values)) + 
        geom_point() + 
        facet_grid(. ~ variable, scales = "free_x") + 
        geom_smooth(method = "lm", se = FALSE) + 
        scale_y_continuous(trans = "log2", labels = NULL, breaks = NULL) + 
        scale_x_continuous(i, trans = "log2", labels = NULL, breaks = NULL)

    pdf(name)
    print(p)

     dev.off()
        }
1
votes

if you need an explicit for loop:

for (gene in paste0("Gene",1:4)){
 a= A %>% 
    gather(key = variable, value = values, Gene5:Gene6) %>% 
    ggplot(aes(get(gene), values)) + 
    geom_point() + 
    facet_grid(. ~ variable, scales = "free_x") + 
    geom_smooth(method = "lm", se = FALSE) + 
    scale_y_continuous(trans = "log2", labels = NULL, breaks = NULL) + 
    scale_x_continuous(trans = "log2", labels = NULL, breaks = NULL)

    print(a)

}