0
votes

I have a list of lists with hold +- 38 lists. Where from only a few should be selected (the rest has no values e.g. NULL). And i want to make a nice dataframe of those lists.

my list of lists:

structure(list(NULL, AFT = NULL, `AP-2` = NULL, `AT_hook, ETS` = NULL, 
    `BASIC, HLH` = NULL, BRIGHT = NULL, BRLZ = NULL, `BRLZ, BZIP_1, BZIP_2` = NULL, 
    bZIP = NULL, DWA = NULL, E2F_TDP = NULL, ETS = structure(list(
        MASHvstRap = 8.34818462488622e-05, MASHvsBEEML = 0.000250015234002341, 
        tRapvsBEEML = 8.80480124829088e-06, frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), FH = structure(list(
        MASHvstRap = 1.72864219357795e-05, MASHvsBEEML = 0.000840376826415137, 
        tRapvsBEEML = 2.54589884424594e-07, frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), GCM = NULL, 
    HLH = structure(list(MASHvstRap = 1.22573775496788e-08, MASHvsBEEML = 0.00119919900578073, 
        tRapvsBEEML = 3.60117573203279e-07, frequency = 13, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), HMG = structure(list(
        MASHvstRap = 6.07022175358029e-30, MASHvsBEEML = 0.0994358268075855, 
        tRapvsBEEML = 5.3728011843321e-09, frequency = 44, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), Homeo = structure(list(
        MASHvstRap = 4.33277656523673e-123, MASHvsBEEML = 0.442020719677047, 
        tRapvsBEEML = 8.44025048683083e-74, frequency = 158, 
        stringsAsFactors = 0), .Names = c("MASHvstRap", "MASHvsBEEML", 
    "tRapvsBEEML", "frequency", "stringsAsFactors"), row.names = c(NA, 
    -1L), class = "data.frame"), `Homeo ` = structure(list(MASHvstRap = 3.36388469632471e-14, 
        MASHvsBEEML = 0.763756578209722, tRapvsBEEML = 3.75944533892572e-07, 
        frequency = 19, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), `Homeo, PAX` = NULL, 
    `Homeo, POU` = structure(list(MASHvstRap = 3.06769943976602e-08, 
        MASHvsBEEML = 0.423594358667165, tRapvsBEEML = 7.51004008659922e-09, 
        frequency = 11, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), `HSF_DNA-bind` = NULL, 
    `HTH APSES-type` = NULL, IRF = structure(list(MASHvstRap = 1.25502843779857e-05, 
        MASHvsBEEML = 0.00094114146973297, tRapvsBEEML = 1.17030570144044e-06, 
        frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), MADS = NULL, 
    Myb = NULL, RFX = NULL, SAND = NULL, SANT = NULL, TBOX = NULL, 
    TBP = NULL, TEA = NULL, unknown = structure(list(MASHvstRap = 4.82890837154273e-32, 
        MASHvsBEEML = 0.0736357072352032, tRapvsBEEML = 7.20783906680568e-26, 
        frequency = 121, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), Zf_C2H2 = NULL, 
    Zf_GATA = NULL, Zn2Cys6 = structure(list(MASHvstRap = 4.71138538453502e-05, 
        MASHvsBEEML = 0.000623286035357452, tRapvsBEEML = 3.93333369828925e-07, 
        frequency = 17, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), ZnF_C2H2 = structure(list(
        MASHvstRap = 1.62205005760679e-17, MASHvsBEEML = 1.46483433509648e-08, 
        tRapvsBEEML = 2.89656372293867e-25, frequency = 54, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), ZnF_C4 = structure(list(
        MASHvstRap = 4.93181852868703e-06, MASHvsBEEML = 0.0467257430288347, 
        tRapvsBEEML = 6.69189512726035e-07, frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap", 
    "MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
    ), row.names = c(NA, -1L), class = "data.frame"), ZnF_GATA = NULL), .Names = c("", 
"AFT", "AP-2", "AT_hook, ETS", "BASIC, HLH", "BRIGHT", "BRLZ", 
"BRLZ, BZIP_1, BZIP_2", "bZIP", "DWA", "E2F_TDP", "ETS", "FH", 
"GCM", "HLH", "HMG", "Homeo", "Homeo ", "Homeo, PAX", "Homeo, POU", 
"HSF_DNA-bind", "HTH APSES-type", "IRF", "MADS", "Myb", "RFX", 
"SAND", "SANT", "TBOX", "TBP", "TEA", "unknown", "Zf_C2H2", "Zf_GATA", 
"Zn2Cys6", "ZnF_C2H2", "ZnF_C4", "ZnF_GATA"))

as you can see a few lists contain no values thus should be removed. What i want is a data frame which has 4 columns: family, method, p.value, frequency. Within the list they are provided per family like this : $Zn2Cys6

    MASHvstRap MASHvsBEEML  tRapvsBEEML frequency
1 4.711385e-05 0.000623286 3.933334e-07        17

so the $zn2Cys6 is family name and should be added in front of MASHvstRap. therefor the list should be unlisted and reformed to a data frame with the following column names(family,method = 'MASHvstRAp',p.value,frequency. I tried to use lapply(rbind) but that gives me a strange structure. cbind and as.data.frame didn't help me either.

3

3 Answers

7
votes

If the structure you posted is named ll you can do this:

> do.call("rbind",ll)
              MASHvstRap  MASHvsBEEML  tRapvsBEEML frequency
ETS         8.348185e-05 2.500152e-04 8.804801e-06        10
FH          1.728642e-05 8.403768e-04 2.545899e-07        10
HLH         1.225738e-08 1.199199e-03 3.601176e-07        13
HMG         6.070222e-30 9.943583e-02 5.372801e-09        44
Homeo      4.332777e-123 4.420207e-01 8.440250e-74       158
Homeo       3.363885e-14 7.637566e-01 3.759445e-07        19
Homeo, POU  3.067699e-08 4.235944e-01 7.510040e-09        11
IRF         1.255028e-05 9.411415e-04 1.170306e-06        10
unknown     4.828908e-32 7.363571e-02 7.207839e-26       121
Zn2Cys6     4.711385e-05 6.232860e-04 3.933334e-07        17
ZnF_C2H2    1.622050e-17 1.464834e-08 2.896564e-25        54
ZnF_C4      4.931819e-06 4.672574e-02 6.691895e-07        10

the first column is the row.names (which before was the names of each list item).

2
votes

If you call your list of lists "pino", then just type:

prova<-data.frame(matrix(unlist(pino),ncol=5,byrow=TRUE)[,1:4])
names(prova)<-c("MASHvstRap","MASHvsBEEML","tRapvsBEEML","frequency")

There might be a more general solution, though...

2
votes

Here's a solution with reshape2:

tmp <- do.call(rbind, dat)[-5] # put data into one data frame
tmp$family = rownames(tmp)     # add column for 'family'

library(reshape2)
melt(tmp, measure.vars = names(tmp)[1:3], 
     variable.name = "method", value.name = "p.value")

The result:

   frequency     family      method       p.value
1         10        ETS  MASHvstRap  8.348185e-05
2         10         FH  MASHvstRap  1.728642e-05
3         13        HLH  MASHvstRap  1.225738e-08
4         44        HMG  MASHvstRap  6.070222e-30
5        158      Homeo  MASHvstRap 4.332777e-123
6         19     Homeo   MASHvstRap  3.363885e-14
7         11 Homeo, POU  MASHvstRap  3.067699e-08
8         10        IRF  MASHvstRap  1.255028e-05
9        121    unknown  MASHvstRap  4.828908e-32
10        17    Zn2Cys6  MASHvstRap  4.711385e-05
11        54   ZnF_C2H2  MASHvstRap  1.622050e-17
12        10     ZnF_C4  MASHvstRap  4.931819e-06
13        10        ETS MASHvsBEEML  2.500152e-04
14        10         FH MASHvsBEEML  8.403768e-04
15        13        HLH MASHvsBEEML  1.199199e-03
16        44        HMG MASHvsBEEML  9.943583e-02
17       158      Homeo MASHvsBEEML  4.420207e-01
18        19     Homeo  MASHvsBEEML  7.637566e-01
19        11 Homeo, POU MASHvsBEEML  4.235944e-01
20        10        IRF MASHvsBEEML  9.411415e-04
21       121    unknown MASHvsBEEML  7.363571e-02
22        17    Zn2Cys6 MASHvsBEEML  6.232860e-04
23        54   ZnF_C2H2 MASHvsBEEML  1.464834e-08
24        10     ZnF_C4 MASHvsBEEML  4.672574e-02
25        10        ETS tRapvsBEEML  8.804801e-06
26        10         FH tRapvsBEEML  2.545899e-07
27        13        HLH tRapvsBEEML  3.601176e-07
28        44        HMG tRapvsBEEML  5.372801e-09
29       158      Homeo tRapvsBEEML  8.440250e-74
30        19     Homeo  tRapvsBEEML  3.759445e-07
31        11 Homeo, POU tRapvsBEEML  7.510040e-09
32        10        IRF tRapvsBEEML  1.170306e-06
33       121    unknown tRapvsBEEML  7.207839e-26
34        17    Zn2Cys6 tRapvsBEEML  3.933334e-07
35        54   ZnF_C2H2 tRapvsBEEML  2.896564e-25
36        10     ZnF_C4 tRapvsBEEML  6.691895e-07