I would very much like to recreate this graph using a pure ggplot2 approach since I would like to solve all my plotting using that package.
The code for generating this particular plot is given below.
library(PerformanceAnalytics)
library(quantmod)
getSymbols(c('SPY','LQD','GLD'))
Data <- cbind(Cl(SPY),Cl(LQD),Cl(GLD))
Year <- as.factor(format(index(Data),'%Y'))
chart.Correlation(Data,bg=seq(1:5)[Year],pch=21)
par(xpd=TRUE)
legend(0, 1, as.vector(unique(Year)), fill=seq(1:5))
You can read more about it on this site.
I have created a small dataset to play around with. The following code defines a data.frame and melts it into a format that ggplot usually likes.
library(ggplot2)
library(reshape2)
mydf<-structure(list(SPY = c(141.37, 141.67, 140.54, 141.19, 141.07,
141.54, 142.16, 143.24, 142.96, 143.02, 142.54, 142.82, 142.38,
142.8, 143.95, 142.26, 142.13, 142.05, 142.79, 143.75, 144.86,
141.31, 141.19, 138.91, 140.37, 141.29, 140.15, 141.28, 138.17,
136.98, 133.43, 132.06, 130.72, 133.86, 134.99, 133.04, 135.24,
135.91, 134.91, 137.37, 92.96, 92.85, 93.47, 90.67, 91.04, 89.09,
86.95, 87.11, 84.37, 84.4, 85.06, 80.57, 84.05, 82.75, 83.11,
83.68, 84.53, 87.39, 84.55, 82.83),
LQD = c(106.45, 107.25, 107.19, 107.15, 107.17, 107.03, 106.7, 106.59,
106.71, 106.56, 106.87,
106.81, 106.88, 106.59, 106.64, 106.3, 106.23, 106.18, 106.33,
106.78, 105.73, 106.6, 106.47, 106.27, 106.05, 105.74, 106.51,
106.07, 106.64, 106.48, 106.98, 106.8, 107.03, 107.17, 106.26,
107.41, 107.38, 107.2, 107.18, 107.53, 100.61, 101.3, 101.47,
100.62, 101.34, 102.6, 102.07, 101.85, 101.4, 101.6, 101.6, 100.1,
99.6, 99.23, 98.57, 97.69, 99.53, 100.32, 99.12, 99.82),
GLD = c(62.28, 61.65, 60.17, 60.48, 60.85, 60.59, 60.63, 62.17, 61.97, 62.64,
62.26, 63, 62.72, 64.23, 64.33, 64.06, 64.17, 63.77, 64.22, 64.83,
85.57, 85.13, 84.77, 86.78, 86.55, 88.25, 88.58, 89.54, 87.99,
86.7, 86.5, 87.42, 88.17, 87.89, 90.08, 90.3, 91.75, 91.15, 92.06,
91.4, 86.23, 84.48, 85.13, 82.75, 84.46, 83.92, 80.76, 80.88,
79.79, 80.39, 82.71, 84.52, 84.15, 84.58, 88.53, 88.95, 88.38,
87.42, 89.5, 91.31),
Year = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L),
.Label = c("2007", "2008", "2009"),
class = "factor")),
.Names = c("SPY", "LQD", "GLD", "Year"),
row.names = c("2007-01-02", "2007-01-03",
"2007-01-04", "2007-01-07", "2007-01-08", "2007-01-09", "2007-01-10",
"2007-01-11", "2007-01-15", "2007-01-16", "2007-01-17", "2007-01-18",
"2007-01-21", "2007-01-22", "2007-01-23", "2007-01-24", "2007-01-25",
"2007-01-28", "2007-01-29", "2007-01-30", "2008-01-02", "2008-01-03",
"2008-01-06", "2008-01-07", "2008-01-08", "2008-01-09", "2008-01-10",
"2008-01-13", "2008-01-14", "2008-01-15", "2008-01-16", "2008-01-17",
"2008-01-21", "2008-01-22", "2008-01-23", "2008-01-24", "2008-01-27",
"2008-01-28", "2008-01-29", "2008-01-30", "2009-01-01", "2009-01-04",
"2009-01-05", "2009-01-06", "2009-01-07", "2009-01-08", "2009-01-11",
"2009-01-12", "2009-01-13", "2009-01-14", "2009-01-15", "2009-01-19",
"2009-01-20", "2009-01-21", "2009-01-22", "2009-01-25", "2009-01-26",
"2009-01-27", "2009-01-28", "2009-01-29"),
class = "data.frame")
mydf<-data.frame(ID=rownames(mydf), mydf)
moltdf<-melt(mydf)
Now I know that this moltdf can be passed to ggplot but I have no idea how to generate the different segments in the rather informationdense plot above.
ggplot2
. Just for the sake of usingggplot2
? – Rolandchart.Correlation
function is just a slightly embellished version of the code fromexample(pairs)
which is also available here – GSee