I am trying to chart a probability density plot using ggplot. My problem is that the area under the curve is not equal to one. Advice appreciated.
Sample chart... the code that produced this chart follows... The Y axis looks like it is a count for small sized bins, rather than a probability for falling into that bin. The example code here, is one of the sources I drew on in the preparation of this chart.
Sample code... most of which is data... the key bit of code is at the bottom...
library(ggplot2)
library(reshape)
library(plyr)
library(scales)
Date <- as.Date(
c("1976-01-16", "1976-02-15", "1976-03-16", "1976-04-15", "1976-05-16",
"1976-06-15", "1976-07-16", "1976-08-16", "1976-09-15", "1976-10-16",
"1976-11-15", "1976-12-16", "1977-01-16", "1977-02-14", "1977-03-16",
"1977-04-15", "1977-05-16", "1977-06-15", "1977-07-16", "1977-08-16",
"1977-09-15", "1977-10-16", "1977-11-15", "1977-12-16", "1978-01-16",
"1978-02-14", "1978-03-16", "1978-04-15", "1978-05-16", "1978-06-15",
"1978-07-16", "1978-08-16", "1978-09-15", "1978-10-16", "1978-11-15",
"1978-12-16", "1979-01-16", "1979-02-14", "1979-03-16", "1979-04-15",
"1979-05-16", "1979-06-15", "1979-07-16", "1979-08-16", "1979-09-15",
"1979-10-16", "1979-11-15", "1979-12-16", "1980-01-16", "1980-02-15",
"1980-03-16", "1980-04-15", "1980-05-16", "1980-06-15", "1980-07-16",
"1980-08-16", "1980-09-15", "1980-10-16", "1980-11-15", "1980-12-16",
"1981-01-16", "1981-02-14", "1981-03-16", "1981-04-15", "1981-05-16",
"1981-06-15", "1981-07-16", "1981-08-16", "1981-09-15", "1981-10-16",
"1981-11-15", "1981-12-16", "1982-01-16", "1982-02-14", "1982-03-16",
"1982-04-15", "1982-05-16", "1982-06-15", "1982-07-16", "1982-08-16",
"1982-09-15", "1982-10-16", "1982-11-15", "1982-12-16", "1983-01-16",
"1983-02-14", "1983-03-16", "1983-04-15", "1983-05-16", "1983-06-15",
"1983-07-16", "1983-08-16", "1983-09-15", "1983-10-16", "1983-11-15",
"1983-12-16", "1984-01-16", "1984-02-15", "1984-03-16", "1984-04-15",
"1984-05-16", "1984-06-15", "1984-07-16", "1984-08-16", "1984-09-15",
"1984-10-16", "1984-11-15", "1984-12-16", "1985-01-16", "1985-02-14",
"1985-03-16", "1985-04-15", "1985-05-16", "1985-06-15", "1985-07-16",
"1985-08-16", "1985-09-15", "1985-10-16", "1985-11-15", "1985-12-16"))
GOLD <- c(
-0.104, 0.051, 0.011, -0.035, -0.008, -0.010, -0.065, -0.067, 0.041, 0.017,
0.126, 0.023, -0.011, 0.029, 0.087, 0.007, -0.016, -0.044, 0.048, -0.013,
0.030, 0.062, -0.029, 0.042, 0.078, 0.028, 0.031, -0.045, 0.005, 0.043,
0.028, 0.090, 0.030, 0.072, -0.094, 0.009, 0.093, 0.080, -0.014, -0.013,
0.077, 0.084, 0.058, 0.021, 0.184, 0.097, 0.002, 0.169, 0.474, -0.014,
-0.168, -0.067, -0.007, 0.169, 0.071, -0.025, 0.077, -0.022, -0.059, -0.044,
-0.063, -0.103, -0.003, -0.008, -0.031, -0.040, -0.113, 0.005, 0.081, -0.014,
-0.057, -0.009, -0.062, -0.026, -0.117, 0.061, -0.046, -0.058, 0.080, 0.076,
0.190, -0.031, -0.019, 0.074, 0.079, 0.022, -0.144, 0.030, 0.013, -0.057,
0.026, -0.017, -0.012, -0.042, -0.030, 0.015, -0.043, 0.041, 0.022, -0.032,
-0.011, 0.001, -0.083, 0.004, -0.019, -0.002, 0.003, -0.065, -0.063, 0.017,
-0.044, 0.134, -0.022, -0.014, -0.008, 0.033, -0.014, 0.017, -0.004, -0.023)
df <- data.frame(Date=Date, GOLD=GOLD)
p <- ggplot(data=df, aes(x=GOLD, y=..density..)) +
stat_density(fill='grey50') +
xlab('Percent change on previous month') +
ylab('Density') +
opts(title='Change in Gold Price in the US')
ggsave(p, width=8, height=4, filename='plot.png', dpi=125)