0
votes

I am trying to recreate a graph I made in excel into R. Here is the excel version of the graph...

http://i.imgur.com/FJXbLy8.png

My data set that is split into 5 segments. For simplicity, I am going to refer to my x and y labels as time and distance. I have 5 "cars" that I want to plot on the shape chart.

The CSV file with the data looks like this (not the real numbers!):

A       B       C      D      E      F     ...
1       4       1      8      7      15
3       5       5      10     12     20
5       6       7      14     20     40

where (A, B) gives you time/distance of car 1, (C, D) gives you time/distance of car 2 and (E, F) gives you time/distance of car 3 and so forth.

My code looks like this:

speed = read.csv (file = "c:/users/XXX/desktop/speed", header = TRUE, sep = ',')
plot (A ~ B, data = speed, xlim = range (1e-5:100), ylim = range (0:200), log = "x")     
par(new = TRUE) 
plot (C ~ D, data = speed, xlim = range (1e-5:100), ylim = range (0:200), log = "x") 
par (new = TRUE)
plot (E ~ F, data = speed, xlim = range (1e-5:100), ylim = range (0:200), log = "x") 
par(new = TRUE )
plot(G ~ H, data = speed, xlim = range (1e-5:100), ylim = range (0:200), log = "x") 
par (new = TRUE) 
plot(I ~ J, data = speed, xlim = range (1e-5:100), ylim = range (0:200), log = "x")

I get an error saying that I have an invalid ylim value... I tried changing the numbers and no luck.

Any tips would be appreciated!

Here is dput(speed) if it is helpful to anyone... - I noticed the NA values in the dput, I think it comes from the fact that not every data set for each "car" is not equal? Not sure how to fix this...I could put each data set for each "car" into a different file, would that be better?

structure(list(A = c(3.59e-05, 3.75e-05, 9.67e-05, 3.92e-05, 
2.14e-05, 9.8e-05, 0.000228481, 0.000228481, 0.000415583, 0.000859052, 
0.002014948, 0.004079371, 0.00406138, 0.004353728, 0.008455587, 
0.007780939, 0.018260469, NA, NA, NA, NA, NA, NA), B = c(4.76, 
6.28, 10.5, 10.6, 12.3, 12.8, 16, 16.8, 20.7, 25.2, 34.4, 36.4, 
37.7, 43.6, 48.7, 49.2, 66.5, NA, NA, NA, NA, NA, NA), C = c(1.734691244, 
2.016976959, 1.707373272, 1.461511521, 0.805880184, 0.417509677, 
0.427070968, 0.220364977, 0.21763318, 0.170282028, 0.169826728, 
0.095612903, 0.094247005, 0.048717051, 0.044072995, 0.034921475, 
0.023721106, 0.022901567, 0.018485161, 0.015252535, 0.008240922, 
0.003942894, 0.002868387), D = c(176, 175, 169, 169, 169, 162, 
161, 146, 146, 143, 143, 121, 117, 90, 90, 77.7, 70.3, 69.2, 
67, 59.6, 50.4, 36.1, 33.7), E = c(0.0235, 0.044636324, 0.075155479, 
0.072909589, 0.09736484, 0.0988621, 0.199428082, 0.202422603, 
0.362878995, 0.370365297, 0.355392694, 1.438410959, 0.727212329, 
0.722221461, 1.40597032, NA, NA, NA, NA, NA, NA, NA, NA), F = c(69L, 
90L, 111L, 114L, 114L, 116L, 143L, 146L, 161L, 163L, 164L, 170L, 
172L, 175L, 180L, NA, NA, NA, NA, NA, NA, NA, NA), G = c(35.29300714, 
17.47300714, 4.351007143, 3.182292857, 3.182292857, 1.411864286, 
1.435007143, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA), H = c(180L, 180L, 180L, 180L, 177L, 175L, 171L, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), I = c(0.021, 0.0258, 0.029929032, 0.034574194, 0.064612903, 
0.088870968, 0.17816129, 0.163967742, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), J = c(67, 68.7, 75.2, 84.6, 
115, 121, 152, 155, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA)), .Names = c("A", "B", "C", "D", "E", "F", "G", 
"H", "I", "J"), class = "data.frame", row.names = c(NA, -23L))
1
Why don't just try to convert the Na numbers to 0? Data$A[is.na(Data$A)] = 0 - Abdocia
Not reproducible for me. I can only think that you are running something different to the code you show above. I.e. did you compute ylim from the data or use log = "xy" at some point? - Gavin Simpson
The idea is very simple. Assuming a long format, each line should have x "coordinate", y "value" and from which group it comes from. Once you have that, it's trivial to draw. In a wide format, one lines represents one x value and values for each group. Perhaps try to construct your data in one of those ways? - Roman Luštrik
I selected the ylim value by hand from the excel version of the graph. Should the numbers be selected in a different way? I did not use log = "xy" for this graph. Can you clarify what you mean by "not reproducible" - Thanks! - Casey
@Casey running your code with the dput() output you provided I don't get any errors. - Gavin Simpson

1 Answers

5
votes

I am not sure what you are looking at. I think this might help.

speed <- structure(list(A = c(3.59e-05, 3.75e-05, 9.67e-05, 3.92e-05,
  2.14e-05, 9.8e-05, 0.000228481, 0.000228481, 0.000415583, 0.000859052,  
0.002014948, 0.004079371, 0.00406138, 0.004353728, 0.008455587,     
0.007780939, 0.018260469, NA, NA, NA, NA, NA, NA), B = c(4.76, 6.28, 10.5,
 10.6, 12.3, 12.8, 16, 16.8, 20.7, 25.2, 34.4, 36.4, 37.7, 43.6, 48.7, 49.2,
 66.5, NA, NA, NA, NA, NA, NA), C = c(1.734691244, 2.016976959, 1.707373272, 
1.461511521, 0.805880184, 0.417509677, 0.427070968, 0.220364977, 0.21763318, 
0.170282028, 0.169826728, 0.095612903, 0.094247005, 0.048717051, 0.044072995, 
0.034921475,  0.023721106, 0.022901567, 0.018485161, 0.015252535, 0.008240922, 
 0.003942894, 0.002868387), D = c(176, 175, 169, 169, 169, 162, 161, 146, 146,
 143, 143, 121, 117, 90, 90, 77.7, 70.3, 69.2, 67, 59.6, 50.4, 36.1, 33.7), 
E = c(0.0235, 0.044636324, 0.075155479, 0.072909589, 0.09736484, 0.0988621, 
0.199428082, 0.202422603, 0.362878995, 0.370365297, 0.355392694, 1.438410959, 
0.727212329,  0.722221461, 1.40597032, NA, NA, NA, NA, NA, NA, NA, NA), 
F = c(69L, 90L, 111L, 114L, 114L, 116L, 143L, 146L, 161L, 163L, 164L, 170L, 
172L, 175L, 180L, NA, NA, NA, NA, NA, NA, NA, NA), G = c(35.29300714, 
17.47300714, 4.351007143, 3.182292857, 3.182292857, 1.411864286, 1.435007143,
 NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,  NA, NA, NA, NA), H = c(180L, 180L, 180L, 180L, 177L, 175L, 171L,  NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
 NA, NA, NA, NA, NA, NA ), I = c(0.021, 0.0258, 0.029929032, 0.034574194, 
0.064612903,   0.088870968, 0.17816129, 0.163967742, NA, NA, NA, NA, NA, 
NA,NA, NA, NA, NA, NA, NA, NA, NA, NA), J = c(67, 68.7, 75.2, 84.6,  115,
 121, 152, 155, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), 
.Names = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J"),
 class = "data.frame", row.names = c(NA, -23L))

Using Base R

Plotting

plot(speed$A,speed$B,xlim=range(1e-5:100),ylim=range(0:200),xlab="Time",ylab="Distance",pch=19,log="x")
points(speed$C,speed$D,col="red",pch=21)
points(speed$E,speed$F,col="blue",pch=18)
points(speed$G,speed$H,col="yellow",pch=19)
points(speed$I,speed$J,col="green",pch=20)
legend("top", legend = c("Car 1","Car 2","Car 3","Car 4","Car 5"), col = c("black","red","blue","yellow","green"),
       ncol = 2,pch=c(19,21,18,19,20))

The output graph is as follows: enter image description here

Option using ggplot2 ggplot2 automatically removes the points which has missing values.

library(ggplot2)
ggplot(speed,aes(A,B))+geom_point()+xlim(1e-5,100)+ylim(0,200)+scale_x_log10()+
  geom_point(data=speed,aes(C,D),col="red")+geom_point(data=speed,aes(E,F),col="blue")+
  geom_point(data=speed,aes(G,H),col="green")+geom_point(data=speed,aes(I,J),col="orange",shape=2)+
  xlab("Distance")+ylab("Distance")

enter image description here

Yet another option using the long format data and ggplot2

## Separate the data by cars
car1 <- speed[,c(1,2)]
car1$id <- "Car1"
car2 <- speed[,c(3,4)]
car2$id <- "Car2"
car3 <- speed[,c(5,6)]
car3$id <- "Car3"
car4 <- speed[,c(7,8)]
car4$id <- "Car4"
car5 <- speed[,c(9,10)]
car5$id <- "Car5"
names(car1)<- names(car2) <- names(car3) <- names(car4) <- names(car5)<- c("distance","speed","id")

Combine datasets

myspeed <- rbind(car1,car2,car3,car4,car5)

Plot ggplot with legends

ggplot(myspeed,aes(distance,speed))+geom_point(aes(color=id))+xlim(1e-5,100)+ylim(0,200)+scale_x_log10()

The output is as follows: enter image description here