0
votes

I have created a PCA for measurements collected on individual from four locations placed on four substrates with three replicates. I have the sex (male or female)and "karyotype" (factor with three possible categories) and the calculated the first two PC scores for each individual.

I would like to make a plot where male and female have different symbols and the colour of the symbols is dependent on the karotype. I have created a plot with the code below that gives me one symbol colour coded for the three karyotypes and put 95% confidence elispses around the males and females.

How can I change the symbol for each sex and keeping the colouring dependent on the karytype? I would also like to have this reflected in the legend.

One last question. Is it possible to add an arrow for each PC (not each individual) from the origin similar to those found in ordination plots?

Plot of PC1 x PC2 for all individuals in experiment

Sample Data:

test <- structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("Kampinge", "Kaseberga", "Molle", "Steninge"
), class = "factor"), Substrate = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L), .Label = c("Kampinge", "Kaseberga", "Molle", 
"Steninge"), class = "factor"), Replicate = structure(c(1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"), 
    Sex = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
    2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L
    ), .Label = c("Female", "Male"), class = "factor"), Karyotype = structure(c(3L, 
    4L, 3L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 
    3L, 3L, 4L, 4L, 3L, 4L, 3L, 4L, 3L), .Label = c("", "BB", 
    "BD", "DD"), class = "factor"), Wing_Length = c(1439L, 1224L, 
    1558L, 1508L, 1286L, 1560L, 1377L, 1486L, 1638L, 1475L, 1703L, 
    1726L, 1668L, 1405L, 1737L, 1419L, 1530L, 1508L, 1525L, 1326L, 
    1609L, 1357L, 1830L, 1476L, 1661L), Leg_Length = c(465L, 
    357L, 610L, 415L, 343L, 560L, 435L, 390L, 425L, 514L, 693L, 
    695L, 657L, 454L, 661L, 382L, 431L, 531L, 435L, 387L, 407L, 
    414L, 752L, 524L, 650L), Development_Time = c(15, 15, 12, 
    12, 12, 12, 12, 12, 12, 15, 15, 15, 15, 15, 15, 15, 11, 12, 
    14, 12, 14, 14, 14, 11, 11), PC1 = c(-281.031806232855, -515.247908786317, 
    -96.7283446465637, -260.171340782501, -476.664849753781, 
    -127.267190895631, -347.839240839062, -293.08530374415, -154.026702195308, 
    -221.98257463847, 67.7504074590983, 86.6778734586525, 17.8073498265326, 
    -314.171132928964, 73.3068216627556, -349.616320093329, -233.030545551831, 
    -185.761623361004, -234.30046275676, -417.754317941649, -187.820500930148, 
    -376.653043663908, 203.025275308178, -214.80078992031, 7.94703091626344
    ), PC2 = c(-78.3082792875783, -133.370219905995, -113.211488986839, 
    4.31036861466361, -82.8593541869054, -73.5708675263244, -95.0643731443612, 
    9.37702847686542, 80.0290301136235, -92.8061497557789, -83.8731164047719, 
    -70.6537733486393, -78.706783632851, -91.6793310834752, -37.5144466525303, 
    -27.4637667171696, 6.14809390611532, -84.6794844768708, -0.127837123829732, 
    -90.9556028004192, 75.2353710655562, -91.7834027435658, -47.669385541585, 
    -99.8362257341741, -77.8269478596591)), .Names = c("Location", 
"Substrate", "Replicate", "Sex", "Karyotype", "Wing_Length", 
"Leg_Length", "Development_Time", "PC1", "PC2"), row.names = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 16L, 17L, 18L, 
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 30L, 31L), class = "data.frame")

## Plot
par(mfrow=c(1,1), mar=c(4,4,2,1), pty = "s")
plot(test$PC1, test$PC2, xlab="PC1", ylab="PC2", pch=16, col=as.numeric(test[,"Karyotype"]), 
     xlim = c(-1000, 1000), ylim = c(-250, 250), las=1, cex.lab = 1.5, cex.axis = 1.25, main = NULL)
ordiellipse(test[,9:10], test$Sex, conf=0.95, col="black", cex=1.75, label=TRUE)
legend("bottomright", pch=16, col=unique(as.numeric(test[,"Karyotype"])), legend=unique(test[,"Karyotype"]), cex = 1.75)
2

2 Answers

5
votes

Replace your pch plot argument by something like :

pch=ifelse(test$Sex=='Male',15,19)
1
votes

Try with ggplot:

library(ggplot2)
ggplot(test, aes(x=PC1, y=PC2, color=Karyotype, shape=Sex, group=Sex))+geom_point(size=5)+stat_ellipse()

enter image description here