0
votes

A compound question:

  • I cannot get log10 y-axis labels to usefully display on the ggalluvial plot after many variations of scale_y_log10. I am having particular difficulty specifying and formatting breaks. scale_y_continuous produces the following:
Error in seq.default(min, max, by = by) : 'to' must be a finite number`
  • Strata labels obscure the strata boundaries. The geom_label nudge_y parameter has no apparent effect. How should one center these labels in the strata? The default setting do not accomplish this.

Please see plot, code and data below.

Appreciate any advice.

Update: Adding reverse=FALSE and vjust="center" to geom_label(stat = "stratum", label.strata=TRUE,fill="white",vjust="center",reverse=FALSE) + appears to fix the issue with centering the strata labels: ggalluvial plot with centered labels

originally posted ggalluvial plot: originally posted ggalluvial plot of U.S. campaign expenditures

library(tidyverse)
library(ggplot2)
library(ggalluvial)
library(scales)
#library(ggthemes) # for theme_economist

proAntiByActivity = ggplot(as.data.frame(a),
       aes(y = aggSpend,
           axis1 = cycle, axis3 = proAnti, axis2 = activityGroup)) +
  #geom_alluvium(aes(fill =cycle))+ #, width = 0, knot.pos = 0, reverse = FALSE) +
  geom_alluvium(aes(fill =cycle), width = 1/12, knot.pos = 1/6, reverse = FALSE, show.legend = TRUE) +
  guides(fill = FALSE) +
  geom_stratum(width = 1/5, reverse = FALSE) +
  #geom_text(stat = "stratum", label.strata = TRUE, reverse = FALSE) +
  geom_label(stat = "stratum", label.strata = TRUE,nudge_y = 0) +
  #scale_x_discrete(breaks = 1:3, labels = c("Election Cycle","Means",  "Pro/Anti")) +
  scale_x_continuous(breaks = 1:3, labels = c("Election Cycle","Means",  "Pro/Anti")) +
  #scale_y_continuous(name="total spent")+
  #scale_y_continuous(trans = "log10",name="total spent",limits=NULL) +
  #coord_trans(y="log10")+
  #scale_y_log10(name="log total spent",breaks = 1e+100*c(2e+03,2e+04,2e+05,2e+06,2e+07,2e+08,2e+09), labels = c(2e+03,2e+04,2e+05,2e+06,2e+07,2e+08,2e+09)) +
  scale_y_log10(name="log total spent",breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", scientific_format())) +
  #scale_y_log10(name="log total spent",breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(.x))) +
#scale_y_log10(name="log total spent",breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
  #theme_economist()+
  ggtitle("Money Flows for U.S. Elections By Activity 2010-2020")

proAntiByActivity

Plot data

a<-structure(list(cycle = c("2010", "2010", "2010", "2010", "2010", 
"2010", "2010", "2010", "2010", "2010", "2010", "2010", "2010", 
"2010", "2010", "2010", "2010", "2010", "2010", "2010", "2010", 
"2010", "2010", "2010", "2010", "2010", "2010", "2010", "2010", 
"2010", "2010", "2010", "2010", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018"), proAnti = c("antiDem", 
"antiDem", "antiDem", "antiDem", "antiDem", "antiDem", "antiGOP", 
"antiGOP", "antiGOP", "antiGOP", "antiGOP", "antiGOP", "antiOther", 
"antiOther", "antiOther", "antiOther", "antiOther", "proDem", 
"proDem", "proDem", "proDem", "proDem", "proDem", "proGOP", "proGOP", 
"proGOP", "proGOP", "proGOP", "proGOP", "proOther", "proOther", 
"proOther", "proOther", "antiDem", "antiDem", "antiDem", "antiDem", 
"antiDem", "antiDem", "antiGOP", "antiGOP", "antiGOP", "antiGOP", 
"antiGOP", "antiGOP", "antiOther", "antiOther", "antiOther", 
"antiOther", "antiOther", "proDem", "proDem", "proDem", "proDem", 
"proDem", "proDem", "proGOP", "proGOP", "proGOP", "proGOP", "proGOP", 
"proGOP", "proOther", "proOther", "proOther", "proOther", "proOther", 
"proOther"), activityGroup = c("Advertizing", "AdvertizingV2", 
"Outreach", "OutreachV2", "Overhead", "Strategy", "Advertizing", 
"AdvertizingV2", "Outreach", "OutreachV2", "Overhead", "Strategy", 
"Advertizing", "AdvertizingV2", "Outreach", "Overhead", "Strategy", 
"Advertizing", "AdvertizingV2", "Outreach", "OutreachV2", "Overhead", 
"Strategy", "Advertizing", "AdvertizingV2", "Outreach", "OutreachV2", 
"Overhead", "Strategy", "Advertizing", "Outreach", "Overhead", 
"Strategy", "Advertizing", "AdvertizingV2", "Outreach", "OutreachV2", 
"Overhead", "Strategy", "Advertizing", "AdvertizingV2", "Outreach", 
"OutreachV2", "Overhead", "Strategy", "Advertizing", "AdvertizingV2", 
"Outreach", "Overhead", "Strategy", "Advertizing", "AdvertizingV2", 
"Outreach", "OutreachV2", "Overhead", "Strategy", "Advertizing", 
"AdvertizingV2", "Outreach", "OutreachV2", "Overhead", "Strategy", 
"Advertizing", "AdvertizingV2", "Outreach", "OutreachV2", "Overhead", 
"Strategy"), aggSpend = c(159948962.660721, 40399.6402031971, 
9158355.18213395, 283702.113259935, 187198.211058457, 4563675.63724594, 
152982928.325557, 216256.874555558, 7993712.18735522, 823.580861836789, 
106873.732663197, 1778558.91739064, 1438915.09226517, 146.251315353075, 
1028161.44634096, 25685.7644535242, 196881.375812725, 23407657.0206254, 
133629.176224183, 14368371.0220397, 26389.4225797726, 1539957.71484811, 
1538921.52699396, 21854214.1121801, 470029.494152242, 11779580.9624892, 
522106.091640242, 791874.639134986, 2898174.32990864, 8.14755986834644, 
11010.7374694267, 5596.20969242995, 3061.26236033165, 311475306.900278, 
4535009.26475469, 14866419.8988148, 46627.7825986496, 139697.610043845, 
5685839.23231515, 442793202.646583, 9185627.87610291, 25475752.6745331, 
17801.571227516, 854976.211853977, 4386843.04877027, 16497196.406344, 
164121.870068956, 831667.837824925, 3.90373318422706, 129470.611578985, 
118114661.837676, 5903094.13420614, 43261891.7968911, 369371.87531218, 
2141258.36101987, 4361747.5737002, 69866481.4457689, 4214523.71305267, 
25400537.1038862, 242901.276117105, 92174.6149800344, 14024469.650107, 
6396957.39009888, 200539.496487483, 736056.134792953, 833.390308236151, 
2446.50392841341, 285246.096898001)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -68L), groups = structure(list(
    cycle = c("2010", "2010", "2010", "2010", "2010", "2010", 
    "2018", "2018", "2018", "2018", "2018", "2018"), proAnti = c("antiDem", 
    "antiGOP", "antiOther", "proDem", "proGOP", "proOther", "antiDem", 
    "antiGOP", "antiOther", "proDem", "proGOP", "proOther"), 
    .rows = list(1:6, 7:12, 13:17, 18:23, 24:29, 30:33, 34:39, 
        40:45, 46:50, 51:56, 57:62, 63:68)), row.names = c(NA, 
-12L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))

1
When you say you want a log10 axis, what would you expect to happen near the bottom of the axis, where the strata begin at 0? Since log10 of 0 is -Inf, I can see why the axis behaves weirdly. Also, I don't think log transformed axes are the best way to visualise stacked values, such as those in strata.teunbrand

1 Answers

0
votes

The following is an alternative to ggalluvial using ggforce. I find that the spacing ggforce places between strata helps the understanding of the plot by resolving categories. Assume a is the dput output you've posted.

library(tidyverse)
library(ggforce)

df <- gather_set_data(a, 1:3)

ggplot(df, aes(x = x, id = id, split = y, value = log10(aggSpend))) +
  geom_parallel_sets(aes(fill = as.factor(cycle)), alpha = 0.5,
                     axis.width = 0.5) +
  geom_parallel_sets_axes(axis.width = 0.5, fill = "white", colour = "black") +
  geom_parallel_sets_labels(angle = 0) +
  scale_x_discrete(limits = c("cycle", "activityGroup", "proAnti"),
                   labels = c("Election Cycle", "Means", "Pro/Anti"),
                   name = "") +
  scale_y_continuous(name = "Log10 Total Spending") +
  guides(fill = "none")

enter image description here

With regard to the log10 axes, my recommendation is to title the y-axis appropriately and do a log10 transform on the data itself rather than the axis. See my comment underneath your question too for my objections.