4
votes

I have made a function that can plot the loadings from many factor analyses at once, also when their variables do not overlap perfectly (or at all). It works fine, except that it generates a number of "duplicated levels in factors are deprecated" warning, and I don't understand why.

The code allow should be reproducible.

library(devtools)
source_url("https://raw.githubusercontent.com/Deleetdk/psych2/master/psych2.R")

loadings.plot2 = function(fa.objects, fa.names="") {
  fa.num = length(fa.objects) #number of fas

  if (fa.names=="") {
    fa.names = str_c("fa.", 1:fa.num)
  }
  if (length(fa.names) != fa.num) {
    stop("Names vector does not match the number of factor analyses.")
  }

  #merge into df
  d = data.frame() #to merge into
  for (fa.idx in 1:fa.num) { #loop over fa objects
    loads = fa.objects[[fa.idx]]$loadings
    rnames = rownames(loads)
    loads = as.data.frame(as.vector(loads))
    rownames(loads) = rnames
    colnames(loads) = fa.names[fa.idx]

    d = merge.datasets(d, loads, 1)
  }


  #reshape to long form
  d2 = reshape(d,
               varying = 1:fa.num,
               direction="long",
               ids = rownames(d))
  d2$time = as.factor(d2$time)
  d2$id = as.factor(d2$id)
  print(d2)
  print(levels(d2$time))
  print(levels(d2$id))

  #plot
  g = ggplot(reorder_by(id, ~ fa, d2), aes(x=fa, y=id, color=time)) +
      geom_point() +
      xlab("Loading") + ylab("Indicator") +
      scale_color_discrete(name="Analysis",
                           labels=fa.names)

  return(g)
}

fa1 = fa(iris[-5])
fa2 = fa(iris[-c(1:50),-5])
fa3 = fa(ability)
fa4 = fa(ability[1:50,])

loadings.plot2(list(fa1))
loadings.plot2(list(fa1,fa2))
loadings.plot2(list(fa1,fa2,fa3))
loadings.plot2(list(fa1,fa2,fa3,fa4))

The example above, with 4 factor analyses

Plotting different numbers of factors give different numbers of errors.

I have tried setting the variables as.factor before giving them to ggplot, but it didn't change anything.

Any ideas? Perhaps related to reorder_by()? This function is needed to sort the data.frame, otherwise ggplot sorts them alphabetically, which is useless.

1
That's a lot of code. Where does the warning happen?Aaron left Stack Overflow
at the ggplot() at the end.CoderGuy123
In the ggplot? Or in the reorder_by? What does the data set look like when the warning happens? Can you make a small data set that looks the same way that recreates the warning?Aaron left Stack Overflow

1 Answers

2
votes

As mentioned in the comments, this warning is caused by using the reorder_by() function but only in conjunction with ggplot2. Specifically, the levels are for some reason duplicated:

#> levels(d2$id)
[1] "Sepal.Width"  "Sepal.Width"  "Sepal.Length" "Sepal.Length" "Petal.Width"  "Petal.Width"  "Petal.Length"
[8] "Petal.Length"

ggplot2 does not like duplicate levels, and so gives the warning.


In case anyone is interested, I wrote new code to do the re-leveling myself to avoid this problem and to avoid the dependency on the plotflow package.

The new function is this:

#' Plot multiple factor loadings in one plot.
#'
#' Returns a ggplot2 plot with sorted loadings colored by the analysis they belong to. Supports reversing óf any factors that are reversed. Dodges to avoid overplotting. Only works for factor analyses with 1 factor solutions!
#' @param fa_objects (list of fa-class objects) Factor analyses objects from the fa() function from the \code{\link{psych}} package.
#' @param fa_labels (chr vector) Names of the analyses. Defaults to fa.1, fa.2, etc..
#' @param reverse_vector (num vector) Vector of numbers to use for reversing factors. Use e.g. c(1, -1) to reverse the second factor. Defaults not reversing.
#' @param reorder (chr scalar or NA) Which factor analysis to order the loadings by. Can be integers that reprensent each factor analysis. Can also be "mean", "median" to use the means and medians of the loadings. Use "all" for the old method. Default = "mean".
#' @export
#' @examples
#' library(psych)
#' plot_loadings_multi(fa(iris[-5])) #extract a factor and reverse
plot_loadings_multi = function (fa_objects, fa_labels, reverse_vector = NA, reorder = "mean") {
  library("stringr")
  library("ggplot2")
  library("plyr")

  fa_num = length(fa_objects)
  fa_names = str_c("fa.", 1:fa_num)
  if (!is.list(fa_objects)) {
    stop("fa_objects parameter is not a list.")
  }
  if (class(fa_objects) %in% c("psych", "fa")) {
    fa_objects = list(fa_objects)
    fa_num = length(fa_objects)
    fa_names = str_c("fa.", 1:fa_num)
  }
  if (missing("fa_labels")) {
    if (!is.null(names(fa_objects))) {
      fa_labels = names(fa_objects)
    }
    else {
      fa_labels = fa_names
    }
  }
  if (length(fa_labels) != fa_num) {
    stop("Factor analysis labels length is not identical to number of analyses.")
  }
  if (all(is.na(reverse_vector))) {
    reverse_vector = rep(1, fa_num)
  }
  else if (length(reverse_vector) != fa_num) {
    stop("Length of reversing vector does not match number of factor analyses.")
  }
  d = data.frame()
  for (fa.idx in 1:fa_num) {
    loads = fa_objects[[fa.idx]]$loadings * reverse_vector[fa.idx]
    rnames = rownames(loads)
    loads = as.data.frame(as.vector(loads))
    rownames(loads) = rnames
    colnames(loads) = fa_names[fa.idx]
    suppressor({
      d = merge_datasets(d, loads, 1)
    })
  }
  d2 = reshape(d, varying = 1:fa_num, direction = "long", ids = rownames(d))
  d2$time = as.factor(d2$time)
  d2$id = as.factor(d2$id)
  colnames(d2)[2] = "fa"

  #reorder factor?
  if (!is.na(reorder)) {
    if (reorder == "all") {
      library("plotflow")

      silence({
        d2 = reorder_by(id, ~fa, d2)
      })
    } else if (reorder == "mean") {
      v_aggregate_values = daply(d2, .(id), function(x) {
        mean(x$fa)
      })

      #re-level
      d2$id = factor(d2$id, levels = names(sort(v_aggregate_values, decreasing = F)))

    } else if (reorder == "median") {
      v_aggregate_values = daply(d2, .(id), function(x) {
        median(x$fa)
      })

      #re-level
      d2$id = factor(d2$id, levels = names(sort(v_aggregate_values, decreasing = F)))

    } else {
      d2_sub = d2[d2$time == reorder, ] #subset the analysis whose loading is to be used for the reorder
      silence({
        d2_sub = reorder_by(id, ~fa, d2_sub)
      })

      library(gdata)
      d2$id = reorder.factor(d2$id, new.order = levels(d2_sub$id))
    }
  }

  #plot
  g = ggplot(d2, aes(x = id, y = fa, color = time, group = time)) +
    geom_point(position = position_dodge(width = 0.5)) +
    ylab("Loading") + xlab("Indicator") + scale_color_discrete(name = "Analysis",
                                                               labels = fa_labels) + coord_flip()
  return(g)
}

library(psych)
fa_1 = fa(iris[-5])
fa_2 = fa(iris[1:125, -5])
plot_loadings_multi(list(fa_1, fa_2), reorder = "mean")

Which produces the following plot without warnings:

enter image description here

The code is from my personal package.