1
votes

I am using igraph version 1.2.4.2 in R 3.5.2 to analyse network data. The vertices (nodes) have categorical attributes like “Sex” and “Age_class”, while the edges are undirected and weighted. I imported the adjacency matrix and attached the vertex attributes using the “set_vertex_attr” command I would like to calculate network metrics such as betweenness and strength not only of the global network, but also between and within the attribute classes, i.e. betweenness of the weighted connection between female-female or male-female.

I am able to calculate the within-class network statistics by removing vertices of other attribute class, e.g.

gMM <- delete.vertices(g, V(g)[Sex != 'M'])    # making a network of only males
betweenness(gMM, direction = F)    # calculating male-male only betweenness

However, this method does not work on between-class statistics, I wonder if anyone knows how to calculate between-class statistics in igraph, thank you.

2

2 Answers

0
votes

I haven't found a satisfying way (that I can ever remember) to do this kind of thing in igraph, so I always end up doing something likes the following.

First, here's some example data...

library(igraph, warn.conflicts = FALSE); set.seed(831); n_nodes <- 12

g <- random.graph.game(n_nodes, 0.2)
vertex_attr(g) <- list(name = letters[seq_len(n_nodes)],
                       sex = sample(c("male", "female"), n_nodes, replace = TRUE))
edge_attr(g) <- list(weight = sample(1:50, size = ecount(g)))
g
#> IGRAPH 8ef5eee UNW- 12 10 -- Erdos renyi (gnp) graph
#> + attr: name (g/c), type (g/c), loops (g/l), p (g/n), name (v/c), sex
#> | (v/c), weight (e/n)
#> + edges from 8ef5eee (vertex names):
#>  [1] b--c f--g c--h f--h a--i b--i f--j e--k i--k c--l

... and here's a function that extracts networks containing only homophilous or heterophilous edges...

subgraph_edges_homophily <- function(graph, vattr_name, heterophily = FALSE,
                                     drop_isolates = FALSE) {
  stopifnot( # arg checks
    igraph::is.igraph(graph) || is.character(vattr_name) || 
      length(vattr_name) == 1L || !is.na(vattr_name) || 
      vattr %in% igraph::vertex_attr_names(vattr_name)
  )

  vattrs <- igraph::vertex_attr(graph, name = vattr_name)
  total_el <- igraph::as_edgelist(graph, names = FALSE)

  # rows from total_el where the attribute of the edge source == attribute of edge target
  edges_to_keep <- vattrs[total_el[, 1L]] == vattrs[total_el[, 2L]]

  # for heterophilous ties, just negate the "in_group" version
  if (heterophily) edges_to_keep <- !edges_to_keep

  igraph::subgraph.edges(graph, 
                         eids = which(edges_to_keep), 
                         delete.vertices = drop_isolates)
}

subgraph_edges_homophily() will let you extract the networks you're looking for like so...

# homophily
subgraph_edges_homophily(g, vattr_name = "sex")
#> IGRAPH 1bc4a38 UNW- 12 3 -- Erdos renyi (gnp) graph
#> + attr: name (g/c), type (g/c), loops (g/l), p (g/n), name (v/c), sex
#> | (v/c), weight (e/n)
#> + edges from 1bc4a38 (vertex names):
#> [1] e--k i--k c--l

# heterophily
subgraph_edges_homophily(g, vattr_name = "sex", heterophily = TRUE)
#> IGRAPH e79e82d UNW- 12 7 -- Erdos renyi (gnp) graph
#> + attr: name (g/c), type (g/c), loops (g/l), p (g/n), name (v/c), sex
#> | (v/c), weight (e/n)
#> + edges from e79e82d (vertex names):
#> [1] b--c f--g c--h f--h a--i b--i f--j

# no isolates
subgraph_edges_homophily(g, vattr_name = "sex", drop_isolates = TRUE)
#> IGRAPH 8ce3efe UNW- 5 3 -- Erdos renyi (gnp) graph
#> + attr: name (g/c), type (g/c), loops (g/l), p (g/n), name (v/c), sex
#> | (v/c), weight (e/n)
#> + edges from 8ce3efe (vertex names):
#> [1] e--k i--k c--l

... and you can then run metrics on those networks as desired. Here's an example calculating between-class metrics like you were asking...

g %>% 
  subgraph_edges_homophily(vattr_name = "sex", heterophily = TRUE) %>% 
  betweenness(directed = FALSE)
#>  a  b  c  d  e  f  g  h  i  j  k  l 
#>  0 10 12  0  0 11  0 12  6  0  0  0

-

sessionInfo()
#> R version 3.6.2 (2019-12-12)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 18.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] igraph_1.2.4.2
#> 
#> loaded via a namespace (and not attached):
#>  [1] compiler_3.6.2  magrittr_1.5    tools_3.6.2     htmltools_0.4.0
#>  [5] yaml_2.2.1      Rcpp_1.0.3      stringi_1.4.6   rmarkdown_2.1.1
#>  [9] highr_0.8       knitr_1.28      stringr_1.4.0   xfun_0.12      
#> [13] digest_0.6.24   pkgconfig_2.0.3 rlang_0.4.4     evaluate_0.14
0
votes

I made some modification to the solution provided by @knapply so the function will provide 1) within-class network (e.g. male-male); 2) between-class network (male-female); and 3) to-other-classes network when the attribute has more than 2 classes (e.g. age class). Here are the modified functions:


## Function - part1 ##

subclass_edges <- function(graph, vattr_name){
  stopifnot( # arg checks
    igraph::is.igraph(graph) || is.character(vattr_name) || 
      length(vattr_name) == 1L || !is.na(vattr_name) || 
      vattr %in% igraph::vertex_attr_names(vattr_name)
  )

  vattrs <- igraph::vertex_attr(graph, name = vattr_name)
  vattrs_class <- unique(vattrs)
  total_el <- igraph::as_edgelist(graph, names = FALSE)

  # Attribute class to single attribute class
  list_name <- paste0("to_", vattrs_class)
  map(vattrs_class, function(x){
    map(1:length(vattrs_class), function(y){
      (vattrs[total_el[, 1L]] == x) & (vattrs[total_el[, 2L]] == vattrs_class[y])
    }) -> to_class
    names(to_class) <- list_name
    return(to_class)
  }) -> attr_class
  names(attr_class) <- vattrs_class

  if(length(vattrs_class) > 2){
    # Attribute class to all other attribute classes
    map(vattrs_class, function(x){
      (vattrs[total_el[, 1L]] == x) & (vattrs[total_el[, 2L]] != x)
    }) -> to_others
    names(to_others) <- vattrs_class

    # Combine
    map(1:length(vattrs_class), function(c){
      fin <- c(attr_class[[c]], to_others[c])
      names(fin) <- c(list_name, "to_others")
      return(fin)
    }) -> combind_edges
    names(combind_edges) <- vattrs_class

    edges_to_keep <- combind_edges
  } else {
    edges_to_keep <- attr_class
  }

  return(edges_to_keep)
}


## Function - part2 ##

subclass <- function(graph, vattr_name, drop_isolates = FALSE){
  subclass_edges(graph, vattr_name) -> input
  map(input, function(form){
    map(form, function(to){
      igraph::subgraph.edges(graph, 
                             eids = which(to), 
                             delete.vertices = drop_isolates)
    })
  })
}

And here is an example modified from @knapply's answer with the new attribute "age_class" and more nodes (vertices):


## Example ##

set.seed(100)
n_nodes <- 20
g <- random.graph.game(n_nodes, 0.2)
vertex_attr(g) <- list(name = letters[seq_len(n_nodes)],
                       sex = sample(c("male", "female"), n_nodes, replace = TRUE), 
                       age_class = sample(c("15-20", "21-25", "26-30"), n_nodes, replace = TRUE))
edge_attr(g) <- list(weight = sample(1:50, size = ecount(g)))
g
#> IGRAPH ce7c899 UNW- 20 44 -- Erdos renyi (gnp) graph
#> + attr: name (g/c), type (g/c), loops (g/l), p (g/n), name (v/c), sex (v/c), age_class (v/c), weight (e/n)
#> + edges from ce7c899 (vertex names):
#> [1] b--c a--d b--e c--e b--f a--g e--g g--h f--i g--i a--j e--j a--k b--k h--k b--l h--l k--l c--m f--m l--m i--n m--n b--o g--o
#> [26] k--o b--p f--p h--p c--q p--q f--r k--r n--r p--r b--s h--s m--s n--s p--s q--s i--t k--t n--t


g %>% subclass(vattr_name = "age_class") -> g_a

g_a$`15-20`$`to_26-30` %>% igraph::betweenness(directed = F) 
# betweenness of indviduals in '15-20' age class with individuals in '26-30' age class
#> a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t 
#> 0  9  0  0  0 15 10  0 11  0  9  0  0  0 18  9  0 18  0  0 

g_a$`15-20`$to_others %>% igraph::betweenness(directed = F) 
# betweenness of indviduals in '15-20' age class with individuals in all age classes except '15-20'
#> a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t 
#> 0 45  0  0  0 16 32  0 16  0 21 21  0  0 34 18  0 16 10  0 


Hope this will be helpful to people having similar questions.