0
votes

I've been trying to make a parallel process in windows; doSNOW states that it makes "SOCK" type clusters, which is also referred to as "socket cluster"; I've got a single cpu with 8 physical and 8 logical cores, which i often use in linux; I've been trying to run the code below but whenever I try the %dopar% option it gives me a NULL result; so I'm wondering if windows only works in parallel with multi-socket machines (which I don't have)

library(jsonlite)
library(doSNOW)
library(foreach)
library(dplyr)

claves <- c("I11D67", "I11D66", "I11D65", "I11D64", "I11D63", "I11D62", 
"I11D61", "I11C69", "I11D79", "I11D78", "I11D77", "I11D76", "I11D75", 
"I11D74", "I11D73", "I11D72", "I11D71", "I11C79", "I12C82", "I12C81", 
"I11D89", "I11D88", "I11D87", "I11D86", "I11D85")

cl <- makeCluster(8)
registerDoSNOW(cl)

foreach(i = claves, .combine=rbind, .errorhandling = "remove") %do% { 
url <- sprintf("http://www.beta.inegi.org.mx/app/api/buscador/busquedaTodos/%s_A/RANKING/es", i)
ea <- fromJSON(url)
ea  %>% select(TITULO, CVE_UNICA, EDICION)
} -> results

stopCluster(cl)
1
it gives no error, just "stores" the result as NULLElio Diaz

1 Answers

2
votes

It's not a problem with your hardware. It's one or both of these 2 things:

  1. SNOW has problems running on Windows. You'd want to substitute in doParallel or something similar.
  2. use :: to make sure your code hits dplyr::select and not, for instance MASS:select

This works (with some unrelated warnings):

if(!require(doParallel)) install.packages("doParallel")
library(foreach)
library(doParallel)
workers <- makeCluster(2) # start small to test
registerDoParallel(workers)

foreach(i = claves, .combine=rbind, .errorhandling = "remove") %do% { 
  url <- sprintf("http://www.beta.inegi.org.mx/app/api/buscador/busquedaTodos/%s_A/RANKING/es", i)
  ea <- fromJSON(url)
  ea  %>% dplyr::select(TITULO, CVE_UNICA, EDICION)
} -> results

stopCluster(cl)



dim(results)
[1] 50 39


results$CVE_UNICA
 [1] "889463489610" "702825634490" "702825271480" "702825637149" "702825267483" "889463489603" "702825213497" "702825709792"
"702825277420" "702825711115"
[11] "702825267476" "702825637101" "702825272128" "889463489597" "702825266967" "702825709327" "702825215576" "702825634582"
"702825214098" "702825709754"
[21] "702825271510" "702825003959" "702825271503" "702825208332" "702825213510" "702825709747" "702825213503" "702825709815"

"702825267506" "889463489627" [31] "702825267490" "702825637170" "702825271497" "702825711269" "702825268114" "889463489580" "702825219352" "702825708375" "702825215590" "702825708382" [41] "702825215583" "702825637330" "702825214104" "702825649418" "702825203894" "702825003962" "702825203887" "702825712617" "702825212421" "702825637293"

Please see this source for several good examples.