0
votes

In SQL you can make a statement like this:

SELECT *
FROM table
WHERE (var1, var2, var3, var4) IN (("var1-1", "var2-1", "var3-1", "var4-1"),
                                   ("var1-2", "var2-2", "var3-2", "var4-2"))

This means to grab all rows where (var1 == "var1-1" and var2 == "var2-1" and var3 == "var3-1" and var4 == "var4-1") or (var1 == "var1-2" and var2 == "var2-2" and var3 == "var3-2" and var4 == "var4-2")

Is there a way to make a similar query in dbplyr in a programmatic way?

So for example suppose I had a tibble:

tribble(
    ~var1,     ~var2,    ~var3,    ~var4,
    "var1-1",  "var2-1", "var3-1", "var4-1",
    "var1-2",  "var2-2", "var3-2", "var4-2"
  )

Is there some sort of function I could use to have dbplyr build a SQL statement like the one above?

2
Is var1 in var1-1, var1-2 or var1-1, var2-1, etc?CPak

2 Answers

3
votes

1) Use inner_join:

library(dplyr)

# test data
v <- paste0("var", 1:4)
DF1 <- as.data.frame(t(outer(v, 1:3, paste, sep = "-")), stringsAsFactors = FALSE)
DF2 <- as.data.frame(t(outer(v, 2:4, paste, sep = "-")), stringsAsFactors = FALSE)

DF1 %>% inner_join(DF2)

giving:

Joining, by = c("V1", "V2", "V3", "V4")
# A tibble: 2 x 4
  V1     V2     V3     V4    
  <chr>  <chr>  <chr>  <chr> 
1 var1-2 var2-2 var3-2 var4-2
2 var1-3 var2-3 var3-3 var4-3

2) In base R we could use merge:

merge(DF1, DF2)

or intersect

intersect(DF1, DF2)

3) In dbplyr:

library(dbplyr)

# set up backend using DF1 and DF2 from (1)
con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
copy_to(con, DF1, "DF1")
copy_to(con, DF2, "DF2")

DF1_db <- tbl(con, "DF1")
DF2_db <- tbl(con, "DF2")
DF1_db %>% inner_join(DF2_db)

giving:

Joining, by = c("V1", "V2", "V3", "V4")
# Source:   lazy query [?? x 4]
# Database: sqlite 3.19.3 []
  V1     V2     V3     V4    
  <chr>  <chr>  <chr>  <chr> 
1 var1-2 var2-2 var3-2 var4-2
2 var1-3 var2-3 var3-3 var4-3

If you have a tibble and a database table you will need to either copy the tibble to the database using copy_to or grab the database table into R. inner_join can't mix sources.

2
votes

Using some advanced R programming with purrr and rlang, you should be able to create a single expression for the filter() that, I think, does the exact thing that you're asking, here's an example using mtcars. I recreated your tribble with data that would apply to the example. I also added inline comments to help explain what each step is doing:

library(dbplyr, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(purrr, warn.conflicts = FALSE)
library(DBI, warn.conflicts = FALSE)
library(rlang, warn.conflicts = FALSE)

con <- DBI::dbConnect(RSQLite::SQLite(), path = ":dbname:")
db_mtcars <- copy_to(con, mtcars)

filters <- tribble(
  ~gear, ~carb,    
  4,     4,
  3,     1
)
# transpose() converts rows into lists entries
filter_expr <- transpose(filters) %>%
  # iterate through each row
  map(~{
    # iterate through each pair ie: gear == 4
    imap(.x, ~expr(!!sym(.y) == !!.x)) %>%
      # creates one call in the row by 
      # "collapsing" all pairs, separating them with &
      reduce(function(x, y) expr((!!x & !!y)))
    }) %>%
  # creates one call collapsing all row calls 
  # into a single one, separating them with |
  reduce(function(x, y ) expr(!!x | !! y))
# the resulting call
filter_expr
#> (gear == 4 & carb == 4) | (gear == 3 & carb == 1)

filtered_mtcars <- db_mtcars %>%
  # Use bang-bang (!!) to execute the new call
  filter(!! filter_expr)

filtered_mtcars
#> # Source:   lazy query [?? x 11]
#> # Database: sqlite 3.22.0 []
#>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
#> 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
#> 3  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
#> 4  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
#> 5  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
#> 6  17.8     6  168.   123  3.92  3.44  18.9     1     0     4     4
#> 7  21.5     4  120.    97  3.7   2.46  20.0     1     0     3     1

show_query(filtered_mtcars)
#> <SQL>
#> SELECT *
#> FROM `mtcars`
#> WHERE ((`gear` = 4.0 AND `carb` = 4.0) OR (`gear` = 3.0 AND `carb` = 1.0))

dbDisconnect(con)