I need to create a goal
variable that will deify if the number of cases when dummy.ciiu_compared = 1
is greater than 50%
of the total cases will be 1
otherwise 0
.
17/26=0.65 -> 1
The target will be the goal
variable.
Note: Consider grouping by year and id.
Data
db = structure(list(year = structure(c("2020", "2020", "2020", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019"), label = "AÃ<U+0091>O", format.stata = "%9s"), id = structure(c(732437,
732437, 732437, 178036, 178036, 178036, 178036, 178036, 178036,
178036, 178036, 178036, 178036, 178036, 178036, 178036, 178036,
178036, 178036, 178036, 178036, 178036, 178036, 178036, 178036,
178036, 178036, 178036, 178036), label = "EXPEDIENTE", format.stata = "%12.0g"),
n_shareholder = c(3L, 3L, 3L, 26L, 26L, 26L, 26L, 26L, 26L,
26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L,
26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L), dummy = structure(list(
ciiu_comparado = c(0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1,
1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -29L)), n_dummy = c(3L,
3L, 3L, 17L, 17L, 9L, 17L, 9L, 9L, 9L, 17L, 17L, 17L, 9L,
17L, 17L, 9L, 17L, 17L, 9L, 17L, 9L, 17L, 17L, 17L, 17L,
17L, 9L, 17L), goal = c(0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA,
-29L), groups = structure(list(year = structure(c("2019", "2020"
), label = "AÃ<U+0091>O", format.stata = "%9s"), id = structure(c(178036,
732437), label = "EXPEDIENTE", format.stata = "%12.0g"), .rows = structure(list(
4:29, 1:3), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
# A tibble: 29 x 6
# Groups: year, id [2]
year id n_shareholder dummy$ciiu_comparado n_dummy goal
<chr> <dbl> <int> <dbl> <int> <dbl>
1 2020 732437 3 0 3 0
2 2020 732437 3 0 3 0
3 2020 732437 3 0 3 0
4 2019 178036 26 1 17 1
5 2019 178036 26 1 17 1
6 2019 178036 26 0 9 1
7 2019 178036 26 1 17 1
8 2019 178036 26 0 9 1
9 2019 178036 26 0 9 1
10 2019 178036 26 0 9 1
# ... with 19 more rows