|
2 | 2 | #' users.
|
3 | 3 | #'
|
4 | 4 | #' @param user_data Result of `lapply(logins, repometrics_data_user)`.
|
| 5 | +#' Contains the following fields: |
| 6 | +#' \enumerate{ |
| 7 | +#' \item general (not considered here) |
| 8 | +#' \item commit_cmt Comments on commits |
| 9 | +#' \item commits Commits to different repositories |
| 10 | +#' \item followers GitHub followers |
| 11 | +#' \item following Logins of people/orgs followed by user on GitHub |
| 12 | +#' \item issue_cmts Comments on issues |
| 13 | +#' \item issues Issues opened by user. |
| 14 | +#' } |
5 | 15 | #' @noRd
|
6 | 16 | user_relation_matrices <- function (user_data) {
|
7 | 17 |
|
8 | 18 | user_names <- names (user_data)
|
9 | 19 | user_data <- add_user_login_cols (user_data) |>
|
10 | 20 | combine_user_data ()
|
11 | 21 |
|
12 |
| - dat <- empty_user_mat (user_data, user_names) |
13 |
| - |
14 |
| - |
15 |
| -} |
16 |
| - |
17 |
| -empty_user_mat <- function (user_data, user_names) { |
18 |
| - |
19 |
| - n_users <- length (user_names) |
20 |
| - n_fields <- length (user_data) |
21 |
| - m <- array (NA_real_, dim = c (n_users, n_users, n_fields)) |
22 |
| - rownames (m) <- colnames (m) <- user_names |
23 |
| - attr (m, "dimnames") [[3]] <- names (user_data) |
24 |
| - |
25 |
| - return (m) |
| 22 | + cmts <- user_relate_commits (user_data, user_names) |
26 | 23 | }
|
27 | 24 |
|
28 | 25 | #' Add 'login' columns to all user data, so each element can be combined.
|
@@ -70,10 +67,31 @@ combine_user_data <- function (user_data) {
|
70 | 67 | return (data)
|
71 | 68 | }
|
72 | 69 |
|
73 |
| -user_relate_commits <- function (user_data) { |
| 70 | +user_relate_commits <- function (user_data, user_names) { |
74 | 71 |
|
75 |
| - commits <- lapply (seq_along (user_data), function (u) { |
| 72 | + user_combs <- t (combn (user_names, m = 2L)) |
76 | 73 |
|
| 74 | + res <- apply (user_combs, 1, function (i) { |
| 75 | + cmt1 <- dplyr::filter (user_data$commits, login == i [1]) |> |
| 76 | + dplyr::group_by (repo) |> |
| 77 | + dplyr::summarise (n1 = sum (num_commits)) |
| 78 | + cmt2 <- dplyr::filter (user_data$commits, login == i [2]) |> |
| 79 | + dplyr::group_by (repo) |> |
| 80 | + dplyr::summarise (n2 = sum (num_commits)) |
| 81 | + overlap <- dplyr::inner_join (cmt1, cmt2, by = "repo") |
77 | 82 |
|
| 83 | + res <- 0 |
| 84 | + if (nrow (overlap) > 0L) { |
| 85 | + res <- (sum (overlap$n1) + sum (overlap$n2)) / |
| 86 | + (sum (cmt1$n1) + sum (cmt2$n2)) |
| 87 | + } |
| 88 | + return (res) |
78 | 89 | })
|
| 90 | + |
| 91 | + data.frame ( |
| 92 | + login1 = user_combs [, 1], |
| 93 | + login2 = user_combs [, 2], |
| 94 | + overlap = res, |
| 95 | + what = "commits" |
| 96 | + ) |
79 | 97 | }
|
0 commit comments