Skip to content

Commit a234d33

Browse files
authored
Merge pull request #58 from ropensci-review-tools/user-connections
start 'R/analyse-users.R'
2 parents bcd75d8 + 61e651d commit a234d33

10 files changed

+237
-4
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: repometrics
22
Title: Metrics for Your Code Repository
3-
Version: 0.1.3.020
3+
Version: 0.1.3.031
44
Authors@R:
55
person("Mark", "Padgham", , "mark.padgham@email.com", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0003-2172-5265"))

R/analyse-users.R

+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#' Construct user-by-user square matrices of strengths of relation between
2+
#' users.
3+
#'
4+
#' @param user_data Result of `lapply(logins, repometrics_data_user)`.
5+
#' Contains the following fields:
6+
#' \enumerate{
7+
#' \item general (not considered here)
8+
#' \item commit_cmt Comments on commits
9+
#' \item commits Commits to different repositories
10+
#' \item followers GitHub followers
11+
#' \item following Logins of people/orgs followed by user on GitHub
12+
#' \item issue_cmts Comments on issues
13+
#' \item issues Issues opened by user.
14+
#' }
15+
#' @return A `data.frame` of pairwise user logins, and proportions of overlap
16+
#' betwen repositories in the six variables described above.
17+
#' @noRd
18+
user_relation_matrices <- function (user_data) {
19+
20+
user_names <- names (user_data)
21+
user_data <- add_user_login_cols (user_data) |>
22+
combine_user_data ()
23+
24+
# Pre-processing to name grouping column "repo" and count column "n":
25+
user_data$commit_cmt$repo <-
26+
paste0 (user_data$commit_cmt$org, user_data$commit_cmt$repo)
27+
28+
user_data$followers <-
29+
dplyr::rename (user_data$followers, repo = followers) |>
30+
dplyr::mutate (n = 1L)
31+
user_data$following <-
32+
dplyr::rename (user_data$following, repo = following) |>
33+
dplyr::mutate (n = 1L)
34+
35+
user_data$issue_cmts <-
36+
dplyr::rename (user_data$issue_cmts, repo = org_repo) |>
37+
dplyr::group_by (repo, login) |>
38+
dplyr::summarise (n = sum (num_comments), .groups = "keep")
39+
user_data$issues <- dplyr::rename (user_data$issues, repo = org_repo) |>
40+
dplyr::group_by (repo, login) |>
41+
dplyr::summarise (n = dplyr::n (), .groups = "keep")
42+
43+
overlap <- lapply (names (user_data), function (n) {
44+
user_data [[n]] <- user_relate_fields (user_data, user_names, what = n)
45+
})
46+
47+
res <- dplyr::left_join (overlap [[1]], overlap [[2]], by = c ("login1", "login2")) |>
48+
dplyr::left_join (overlap [[3]], by = c ("login1", "login2")) |>
49+
dplyr::left_join (overlap [[4]], by = c ("login1", "login2")) |>
50+
dplyr::left_join (overlap [[5]], by = c ("login1", "login2")) |>
51+
dplyr::left_join (overlap [[6]], by = c ("login1", "login2"))
52+
53+
return (res)
54+
}
55+
56+
#' Add 'login' columns to all user data, so each element can be combined.
57+
#' @noRd
58+
add_user_login_cols <- function (user_data) {
59+
60+
nms <- names (user_data)
61+
res <- lapply (seq_along (user_data), function (u) {
62+
nms_u <- names (user_data [[u]])
63+
res_u <- lapply (seq_along (user_data [[u]]), function (i) {
64+
ud <- user_data [[u]] [[i]]
65+
if (is.data.frame (ud) && nrow (ud) > 0L) {
66+
ud$login <- names (user_data) [u]
67+
} else if (is.character (ud)) {
68+
ud <- data.frame (ud, login = names (user_data) [u])
69+
names (ud) [1] <- names (user_data [[u]]) [i]
70+
}
71+
return (ud)
72+
})
73+
names (res_u) <- nms_u
74+
75+
return (res_u)
76+
})
77+
names (res) <- nms
78+
79+
return (res)
80+
}
81+
82+
#' Combine all individual elements of 'user_data' for all users.
83+
#'
84+
#' The `add_user_login_cols` enables all data to be `rbind`-ed here.
85+
#' @noRd
86+
combine_user_data <- function (user_data) {
87+
88+
data <- lapply (names (user_data [[1]]), function (n) {
89+
these <- lapply (user_data, function (i) i [[n]])
90+
res <- do.call (rbind, these)
91+
rownames (res) <- NULL
92+
return (res)
93+
})
94+
95+
names (data) <- names (user_data [[1]])
96+
data$general <- NULL
97+
98+
return (data)
99+
}
100+
101+
user_relate_fields <- function (user_data, user_names, what = "commits") {
102+
103+
user_combs <- t (combn (user_names, m = 2L))
104+
if (what == "commits") {
105+
user_data [[what]] <- dplyr::rename (user_data [[what]], n = num_commits)
106+
} else if (what == "commit_cmt") {
107+
user_data$commit_cmt$n <- 1L
108+
}
109+
110+
res <- apply (user_combs, 1, function (i) {
111+
cmt1 <- dplyr::filter (user_data [[what]], login == i [1]) |>
112+
dplyr::group_by (repo) |>
113+
dplyr::summarise (n1 = sum (n))
114+
cmt2 <- dplyr::filter (user_data [[what]], login == i [2]) |>
115+
dplyr::group_by (repo) |>
116+
dplyr::summarise (n2 = sum (n))
117+
overlap <- dplyr::inner_join (cmt1, cmt2, by = "repo")
118+
119+
res <- 0
120+
if (nrow (overlap) > 0L) {
121+
res <- (sum (overlap$n1) + sum (overlap$n2)) /
122+
(sum (cmt1$n1) + sum (cmt2$n2))
123+
}
124+
return (res)
125+
})
126+
127+
res <- data.frame (
128+
login1 = user_combs [, 1],
129+
login2 = user_combs [, 2],
130+
res
131+
)
132+
names (res) [3] <- what
133+
134+
return (res)
135+
}

R/data-gh-user.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ gh_user_general_internal <- function (login = "",
8787
name = org_name,
8888
gh_org = org_gh_org,
8989
url = org_url,
90-
web_url = org_web_url,
91-
location = org_location,
90+
web_url = null2na_char (org_web_url),
91+
location = null2na_char (org_location),
9292
num_members = org_num_members
9393
)
9494

codemeta.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"codeRepository": "https://github.com/ropensci-review-tools/repometrics",
99
"issueTracker": "https://github.com/ropensci-review-tools/repometrics/issues",
1010
"license": "https://spdx.org/licenses/GPL-3.0",
11-
"version": "0.1.3.020",
11+
"version": "0.1.3.031",
1212
"programmingLanguage": {
1313
"@type": "ComputerLanguage",
1414
"name": "R",
+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Mock version of data constructed in data-gh-user.R
2+
mock_user_rel_data <- function () {
3+
4+
general <- list (
5+
user = data.frame (
6+
login = "me",
7+
name = "me too",
8+
email = "me@here.com",
9+
location = "somewhere",
10+
company = "noway",
11+
bio = NA_character_,
12+
avatarUrl = NA_character_,
13+
num_repositories = 1L,
14+
repos_contributed_to = 2L,
15+
num_starred_repos = 3L
16+
),
17+
orgs = data.frame (
18+
name = "org",
19+
gh_org = "org",
20+
url = "https://github.com/org",
21+
web_url = NA_character_,
22+
location = NA_character_,
23+
num_members = 0L
24+
)
25+
)
26+
27+
randchars <- function (len = 6L) {
28+
x <- sample (c (letters, LETTERS), size = len, replace = TRUE)
29+
paste0 (x, collapse = "")
30+
}
31+
followers <- vapply (1:10, function (i) randchars (), character (1L))
32+
following <- vapply (1:5, function (i) randchars (), character (1L))
33+
34+
timestamp <- as.POSIXct ("2024-01-01T00:00:01")
35+
timestamp_minus_year <- as.POSIXct ("2023-01-01T00:00:01")
36+
37+
commits <- data.frame (
38+
repo = paste0 ("org", c ("one", "two")),
39+
num_commits = 1:2,
40+
date = rep (timestamp, 2L)
41+
)
42+
43+
commit_cmt <- data.frame (
44+
repo = commits$repo,
45+
num_commits = 1:2,
46+
date = rep (timestamp, 2L)
47+
)
48+
attr (commit_cmt, "started_at") <- timestamp_minus_year
49+
attr (commit_cmt, "ended_at") <- timestamp
50+
51+
issues <- data.frame (
52+
opened_at = rep (timestamp, 2L),
53+
closed_at = rep (timestamp, 2L),
54+
org_repo = commits$repo,
55+
issue_num = 1:2,
56+
num_issue_comments = 3:4,
57+
num_issue_participants = 5:6,
58+
num_repo_languages = 7:8,
59+
repo_languages = I (c ("R", "C"))
60+
)
61+
attr (issues, "started_at") <- timestamp_minus_year
62+
attr (issues, "ended_at") <- timestamp
63+
64+
issue_cmts <- data.frame (
65+
org_repo = commits$repo,
66+
issue_num = 1:2,
67+
created_at = rep (timestamp, 2L),
68+
num_comments = 1:2,
69+
num_participants = 3:4
70+
)
71+
72+
# Then assemble all:
73+
list (
74+
general = general,
75+
commit_cmt = commit_cmt,
76+
commits = commits,
77+
followers = followers,
78+
following = following,
79+
issue_cmts = issue_cmts,
80+
issues = issues
81+
)
82+
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

tests/testthat/test-data-user.R

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
test_that ("user data martrices", {
2+
3+
user_data <- lapply (1:2, function (i) mock_user_rel_data ())
4+
names (user_data) <- c ("a", "b")
5+
6+
mats <- user_relation_matrices (user_data)
7+
8+
expect_s3_class (mats, "data.frame")
9+
expect_equal (ncol (mats), 8L)
10+
nms <- c (
11+
"login1", "login2", "commit_cmt", "commits", "followers", "following",
12+
"issue_cmts", "issues"
13+
)
14+
expect_equal (names (mats), nms)
15+
expect_true (nrow (mats) > 0L)
16+
})

0 commit comments

Comments
 (0)