Skip to content

Commit c0d8d3e

Browse files
authored
Merge pull request #119 from ropensci-review-tools/user-data
Fix bug in dashboard pre-process of user data
2 parents 057350e + 3ab5bc1 commit c0d8d3e

6 files changed

+208
-49
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: repometrics
22
Title: Metrics for Your Code Repository
3-
Version: 0.1.6.064
3+
Version: 0.1.6.075
44
Authors@R:
55
person("Mark", "Padgham", , "mark.padgham@email.com", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0003-2172-5265"))

R/analyse-users.R

+36-34
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#' Construct user-by-user square matrices of strengths of relation between
22
#' users.
33
#'
4-
#' @param user_data Result of `lapply(logins, repometrics_data_user)`.
4+
#' @param data_users Result of `lapply(logins, repometrics_data_user)`.
55
#' Contains the following fields:
66
#' \enumerate{
77
#' \item general (not considered here)
@@ -15,36 +15,37 @@
1515
#' @return A `data.frame` of pairwise user logins, and proportions of overlap
1616
#' betwen repositories in the six variables described above.
1717
#' @noRd
18-
user_relation_matrices <- function (user_data) {
18+
user_relation_matrices <- function (data_users) {
1919

2020
# Suppress no visible binding notes:
2121
followers <- following <- org_repo <- repo <- login <- num_comments <- NULL
2222

23-
user_names <- names (user_data)
24-
user_data <- add_user_login_cols (user_data) |>
23+
user_names <- names (data_users)
24+
data_users <- add_user_login_cols (data_users) |>
2525
combine_user_data ()
2626

2727
# Pre-processing to name grouping column "repo" and count column "n":
28-
user_data$commit_cmt$repo <-
29-
paste0 (user_data$commit_cmt$org, user_data$commit_cmt$repo)
28+
data_users$commit_cmt$repo <-
29+
paste0 (data_users$commit_cmt$org, data_users$commit_cmt$repo)
3030

31-
user_data$followers <-
32-
dplyr::rename (user_data$followers, repo = followers) |>
31+
data_users$followers <-
32+
dplyr::rename (data_users$followers, repo = followers) |>
3333
dplyr::mutate (n = 1L)
34-
user_data$following <-
35-
dplyr::rename (user_data$following, repo = following) |>
34+
data_users$following <-
35+
dplyr::rename (data_users$following, repo = following) |>
3636
dplyr::mutate (n = 1L)
3737

38-
user_data$issue_cmts <-
39-
dplyr::rename (user_data$issue_cmts, repo = org_repo) |>
38+
data_users$issue_cmts <-
39+
dplyr::rename (data_users$issue_cmts, repo = org_repo) |>
4040
dplyr::group_by (repo, login) |>
4141
dplyr::summarise (n = sum (num_comments), .groups = "keep")
42-
user_data$issues <- dplyr::rename (user_data$issues, repo = org_repo) |>
42+
data_users$issues <- dplyr::rename (data_users$issues, repo = org_repo) |>
4343
dplyr::group_by (repo, login) |>
4444
dplyr::summarise (n = dplyr::n (), .groups = "keep")
4545

46-
overlap <- lapply (names (user_data), function (n) {
47-
user_data [[n]] <- user_relate_fields (user_data, user_names, what = n)
46+
overlap <- lapply (names (data_users), function (n) {
47+
data_users [[n]] <-
48+
user_relate_fields (data_users, user_names, what = n)
4849
})
4950

5051
res <- dplyr::left_join (
@@ -62,18 +63,19 @@ user_relation_matrices <- function (user_data) {
6263

6364
#' Add 'login' columns to all user data, so each element can be combined.
6465
#' @noRd
65-
add_user_login_cols <- function (user_data) {
66+
add_user_login_cols <- function (data_users) {
6667

67-
nms <- names (user_data)
68-
res <- lapply (seq_along (user_data), function (u) {
69-
nms_u <- names (user_data [[u]])
70-
res_u <- lapply (seq_along (user_data [[u]]), function (i) {
71-
ud <- user_data [[u]] [[i]]
68+
nms <- names (data_users)
69+
res <- lapply (seq_along (data_users), function (u) {
70+
nms_u <- names (data_users [[u]])
71+
res_u <- lapply (seq_along (data_users [[u]]), function (i) {
72+
ud <- data_users [[u]] [[i]]
7273
if (is.data.frame (ud) && nrow (ud) > 0L) {
73-
ud$login <- names (user_data) [u]
74+
ud$login <- names (data_users) [u]
7475
} else if (is.character (ud)) {
75-
ud <- data.frame (ud, login = names (user_data) [u])
76-
names (ud) [1] <- names (user_data [[u]]) [i]
76+
login <- names (data_users) [i]
77+
ud <- data.frame (ud, login = rep (login, length (ud)))
78+
names (ud) [1] <- names (data_users [[u]]) [i]
7779
}
7880
return (ud)
7981
})
@@ -90,39 +92,39 @@ add_user_login_cols <- function (user_data) {
9092
#'
9193
#' The `add_user_login_cols` enables all data to be `rbind`-ed here.
9294
#' @noRd
93-
combine_user_data <- function (user_data) {
95+
combine_user_data <- function (data_users) {
9496

95-
data <- lapply (names (user_data [[1]]), function (n) {
96-
these <- lapply (user_data, function (i) i [[n]])
97+
data <- lapply (names (data_users [[1]]), function (n) {
98+
these <- lapply (data_users, function (i) i [[n]])
9799
res <- do.call (rbind, these)
98100
rownames (res) <- NULL
99101
return (res)
100102
})
101103

102-
names (data) <- names (user_data [[1]])
104+
names (data) <- names (data_users [[1]])
103105
data$general <- NULL
104106

105107
return (data)
106108
}
107109

108-
user_relate_fields <- function (user_data, user_names, what = "commits") {
110+
user_relate_fields <- function (data_users, user_names, what = "commits") {
109111

110112
# Suppress no visible binding notes:
111113
num_commits <- login <- repo <- n <- NULL
112114

113115
user_combs <- t (utils::combn (user_names, m = 2L))
114116
if (what == "commits") {
115-
user_data [[what]] <-
116-
dplyr::rename (user_data [[what]], n = num_commits)
117+
data_users [[what]] <-
118+
dplyr::rename (data_users [[what]], n = num_commits)
117119
} else if (what == "commit_cmt") {
118-
user_data$commit_cmt$n <- 1L
120+
data_users$commit_cmt$n <- 1L
119121
}
120122

121123
res <- apply (user_combs, 1, function (i) {
122-
cmt1 <- dplyr::filter (user_data [[what]], login == i [1]) |>
124+
cmt1 <- dplyr::filter (data_users [[what]], login == i [1]) |>
123125
dplyr::group_by (repo) |>
124126
dplyr::summarise (n1 = sum (n))
125-
cmt2 <- dplyr::filter (user_data [[what]], login == i [2]) |>
127+
cmt2 <- dplyr::filter (data_users [[what]], login == i [2]) |>
126128
dplyr::group_by (repo) |>
127129
dplyr::summarise (n2 = sum (n))
128130
overlap <- dplyr::inner_join (cmt1, cmt2, by = "repo")

R/quarto-dashboard.R

+50-1
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,37 @@
88
#' @param action One of "preview", to start and open a live preview of the
99
#' dashboard website, or "render" to render a static version without previewing
1010
#' or opening.
11+
#' @param ctb_threshold An optional single numeric value between 0 and 1. If
12+
#' specified, contributions are arranged in cumulative order, and the
13+
#' contributor data reduced to only those who contribute to this proportion of
14+
#' all contributions.
15+
#' @param max_ctbs Optional maximum number of contributors to be included. This
16+
#' is an alternative way to reduce number of contributors presented in
17+
#' dashboard, and may only be specified if `ctb_threshold` is left at default
18+
#' value of `NULL`.
19+
#'
1120
#' @return (Invisibly) Path to main "index.html" document of quarto site. Note
1221
#' that the site must be served with `action = "preview"`, and will not work by
1322
#' simply opening this "index.html" file.
1423
#'
1524
#' @family dashboard
1625
#' @export
17-
repometrics_dashboard <- function (data_repo, data_users, action = "preview") {
26+
repometrics_dashboard <- function (data_repo, data_users, action = "preview",
27+
ctb_threshold = NULL, max_ctbs = NULL) {
28+
29+
if (!is.null (ctb_threshold)) {
30+
checkmate::assert_numeric (ctb_threshold, len = 1L, lower = 0, upper = 1)
31+
if (!is.null (max_ctbs)) {
32+
cli::cli_abort ("Only one of 'ctb_threshold' or 'max_ctbs' may be specified.")
33+
}
34+
}
35+
if (!is.null (max_ctbs)) {
36+
checkmate::assert_integerish (max_ctbs, len = 1L, lower = 1, upper = length (data_users))
37+
}
38+
39+
if (!is.null (ctb_threshold) || !is.null (max_ctbs)) {
40+
data_users <- reduce_data_users (data_users, ctb_threshold, max_ctbs)
41+
}
1842

1943
check_dashboard_arg (data_repo)
2044
data_repo$pkgstats <- timestamps_to_dates (data_repo$pkgstats)
@@ -47,6 +71,31 @@ repometrics_dashboard <- function (data_repo, data_users, action = "preview") {
4771
})
4872
}
4973

74+
reduce_data_users <- function (data_users,
75+
ctb_threshold = NULL,
76+
max_ctbs = NULL) {
77+
78+
classes <- vapply (data_users [[1]], class, character (1L))
79+
index <- which (classes == "data.frame")
80+
# Those are "commit_cmt", "commits", "issue_cmts", "issues"
81+
rowcounts <- t (vapply (data_users, function (u) {
82+
vapply (u [index], nrow, integer (1L))
83+
}, integer (length (index))))
84+
n <- sort (rowSums (rowcounts), decreasing = TRUE)
85+
86+
if (!is.null (max_ctbs)) {
87+
these_ctbs <- names (n) [seq_len (max_ctbs)]
88+
index <- sort (match (these_ctbs, names (data_users)))
89+
} else {
90+
ncum <- cumsum (n) / sum (n)
91+
ctbs_trimmed <- names (ncum) [which (ncum <= ctb_threshold)]
92+
index <- sort (match (ctbs_trimmed, names (data_users)))
93+
}
94+
data_users <- data_users [index]
95+
96+
return (data_users)
97+
}
98+
5099
# `range` is used to scale values, and restrict to sufficiently large values.
51100
# Total range is first re-scaled to maximum of `range[2]`, then values below
52101
# `range[1]` are removed.

codemeta.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"codeRepository": "https://github.com/ropensci-review-tools/repometrics",
99
"issueTracker": "https://github.com/ropensci-review-tools/repometrics/issues",
1010
"license": "https://spdx.org/licenses/GPL-3.0",
11-
"version": "0.1.6.064",
11+
"version": "0.1.6.075",
1212
"programmingLanguage": {
1313
"@type": "ComputerLanguage",
1414
"name": "R",

man/repometrics_dashboard.Rd

+17-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)