Skip to content

Commit 7d49a45

Browse files
authored
Merge pull request #35 from ropensci-review-tools/pkgstats
Pkgstats data
2 parents 29215a7 + 72e7e53 commit 7d49a45

14 files changed

+249
-205
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: repometrics
22
Title: Metrics for Your Code Repository
3-
Version: 0.1.1.100
3+
Version: 0.1.1.104
44
Authors@R:
55
person("Mark", "Padgham", , "mark.padgham@email.com", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0003-2172-5265"))

NAMESPACE

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(cm_data)
34
export(ghist_dashboard)
4-
export(githist)
5+
export(repo_pkgstats_history)
56
importFrom(memoise,memoise)

R/cm-data.R

+26
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,29 @@
1+
#' Calculate all repository data used in CHAOSS metrics
2+
#' \url{https://chaoss.community/kb-metrics-and-metrics-models/}.
3+
#'
4+
#' @param path Path to local source repository.
5+
#' @return A list of the following `data.frame` objects:
6+
#' \enumerate{
7+
#' \item `contribs_from_gh_api` with details of all code contributors from GitHub
8+
#' \item `contribs_from_log` with details of all code contributors from the local git log
9+
#' \item `dependencies` A simple `data.frame` of all package dependencies
10+
#' \item `gh_repo_workflow` with details of all workflows run on GitHub,
11+
#' including status of most recent runs
12+
#' \item `gitlog` with one row for each git commit, and associated statistics
13+
#' \item `issue_comments_from_gh_api` with details of all comments from all
14+
#' repository issues on GitHub
15+
#' \item `issues_from_gh_api` with details of all issues on GitHub
16+
#' \item `libyears` The CHAOSS metric described at
17+
#' \url{https://chaoss.community/kb/metric-libyears/}, measuring the relative
18+
#' age of a project's dependencies, with lower values indicating more
19+
#' up-to-date projects. This is the only item which is not a `data.frame`,
20+
#' rather a named numerical vector of mean and median "libyears"
21+
#' \item `prs_from_gh_api` with details of all pull requests on GitHub
22+
#' \item `releases_from_gh_api` with details of all repository releases on GitHub
23+
#' \item `repo_from_gh_api` A `data.frame` of a single line, with several key
24+
#' attributes of the repository on GitHub.
25+
#' }
26+
#' @export
127
cm_data <- function (path) {
228

329
checkmate::assert_directory_exists (path)

R/githist-fn.R

-133
This file was deleted.

R/pkgstats-data.R

+150
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,118 @@
1+
#' Apply \pkg{pkgstats} across the git history of a package
2+
#'
3+
#' @param path Path to local repository containing an R package.
4+
#' @param step_days Analyse package at intervals of this number of days. The
5+
#' last commit for each day is chosen. For example, `step_days = 7L` will
6+
#' return weekly statistics. Values of zero or less will analyse all commits,
7+
#' including potentially multiple daily commits.
8+
#' @param num_cores Number of cores to use in multi-core processing. Has no
9+
#' effect on Windows operating systems, on which calculations are always
10+
#' single-core only. Negative values are subtracted from number of available
11+
#' cores, determined as `parallel::detectCores()`, so default of `num_cores =
12+
#' -1L` uses `detectCores() - 1L`. Positive values use precisely that number,
13+
#' restricted to maximum available cores, and a value of zero will use all
14+
#' available cores.
15+
#'
16+
#' @return A list of three items:
17+
#' \itemize{
18+
#' \item desc_data Containing data from `DESCRIPTION` files, along with data on
19+
#' numbers of functions.
20+
#' \item loc Containing data on "lines-of-code" for all languages and
21+
#' sub-directories within package.
22+
#' \item stats Containing statistics on (mean, medium, and sum) of various
23+
#' properties of each function in package.
24+
#' }
25+
#'
26+
#' @export
27+
repo_pkgstats_history <- function (path,
28+
step_days = 1L,
29+
num_cores = -1L) {
30+
31+
checkmate::assert_character (path, len = 1L)
32+
checkmate::assert_directory (path)
33+
checkmate::assert_int (step_days, lower = 0L)
34+
checkmate::assert_int (num_cores)
35+
36+
num_cores <- set_num_cores (num_cores)
37+
38+
log <- cm_data_gitlog (path)
39+
log <- filter_git_log (log, step_days)
40+
41+
if (num_cores == 1L) {
42+
43+
res <- extract_pkgstats_data_single (log, path)
44+
45+
} else {
46+
47+
res <- extract_pkgstats_data_multi (log, path, num_cores)
48+
49+
}
50+
51+
collate_pkgstats (res)
52+
}
53+
54+
filter_git_log <- function (log, step_days) {
55+
56+
if (step_days >= 1L) {
57+
log$date <- as.Date (log$timestamp)
58+
log <- dplyr::group_by (log, date) |>
59+
dplyr::filter (dplyr::row_number () == 1L)
60+
if (step_days > 1L) {
61+
index <- which (-diff (log$date) < step_days)
62+
if (length (index) > 0L) {
63+
log <- log [-(index), ]
64+
}
65+
}
66+
}
67+
68+
return (log)
69+
}
70+
71+
72+
extract_pkgstats_data_single <- function (log, path) {
73+
74+
path_cp <- fs::path (fs::path_temp (), basename (path))
75+
clean_after <- FALSE
76+
if (fs::path (fs::path_dir (path)) != fs::path_temp () &&
77+
!fs::dir_exists (path_cp)) {
78+
path_cp <- fs::dir_copy (path, fs::path_temp ())
79+
clean_after <- TRUE
80+
}
81+
82+
res <- pbapply::pblapply (seq_len (nrow (log)), function (i) {
83+
g <- gert::git_reset_hard (ref = log$hash [i], repo = path_cp)
84+
run_one_pkgstats (path = path_cp, pkg_date = log$timestamp [i])
85+
})
86+
87+
if (clean_after) {
88+
fs::dir_delete (path_cp)
89+
}
90+
91+
return (res)
92+
}
93+
94+
extract_pkgstats_data_multi <- function (log, path, num_cores) {
95+
96+
cl <- parallel::makeCluster (num_cores)
97+
parallel::clusterExport (
98+
cl,
99+
c ("log", "path", "run_one_pkgstats"),
100+
envir = environment ()
101+
)
102+
res <- pbapply::pblapply (seq_len (nrow (log)), function (i) {
103+
path_cp <- fs::dir_copy (path, fs::path_temp ())
104+
g <- gert::git_reset_hard (ref = log$hash [i], repo = path_cp)
105+
s <- run_one_pkgstats (path = path_cp, pkg_date = log$timestamp [i])
106+
fs::dir_delete (path_cp)
107+
return (s)
108+
}, cl = cl)
109+
parallel::stopCluster (cl)
110+
111+
return (res)
112+
113+
return (res)
114+
}
115+
1116
run_one_pkgstats <- function (path, pkg_date) {
2117

3118
s <- pkgstats::pkgstats (path)
@@ -62,3 +177,38 @@ run_one_pkgstats <- function (path, pkg_date) {
62177
)
63178
)
64179
}
180+
181+
collate_pkgstats <- function (x) {
182+
nms <- names (x [[1]])
183+
nms2df <- nms [seq_len (which (nms == "loc") - 1L)]
184+
desc_data <- lapply (nms2df, function (i) {
185+
unlist (lapply (x, function (j) j [[i]]))
186+
})
187+
desc_data <- data.frame (do.call (cbind, desc_data))
188+
names (desc_data) <- nms2df
189+
desc_data$date <- vapply (
190+
x,
191+
function (i) strftime (i$date, "%y-%m-%d %H:%M:%S"),
192+
"character"
193+
)
194+
desc_data$date <- strptime (desc_data$date, format = "%y-%m-%d %H:%M:%S")
195+
196+
nms_int <- nms2df [-seq_len (which (nms2df == "date"))]
197+
for (n in nms_int) {
198+
desc_data [[n]] <- as.integer (desc_data [[n]])
199+
}
200+
201+
loc <- do.call (rbind, lapply (x, function (i) i$loc))
202+
stats <- do.call (rbind, lapply (x, function (i) i$stats))
203+
stats$measure <- gsub ("[0-9]+$", "", rownames (stats))
204+
rownames (stats) <- NULL
205+
206+
# Lazy convert all to tibbles, which `res$loc` is from `dplyr`:
207+
class (desc_data) <- class (stats) <- class (loc)
208+
209+
list (
210+
desc_data = desc_data,
211+
loc = loc,
212+
stats = stats
213+
)
214+
}

R/quarto-dashboard.R

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
#' Start quarto dashboard with results of main \link{githist} function.
1+
#' Start quarto dashboard with results of main \link{repo_pkgstats_history}
2+
#' function.
23
#'
3-
#' @param results Results of main \link{githist} function applied to one
4-
#' package.
4+
#' @param results Results of main \link{repo_pkgstats_history} function applied
5+
#' to one package.
56
#' @param action One of "preview", to start and open a live preview of the
67
#' dashboard website, or "render" to render a static version without previewing
78
#' or opening.

R/utils.R

-19
Original file line numberDiff line numberDiff line change
@@ -38,25 +38,6 @@ to_posix <- function (x) {
3838
as.POSIXct (x, format = "%Y-%m-%dT%H:%M:%S", tz = "UTC")
3939
}
4040

41-
filter_git_hist <- function (h, n, step_days) {
42-
if (!is.null (n)) {
43-
h <- h [seq_len (n), ]
44-
}
45-
if (step_days >= 1L) {
46-
h$date <- as.Date (h$time)
47-
h <- dplyr::group_by (h, date) |>
48-
dplyr::filter (dplyr::row_number () == 1L)
49-
if (step_days > 1L) {
50-
index <- which (-diff (h$date) < step_days)
51-
if (length (index) > 0L) {
52-
h <- h [-(index), ]
53-
}
54-
}
55-
}
56-
57-
return (h)
58-
}
59-
6041
n_per_page_in_tests <- function (n_per_page) {
6142
is_test_env <- Sys.getenv ("REPOMETRICS_TESTS") == "true"
6243
ifelse (is_test_env, 2L, n_per_page)

codemeta.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"codeRepository": "https://github.com/ropensci-review-tools/repometrics",
99
"issueTracker": "https://github.com/ropensci-review-tools/repometrics/issues",
1010
"license": "https://spdx.org/licenses/GPL-3.0",
11-
"version": "0.1.1.100",
11+
"version": "0.1.1.104",
1212
"programmingLanguage": {
1313
"@type": "ComputerLanguage",
1414
"name": "R",
@@ -249,7 +249,7 @@
249249
},
250250
"SystemRequirements": {}
251251
},
252-
"fileSize": "179.388KB",
252+
"fileSize": "196.637KB",
253253
"readme": "https://github.com/ropensci-review-tools/repometrics/blob/main/README.md",
254254
"contIntegration": [
255255
"https://github.com/ropensci-review-tools/repometrics/actions?query=workflow%3AR-CMD-check",

0 commit comments

Comments
 (0)