-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathspecials_metadata.R
131 lines (117 loc) · 5.62 KB
/
specials_metadata.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# given a string like
# Found %d arguments in %s. Average %02.3f%%
# get data necessary to build a second line
# highlighting the string format templates:
# Found %d arguments in %s. \n Average %02.3f%%\n
# ^^ ^^ ^----^^^^^
# TODO: maybe erroring here is too harsh? maybe let these cases pass &
# (1) error if it's being run on msgid
# (2) throw the warning in all.equal() if it's an issue with msgstr so user can retry?
get_specials_metadata = function(x) {
# tested with xgettext: warning against using \a | \b | \f | \v in internationalized messages;
# and msgfmt doesn't warn about whether \t is matched, so no need to bother matching that either
special_matches = gregexpr(sprintf("%s|^[\\\\]n|[\\\\]n$", SPRINTF_TEMPLATE_REGEX), x, perl = TRUE)[[1L]]
group_starts = attr(special_matches, "capture.start")
group_lengths = attr(special_matches, "capture.length")
if (special_matches[1L] < 0L) {
meta = data.table(special=character(), id = character(), sequence = integer(), start=integer(), stop=integer())
setattr(meta, "class", c("specials_metadata", class(meta)))
return(meta)
}
# strip attributes with as.integer()
meta = data.table(
start = as.integer(special_matches),
end = as.integer(special_matches + attr(special_matches, 'match.length') - 1L),
redirect_start = group_starts[ , "redirect"],
redirect_length = group_lengths[ , "redirect"],
width_precision_start = group_starts[ , "width_precision"],
width_precision_length = group_lengths[ , "width_precision"],
id_start = group_starts[ , "id"],
id_length = group_lengths[ , "id"]
)
meta[ , "special" := substring(x, start, end)]
# % escapes don't matter for validation
meta = meta[special != "%%"]
template_idx = meta$special != "\\n"
redirect_idx = meta$redirect_start > 0L
if (any(template_idx & redirect_idx)) {
if (any(template_idx & !redirect_idx)) stopf(
# nolint next: line_length_linter.
"Invalid templated message. If any %%N$ redirects are used, all templates must be redirected.\n\tRedirected tempates: %s\n\t Un-redirected templates: %s",
meta$special[template_idx & redirect_idx], meta$special[template_idx & !redirect_idx]
)
# make sure newlines are retained in the right order, if present
meta[ , "redirect_id" := fcase(
template_idx,
as.integer(substring(x, redirect_start, redirect_start + redirect_length - 2L)),
start == 1L, 0L,
default = .N + 1L
)]
setorderv(meta, "redirect_id")
}
meta[(template_idx), "id" := safe_substring(x, id_start, id_start + id_length - 1L)]
# check if variable-width/precision formatting is used (e.g. %.*f or %*.0f)
meta[
template_idx & width_precision_start > 0L,
"width_precision" := safe_substring(x, width_precision_start, width_precision_start + width_precision_length - 1L)
]
meta[template_idx & grepl("*", width_precision, fixed = TRUE), "id" := paste0("*", id)]
# either it's an initial newline or it's a terminal newline; tag so as to distinguish
meta[(!template_idx), "id" := fifelse(start == 1L, "^\\n", "\\n$")]
if (any(template_idx & redirect_idx)) {
if (nrow(fail <- meta[ , .N, by = c("redirect_id", "id")][ , .N, by = "redirect_id"][N > 1L])) {
stopf(
# nolint next: line_length_linter.
"Invalid templated message string with redirects -- all messages pointing to the same input must have identical formats, but received %s",
meta[
fail,
on = "redirect_id"
][ , .(by_id = sprintf("[%s]", toString(special))), by = "redirect_id"][ , paste(by_id, collapse = " / ")]
)
}
meta = unique(meta, by = "redirect_id")
}
# need 'sequence' to ensure repeated %d are uniquely identified
meta = meta[ , .(special, id, sequence = seq_len(.N), start, end)]
setattr(meta, "class", c("specials_metadata", class(meta)))
meta[]
}
# convert to a tag as described above
#' @export
format.specials_metadata = function(x, ...) {
if (!nrow(x)) return('')
out = rep(" ", max(x$end))
out[c(x$start, x$end)] = "^"
# formats wider than %s get padded out with '-' between the "^" anchors
out[x[start < end - 1L, unlist(lapply(seq_len(.N), function(ii) (start[ii] + 1L):(end[ii] - 1L)))]] = "-"
paste(out, collapse="")
}
# target: msgid [template translation]
# current: msgstr [received translation]
#' @export
all.equal.specials_metadata = function(target, current, ...) {
if (nrow(target) != nrow(current)) return(gettextf(
"received %d unique templated arguments + bordering newlines but there are %d in the original",
nrow(current), nrow(target)
))
matched = merge(target, current, by = c('id', 'sequence'), all = TRUE)
# found in y, not x (i.e., in msgstr, not msgid)
if (anyNA(matched$start.x)) {
if (isTRUE(all.equal(table(target$id), table(current$id)))) {
return(gettextf(
# nolint next: line_length_linter.
"received the same set of templates + bordering newlines, but in incorrect order (%s vs %s). Recall that you can use %%$N to do redirect, e.g. to swap the order of '%%d %%s' to be translated more naturally, your translation can use '%%1$s %%2$d'",
sprintf("[%s]", toString(target$special)), sprintf("[%s]", toString(current$special))
))
}
return(gettextf(
"received templates + bordering newlines not present in the original: %s",
toString(matched[is.na(start.x)]$special.y)
))
}
# NB: since nrow(target) == nrow(current) and !anyNA(matched$start.x), we are done --
# anyNA(matched$start.y is already impossible)
# found in x, not y (i.e., in msgid, not msgstr)
# else target/current have the same number of rows
TRUE
}