Skip to content

Commit 558e240

Browse files
committed
separate two sub-fns in src/bm25.cpp
1 parent 4af3f1d commit 558e240

File tree

4 files changed

+33
-14
lines changed

4 files changed

+33
-14
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: pkgmatch
22
Title: Find R Packages Matching Either Descriptions or Other R Packages
3-
Version: 0.4.1.043
3+
Version: 0.4.1.044
44
Authors@R: c(
55
person("Mark", "Padgham", , "mark.padgham@email.com", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0003-2172-5265")),

codemeta.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"codeRepository": "https://github.com/ropensci-review-tools/pkgmatch",
99
"issueTracker": "https://github.com/ropensci-review-tools/pkgmatch/issues",
1010
"license": "https://spdx.org/licenses/MIT",
11-
"version": "0.4.1.043",
11+
"version": "0.4.1.044",
1212
"programmingLanguage": {
1313
"@type": "ComputerLanguage",
1414
"name": "R",

src/bm25.cpp

+24-12
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,46 @@
11
#include "bm25.h"
22

3-
// [[Rcpp::export]]
4-
Rcpp::NumericVector rcpp_bm25 (const Rcpp::DataFrame &idfs, const Rcpp::List &tokensList, Rcpp::DataFrame &these_tokens, const double ntoks_avg) {
5-
6-
// Fixed parameters used in the BM25 function. See wikipedia reference for
7-
// these values.
8-
const double k = 1.2;
9-
const double b = 0.75;
3+
void make_idf_map (
4+
const Rcpp::DataFrame &idfs,
5+
std::unordered_map <std::string, double> &idf_map) {
106

11-
// Set up primary 'idf_map' to map all tokens to their IDFs over whole corpus:
12-
std::unordered_map <std::string, double> idf_map;
137
const Rcpp::CharacterVector idf_tokens = idfs ["token"];
148
const Rcpp::NumericVector idf_idf = idfs ["idf"];
159
for (int i = 0; i < idfs.nrow (); i++) {
1610
std::string this_tok = static_cast<std::string> (idf_tokens [i]);
1711
idf_map.emplace (this_tok, idf_idf [i]);
1812
}
13+
}
1914

20-
const int ndocs = tokensList.size();
15+
void make_these_tokens_map (
16+
const Rcpp::DataFrame &these_tokens,
17+
std::unordered_map <std::string, int> &these_tokens_map) {
2118

22-
// Then make a map of the input tokens and counts:
23-
std::unordered_map <std::string, int> these_tokens_map;
2419
const Rcpp::CharacterVector these_tokens_str = these_tokens ["token"];
2520
const Rcpp::IntegerVector these_tokens_n = these_tokens ["np"];
2621

2722
for (int i = 0; i < these_tokens.nrow (); i++) {
2823
const std::string this_string = static_cast <std::string> (these_tokens_str [i]);
2924
these_tokens_map.emplace (this_string, these_tokens_n [i]);
3025
}
26+
}
3127

28+
// [[Rcpp::export]]
29+
Rcpp::NumericVector rcpp_bm25 (const Rcpp::DataFrame &idfs, const Rcpp::List &tokensList, Rcpp::DataFrame &these_tokens, const double ntoks_avg) {
30+
31+
// Fixed parameters used in the BM25 function. See wikipedia reference for
32+
// these values.
33+
const double k = 1.2;
34+
const double b = 0.75;
35+
36+
// Set up primary 'idf_map' to map all tokens to their IDFs over whole corpus:
37+
std::unordered_map <std::string, double> idf_map;
38+
make_idf_map (idfs, idf_map);
39+
40+
std::unordered_map <std::string, int> these_tokens_map;
41+
make_these_tokens_map (these_tokens, these_tokens_map);
42+
43+
const int ndocs = tokensList.size();
3244
Rcpp::NumericVector bm25 (ndocs, 0.0);
3345

3446
for (int i = 0; i < ndocs; i++) {

src/bm25.h

+7
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,11 @@
55

66
#include <Rcpp.h>
77

8+
void make_idf_map (
9+
const Rcpp::DataFrame &idfs,
10+
std::unordered_map <std::string, double> &idf_map);
11+
void make_these_tokens_map (
12+
const Rcpp::DataFrame &these_tokens,
13+
std::unordered_map <std::string, double> &these_tokens_map);
14+
815
Rcpp::NumericVector rcpp_bm25 (const Rcpp::DataFrame &idfs, const Rcpp::List &tokensList, Rcpp::DataFrame &these_tokens, const double ntoks_avg);

0 commit comments

Comments
 (0)