Skip to content

Commit 1368045

Browse files
committed
Merge #530: Improve tracker statistics importation
af7150b feat: [#469] import torrent stats using multiple torrents tracker API endpoint (Jose Celano) 16cbea8 feat: [#469] import torrent statistics in batches (Jose Celano) feffd09 feat: [#469] add update datetime for tracker stasts importation (Jose Celano) Pull request description: Currently, the Index imports statistics for all torrents every hour (1 hour is the default value in the configuration). We need to import stats for all torrents because we allow users to sort torrents by torrent stats (number of seeders and leechers). This PR improves a little bit the process. - [x] Add a new field (`updated_at`) to the table `torrust_torrent_tracker_stats` with the datetime when the stats were imported from the tracker. This is for logging purposes but it also helps to import torrents in batches. Regarding logging, it could help to check that the cronjob is running correctly. - [x] We get all torrents (`get_all_torrents_compact`) from the database. That could be big array of infohashes. We could obtain the 50 records that have not been updated for the longest time and run the importation every 100 milliseconds. We request the tracker API every 100 milliseconds getting 50 torrents. Those values can be adjusted in the future. - [x] A [new filter was added to the tracker API to get statistics for a list of torrents with one request](torrust/torrust-tracker#728). We can use it instead of getting one torrent at a time. **Pros:** - With millions of torrents we don't need to load all of them into memory. - The new field `updated_at` helps to monitor the importation process. - We get torrent stats for 50 torrents in one request instead of one request per torrent. **Cons:** - Every 100 milliseconds we run a query to check which torrent stats are pending to update. ACKs for top commit: josecelano: ACK af7150b Tree-SHA512: af1632282419457e20cc86e447b65d36c8e52dbff47e5c79cc1802fc6f67c759d572568f2846f65d4d5540049240ea82246df21d773ed1e6a285bde681fb423b
2 parents 1769bf1 + af7150b commit 1368045

10 files changed

+281
-22
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
-- New field to track when stats were updated from the tracker
2+
ALTER TABLE torrust_torrent_tracker_stats ADD COLUMN updated_at DATETIME DEFAULT NULL;
3+
UPDATE torrust_torrent_tracker_stats SET updated_at = '1000-01-01 00:00:00';
4+
ALTER TABLE torrust_torrent_tracker_stats MODIFY COLUMN updated_at DATETIME NOT NULL;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- New field to track when stats were updated from the tracker
2+
ALTER TABLE torrust_torrent_tracker_stats ADD COLUMN updated_at TEXT DEFAULT "1000-01-01 00:00:00";

src/console/cronjobs/tracker_statistics_importer.rs

+44-11
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@ use axum::extract::State;
1717
use axum::routing::{get, post};
1818
use axum::{Json, Router};
1919
use chrono::{DateTime, Utc};
20-
use log::{error, info};
20+
use log::{debug, error, info};
2121
use serde_json::{json, Value};
22+
use text_colorizer::Colorize;
2223
use tokio::net::TcpListener;
2324
use tokio::task::JoinHandle;
2425

2526
use crate::tracker::statistics_importer::StatisticsImporter;
27+
use crate::utils::clock::seconds_ago_utc;
2628

2729
const IMPORTER_API_IP: &str = "127.0.0.1";
2830

@@ -41,7 +43,7 @@ struct ImporterState {
4143
#[must_use]
4244
pub fn start(
4345
importer_port: u16,
44-
torrent_info_update_interval: u64,
46+
torrent_stats_update_interval: u64,
4547
tracker_statistics_importer: &Arc<StatisticsImporter>,
4648
) -> JoinHandle<()> {
4749
let weak_tracker_statistics_importer = Arc::downgrade(tracker_statistics_importer);
@@ -54,7 +56,7 @@ pub fn start(
5456
let _importer_api_handle = tokio::spawn(async move {
5557
let import_state = Arc::new(ImporterState {
5658
last_heartbeat: Arc::new(Mutex::new(Utc::now())),
57-
torrent_info_update_interval,
59+
torrent_info_update_interval: torrent_stats_update_interval,
5860
});
5961

6062
let app = Router::new()
@@ -81,25 +83,56 @@ pub fn start(
8183

8284
info!("Tracker statistics importer cronjob starting ...");
8385

84-
let interval = std::time::Duration::from_secs(torrent_info_update_interval);
85-
let mut interval = tokio::time::interval(interval);
86+
// code-review: we set an execution interval to avoid intense polling to
87+
// the database. If we remove the interval we would be constantly
88+
// queering if there are torrent stats pending to update, unless there
89+
// are torrents to update. Maybe we should only sleep for 100 milliseconds
90+
// if we did not update any torrents in the latest execution.
91+
// With this current limit we can only import 50 torrent stats every 100
92+
// milliseconds which is 500 torrents per second (1800000 torrents per hour).
93+
// If the tracker can handle a request in 100 milliseconds.
8694

87-
interval.tick().await; // first tick is immediate...
95+
let execution_interval_in_milliseconds = 100;
96+
let execution_interval_duration = std::time::Duration::from_millis(execution_interval_in_milliseconds);
97+
let mut execution_interval = tokio::time::interval(execution_interval_duration);
8898

89-
loop {
90-
interval.tick().await;
99+
execution_interval.tick().await; // first tick is immediate...
91100

92-
info!("Running tracker statistics importer ...");
101+
info!("Running tracker statistics importer every {execution_interval_in_milliseconds} milliseconds ...");
93102

103+
loop {
94104
if let Err(e) = send_heartbeat(importer_port).await {
95105
error!("Failed to send heartbeat from importer cronjob: {}", e);
96106
}
97107

98-
if let Some(tracker) = weak_tracker_statistics_importer.upgrade() {
99-
drop(tracker.import_all_torrents_statistics().await);
108+
if let Some(statistics_importer) = weak_tracker_statistics_importer.upgrade() {
109+
let one_interval_ago = seconds_ago_utc(
110+
torrent_stats_update_interval
111+
.try_into()
112+
.expect("update interval should be a positive integer"),
113+
);
114+
let limit = 50;
115+
116+
debug!(
117+
"Importing torrents statistics not updated since {} limited to a maximum of {} torrents ...",
118+
one_interval_ago.to_string().yellow(),
119+
limit.to_string().yellow()
120+
);
121+
122+
match statistics_importer
123+
.import_torrents_statistics_not_updated_since(one_interval_ago, limit)
124+
.await
125+
{
126+
Ok(()) => {}
127+
Err(e) => error!("Failed to import statistics: {:?}", e),
128+
}
129+
130+
drop(statistics_importer);
100131
} else {
101132
break;
102133
}
134+
135+
execution_interval.tick().await;
103136
}
104137
})
105138
}

src/databases/database.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use async_trait::async_trait;
2-
use chrono::NaiveDateTime;
2+
use chrono::{DateTime, NaiveDateTime, Utc};
33
use serde::{Deserialize, Serialize};
44

55
use crate::databases::mysql::Mysql;
@@ -292,6 +292,13 @@ pub trait Database: Sync + Send {
292292
/// Get all torrents as `Vec<TorrentCompact>`.
293293
async fn get_all_torrents_compact(&self) -> Result<Vec<TorrentCompact>, Error>;
294294

295+
/// Get torrents whose stats have not been imported from the tracker at least since a given datetime.
296+
async fn get_torrents_with_stats_not_updated_since(
297+
&self,
298+
datetime: DateTime<Utc>,
299+
limit: i64,
300+
) -> Result<Vec<TorrentCompact>, Error>;
301+
295302
/// Update a torrent's title with `torrent_id` and `title`.
296303
async fn update_torrent_title(&self, torrent_id: i64, title: &str) -> Result<(), Error>;
297304

src/databases/mysql.rs

+25-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::str::FromStr;
22
use std::time::Duration;
33

44
use async_trait::async_trait;
5-
use chrono::NaiveDateTime;
5+
use chrono::{DateTime, NaiveDateTime, Utc};
66
use sqlx::mysql::{MySqlConnectOptions, MySqlPoolOptions};
77
use sqlx::{query, query_as, Acquire, ConnectOptions, MySqlPool};
88

@@ -20,7 +20,7 @@ use crate::models::torrent_tag::{TagId, TorrentTag};
2020
use crate::models::tracker_key::TrackerKey;
2121
use crate::models::user::{User, UserAuthentication, UserCompact, UserId, UserProfile};
2222
use crate::services::torrent::{CanonicalInfoHashGroup, DbTorrentInfoHash};
23-
use crate::utils::clock;
23+
use crate::utils::clock::{self, datetime_now, DATETIME_FORMAT};
2424
use crate::utils::hex::from_bytes;
2525

2626
pub struct Mysql {
@@ -884,6 +884,27 @@ impl Database for Mysql {
884884
.map_err(|_| database::Error::Error)
885885
}
886886

887+
async fn get_torrents_with_stats_not_updated_since(
888+
&self,
889+
datetime: DateTime<Utc>,
890+
limit: i64,
891+
) -> Result<Vec<TorrentCompact>, database::Error> {
892+
query_as::<_, TorrentCompact>(
893+
"SELECT tt.torrent_id, tt.info_hash
894+
FROM torrust_torrents tt
895+
LEFT JOIN torrust_torrent_tracker_stats tts ON tt.torrent_id = tts.torrent_id
896+
WHERE tts.updated_at < ? OR tts.updated_at IS NULL
897+
ORDER BY tts.updated_at ASC
898+
LIMIT ?
899+
",
900+
)
901+
.bind(datetime.format(DATETIME_FORMAT).to_string())
902+
.bind(limit)
903+
.fetch_all(&self.pool)
904+
.await
905+
.map_err(|_| database::Error::Error)
906+
}
907+
887908
async fn update_torrent_title(&self, torrent_id: i64, title: &str) -> Result<(), database::Error> {
888909
query("UPDATE torrust_torrent_info SET title = ? WHERE torrent_id = ?")
889910
.bind(title)
@@ -1055,11 +1076,12 @@ impl Database for Mysql {
10551076
seeders: i64,
10561077
leechers: i64,
10571078
) -> Result<(), database::Error> {
1058-
query("REPLACE INTO torrust_torrent_tracker_stats (torrent_id, tracker_url, seeders, leechers) VALUES (?, ?, ?, ?)")
1079+
query("REPLACE INTO torrust_torrent_tracker_stats (torrent_id, tracker_url, seeders, leechers, updated_at) VALUES (?, ?, ?, ?, ?)")
10591080
.bind(torrent_id)
10601081
.bind(tracker_url)
10611082
.bind(seeders)
10621083
.bind(leechers)
1084+
.bind(datetime_now())
10631085
.execute(&self.pool)
10641086
.await
10651087
.map(|_| ())

src/databases/sqlite.rs

+25-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::str::FromStr;
22
use std::time::Duration;
33

44
use async_trait::async_trait;
5-
use chrono::NaiveDateTime;
5+
use chrono::{DateTime, NaiveDateTime, Utc};
66
use sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions};
77
use sqlx::{query, query_as, Acquire, ConnectOptions, SqlitePool};
88

@@ -20,7 +20,7 @@ use crate::models::torrent_tag::{TagId, TorrentTag};
2020
use crate::models::tracker_key::TrackerKey;
2121
use crate::models::user::{User, UserAuthentication, UserCompact, UserId, UserProfile};
2222
use crate::services::torrent::{CanonicalInfoHashGroup, DbTorrentInfoHash};
23-
use crate::utils::clock;
23+
use crate::utils::clock::{self, datetime_now, DATETIME_FORMAT};
2424
use crate::utils::hex::from_bytes;
2525

2626
pub struct Sqlite {
@@ -876,6 +876,27 @@ impl Database for Sqlite {
876876
.map_err(|_| database::Error::Error)
877877
}
878878

879+
async fn get_torrents_with_stats_not_updated_since(
880+
&self,
881+
datetime: DateTime<Utc>,
882+
limit: i64,
883+
) -> Result<Vec<TorrentCompact>, database::Error> {
884+
query_as::<_, TorrentCompact>(
885+
"SELECT tt.torrent_id, tt.info_hash
886+
FROM torrust_torrents tt
887+
LEFT JOIN torrust_torrent_tracker_stats tts ON tt.torrent_id = tts.torrent_id
888+
WHERE tts.updated_at < ? OR tts.updated_at IS NULL
889+
ORDER BY tts.updated_at ASC
890+
LIMIT ?
891+
",
892+
)
893+
.bind(datetime.format(DATETIME_FORMAT).to_string())
894+
.bind(limit)
895+
.fetch_all(&self.pool)
896+
.await
897+
.map_err(|_| database::Error::Error)
898+
}
899+
879900
async fn update_torrent_title(&self, torrent_id: i64, title: &str) -> Result<(), database::Error> {
880901
query("UPDATE torrust_torrent_info SET title = $1 WHERE torrent_id = $2")
881902
.bind(title)
@@ -1047,11 +1068,12 @@ impl Database for Sqlite {
10471068
seeders: i64,
10481069
leechers: i64,
10491070
) -> Result<(), database::Error> {
1050-
query("REPLACE INTO torrust_torrent_tracker_stats (torrent_id, tracker_url, seeders, leechers) VALUES ($1, $2, $3, $4)")
1071+
query("REPLACE INTO torrust_torrent_tracker_stats (torrent_id, tracker_url, seeders, leechers, updated_at) VALUES ($1, $2, $3, $4, $5)")
10511072
.bind(torrent_id)
10521073
.bind(tracker_url)
10531074
.bind(seeders)
10541075
.bind(leechers)
1076+
.bind(datetime_now())
10551077
.execute(&self.pool)
10561078
.await
10571079
.map(|_| ())

src/tracker/api.rs

+23-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ impl ConnectionInfo {
1515
}
1616
}
1717

18+
const TOKEN_PARAM_NAME: &str = "token";
19+
1820
pub struct Client {
1921
pub connection_info: ConnectionInfo,
2022
api_base_url: String,
@@ -29,7 +31,7 @@ impl Client {
2931
pub fn new(connection_info: ConnectionInfo) -> Result<Self, Error> {
3032
let base_url = format!("{}/api/v1", connection_info.url);
3133
let client = reqwest::Client::builder().timeout(Duration::from_secs(5)).build()?;
32-
let token_param = [("token".to_string(), connection_info.token.to_string())];
34+
let token_param = [(TOKEN_PARAM_NAME.to_string(), connection_info.token.to_string())];
3335

3436
Ok(Self {
3537
connection_info,
@@ -72,7 +74,7 @@ impl Client {
7274
self.client.post(request_url).query(&self.token_param).send().await
7375
}
7476

75-
/// Retrieve the info for a torrent.
77+
/// Retrieve the info for one torrent.
7678
///
7779
/// # Errors
7880
///
@@ -82,4 +84,23 @@ impl Client {
8284

8385
self.client.get(request_url).query(&self.token_param).send().await
8486
}
87+
88+
/// Retrieve the info for multiple torrents at the same time.
89+
///
90+
/// # Errors
91+
///
92+
/// Will return an error if the HTTP request fails.
93+
pub async fn get_torrents_info(&self, info_hashes: &[String]) -> Result<Response, Error> {
94+
let request_url = format!("{}/torrents", self.api_base_url);
95+
96+
let mut query_params: Vec<(String, String)> = Vec::with_capacity(info_hashes.len() + 1);
97+
98+
query_params.push((TOKEN_PARAM_NAME.to_string(), self.connection_info.token.clone()));
99+
100+
for info_hash in info_hashes {
101+
query_params.push(("info_hash".to_string(), info_hash.clone()));
102+
}
103+
104+
self.client.get(request_url).query(&query_params).send().await
105+
}
85106
}

src/tracker/service.rs

+56
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ pub struct TorrentInfo {
4848
pub peers: Vec<Peer>,
4949
}
5050

51+
#[derive(Debug, Serialize, Deserialize, PartialEq)]
52+
pub struct TorrentBasicInfo {
53+
pub info_hash: String,
54+
pub seeders: i64,
55+
pub completed: i64,
56+
pub leechers: i64,
57+
}
58+
5159
#[derive(Debug, Serialize, Deserialize, PartialEq)]
5260
pub struct Peer {
5361
pub peer_id: Option<PeerId>,
@@ -259,6 +267,54 @@ impl Service {
259267
}
260268
}
261269

270+
/// Get torrent info from tracker in batches.
271+
///
272+
/// # Errors
273+
///
274+
/// Will return an error if the HTTP request to get torrent info fails or
275+
/// if the response cannot be parsed.
276+
pub async fn get_torrents_info(&self, info_hashes: &[String]) -> Result<Vec<TorrentBasicInfo>, TrackerAPIError> {
277+
debug!(target: "tracker-service", "get torrents info");
278+
279+
let maybe_response = self.api_client.get_torrents_info(info_hashes).await;
280+
281+
debug!(target: "tracker-service", "get torrents info response result: {:?}", maybe_response);
282+
283+
match maybe_response {
284+
Ok(response) => {
285+
let status: StatusCode = map_status_code(response.status());
286+
287+
let body = response.text().await.map_err(|_| {
288+
error!(target: "tracker-service", "response without body");
289+
TrackerAPIError::MissingResponseBody
290+
})?;
291+
292+
match status {
293+
StatusCode::OK => serde_json::from_str(&body).map_err(|e| {
294+
error!(
295+
target: "tracker-service", "Failed to parse torrents info from tracker response. Body: {}, Error: {}",
296+
body, e
297+
);
298+
TrackerAPIError::FailedToParseTrackerResponse { body }
299+
}),
300+
StatusCode::INTERNAL_SERVER_ERROR => {
301+
if body == Self::invalid_token_body() {
302+
Err(TrackerAPIError::InvalidToken)
303+
} else {
304+
error!(target: "tracker-service", "get torrents info 500 response: status {status}, body: {body}");
305+
Err(TrackerAPIError::InternalServerError)
306+
}
307+
}
308+
_ => {
309+
error!(target: "tracker-service", "get torrents info unhandled response: status {status}, body: {body}");
310+
Err(TrackerAPIError::UnexpectedResponseStatus)
311+
}
312+
}
313+
}
314+
Err(_) => Err(TrackerAPIError::TrackerOffline),
315+
}
316+
}
317+
262318
/// Issue a new tracker key from tracker.
263319
async fn retrieve_new_tracker_key(&self, user_id: i64) -> Result<TrackerKey, TrackerAPIError> {
264320
debug!(target: "tracker-service", "retrieve key: {user_id}");

0 commit comments

Comments
 (0)