diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index baa8bc5d..ab440fbb 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -6,7 +6,7 @@ import logging import re -from datetime import datetime, timedelta +from datetime import datetime from pathlib import Path @@ -958,16 +958,18 @@ def index_categories_capture(self, crawled_tree: CrawledTree, capture_dir: Path) pipeline.execute() def get_captures_category(self, category: str, most_recent_capture: datetime | None=None, - oldest_capture: datetime | None = None) -> list[tuple[str, float]]: + oldest_capture: datetime | None = None, + offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]: """Get all the captures for a specific category, on a time interval starting from the most recent one. :param category: The category :param most_recent_capture: The capture time of the most recent capture to consider - :param oldest_capture: The capture time of the oldest capture to consider, defaults to 30 days ago. + :param oldest_capture: The capture time of the oldest capture to consider """ max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf' - min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=30)).timestamp() - return self.redis.zrevrangebyscore(f'categories|{category}|captures', max_score, min_score, withscores=True) + min_score: str | float = oldest_capture.timestamp() if oldest_capture else "-Inf" + total = self.redis.zcard(f'categories|{category}|captures') + return total, self.redis.zrevrangebyscore(f'categories|{category}|captures', max_score, min_score, withscores=True, start=offset, num=limit) def get_capture_categories(self, capture_uuid: str) -> set[str]: return self.redis.smembers(f'capture_indexes|{capture_uuid}|categories') diff --git a/website/web/genericapi.py b/website/web/genericapi.py index c7e83230..dce9d052 100644 --- a/website/web/genericapi.py +++ b/website/web/genericapi.py @@ -838,9 +838,13 @@ def get(self, timestamp: str | float | None=None) -> list[str]: class CategoriesCaptures(Resource): # type: ignore[misc] def get(self, category: str | None=None) -> list[str] | dict[str, list[str]]: if category: - return [uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_category(category)] - return {c: [uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_category(c)] - for c in get_indexing(flask_login.current_user).categories} + _, entries = get_indexing(flask_login.current_user).get_captures_category(category) + return [uuid for uuid, _ in entries] + to_return: dict[str, list[str]] = {} + for c in get_indexing(flask_login.current_user).categories: + _, entries = get_indexing(flask_login.current_user).get_captures_category(c) + to_return[c] = [uuid for uuid, _ in entries] + return to_return # NOTE: there are a few extra paramaters we may want to add in the future: most recent/oldest capture