Skip to content

Commit

Permalink
chg: Allow to iterate over the categories
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafiot committed Dec 2, 2024
1 parent cce3000 commit deadc95
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
12 changes: 7 additions & 5 deletions lookyloo/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
import re

from datetime import datetime, timedelta
from datetime import datetime

from pathlib import Path

Expand Down Expand Up @@ -958,16 +958,18 @@ def index_categories_capture(self, crawled_tree: CrawledTree, capture_dir: Path)
pipeline.execute()

def get_captures_category(self, category: str, most_recent_capture: datetime | None=None,
oldest_capture: datetime | None = None) -> list[tuple[str, float]]:
oldest_capture: datetime | None = None,
offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]:
"""Get all the captures for a specific category, on a time interval starting from the most recent one.
:param category: The category
:param most_recent_capture: The capture time of the most recent capture to consider
:param oldest_capture: The capture time of the oldest capture to consider, defaults to 30 days ago.
:param oldest_capture: The capture time of the oldest capture to consider
"""
max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf'
min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=30)).timestamp()
return self.redis.zrevrangebyscore(f'categories|{category}|captures', max_score, min_score, withscores=True)
min_score: str | float = oldest_capture.timestamp() if oldest_capture else "-Inf"
total = self.redis.zcard(f'categories|{category}|captures')
return total, self.redis.zrevrangebyscore(f'categories|{category}|captures', max_score, min_score, withscores=True, start=offset, num=limit)

def get_capture_categories(self, capture_uuid: str) -> set[str]:
return self.redis.smembers(f'capture_indexes|{capture_uuid}|categories')
Expand Down
10 changes: 7 additions & 3 deletions website/web/genericapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,9 +838,13 @@ def get(self, timestamp: str | float | None=None) -> list[str]:
class CategoriesCaptures(Resource): # type: ignore[misc]
def get(self, category: str | None=None) -> list[str] | dict[str, list[str]]:
if category:
return [uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_category(category)]
return {c: [uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_category(c)]
for c in get_indexing(flask_login.current_user).categories}
_, entries = get_indexing(flask_login.current_user).get_captures_category(category)
return [uuid for uuid, _ in entries]
to_return: dict[str, list[str]] = {}
for c in get_indexing(flask_login.current_user).categories:
_, entries = get_indexing(flask_login.current_user).get_captures_category(c)
to_return[c] = [uuid for uuid, _ in entries]
return to_return


# NOTE: there are a few extra paramaters we may want to add in the future: most recent/oldest capture
Expand Down

0 comments on commit deadc95

Please sign in to comment.