Skip to content

Commit

Permalink
chg: Disable index cache for backgroupd processes
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafiot committed Mar 12, 2024
1 parent df1e322 commit 926c0da
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 6 deletions.
2 changes: 1 addition & 1 deletion bin/background_build_captures.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class BackgroundBuildCaptures(AbstractManager):

def __init__(self, loglevel: int | None=None):
super().__init__(loglevel)
self.lookyloo = Lookyloo()
self.lookyloo = Lookyloo(cache_max_size=1)
self.script_name = 'background_build_captures'
# make sure discarded captures dir exists
self.captures_dir = get_captures_dir()
Expand Down
2 changes: 1 addition & 1 deletion bin/background_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class BackgroundIndexer(AbstractManager):

def __init__(self, full: bool=False, loglevel: int | None=None):
super().__init__(loglevel)
self.lookyloo = Lookyloo()
self.lookyloo = Lookyloo(cache_max_size=1)
self.is_public_instance = get_config('generic', 'public_instance')
self.full_indexer = full
self.indexing = Indexing(full_index=self.full_indexer)
Expand Down
11 changes: 9 additions & 2 deletions lookyloo/capturecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import sys
import time

from collections import OrderedDict
from collections.abc import Mapping
from datetime import datetime
from functools import lru_cache, _CacheInfo as CacheInfo
Expand Down Expand Up @@ -172,13 +173,14 @@ def serialize_sets(obj: Any) -> Any:

class CapturesIndex(Mapping): # type: ignore[type-arg]

def __init__(self, redis: Redis, contextualizer: Context | None=None) -> None: # type: ignore[type-arg]
def __init__(self, redis: Redis, contextualizer: Context | None=None, maxsize: int | None=None) -> None: # type: ignore[type-arg]
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel'))
self.redis = redis
self.indexing = Indexing()
self.contextualizer = contextualizer
self.__cache: dict[str, CaptureCache] = {}
self.__cache_max_size = maxsize
self.__cache: dict[str, CaptureCache] = OrderedDict()
self._quick_init()
self.timeout = get_config('generic', 'max_tree_create_time')
try:
Expand All @@ -203,6 +205,8 @@ def cached_captures(self) -> set[str]:
return set(self.__cache.keys())

def __getitem__(self, uuid: str) -> CaptureCache:
if self.__cache_max_size is not None and len(self.__cache) > self.__cache_max_size:
self.__cache.popitem()
if uuid in self.__cache:
if self.__cache[uuid].capture_dir.exists():
return self.__cache[uuid]
Expand Down Expand Up @@ -251,6 +255,9 @@ def lru_cache_clear(self) -> None:
def _quick_init(self) -> None:
'''Initialize the cache with a list of UUIDs, with less back and forth with redis.
Only get recent captures.'''
if self.__cache_max_size is not None:
self.logger.info('Cache max size set, skip quick init.')
return None
p = self.redis.pipeline()
has_new_cached_captures = False
for uuid, directory in self.redis.hscan_iter('lookup_dirs'):
Expand Down
9 changes: 7 additions & 2 deletions lookyloo/lookyloo.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,12 @@ class CaptureSettings(CaptureSettingsCore, total=False):

class Lookyloo():

def __init__(self) -> None:
def __init__(self, cache_max_size: int | None=None) -> None:
'''Initialize lookyloo.
:param cache_max_size: The maximum size of the cache. Alows to display captures metadata without getting it from redis
This cache is *not* useful for background indexing or pickle building, only for the front end.
So it should always be None *unless* we're running the background processes.
'''
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel'))
self.user_agents = UserAgents()
Expand Down Expand Up @@ -149,7 +154,7 @@ def __init__(self) -> None:
self.context = Context()
self.logger.info('Context initialized.')
self.logger.info('Initializing index...')
self._captures_index = CapturesIndex(self.redis, self.context)
self._captures_index = CapturesIndex(self.redis, self.context, maxsize=cache_max_size)
self.logger.info('Index initialized.')

# init lacus
Expand Down

0 comments on commit 926c0da

Please sign in to comment.