Skip to content

Commit

Permalink
Improve scrape_all.py
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Feb 19, 2025
1 parent 6fba0ed commit 27216f0
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
7 changes: 5 additions & 2 deletions odds/backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,16 @@ def scan_all(self) -> None:
def scan_new(self) -> None:
asyncio.run(self.scan(CatalogFilter(), DatasetFilterNew()))

def scan_specific(self, catalogId: str = None, datasetId: str = None) -> None:
def scan_specific(self, catalogId: str = None, datasetId: str = None, force=True) -> None:
if catalogId:
catalogFilter = CatalogFilterById(catalogId)
else:
catalogFilter = CatalogFilter()
if datasetId:
datasetFilter = DatasetFilterById(datasetId)
else:
datasetFilter = DatasetFilterForce()
if force:
datasetFilter = DatasetFilterForce()
else:
datasetFilter = DatasetFilterIncomplete()
asyncio.run(self.scan(catalogFilter, datasetFilter))
13 changes: 7 additions & 6 deletions utils/scrape_all.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from odds.backend import backend
import sys
import argparse

b = backend.ODDSBackend()
parser = argparse.ArgumentParser(description='Scrape data from all catalogs')
parser.add_argument('--catalog-id', type=str, help='ID of the catalog to scrape')
parser.add_argument('--force', type=bool, default=False, help='Scrape all and not just missing datasets')
args = parser.parse_args()

if len(sys.argv) > 1:
selected_catalog = sys.argv[1]
b.scan_specific(catalogId=selected_catalog)
else:
b.scan_required()
b = backend.ODDSBackend()
b.scan_specific(catalogId=args.catalog_id, force=args.force)
del b

0 comments on commit 27216f0

Please sign in to comment.