Skip to content

Commit

Permalink
bugfix
Browse files Browse the repository at this point in the history
Signed-off-by: Maximilian Inckmann <maximilian.inckmann@kit.edu>
  • Loading branch information
maximiliani committed Jan 14, 2025
1 parent 87ad7f5 commit 1ce8753
Show file tree
Hide file tree
Showing 8 changed files with 198 additions and 425 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,4 @@ legacyRecords/
FAIR-DO-Lab/
tpm-test/
elastic-test/
src/legacy
tmp/
121 changes: 0 additions & 121 deletions LICENSES/CC0-1.0.txt

This file was deleted.

22 changes: 7 additions & 15 deletions src/nmr_FAIR_DOs/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,29 +42,21 @@
app.add_typer(say, name="say")


# @app.command()
# def createAllAvailable(repo: str):
# """
# Create PID records for all available resources.
# """
# repository: AbstractRepository = getRepository(repo)
# resources = asyncio.run(create_pidRecords_from_scratch(repository))
#
# typer.echo(f"Created PID records for {len(resources)} resources in {repo}.")
# typer.echo(f"If errors occurred, please see error_{repository.repositoryID}.json for details. You can retry the creation of PID records for the failed resources by using the 'retryErrors' command.")


@app.command()
def createAllAvailable(repo: str, start: datetime = None, end: datetime = None):
def createAllAvailable(
repo: str, start: datetime = None, end: datetime = None, dryrun: bool = False
):
"""
Create PID records for all available resources.
"""
logger.info(
f"Creating PID records for all available resources in {repo} in timerange {start}-{end}."
f"Creating PID records for all available resources in {repo} in timerange {start}-{end}. Dryrun: {dryrun}"
)

repository: AbstractRepository = getRepository(repo)
resources = asyncio.run(create_pidRecords_from_scratch(repository, start, end))
resources = asyncio.run(
create_pidRecords_from_scratch(repository, start, end, dryrun)
)

typer.echo(f"Created PID records for {len(resources)} resources in {repo}.")
typer.echo(
Expand Down
46 changes: 46 additions & 0 deletions src/nmr_FAIR_DOs/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# SPDX-FileCopyrightText: 2025 Karlsruhe Institute of Technology <maximilian.inckmann@kit.edu>
# SPDX-License-Identifier: Apache-2.0
#
# Copyright (c) 2025. Karlsruhe Institute of Technology
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from dotenv import load_dotenv

load_dotenv()

TPM_URL = os.getenv("TPM_URL")
CHEMOTION_BASE_URL = os.getenv("CHEMOTION_BASE_URL")
ELASTICSEARCH_URL = os.getenv("ELASTICSEARCH_URL")
ELASTICSEARCH_INDEX = os.getenv("ELASTICSEARCH_INDEX")
ELASTICSEARCH_APIKEY = os.getenv("ELASTICSEARCH_APIKEY")
CACHE_DIR = os.getenv("CACHE_DIR")

# Check if the environment variables are set
if not TPM_URL:
raise Exception("TPM_URL is not set")
if not CHEMOTION_BASE_URL:
raise Exception("CHEMOTION_BASE_URL is not set")
if not ELASTICSEARCH_URL:
raise Exception("ELASTICSEARCH_URL is not set")
if not ELASTICSEARCH_INDEX:
raise Exception("ELASTICSEARCH_INDEX is not set")
if not ELASTICSEARCH_APIKEY:
raise Exception("ELASTICSEARCH_APIKEY is not set")
if not CACHE_DIR:
raise Exception("CACHE_DIR is not set")

# Check if the cache directory exists
if not os.path.isdir(CACHE_DIR):
os.makedirs(CACHE_DIR)
86 changes: 52 additions & 34 deletions src/nmr_FAIR_DOs/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,24 @@

import json
import logging
import os
from datetime import datetime

from dotenv import load_dotenv
from nmr_FAIR_DOs.connectors.elasticsearch import ElasticsearchConnector
from nmr_FAIR_DOs.connectors.tpm_connector import TPMConnector
from nmr_FAIR_DOs.domain.pid_record import PIDRecord
from nmr_FAIR_DOs.domain.pid_record_entry import PIDRecordEntry
from nmr_FAIR_DOs.env import (
TPM_URL,
CHEMOTION_BASE_URL,
ELASTICSEARCH_URL,
ELASTICSEARCH_APIKEY,
ELASTICSEARCH_INDEX,
)
from nmr_FAIR_DOs.repositories.AbstractRepository import AbstractRepository
from nmr_FAIR_DOs.repositories.chemotion import ChemotionRepository

logger = logging.getLogger(__name__)

load_dotenv()

TPM_URL = os.getenv("TPM_URL")
CHEMOTION_BASE_URL = os.getenv("CHEMOTION_BASE_URL")
ELASTICSEARCH_URL = os.getenv("ELASTICSEARCH_URL")
ELASTICSEARCH_INDEX = os.getenv("ELASTICSEARCH_INDEX")
ELASTICSEARCH_APIKEY = os.getenv("ELASTICSEARCH_APIKEY")

tpm = TPMConnector(TPM_URL)
chemotion_repo = ChemotionRepository(CHEMOTION_BASE_URL)
elasticsearch = ElasticsearchConnector(
Expand Down Expand Up @@ -134,14 +131,15 @@ def addEntries(presumed_pid: str, entries: list[PIDRecordEntry]) -> str:


async def create_pidRecords_from_urls(
repo: AbstractRepository, urls: list[str]
repo: AbstractRepository, urls: list[str], dryrun: bool = False
) -> list[PIDRecord]:
"""
Create PID records for the given URLs.
Args:
repo (AbstractRepository): The repository to get the resources from
urls (list[str]): The URLs to create PID records for
dryrun (bool): If true, the PID records will not be created in TPM or Elasticsearch
Returns:
list[PIDRecord]: A list of PID records created from the URLs
Expand Down Expand Up @@ -220,29 +218,41 @@ async def create_pidRecords_from_urls(
)
# raise Exception("Error adding entries to PID record with PID from future entries")

# Create PID records in TPM
real_pid_records = []
try:
logger.info("Creating PID records in TPM", local_pid_records)
real_pid_records = tpm.createMultipleFAIRDOs(
local_pid_records
) # create PID records in TPM
except Exception as e:
logger.error("Error creating PID records in TPM", local_pid_records, e)
errors.append({"error": e.__repr__(), "timestamp": datetime.now().isoformat()})
if not dryrun:
# Create PID records in TPM
real_pid_records = []
try:
logger.info("Creating PID records in TPM", local_pid_records)
real_pid_records = tpm.createMultipleFAIRDOs(
local_pid_records
) # create PID records in TPM
except Exception as e:
logger.error("Error creating PID records in TPM", local_pid_records, e)
errors.append(
{"error": e.__repr__(), "timestamp": datetime.now().isoformat()}
)

# Add PID records to Elasticsearch
try:
logger.info("Adding PID records to Elasticsearch", real_pid_records)
await elasticsearch.addPIDRecords(
real_pid_records
) # add PID records to Elasticsearch
except Exception as e:
logger.error(f"Error adding PID records to Elasticsearch: {str(e)}")
errors.append({"error": e.__repr__(), "timestamp": datetime.now().isoformat()})
# Add PID records to Elasticsearch
try:
logger.info("Adding PID records to Elasticsearch", real_pid_records)
await elasticsearch.addPIDRecords(
real_pid_records
) # add PID records to Elasticsearch
except Exception as e:
logger.error(f"Error adding PID records to Elasticsearch: {str(e)}")
errors.append(
{"error": e.__repr__(), "timestamp": datetime.now().isoformat()}
)

logger.debug("PID records created:", real_pid_records)
pid_records.extend(real_pid_records) # add PID records to the list of PID records
logger.debug("PID records created:", real_pid_records)
pid_records.extend(
real_pid_records
) # add PID records to the list of PID records
else:
logger.warning("Dryrun: Not creating PID records in TPM or Elasticsearch")
pid_records.extend(
local_pid_records
) # add PID records to the list of PID records

# write errors to file
with open("errors_" + repo.repositoryID.replace("/", "_") + ".json", "w") as f:
Expand All @@ -258,7 +268,10 @@ async def create_pidRecords_from_urls(


async def create_pidRecords_from_scratch(
repo: AbstractRepository, start: datetime = None, end: datetime = None
repo: AbstractRepository,
start: datetime = None,
end: datetime = None,
dryrun: bool = False,
) -> list[PIDRecord]:
"""
Create PID records from scratch for the given time frame.
Expand All @@ -267,6 +280,7 @@ async def create_pidRecords_from_scratch(
repo (AbstractRepository): The repository to get the resources from
start (datetime): The start of the time frame
end (datetime): The end of the time frame
dryrun (bool): If true, the PID records will not be created in TPM or Elasticsearch
Returns:
list[PIDRecord]: A list of PID records created from scratch
Expand All @@ -285,11 +299,15 @@ async def create_pidRecords_from_scratch(
):
urls = await repo.listURLsForTimeFrame(start, end)
else:
with open(
"last_run_" + repo.repositoryID.replace("/", "_") + ".json", "w"
) as f:
f.write(datetime.now().isoformat())
urls = await repo.listAvailableURLs()

logger.info("Creating PID records from scratch for the following URLs:", urls)

return await create_pidRecords_from_urls(repo, urls)
return await create_pidRecords_from_urls(repo, urls, dryrun)


async def recreate_pidRecords_with_errors(repo: AbstractRepository) -> list[PIDRecord]:
Expand Down
25 changes: 0 additions & 25 deletions src/nmr_FAIR_DOs/repositories/AbstractRepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,31 +118,6 @@ async def extractAll(
pid_records = []
errors = []

# def localAddEntries(presumed_pid: str, entries: list[PIDRecordEntry]) -> str:
# """
# Adds entries to an existing record with the specified PID.
# This only applies to records that have already been extracted from the resources.
#
# Args:
# presumed_pid (str): The presumed PID of the target record
# entries (list[PIDRecordEntry]): A list of entries to add to the target record
#
# Returns:
# str: The PID of the target record
# """
# for record in pid_records:
# if record.getPID() == presumed_pid: # PID of the record matches the presumed PID
# logger.debug(f"Adding entries to existing record with PID {presumed_pid}. Identified by PID.", entries)
# record.addListOfEntries(entries)
# return presumed_pid
# elif record.entryExists("21.T11148/b8457812905b83046284", presumed_pid): # Value of digitalObjectLocation matches the presumed PID
# logger.debug(f"Adding entries to existing record with PID {presumed_pid}. Identified by digitalObjectLocation.", entries)
# record.addListOfEntries(entries)
# return record.getPID()
#
# logger.info("Couldn't find a record to add entries to. Calling addEntries function.")
# return addEntries(presumed_pid, entries)

for url in urls:
try:
# pid_record = await self.extractPIDRecordFromResource(url, localAddEntries)
Expand Down
Loading

0 comments on commit 1ce8753

Please sign in to comment.