Skip to content

Commit

Permalink
Merge branch 'main' into SCRUM-4332
Browse files Browse the repository at this point in the history
  • Loading branch information
sweng66 authored Mar 8, 2025
2 parents 1acec46 + 7060441 commit ff3821a
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 39 deletions.
89 changes: 75 additions & 14 deletions agr_literature_service/api/crud/ateam_db_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,25 +207,42 @@ def search_for_entity_curies(db: Session, entity_type, entity_curie_list):
return rows


def search_topic(topic):
def search_topic(topic, mod_abbr=None):
"""Search ATP ontology for topics that match the given string."""
db = create_ateam_db_session()
search_query = f"%{topic.upper()}%"
sql_query = text("""
SELECT ot.curie, ot.name
FROM ontologyterm ot
JOIN ontologyterm_isa_ancestor_descendant oad ON ot.id = oad.isadescendants_id
JOIN ontologyterm ancestor ON ancestor.id = oad.isaancestors_id
WHERE ot.ontologytermtype = 'ATPTerm'
AND UPPER(ot.name) LIKE :search_query
AND ot.obsolete = false
AND ancestor.curie = :topic_category_atp
ORDER BY LENGTH(ot.name)
LIMIT 10
""")
if mod_abbr is not None:
sql_query = text("""
SELECT ot.curie, ot.name
FROM ontologyterm ot
JOIN ontologyterm_isa_ancestor_descendant oad ON ot.id = oad.isadescendants_id
JOIN ontologyterm ancestor ON ancestor.id = oad.isaancestors_id
JOIN ontologyterm_subsets s ON ot.id = s.ontologyterm_id
WHERE ot.ontologytermtype = 'ATPTerm'
AND UPPER(ot.name) LIKE :search_query
AND ot.obsolete = false
AND ancestor.curie = :topic_category_atp
AND s.subsets = :mod_abbr
ORDER BY LENGTH(ot.name)
LIMIT 10
""")
else:
sql_query = text("""
SELECT ot.curie, ot.name
FROM ontologyterm ot
JOIN ontologyterm_isa_ancestor_descendant oad ON ot.id = oad.isadescendants_id
JOIN ontologyterm ancestor ON ancestor.id = oad.isaancestors_id
WHERE ot.ontologytermtype = 'ATPTerm'
AND UPPER(ot.name) LIKE :search_query
AND ot.obsolete = false
AND ancestor.curie = :topic_category_atp
ORDER BY LENGTH(ot.name)
LIMIT 10
""")
rows = db.execute(sql_query, {
'search_query': search_query,
'topic_category_atp': topic_category_atp
'topic_category_atp': topic_category_atp,
'mod_abbr': f'{mod_abbr}_tag'
}).fetchall()

data = [
Expand Down Expand Up @@ -424,6 +441,50 @@ def set_globals(atp_to_name_init, name_to_atp_init, atp_to_children_init, atp_to
atp_to_parent = atp_to_parent_init.copy()


def get_jobs_to_run(name: str, mod_abbreviation: str) -> list[str]:
"""
Use the subsets in ontologyterm_subsets table to find the jobs to run.
"""
if not atp_to_parent:
load_name_to_atp_and_relationships()
if name.startswith('ATP:'):
atp_parent_id = name
else:
needed_string = f"{name} needed"
if needed_string not in name_to_atp:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND,
detail=f"Exception: Could not find '{needed_string}' in ATP ontology names")
atp_parent_id = name_to_atp[needed_string]

# get list of all possible jobs.
if name.startswith('ATP:'):
jobs_list = [atp_parent_id]
else:
jobs_list = atp_to_children[atp_parent_id]

mod_tag = f'{mod_abbreviation}_tag'
# refine these to ones that are in the subset

sql_query_str = """
SELECT o.curie
FROM ontologyterm o, ontologyterm_subsets s
WHERE
o.id = s.ontologyterm_id AND
s.subsets = :mod_tag AND
o.curie in :jobs_list
"""
query_params = {}
query_params['mod_tag'] = mod_tag
query_params['jobs_list'] = tuple(jobs_list,) # type: ignore

db = create_ateam_db_session()
rows = db.execute(text(sql_query_str).bindparams(**query_params)).fetchall()
results = [atp_parent_id]
for row in rows:
results.append(row[0])
return results


def load_name_to_atp_and_relationships(start_term='ATP:0000177'):
"""
Add data to atp_to_name and name_to_atp dictionaries.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
get_current_workflow_status, delete_workflow_tags
from agr_literature_service.api.crud.topic_entity_tag_utils import delete_non_manual_tets, \
delete_manual_tets, has_manual_tet
from agr_literature_service.api.crud.ateam_db_helpers import name_to_atp

file_needed_tag_atp_id = "ATP:0000141" # file needed
manual_indexing_needed_tag_atp_id = "ATP:0000274"
Expand Down Expand Up @@ -145,6 +146,11 @@ def patch(db: Session, mod_corpus_association_id: int, mod_corpus_association_up
"ATP:0000140",
mod_abbreviation=mod_abbreviation) is None:
transition_to_workflow_status(db, reference_obj.curie, mod_abbreviation, file_needed_tag_atp_id)
if mod_abbreviation == 'ZFIN':
wft_obj = WorkflowTagModel(reference_id=mod_corpus_association_db_obj.reference_id,
mod_id=mod_corpus_association_db_obj.mod_id,
workflow_tag_id=name_to_atp["pre-indexing prioritization needed"])
db.add(wft_obj)
if mod_abbreviation == 'SGD' and mod_corpus_association_data.get('index_wft_id'):
wft_id = mod_corpus_association_data['index_wft_id']
wft_obj = WorkflowTagModel(reference_id=mod_corpus_association_db_obj.reference_id,
Expand Down
1 change: 0 additions & 1 deletion agr_literature_service/api/crud/reference_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ def show(db: Session, curie_or_reference_id: str): # noqa
:param db:
:param curie_or_reference_id:
:param http_request:
:return:
"""
logger.info("Show reference called")
Expand Down
22 changes: 21 additions & 1 deletion agr_literature_service/api/crud/workflow_tag_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@
get_name_to_atp_for_all_children,
atp_get_all_descendents,
atp_get_all_ancestors,
atp_get_parent
atp_get_parent,
get_jobs_to_run,
atp_to_name
)

process_atp_multiple_allowed = [
'ATP:ont1', # used in testing
'ATP:0000165', 'ATP:0000169', 'ATP:0000189', 'ATP:0000178', 'ATP:0000166' # classifications and subtasks
Expand Down Expand Up @@ -1104,3 +1107,20 @@ def report_workflow_tags(db: Session, workflow_parent: str, mod_abbreviation: st
out_rec[f"{field}_perc"] = "0.00"
out_records.append(out_rec)
return out_records, headers


def workflow_subset_list(workflow_name, mod_abbreviation, db):
"""
More for tests and to allow users to see the curies in a workflow.
Given a workflow name i.e. "reference classification" return the ATP's and names ofr these.
"""
# More code injection checks
mod = db.query(ModModel).filter(ModModel.abbreviation == mod_abbreviation).first()
if not mod:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Unknown mod abbreviation '{mod_abbreviation}'")

curie_list = get_jobs_to_run(workflow_name, mod.abbreviation)
result = {}
for curie in curie_list:
result[atp_to_name[curie]] = curie
return result
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sqlalchemy.orm import Session
from sqlalchemy import text
from fastapi import HTTPException, status
from agr_literature_service.api.crud.ateam_db_helpers import get_jobs_to_run


def proceed_on_value(db: Session, current_workflow_tag_db_obj: WorkflowTagModel, args: list):
Expand Down Expand Up @@ -48,13 +49,10 @@ def proceed_on_value(db: Session, current_workflow_tag_db_obj: WorkflowTagModel,
detail=f"Method {checktype} not supported")

if call_process:
# sanity check, should start with ATP
if not new_atp.startswith("ATP:"):
raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED,
detail=f"Method proceed_on_value with second arg {new_atp} must start with ATP:")
# Add new wft for this ref and mod
wtm = WorkflowTagModel(reference=current_workflow_tag_db_obj.reference,
mod=current_workflow_tag_db_obj.mod,
workflow_tag_id=new_atp)
db.add(wtm)
for atp in get_jobs_to_run(new_atp, current_workflow_tag_db_obj.mod.abbreviation):
# Add new wft for this ref and mod
wtm = WorkflowTagModel(reference=current_workflow_tag_db_obj.reference,
mod=current_workflow_tag_db_obj.mod,
workflow_tag_id=atp)
db.add(wtm)
db.commit()
5 changes: 3 additions & 2 deletions agr_literature_service/api/routers/topic_entity_tag_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,9 @@ def entity_validation(taxon: str,

@router.get('/search_topic/{topic}',
status_code=200)
def search_topic(topic: str):
return ateam_db_helpers.search_topic(topic)
def search_topic(topic: str,
mod_abbr: str = None):
return ateam_db_helpers.search_topic(topic, mod_abbr)


@router.get('/search_descendants/{ancestor_curie}',
Expand Down
12 changes: 11 additions & 1 deletion agr_literature_service/api/routers/workflow_tag_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
ResponseMessageSchema)
from agr_literature_service.api.schemas.workflow_tag_schemas import WorkflowTransitionSchemaPost
from agr_literature_service.api.user import set_global_user_from_okta
from agr_literature_service.api.crud.ateam_db_helpers import atp_get_name
from agr_literature_service.api.crud.ateam_db_helpers import (
atp_get_name
)

router = APIRouter(
prefix="/workflow_tag",
Expand Down Expand Up @@ -161,3 +163,11 @@ def get_report_workflow_tags(mod_abbreviation: str,
@router.get('/get_name/{workflow_tag_id}', status_code=200)
def get_name(workflow_tag_id: str):
return atp_get_name(workflow_tag_id)


@router.get('/subsets/{workflow_name}/{mod_abbreviation}',
status_code=200)
def get_workflow_tags_subset(mod_abbreviation: str,
workflow_name: str,
db: Session = db_session):
return workflow_tag_crud.workflow_subset_list(workflow_name, mod_abbreviation, db)
24 changes: 17 additions & 7 deletions tests/api/test_workflow_automation.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,28 @@ def mock_load_name_to_atp_and_relationships():
}
workflow_parent = {}
atp_to_name = {}
name_to_atp = {}
name_to_atp = {"reference classification needed": "ATP:0000166"}
for atp in workflow_children.keys():
atp_to_name[atp] = atp
name_to_atp[atp] = atp
for atp2 in workflow_children[atp]:
workflow_parent[atp2] = atp
name_to_atp[atp2] = atp2
atp_to_name[atp2] = atp2

atp_to_name["ATP:0000166"] = "reference classification needed"
set_globals(atp_to_name, name_to_atp, workflow_children, workflow_parent)


def mock_get_jobs_to_run(name: str, mod_abbreviation: str):
results = {'reference classification': ['ATP:0000166',
'ATP:task1_needed',
'ATP:task2_needed'],
'ATP:task3_needed': ['ATP:task3_needed'],
'ATP:NEW': ['ATP:NEW']
}
return results[name]


def workflow_automation_init(db): # noqa
print("workflow_automation_init")
test_data = [
Expand All @@ -88,11 +98,9 @@ def workflow_automation_init(db): # noqa
["ATP:0000141", "ATP:fileuploadinprogress", [], 'on_start'],
["ATP:fileuploadinprogress",
"ATP:fileuploadcomplete",
["proceed_on_value::category::thesis::ATP:task1_needed",
"proceed_on_value::category::thesis::ATP:task2_needed",
"proceed_on_value::category::thesis::ATP:0000166",
"proceed_on_value::reference_type::Experimental::ATP:NEW",
"proceed_on_value::category::failure::ATP:task3_needed"],
["proceed_on_value::category::thesis::reference classification",
"proceed_on_value::category::failure::ATP:task3_needed",
"proceed_on_value::reference_type::Experimental::ATP:NEW"],
'on_success'],
["ATP:fileuploadinprogress", "ATP:fileuploadfailed", [], 'on_failed'],
["ATP:needed", "ATP:task1_needed", None, "task1_job"],
Expand Down Expand Up @@ -123,6 +131,7 @@ def workflow_automation_init(db): # noqa
class TestWorkflowTagAutomation:
@patch("agr_literature_service.api.crud.ateam_db_helpers.load_name_to_atp_and_relationships",
mock_load_name_to_atp_and_relationships)
@patch("agr_literature_service.api.crud.workflow_transition_actions.proceed_on_value.get_jobs_to_run", mock_get_jobs_to_run)
def test_transition_actions(self, db, auth_headers, test_mod, test_reference): # noqa
print("test_transition_actions")
mod = db.query(ModModel).filter(ModModel.abbreviation == test_mod.new_mod_abbreviation).one()
Expand Down Expand Up @@ -293,6 +302,7 @@ def test_transition_work_failed(self, db, auth_headers, test_mod, test_reference

@patch("agr_literature_service.api.crud.ateam_db_helpers.load_name_to_atp_and_relationships",
mock_load_name_to_atp_and_relationships)

def test_bad_transitions(self, db, auth_headers, test_mod, test_reference): # noqa
with TestClient(app) as client:
mock_load_name_to_atp_and_relationships()
Expand Down
13 changes: 9 additions & 4 deletions tests/lit_processing/pdf2tei/test_pdf2tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,15 @@ def convert_pdf_with_grobid_mock(file_content):
return mock_response


def mock_get_jobs_to_run(name: str, mod_abbreviation: str):
results = {'ATP:0000162': ['ATP:0000162']}
return results[name]


class TestPdf2TEI:

@staticmethod
@patch("agr_literature_service.api.crud.workflow_transition_actions.proceed_on_value.get_jobs_to_run", mock_get_jobs_to_run)
def upload_initial_main_reference_file(db, client, test_mod, test_reference, auth_headers): # noqa
mod_response = client.get(url=f"/mod/{test_mod.new_mod_abbreviation}")
mod_abbreviation = mod_response.json()["abbreviation"]
Expand Down Expand Up @@ -178,8 +184,7 @@ def upload_initial_main_reference_file(db, client, test_mod, test_reference, aut

@patch("agr_literature_service.lit_processing.pdf2tei.pdf2tei.convert_pdf_with_grobid",
convert_pdf_with_grobid_mock)
# @patch("agr_literature_service.api.crud.ateam_db_helpers.search_ancestors_or_descendants",
# search_ancestors_or_descendants_mock)
@patch("agr_literature_service.api.crud.workflow_transition_actions.proceed_on_value.get_jobs_to_run", mock_get_jobs_to_run)
def test_pdf2tei(self, db, auth_headers, test_reference, test_mod): # noqa
with TestClient(app) as client:
load_name_to_atp_and_relationships_mock()
Expand All @@ -196,6 +201,7 @@ def test_pdf2tei(self, db, auth_headers, test_reference, test_mod): # noqa
assert response.json() == "ATP:0000163"

@patch("agr_literature_service.lit_processing.pdf2tei.pdf2tei.convert_pdf_with_grobid")
@patch("agr_literature_service.api.crud.workflow_transition_actions.proceed_on_value.get_jobs_to_run", mock_get_jobs_to_run)
def test_pdf2tei_failed_conversion(self, mock_convert_pdf_with_grobid,
db, auth_headers, test_reference, test_mod): # noqa
with TestClient(app) as client:
Expand All @@ -221,8 +227,7 @@ def test_pdf2tei_failed_conversion(self, mock_convert_pdf_with_grobid,
@patch("agr_literature_service.api.crud.ateam_db_helpers.load_name_to_atp_and_relationships",
load_name_to_atp_and_relationships_mock)
@patch("agr_literature_service.lit_processing.pdf2tei.pdf2tei.convert_pdf_with_grobid")
# @patch("agr_literature_service.api.crud.workflow_tag_crud.search_ancestors_or_descendants",
# search_ancestors_or_descendants_mock)
@patch("agr_literature_service.api.crud.workflow_transition_actions.proceed_on_value.get_jobs_to_run", mock_get_jobs_to_run)
def test_pdf2tei_failed_conversion_500(self, mock_convert_pdf_with_grobid,
db, auth_headers, test_reference, test_mod): # noqa
with TestClient(app) as client:
Expand Down

0 comments on commit ff3821a

Please sign in to comment.