diff --git a/examples/gene_workflow.ipynb b/examples/gene_workflow.ipynb
index cbedb6ff..f7cba1ea 100644
--- a/examples/gene_workflow.ipynb
+++ b/examples/gene_workflow.ipynb
@@ -262,97 +262,24 @@
"metadata": {},
"outputs": [
{
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " identifier | \n",
- " identifier.source | \n",
- " target | \n",
- " target.source | \n",
- " DisGeNET | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " AGRN | \n",
- " HGNC | \n",
- " 375790 | \n",
- " NCBI Gene | \n",
- " [{'gene_dsi': 0.626, 'gene_dpi': 0.538, 'gene_... | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " ALG14 | \n",
- " HGNC | \n",
- " 199857 | \n",
- " NCBI Gene | \n",
- " [{'gene_dsi': 0.722, 'gene_dpi': 0.308, 'gene_... | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " ALG2 | \n",
- " HGNC | \n",
- " 85365 | \n",
- " NCBI Gene | \n",
- " [{'gene_dsi': 0.67, 'gene_dpi': 0.423, 'gene_p... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " CHAT | \n",
- " HGNC | \n",
- " 1103 | \n",
- " NCBI Gene | \n",
- " [{'gene_dsi': 0.52, 'gene_dpi': 0.808, 'gene_p... | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " CHD8 | \n",
- " HGNC | \n",
- " 57680 | \n",
- " NCBI Gene | \n",
- " [{'gene_dsi': 0.656, 'gene_dpi': 0.577, 'gene_... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " identifier identifier.source target target.source \\\n",
- "0 AGRN HGNC 375790 NCBI Gene \n",
- "1 ALG14 HGNC 199857 NCBI Gene \n",
- "2 ALG2 HGNC 85365 NCBI Gene \n",
- "3 CHAT HGNC 1103 NCBI Gene \n",
- "4 CHD8 HGNC 57680 NCBI Gene \n",
- "\n",
- " DisGeNET \n",
- "0 [{'gene_dsi': 0.626, 'gene_dpi': 0.538, 'gene_... \n",
- "1 [{'gene_dsi': 0.722, 'gene_dpi': 0.308, 'gene_... \n",
- "2 [{'gene_dsi': 0.67, 'gene_dpi': 0.423, 'gene_p... \n",
- "3 [{'gene_dsi': 0.52, 'gene_dpi': 0.808, 'gene_p... \n",
- "4 [{'gene_dsi': 0.656, 'gene_dpi': 0.577, 'gene_... "
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "JSONDecodeError",
+ "evalue": "Expecting value: line 1 column 1 (char 0)",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[1;32m~\\anaconda3\\envs\\pyBiodatafuse_dev\\Lib\\site-packages\\requests\\models.py:971\u001b[0m, in \u001b[0;36mResponse.json\u001b[1;34m(self, **kwargs)\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m complexjson\u001b[38;5;241m.\u001b[39mloads(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[0;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n",
+ "File \u001b[1;32m~\\anaconda3\\envs\\pyBiodatafuse_dev\\Lib\\json\\__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[1;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[0;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[0;32m 344\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[0;32m 345\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[1;32m--> 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _default_decoder\u001b[38;5;241m.\u001b[39mdecode(s)\n\u001b[0;32m 347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+ "File \u001b[1;32m~\\anaconda3\\envs\\pyBiodatafuse_dev\\Lib\\json\\decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[1;34m(self, s, _w)\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[0;32m 334\u001b[0m \u001b[38;5;124;03mcontaining a JSON document).\u001b[39;00m\n\u001b[0;32m 335\u001b[0m \n\u001b[0;32m 336\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m--> 337\u001b[0m obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw_decode(s, idx\u001b[38;5;241m=\u001b[39m_w(s, \u001b[38;5;241m0\u001b[39m)\u001b[38;5;241m.\u001b[39mend())\n\u001b[0;32m 338\u001b[0m end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n",
+ "File \u001b[1;32m~\\anaconda3\\envs\\pyBiodatafuse_dev\\Lib\\json\\decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[1;34m(self, s, idx)\u001b[0m\n\u001b[0;32m 354\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m--> 355\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpecting value\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, err\u001b[38;5;241m.\u001b[39mvalue) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n",
+ "\u001b[1;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[1;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[5], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m api_key \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m0209751bfa7b6a981a8f5fb5f062313067ecd36c\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m# TODO: add your key\u001b[39;00m\n\u001b[0;32m 2\u001b[0m params \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msource\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCURATED\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mjson\u001b[39m\u001b[38;5;124m\"\u001b[39m} \u001b[38;5;66;03m# only curated data\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m disgenet_result, disgenet_metadata \u001b[38;5;241m=\u001b[39m disgenet\u001b[38;5;241m.\u001b[39mget_gene_disease(\n\u001b[0;32m 4\u001b[0m bridgedb_df\u001b[38;5;241m=\u001b[39mbridgdb_df, api_key\u001b[38;5;241m=\u001b[39mapi_key, params\u001b[38;5;241m=\u001b[39mparams\n\u001b[0;32m 5\u001b[0m )\n\u001b[0;32m 6\u001b[0m disgenet_result\u001b[38;5;241m.\u001b[39mhead()\n",
+ "File \u001b[1;32m~\\Desktop\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\disgenet.py:77\u001b[0m, in \u001b[0;36mget_gene_disease\u001b[1;34m(bridgedb_df, api_key, params)\u001b[0m\n\u001b[0;32m 75\u001b[0m \u001b[38;5;66;03m# Get all the diseases associated with genes for the current chunk\u001b[39;00m\n\u001b[0;32m 76\u001b[0m gda_response \u001b[38;5;241m=\u001b[39m s\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mapi_host\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/gda/gene/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mchunked_input\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, params\u001b[38;5;241m=\u001b[39mparams)\n\u001b[1;32m---> 77\u001b[0m chunk_output \u001b[38;5;241m=\u001b[39m gda_response\u001b[38;5;241m.\u001b[39mjson()\n\u001b[0;32m 78\u001b[0m disgenet_output\u001b[38;5;241m.\u001b[39mextend(chunk_output)\n\u001b[0;32m 80\u001b[0m \u001b[38;5;66;03m# Record the end time\u001b[39;00m\n",
+ "File \u001b[1;32m~\\anaconda3\\envs\\pyBiodatafuse_dev\\Lib\\site-packages\\requests\\models.py:975\u001b[0m, in \u001b[0;36mResponse.json\u001b[1;34m(self, **kwargs)\u001b[0m\n\u001b[0;32m 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m complexjson\u001b[38;5;241m.\u001b[39mloads(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[0;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n\u001b[1;32m--> 975\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RequestsJSONDecodeError(e\u001b[38;5;241m.\u001b[39mmsg, e\u001b[38;5;241m.\u001b[39mdoc, e\u001b[38;5;241m.\u001b[39mpos)\n",
+ "\u001b[1;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)"
+ ]
}
],
"source": [
@@ -370,166 +297,15 @@
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- "[{'gene_dsi': 0.626,\n",
- " 'gene_dpi': 0.538,\n",
- " 'gene_pli': 5.4727e-07,\n",
- " 'protein_class': None,\n",
- " 'protein_class_name': None,\n",
- " 'diseaseid': 'C3808739',\n",
- " 'disease_name': 'MYASTHENIC SYNDROME, CONGENITAL, 8',\n",
- " 'disease_class': None,\n",
- " 'disease_class_name': None,\n",
- " 'disease_type': 'disease',\n",
- " 'disease_semantic_type': 'Disease or Syndrome',\n",
- " 'score': 0.8,\n",
- " 'ei': 1.0,\n",
- " 'el': None,\n",
- " 'year_initial': 2009.0,\n",
- " 'year_final': 2014.0,\n",
- " 'source': 'CURATED'},\n",
- " {'gene_dsi': 0.626,\n",
- " 'gene_dpi': 0.538,\n",
- " 'gene_pli': 5.4727e-07,\n",
- " 'protein_class': None,\n",
- " 'protein_class_name': None,\n",
- " 'diseaseid': 'C0751882',\n",
- " 'disease_name': 'Myasthenic Syndromes, Congenital',\n",
- " 'disease_class': 'C16;C10',\n",
- " 'disease_class_name': ' Congenital, Hereditary, and Neonatal Diseases and Abnormalities; Nervous System Diseases',\n",
- " 'disease_type': 'disease',\n",
- " 'disease_semantic_type': 'Disease or Syndrome',\n",
- " 'score': 0.65,\n",
- " 'ei': 1.0,\n",
- " 'el': 'strong',\n",
- " 'year_initial': 2009.0,\n",
- " 'year_final': 2020.0,\n",
- " 'source': 'CURATED'},\n",
- " {'gene_dsi': 0.626,\n",
- " 'gene_dpi': 0.538,\n",
- " 'gene_pli': 5.4727e-07,\n",
- " 'protein_class': None,\n",
- " 'protein_class_name': None,\n",
- " 'diseaseid': 'C0751883',\n",
- " 'disease_name': 'Congenital Myasthenic Syndromes, Postsynaptic',\n",
- " 'disease_class': 'C16;C10',\n",
- " 'disease_class_name': ' Congenital, Hereditary, and Neonatal Diseases and Abnormalities; Nervous System Diseases',\n",
- " 'disease_type': 'disease',\n",
- " 'disease_semantic_type': 'Disease or Syndrome',\n",
- " 'score': 0.5,\n",
- " 'ei': 1.0,\n",
- " 'el': None,\n",
- " 'year_initial': 2009.0,\n",
- " 'year_final': 2012.0,\n",
- " 'source': 'CURATED'},\n",
- " {'gene_dsi': 0.626,\n",
- " 'gene_dpi': 0.538,\n",
- " 'gene_pli': 5.4727e-07,\n",
- " 'protein_class': None,\n",
- " 'protein_class_name': None,\n",
- " 'diseaseid': 'C0751884',\n",
- " 'disease_name': 'Congenital Myasthenic Syndromes, Presynaptic',\n",
- " 'disease_class': 'C16;C10',\n",
- " 'disease_class_name': ' Congenital, Hereditary, and Neonatal Diseases and Abnormalities; Nervous System Diseases',\n",
- " 'disease_type': 'disease',\n",
- " 'disease_semantic_type': 'Disease or Syndrome',\n",
- " 'score': 0.5,\n",
- " 'ei': 1.0,\n",
- " 'el': None,\n",
- " 'year_initial': 2009.0,\n",
- " 'year_final': 2012.0,\n",
- " 'source': 'CURATED'},\n",
- " {'gene_dsi': 0.626,\n",
- " 'gene_dpi': 0.538,\n",
- " 'gene_pli': 5.4727e-07,\n",
- " 'protein_class': None,\n",
- " 'protein_class_name': None,\n",
- " 'diseaseid': 'C0023467',\n",
- " 'disease_name': 'Leukemia, Myelocytic, Acute',\n",
- " 'disease_class': 'C04',\n",
- " 'disease_class_name': ' Neoplasms',\n",
- " 'disease_type': 'disease',\n",
- " 'disease_semantic_type': 'Neoplastic Process',\n",
- " 'score': 0.3,\n",
- " 'ei': 1.0,\n",
- " 'el': None,\n",
- " 'year_initial': 2007.0,\n",
- " 'year_final': 2007.0,\n",
- " 'source': 'CURATED'},\n",
- " {'gene_dsi': 0.626,\n",
- " 'gene_dpi': 0.538,\n",
- " 'gene_pli': 5.4727e-07,\n",
- " 'protein_class': None,\n",
- " 'protein_class_name': None,\n",
- " 'diseaseid': 'C0026998',\n",
- " 'disease_name': 'Acute Myeloid Leukemia, M1',\n",
- " 'disease_class': 'C04',\n",
- " 'disease_class_name': ' Neoplasms',\n",
- " 'disease_type': 'disease',\n",
- " 'disease_semantic_type': 'Neoplastic Process',\n",
- " 'score': 0.3,\n",
- " 'ei': 1.0,\n",
- " 'el': None,\n",
- " 'year_initial': 2007.0,\n",
- " 'year_final': 2007.0,\n",
- " 'source': 'CURATED'},\n",
- " {'gene_dsi': 0.626,\n",
- " 'gene_dpi': 0.538,\n",
- " 'gene_pli': 5.4727e-07,\n",
- " 'protein_class': None,\n",
- " 'protein_class_name': None,\n",
- " 'diseaseid': 'C0751885',\n",
- " 'disease_name': 'Myasthenic Syndromes, Congenital, Slow Channel',\n",
- " 'disease_class': 'C16;C10',\n",
- " 'disease_class_name': ' Congenital, Hereditary, and Neonatal Diseases and Abnormalities; Nervous System Diseases',\n",
- " 'disease_type': 'disease',\n",
- " 'disease_semantic_type': 'Disease or Syndrome',\n",
- " 'score': 0.3,\n",
- " 'ei': nan,\n",
- " 'el': None,\n",
- " 'year_initial': nan,\n",
- " 'year_final': nan,\n",
- " 'source': 'CURATED'},\n",
- " {'gene_dsi': 0.626,\n",
- " 'gene_dpi': 0.538,\n",
- " 'gene_pli': 5.4727e-07,\n",
- " 'protein_class': None,\n",
- " 'protein_class_name': None,\n",
- " 'diseaseid': 'C1850792',\n",
- " 'disease_name': 'Congenital myasthenic syndrome ib',\n",
- " 'disease_class': 'C16;C10',\n",
- " 'disease_class_name': ' Congenital, Hereditary, and Neonatal Diseases and Abnormalities; Nervous System Diseases',\n",
- " 'disease_type': 'disease',\n",
- " 'disease_semantic_type': 'Disease or Syndrome',\n",
- " 'score': 0.3,\n",
- " 'ei': nan,\n",
- " 'el': 'limited',\n",
- " 'year_initial': nan,\n",
- " 'year_final': nan,\n",
- " 'source': 'CURATED'},\n",
- " {'gene_dsi': 0.626,\n",
- " 'gene_dpi': 0.538,\n",
- " 'gene_pli': 5.4727e-07,\n",
- " 'protein_class': None,\n",
- " 'protein_class_name': None,\n",
- " 'diseaseid': 'C1879321',\n",
- " 'disease_name': 'Acute Myeloid Leukemia (AML-M2)',\n",
- " 'disease_class': 'C04',\n",
- " 'disease_class_name': ' Neoplasms',\n",
- " 'disease_type': 'disease',\n",
- " 'disease_semantic_type': 'Neoplastic Process',\n",
- " 'score': 0.3,\n",
- " 'ei': 1.0,\n",
- " 'el': None,\n",
- " 'year_initial': 2007.0,\n",
- " 'year_final': 2007.0,\n",
- " 'source': 'CURATED'}]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "NameError",
+ "evalue": "name 'disgenet_result' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[6], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m disgenet_result[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDisGeNET\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;241m0\u001b[39m]\n",
+ "\u001b[1;31mNameError\u001b[0m: name 'disgenet_result' is not defined"
+ ]
}
],
"source": [
@@ -589,33 +365,33 @@
" \n",
" \n",
" 1 | \n",
- " AGRN | \n",
+ " ALG14 | \n",
" HGNC | \n",
- " A0A494C0G5 | \n",
+ " Q96F25 | \n",
" Uniprot-TrEMBL | \n",
" [{'label': nan, 'InChIKey': nan, 'SMILES': nan... | \n",
"
\n",
" \n",
" 2 | \n",
- " AGRN | \n",
+ " ALG2 | \n",
" HGNC | \n",
- " A0A494C1I6 | \n",
+ " A0A024R184 | \n",
" Uniprot-TrEMBL | \n",
" [{'label': nan, 'InChIKey': nan, 'SMILES': nan... | \n",
"
\n",
" \n",
" 3 | \n",
- " AGRN | \n",
+ " CHAT | \n",
" HGNC | \n",
- " O00468 | \n",
+ " A0A1W2PP46 | \n",
" Uniprot-TrEMBL | \n",
" [{'label': nan, 'InChIKey': nan, 'SMILES': nan... | \n",
"
\n",
" \n",
" 4 | \n",
- " ALG14 | \n",
+ " CHD8 | \n",
" HGNC | \n",
- " Q96F25 | \n",
+ " A0A2R8Y4P3 | \n",
" Uniprot-TrEMBL | \n",
" [{'label': nan, 'InChIKey': nan, 'SMILES': nan... | \n",
"
\n",
@@ -626,10 +402,10 @@
"text/plain": [
" identifier identifier.source target target.source \\\n",
"0 AGRN HGNC A0A087X208 Uniprot-TrEMBL \n",
- "1 AGRN HGNC A0A494C0G5 Uniprot-TrEMBL \n",
- "2 AGRN HGNC A0A494C1I6 Uniprot-TrEMBL \n",
- "3 AGRN HGNC O00468 Uniprot-TrEMBL \n",
- "4 ALG14 HGNC Q96F25 Uniprot-TrEMBL \n",
+ "1 ALG14 HGNC Q96F25 Uniprot-TrEMBL \n",
+ "2 ALG2 HGNC A0A024R184 Uniprot-TrEMBL \n",
+ "3 CHAT HGNC A0A1W2PP46 Uniprot-TrEMBL \n",
+ "4 CHD8 HGNC A0A2R8Y4P3 Uniprot-TrEMBL \n",
"\n",
" transporter_inhibitor \n",
"0 [{'label': nan, 'InChIKey': nan, 'SMILES': nan... \n",
@@ -660,19 +436,19 @@
"[{'label': 'Cefepime',\n",
" 'InChIKey': 'HVFLCNVBZFFHBT-UHFFFAOYSA-O',\n",
" 'SMILES': 'CON=C(C(=O)NC1C(=O)N2C(C(=O)O)=C(C[N+]3(C)CCCC3)CSC12)c1csc(N)n1',\n",
- " 'pubchem_compound_id': '2623',\n",
+ " 'pubchem_compound_id': 2623,\n",
" 'molmedb_id': 'MM16967',\n",
" 'source_doi': 'doi:10.1074/jbc.275.3.1699',\n",
- " 'source_pmid': '10636865',\n",
+ " 'source_pmid': 10636865,\n",
" 'chebi_id': nan,\n",
" 'drugbank_id': nan},\n",
" {'label': 'Cephaloridine',\n",
" 'InChIKey': 'CZTQZXZIADLWOZ-UHFFFAOYSA-O',\n",
" 'SMILES': 'O=C(Cc1cccs1)NC1C(=O)N2C(C(=O)O)=C(C[n+]3ccccc3)CSC12',\n",
- " 'pubchem_compound_id': '5773',\n",
+ " 'pubchem_compound_id': 5773,\n",
" 'molmedb_id': 'MM00638',\n",
" 'source_doi': 'doi:10.1074/jbc.275.3.1699',\n",
- " 'source_pmid': '10636865',\n",
+ " 'source_pmid': 10636865,\n",
" 'chebi_id': '3537',\n",
" 'drugbank_id': 'DB09008'}]"
]
@@ -683,7 +459,7 @@
}
],
"source": [
- "inhibitor_df[\"transporter_inhibitor\"][72]"
+ "inhibitor_df[\"transporter_inhibitor\"][13]"
]
},
{
@@ -697,7 +473,19 @@
"cell_type": "code",
"execution_count": 9,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'disgenet_result' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[9], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m combined_df \u001b[38;5;241m=\u001b[39m combine_sources([disgenet_result, inhibitor_df])\n",
+ "\u001b[1;31mNameError\u001b[0m: name 'disgenet_result' is not defined"
+ ]
+ }
+ ],
"source": [
"combined_df = combine_sources([disgenet_result, inhibitor_df])"
]
@@ -708,103 +496,27 @@
"metadata": {},
"outputs": [
{
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " identifier | \n",
- " identifier.source | \n",
- " target | \n",
- " target.source | \n",
- " DisGeNET | \n",
- " transporter_inhibitor | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " AGRN | \n",
- " HGNC | \n",
- " 375790 | \n",
- " NCBI Gene | \n",
- " [{'gene_dsi': 0.626, 'gene_dpi': 0.538, 'gene_... | \n",
- " [{'label': nan, 'InChIKey': nan, 'SMILES': nan... | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " ALG14 | \n",
- " HGNC | \n",
- " 199857 | \n",
- " NCBI Gene | \n",
- " [{'gene_dsi': 0.722, 'gene_dpi': 0.308, 'gene_... | \n",
- " [{'label': nan, 'InChIKey': nan, 'SMILES': nan... | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " ALG2 | \n",
- " HGNC | \n",
- " 85365 | \n",
- " NCBI Gene | \n",
- " [{'gene_dsi': 0.67, 'gene_dpi': 0.423, 'gene_p... | \n",
- " [{'label': nan, 'InChIKey': nan, 'SMILES': nan... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " CHAT | \n",
- " HGNC | \n",
- " 1103 | \n",
- " NCBI Gene | \n",
- " [{'gene_dsi': 0.52, 'gene_dpi': 0.808, 'gene_p... | \n",
- " [{'label': nan, 'InChIKey': nan, 'SMILES': nan... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " identifier identifier.source target target.source \\\n",
- "0 AGRN HGNC 375790 NCBI Gene \n",
- "1 ALG14 HGNC 199857 NCBI Gene \n",
- "2 ALG2 HGNC 85365 NCBI Gene \n",
- "3 CHAT HGNC 1103 NCBI Gene \n",
- "\n",
- " DisGeNET \\\n",
- "0 [{'gene_dsi': 0.626, 'gene_dpi': 0.538, 'gene_... \n",
- "1 [{'gene_dsi': 0.722, 'gene_dpi': 0.308, 'gene_... \n",
- "2 [{'gene_dsi': 0.67, 'gene_dpi': 0.423, 'gene_p... \n",
- "3 [{'gene_dsi': 0.52, 'gene_dpi': 0.808, 'gene_p... \n",
- "\n",
- " transporter_inhibitor \n",
- "0 [{'label': nan, 'InChIKey': nan, 'SMILES': nan... \n",
- "1 [{'label': nan, 'InChIKey': nan, 'SMILES': nan... \n",
- "2 [{'label': nan, 'InChIKey': nan, 'SMILES': nan... \n",
- "3 [{'label': nan, 'InChIKey': nan, 'SMILES': nan... "
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "NameError",
+ "evalue": "name 'combined_df' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[10], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m combined_df\u001b[38;5;241m.\u001b[39mhead(\u001b[38;5;241m4\u001b[39m)\n",
+ "\u001b[1;31mNameError\u001b[0m: name 'combined_df' is not defined"
+ ]
}
],
"source": [
"combined_df.head(4)"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
@@ -823,9 +535,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "version": "3.11.8"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/src/pyBiodatafuse/annotators/molmedb.py b/src/pyBiodatafuse/annotators/molmedb.py
index 088ca952..1d7ead7f 100644
--- a/src/pyBiodatafuse/annotators/molmedb.py
+++ b/src/pyBiodatafuse/annotators/molmedb.py
@@ -8,6 +8,7 @@
from string import Template
from typing import Tuple
+import numpy as np
import pandas as pd
from SPARQLWrapper import JSON, SPARQLWrapper
@@ -93,6 +94,7 @@ def get_gene_mol_inhibitor(bridgedb_df: pd.DataFrame):
col_name="transporter_inhibitor",
)
+ # if mappings exist but SPARQL returns empty response
if (not merged_df.empty) and merged_df["transporter_inhibitor"][0] is None:
merged_df.drop_duplicates(subset=["identifier", "transporter_inhibitor"], inplace=True)
elif not merged_df.empty:
@@ -111,14 +113,16 @@ def get_gene_mol_inhibitor(bridgedb_df: pd.DataFrame):
identifiers = merged_df["identifier"].unique()
for identifier in identifiers:
if merged_df.loc[merged_df["identifier"] == identifier].shape[0] > 1:
- mask = merged_df.apply(
- lambda x, id=identifier: (
- all(pd.isna(v) for v in d.values()) and x["identifier"] == id
- for d in x["transporter_inhibitor"]
- ),
- axis=1,
+ mask = merged_df["transporter_inhibitor"].apply(
+ lambda lst: all(
+ [
+ all([isinstance(val, float) and np.isnan(val) for val in dct.values()])
+ for dct in lst
+ ]
+ )
)
- merged_df.drop(merged_df[mask].index, inplace=True)
+ mask2 = merged_df["identifier"].apply(lambda x, id=identifier : x == id)
+ merged_df.drop(merged_df[mask & mask2].index, inplace=True)
# set default order to response dictionaries to keep output consistency
merged_df["transporter_inhibitor"] = merged_df["transporter_inhibitor"].apply(
@@ -248,7 +252,7 @@ def get_mol_gene_inhibitor(bridgedb_df: pd.DataFrame) -> Tuple[pd.DataFrame, dic
def int_response_value_types(resp_list: list, key_list: list):
- """Change values in response dictionaries to int to stay consistent woth other Annotators.
+ """Change values in response dictionaries to int to stay consistent with other Annotators.
:param: resp_list: list of response dictionaries.
:param: key_list: list of keys to change to int.