Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated tools w/new fields #157

Merged
merged 1 commit into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/build_tools/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ export interface Metadata {
query: string;
}[];
oauth: Record<string, string>[] | null;
runner: string;
operating_system: string[];
tool_set: string;
}

export interface StoreMetadata {
Expand Down
7 changes: 6 additions & 1 deletion scripts/build_tools/save_tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ async function buildToolJson(
sql_tables: metadata.sqlTables,
tools: metadata.tools,
version: metadata.version,
[toolType === "Python" ? "py_code" : "js_code"]: toolContent
[toolType === "Python" ? "py_code" : "js_code"]: toolContent,
runner: metadata.runner,
operating_system: metadata.operating_system,
tool_set: metadata.tool_set,
},
false
],
Expand Down Expand Up @@ -273,6 +276,8 @@ export async function saveToolsInNode(toolsOriginal: DirectoryEntry[]): Promise<
const tools: DirectoryEntry[] = JSON.parse(JSON.stringify(toolsOriginal));
const toolsSaved: DirectoryEntry[] = [];
for (const tool of tools) {
// Wait 250ms between tool uploads
await new Promise(resolve => setTimeout(resolve, 250));

// Read files
const metadata: Metadata = JSON.parse(await Deno.readTextFile(join(tool.dir, "metadata.json")));
Expand Down
1 change: 1 addition & 0 deletions tools/article-scraper/.tool-dump.test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"Python","content":[{"version":"1.0.0","name":"Article Scraper","homepage":null,"author":"@@official.shinkai","py_code":"# /// script\n# dependencies = [\n# \"requests\",\n# \"beautifulsoup4\",\n# \"lxml\"\n# ]\n# ///\n\nimport requests\nfrom bs4 import BeautifulSoup\nfrom typing import List, Optional, Dict, Any\nimport datetime\n\nclass CONFIG:\n \"\"\"\n This class holds the tool's configuration, such as\n default language or advanced flags.\n \"\"\"\n default_language: str = \"en\"\n\nclass INPUTS:\n \"\"\"\n This class holds the user-provided inputs.\n \"\"\"\n url: str\n html: Optional[str] = None\n language: Optional[str] = None\n\nclass OUTPUT:\n \"\"\"\n This class represents the result structure to be returned.\n \"\"\"\n title: str\n authors: List[str]\n publish_date: str\n summary: str\n keywords: List[str]\n top_image: str\n text: str\n\ndef extract_text_content(soup: BeautifulSoup) -> str:\n \"\"\"Extract main text content from the article.\"\"\"\n # Remove script and style elements\n for script in soup([\"script\", \"style\"]):\n script.decompose()\n \n # Get text\n text = soup.get_text(separator='\\n', strip=True)\n return text\n\ndef extract_metadata(soup: BeautifulSoup) -> Dict[str, Any]:\n \"\"\"Extract metadata from meta tags.\"\"\"\n metadata = {\n \"title\": \"\",\n \"authors\": [],\n \"publish_date\": \"\",\n \"keywords\": [],\n \"top_image\": \"\"\n }\n \n # Try to get title\n title_tag = soup.find('title')\n if title_tag:\n metadata[\"title\"] = title_tag.string.strip()\n \n # Try meta tags\n meta_mappings = {\n \"author\": [\"author\", \"article:author\", \"og:article:author\"],\n \"publish_date\": [\"article:published_time\", \"publishdate\", \"date\", \"published_time\"],\n \"image\": [\"og:image\", \"twitter:image\"],\n \"keywords\": [\"keywords\", \"news_keywords\"]\n }\n \n for meta in soup.find_all('meta'):\n name = meta.get('name', '').lower()\n property = meta.get('property', '').lower()\n content = meta.get('content', '')\n \n if not content:\n continue\n \n # Authors\n if name in meta_mappings[\"author\"] or property in meta_mappings[\"author\"]:\n if content not in metadata[\"authors\"]:\n metadata[\"authors\"].append(content)\n \n # Publish date\n elif name in meta_mappings[\"publish_date\"] or property in meta_mappings[\"publish_date\"]:\n metadata[\"publish_date\"] = content\n \n # Image\n elif name in meta_mappings[\"image\"] or property in meta_mappings[\"image\"]:\n if not metadata[\"top_image\"]:\n metadata[\"top_image\"] = content\n \n # Keywords\n elif name in meta_mappings[\"keywords\"] or property in meta_mappings[\"keywords\"]:\n keywords = [k.strip() for k in content.split(',')]\n metadata[\"keywords\"].extend(keywords)\n \n return metadata\n\nasync def run(c: CONFIG, p: INPUTS) -> Dict[str, Any]:\n \"\"\"\n The main run function that processes the article.\n \"\"\"\n if p.html:\n html_content = p.html\n else:\n # Fetch the URL\n headers = {\n 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'\n }\n response = requests.get(p.url, headers=headers)\n response.raise_for_status()\n html_content = response.text\n\n # Parse HTML\n soup = BeautifulSoup(html_content, 'lxml')\n \n # Extract metadata\n metadata = extract_metadata(soup)\n \n # Extract text content\n text_content = extract_text_content(soup)\n \n # Create summary (first 500 characters of text)\n summary = text_content[:500].strip()\n \n result = {\n \"title\": metadata[\"title\"],\n \"authors\": metadata[\"authors\"],\n \"publish_date\": metadata[\"publish_date\"],\n \"summary\": summary,\n \"keywords\": metadata[\"keywords\"],\n \"top_image\": metadata[\"top_image\"],\n \"text\": text_content\n }\n \n return result ","tools":[],"config":[{"BasicConfig":{"key_name":"default_language","description":"The default language to assume for articles","required":false,"type":null,"key_value":null}}],"description":"Extracts article text, authors, images, and metadata from a given URL or raw HTML using newspaper3k","keywords":["article","news","newspaper3k","scraper","metadata","text extraction"],"input_args":{"type":"object","properties":{"html":{"type":"string","description":"Optional raw HTML content (if you already have it), will override fetching from 'url'"},"language":{"type":"string","description":"Override default language, e.g. 'en', 'zh'"},"url":{"type":"string","description":"Article URL to scrape"}},"required":["url"]},"output_arg":{"json":""},"activated":false,"embedding":[0.69941324,0.64064425,-0.8242118,-0.20212075,-0.0026108846,-0.015203901,-0.9524106,-0.015597768,0.080349006,0.15950476,0.07163128,0.5253768,0.5516496,0.117237054,0.14839432,-0.07454805,-0.15771993,-0.32681906,-1.7062681,0.079335906,0.7563415,0.93879145,0.5255999,-0.050526172,0.31186634,-0.31516442,-0.317042,-0.8968821,-0.7061774,-1.2805489,0.34653315,-0.06946293,-0.38671303,-0.38466397,0.03807852,-0.4254118,0.26365077,-0.2770071,-0.65233713,-0.6415964,0.07296899,0.37815177,-0.18328999,-0.25453016,0.62554437,0.023577802,-0.024233885,-0.78328097,0.5548543,0.84306854,-0.24449272,-0.6769733,-0.36387423,-0.47791514,-0.10911339,-0.25945944,-0.41551962,0.3341912,-0.0977878,0.16487683,0.3199462,0.28385603,-3.8530324,0.082349226,0.6133101,0.04000172,0.009939375,-0.34541604,-0.2724772,-0.069931336,0.04572821,0.11202561,-0.14612137,0.21732494,0.13858303,-1.0522708,0.26680806,0.226521,0.02490455,-0.6104649,0.021241015,0.33769563,-0.6432464,0.4505488,-0.6576282,0.8213175,-0.51968646,-0.5023192,0.27919054,0.16295187,0.7402312,-0.65924335,-0.4153273,-0.17680705,-0.39726406,0.035670493,0.0712568,0.68875307,0.3967821,3.2755167,0.91599333,-0.04625012,-0.09378637,-1.0914956,0.70594496,-0.71022606,-0.08095429,0.38602623,0.34430817,0.17422038,0.22477517,-0.2959589,-0.42709592,-0.0005936455,-0.1604625,0.1939446,-0.64268064,-0.42358655,0.6000193,0.7521763,0.0050160587,0.40755403,-0.48660865,-0.61343753,0.27787632,0.39944577,0.06094875,0.6259906,0.3867572,-0.122751,0.2874025,-0.36750785,-1.2879946,-0.27463716,-0.41514495,-0.12326933,0.6739438,-0.8465257,0.5324717,-0.8915728,0.09410974,-0.9958719,0.17186475,0.4575055,0.31466278,0.5138172,-0.07581605,0.26031354,-0.65164775,0.06830618,0.10675892,0.7442521,0.04799307,0.35385343,0.8834854,-0.20027967,-0.07487042,-0.39091894,-0.6426438,0.068887234,-0.36406964,-0.4486674,0.74584216,0.20281845,0.038113885,-0.41550457,0.4658897,0.27080587,0.5783553,0.3564225,0.6557421,-0.25712466,0.31269276,0.2567668,-0.09374846,-0.18509637,-0.30487815,-0.18620446,0.30211902,-0.2980997,1.1075373,0.53284484,0.016027093,-0.40134674,-0.5831429,0.18777403,0.19905213,0.41588104,0.97698915,0.7943896,-0.09064364,1.7852771,-0.52919954,-0.96481776,0.4668316,-0.12777343,-0.16401082,0.108757004,0.4017435,0.11989166,0.15786746,-0.035420753,-0.298809,-0.15346563,0.08553788,-0.09210476,0.6943215,0.069616936,0.071044974,-0.8762087,-0.09279807,-0.9468175,0.4682254,-0.29124272,0.58419573,0.049899533,-0.4787472,0.36137012,0.31148908,0.69186544,-0.043289892,0.2936322,-0.50015646,-0.6425251,-0.8887293,0.53056914,0.111264,0.37473297,-0.3324417,-0.1119518,0.8927302,1.0051562,0.468221,1.2657965,1.0531231,-0.2599583,-0.11277374,0.58682287,1.1168257,-0.13469021,0.75258553,0.20202796,-0.23202433,0.18664481,-0.11527233,-0.8335793,-0.4779065,-0.112425655,0.61546636,1.3688636,0.43593183,0.03427375,0.4447992,0.39139223,0.2445595,-0.14925617,-1.784064,0.11501313,-0.03271967,1.0284023,0.13603842,-0.47397316,0.725747,0.58368397,0.50861865,-0.81741315,-0.46407428,-0.5428107,-0.08870637,-0.03713212,-0.6832232,0.34509057,-0.15277311,-0.23207927,0.18668383,0.15238805,1.0182273,0.16069826,-0.7515116,-0.4797083,0.53332186,0.05925063,0.062034234,0.67584586,-0.28007317,-0.7663089,-0.6134092,0.6118777,-0.55895805,0.6261777,-0.051148105,-0.68185204,-0.048983205,0.22133046,1.3931886,-0.122785434,0.33762306,-0.4109923,-0.2381622,-0.28884274,-0.49689972,0.5387871,-0.8388947,-0.27300158,-0.044576533,-0.79690516,0.86298144,-0.5045956,-0.17869407,0.21723235,-0.9806924,-0.23222165,0.21176931,0.10187938,0.14710796,-0.39029527,-0.17337228,-0.062158257,0.19290414,-1.9061534,-0.4343448,-0.16389084,0.21875419,-0.20962098,-0.43167254,0.9444105,-0.6146229,-0.43349522,-0.67292005,1.2295314,0.38771087,-0.046566505,0.1834136,0.27592087,0.3318329,-0.02733611,-0.5154438,-0.14502653,-0.9736136,-0.053445745,-0.09253003,1.1189029,0.22760381,0.23968303,0.056988988,0.70392644,-1.0532606,-0.8967281,0.55461484,-0.021515287,-0.42339802,0.46432325,-0.50498694,-0.79036516,0.47943944,1.2220789,-0.43718585,-0.6154099,-0.76879495,1.6305152,-0.607415,0.055705592,-0.6232083,-0.100991845,0.19413708,0.41611272,0.4850095,-0.29752842,0.10240604,-0.6654748,0.25095123,-0.20786086,0.5775305,-0.28614354,0.61468095,0.22875509,0.5750578,0.70075417,0.5889914,0.5124304,0.91121185,0.08331992,-0.6880028,0.36062735],"result":{"type":"object","properties":{"authors":{"items":{"type":"string"},"type":"array"},"keywords":{"items":{"type":"string"},"type":"array"},"publish_date":{"type":"string"},"summary":{"type":"string"},"text":{"type":"string"},"title":{"type":"string"},"top_image":{"type":"string"}},"required":["title","authors","publish_date","summary","keywords","top_image","text"]},"sql_tables":null,"sql_queries":null,"file_inbox":null,"oauth":null,"assets":null,"runner":"any","operating_system":["linux","macos","windows"],"tool_set":""},false]}
3 changes: 3 additions & 0 deletions tools/article-scraper/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
"metadata",
"text extraction"
],
"runner": "any",
"operating_system": ["linux", "macos", "windows"],
"tool_set": "",
"configurations": {
"type": "object",
"properties": {
Expand Down
1 change: 1 addition & 0 deletions tools/arxiv-download/.tool-dump.test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type":"Python","content":[{"version":"1.0.0","name":"arxiv-download","homepage":null,"author":"@@official.shinkai","py_code":"# /// script\n# dependencies = [\n# \"requests\",\n# \"arxiv>=1.4.7\",\n# \"pymupdf4llm\",\n# \"pathlib\"\n# ]\n# ///\n\nimport arxiv\nimport requests\nimport json\nimport pymupdf4llm\nfrom pathlib import Path\nfrom typing import Dict, Any\n\nclass CONFIG:\n storage_folder: str = \"arxiv_papers\"\n\nclass INPUTS:\n paper_id: str # e.g. \"2101.00001\"\n convert_to_md: bool = True\n\nclass OUTPUT:\n status: str\n message: str\n md_file: str\n\nasync def run(c: CONFIG, p: INPUTS) -> OUTPUT:\n \"\"\"\n Download a paper from arXiv by ID, store as PDF in the storage folder, optionally convert to .md\n \"\"\"\n folder = Path(c.storage_folder)\n folder.mkdir(parents=True, exist_ok=True)\n\n # if we already have .md for that paper, skip\n md_path = folder / f\"{p.paper_id}.md\"\n if md_path.exists():\n out = OUTPUT()\n out.status = \"exists\"\n out.message = f\"Paper {p.paper_id} already downloaded/converted.\"\n out.md_file = str(md_path)\n return out\n\n # otherwise, we do the download\n search = arxiv.Search(id_list=[p.paper_id])\n client = arxiv.Client()\n try:\n paper = next(client.results(search))\n except StopIteration:\n out = OUTPUT()\n out.status = \"error\"\n out.message = f\"Paper not found: {p.paper_id}\"\n out.md_file = \"\"\n return out\n\n # Download PDF\n pdf_path = folder / f\"{p.paper_id}.pdf\"\n if not pdf_path.exists():\n paper.download_pdf(dirpath=str(folder), filename=pdf_path.name)\n\n # Optionally convert\n if p.convert_to_md:\n # Convert using pymupdf4llm\n try:\n markdown_text = pymupdf4llm.to_markdown(str(pdf_path), show_progress=False)\n md_path.write_text(markdown_text, encoding='utf-8')\n # remove pdf if you want\n # pdf_path.unlink()\n except Exception as e:\n out = OUTPUT()\n out.status = \"error\"\n out.message = f\"Conversion failed: {str(e)}\"\n out.md_file = \"\"\n return out\n\n out = OUTPUT()\n out.status = \"success\"\n out.message = f\"Paper {p.paper_id} downloaded successfully.\"\n out.md_file = str(md_path) if p.convert_to_md else \"\"\n return out ","tools":[],"config":[{"BasicConfig":{"key_name":"storage_folder","description":"Where to store PDFs/MD outputs","required":false,"type":null,"key_value":null}}],"description":"Download an arXiv paper PDF and optionally convert it to Markdown","keywords":["arxiv","pdf","download","markdown","research","paper"],"input_args":{"type":"object","properties":{"convert_to_md":{"type":"boolean","description":"Whether to convert the downloaded PDF to .md"},"paper_id":{"type":"string","description":"ArXiv paper ID to download"}},"required":["paper_id"]},"output_arg":{"json":""},"activated":false,"embedding":[-0.06772009,0.33074895,-0.12433264,-0.055927545,0.09624629,-0.12388899,-0.71083736,-0.3618617,0.10271849,-0.8440084,0.5556332,0.55765307,0.3625208,0.35818374,0.42076993,-0.68175757,-0.115849614,-0.74241465,-0.6022908,0.18148008,-0.049271327,1.1281248,0.14197966,0.14113511,0.48362833,-0.19575952,0.23364326,-0.437007,-1.2430946,-2.2457132,0.3783815,0.16859254,-0.5481903,-0.090724036,0.30787995,-0.86067104,-0.11666481,0.22927448,0.12952995,-0.16315632,0.14190437,-0.1897714,0.42679283,-0.2632755,0.9500502,-0.25022757,0.0298271,0.039278306,0.6473044,0.070573956,-0.5418029,-0.12828816,0.09704277,-0.22622198,-0.3254506,0.019293204,0.4580627,0.033716645,0.04554349,0.33572453,0.28695026,0.59874475,-4.281266,0.43372402,0.5776784,0.59592384,0.3208925,-0.06429759,-0.12958783,-0.15205355,0.24863602,-0.104167335,-0.27816,0.0016052015,-0.50114995,-0.54733145,-0.18057136,0.29124182,-0.19310378,-0.23886481,-0.437409,0.27079862,0.42258355,0.22533771,-0.57201606,0.50246143,-0.3832505,-0.5628334,0.7944239,-0.035965882,-0.046965446,-0.28912395,-0.21649215,0.3692323,-0.26799268,0.35802346,0.17145126,0.06583945,-0.41364226,3.3612483,0.41108125,-0.13293199,1.170197,-0.56670916,-0.40315762,-0.54664934,-0.30528918,-0.26310351,-0.15721942,0.2900381,0.5113967,-0.9446422,-0.4114693,-0.108598545,-0.21903737,-0.091914244,-0.41363305,-0.14384334,0.05311441,0.25706697,-1.1712787,0.5694138,-0.44011515,-0.43747523,0.3661413,0.08051382,-0.25502595,0.3159434,0.42088762,-0.16219096,-0.16146418,-0.18206616,-0.25271896,0.19235066,0.065700546,0.04299907,0.95749205,-0.36959967,0.17073585,-0.1140331,0.13210365,-0.8657535,0.7602873,0.2573825,0.6513539,0.14940584,-0.19946343,0.68404925,-0.53199345,-0.10830405,-0.067399174,0.007347025,-0.099279076,-0.27195817,0.9186075,-0.14061287,-0.119402714,0.066054106,-0.017029323,0.47632283,0.11573829,-0.6110352,0.30140185,0.9487582,0.0013287663,0.018905332,0.16431758,-0.07871211,0.77877986,0.313329,0.36769348,-0.35382083,0.36676982,-0.14401965,0.303029,0.444213,-0.08598235,-0.6011994,0.17401975,-0.633597,0.53721094,0.3046455,-0.04384352,-0.30939847,0.22511028,0.47177783,0.32312715,-0.020245794,0.63887334,0.5815079,-0.4591998,1.8242875,-1.6826007,-0.63111514,-0.46525353,0.049148753,0.24241935,0.27208704,0.5969246,-0.5733497,-0.67277235,-0.12470095,0.54242986,0.44272786,0.2653585,-0.5881211,0.43370715,0.17412254,-0.092196986,-0.38453522,-0.13758329,-0.3040964,0.9313191,0.7411672,0.08223746,0.0749369,-0.11820951,0.08241235,0.20786884,0.30508548,0.007237643,0.1384644,-0.52654344,-0.50662273,-0.483865,0.15936537,0.10304559,0.003170129,-0.20125444,-0.24802543,0.296281,0.16812082,0.6309372,1.0705671,0.3381907,-0.2609965,-0.43840575,0.26710033,0.011027411,-0.71286404,0.6916328,-0.04688865,-0.105023734,-0.5047826,-0.07493542,-0.84578335,-0.13567328,0.47130805,-0.1308145,1.6273332,0.39326164,-0.5048349,0.7160495,0.06862165,-0.109315954,0.27021915,-1.8295172,-0.04794021,-0.024705894,0.4504594,-0.07260375,0.44587356,0.36690506,0.3875241,-0.1434882,-0.41276056,-0.76968247,-0.33724308,0.0024902895,-0.15620635,-0.11287923,0.48406887,0.40559518,0.24793443,-0.02829522,0.46180364,0.981828,-0.30582544,-0.46393213,-0.7084347,-0.1316689,0.052845255,0.24334338,0.037883535,-0.49104986,0.07941584,0.35554042,0.007393047,0.056695815,-0.32162595,-0.09060878,0.042034656,0.13000086,0.18264228,1.926402,0.04121549,0.03346231,0.19649933,-0.7543767,0.6176141,-0.011706315,-0.10913884,-0.20472036,-0.054269843,-1.0272176,0.11338819,0.30252743,-0.5999712,0.074522674,0.408844,-0.29486153,-0.5803827,-0.035178546,-0.17542776,0.47810194,-0.6029544,-0.24997126,0.29132655,-0.35446957,-1.5702441,-0.6800717,0.015752247,0.50049007,-0.46064997,-0.59031856,0.865307,0.28391632,0.21349448,-0.23197974,1.715747,0.4794465,0.1617654,-0.51702493,-0.050501943,0.7516259,-0.24805821,0.77039766,-0.44998118,-0.8785668,0.012401135,0.581763,1.636462,-0.14743578,-0.33823237,-0.22904876,-0.028672434,-1.53263,-0.871168,0.074743755,-0.06351489,-0.15739936,0.40022105,-0.42450187,-0.11895325,1.098501,1.005978,-0.23876546,0.104689986,-0.17724875,2.3204203,-0.43574765,-0.31134295,0.09102308,-0.0021279864,-0.2569486,0.17781231,-0.3419927,-0.8080248,-0.24101713,-0.017459534,0.25798306,-0.5684905,0.7051594,0.20335832,0.25227988,0.07262878,-0.27239957,0.8793398,0.77965456,0.06566863,0.26809794,-0.5473861,-0.65845394,-0.06537734],"result":{"type":"object","properties":{"md_file":{"type":"string"},"message":{"type":"string"},"status":{"type":"string"}},"required":["status","message"]},"sql_tables":null,"sql_queries":null,"file_inbox":null,"oauth":null,"assets":null,"runner":"any","operating_system":["linux","macos","windows"],"tool_set":""},false]}
3 changes: 3 additions & 0 deletions tools/arxiv-download/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
"research",
"paper"
],
"runner": "any",
"operating_system": ["linux", "macos", "windows"],
"tool_set": "",
"configurations": {
"type": "object",
"properties": {
Expand Down
Loading
Loading