Skip to content

Commit

Permalink
Merge pull request #140 from dcSpark/feature/add-wikimedia-page-conte…
Browse files Browse the repository at this point in the history
…nt-tool

feat: add wikimedia-page-content tool
  • Loading branch information
guillevalin authored Feb 4, 2025
2 parents 3edae50 + dbd72fd commit 3d86114
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 0 deletions.
Binary file added tools/wikimedia-page-content/banner.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tools/wikimedia-page-content/icon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
49 changes: 49 additions & 0 deletions tools/wikimedia-page-content/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { expect } from '@jest/globals';
import { getToolTestClient } from '../../src/test/utils';
import * as path from 'path';

describe('Wikimedia Page Content Tool', () => {
const toolPath = path.join(__dirname, 'tool.ts');
const client = getToolTestClient();

it('fetches page content with default parameters', async () => {
const response = await client.executeToolFromFile(toolPath, {
title: 'Artificial intelligence'
});
console.log("Response: ", response);
expect(response).toHaveProperty('content');
expect(response.content).toHaveProperty('title');
expect(response.content).toHaveProperty('html');
expect(response.content).toHaveProperty('url');
expect(response.content).toHaveProperty('lastModified');
expect(response.content).toHaveProperty('language');

expect(response.content.title).toBe('Artificial intelligence');
expect(response.content.html).toBeTruthy();
expect(response.content.url).toBe('https://en.wikipedia.org/wiki/Artificial_intelligence');
expect(response.content.language).toBe('en');
}, 30000);

it('handles custom project and language', async () => {
const response = await client.executeToolFromFile(toolPath, {
title: 'Intelligence artificielle'
}, {
project: 'wikipedia',
language: 'fr'
});

expect(response.content.url).toBe('https://fr.wikipedia.org/wiki/Intelligence_artificielle');
expect(response.content.language).toBe('fr');
expect(response.content.html).toBeTruthy();
}, 30000);

it('handles titles with spaces', async () => {
const response = await client.executeToolFromFile(toolPath, {
title: 'Machine learning'
});

expect(response.content.url).toBe('https://en.wikipedia.org/wiki/Machine_learning');
expect(response.content.title).toBe('Machine learning');
expect(response.content.html).toBeTruthy();
}, 30000);
});
58 changes: 58 additions & 0 deletions tools/wikimedia-page-content/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"id": "wikimedia-page-content",
"version": "1.0.0",
"name": "Wikimedia Page Content",
"description": "Fetch the full HTML content of a specific Wikimedia page",
"author": "Shinkai",
"keywords": [
"wikimedia",
"content",
"wikipedia",
"page",
"html",
"article"
],
"configurations": {
"type": "object",
"properties": {
"project": {
"type": "string",
"description": "Wikimedia project (e.g., wikipedia)",
"default": "wikipedia"
},
"language": {
"type": "string",
"description": "Language code (e.g., en)",
"default": "en"
}
},
"required": []
},
"parameters": {
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "Title of the page to fetch"
}
},
"required": ["title"]
},
"result": {
"type": "object",
"properties": {
"content": {
"type": "object",
"properties": {
"title": {"type": "string"},
"html": {"type": "string"},
"url": {"type": "string"},
"lastModified": {"type": "string"},
"language": {"type": "string"}
},
"required": ["title", "html", "url", "lastModified", "language"]
}
},
"required": ["content"]
}
}
3 changes: 3 additions & 0 deletions tools/wikimedia-page-content/store.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"categoryId": "cc6ba888-3987-4e2a-af7e-3b137d997262"
}
68 changes: 68 additions & 0 deletions tools/wikimedia-page-content/tool.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import axios from 'npm:axios';

type Configurations = {
project?: string;
language?: string;
};

type Parameters = {
title: string;
};

type Result = {
content: {
title: string;
html: string;
url: string;
lastModified: string;
language: string;
};
};

export type Run<C extends Record<string, any>, I extends Record<string, any>, R extends Record<string, any>> = (
config: C,
inputs: I
) => Promise<R>;

export const run: Run<Configurations, Parameters, Result> = async (
configurations: Configurations,
params: Parameters
): Promise<Result> => {
try {
const project = configurations?.project || 'wikipedia';
const language = configurations?.language || 'en';

// Using the REST v1 API endpoint for page content
const api_url = `https://${language}.${project}.org/api/rest_v1/page/html/${encodeURIComponent(params.title)}`;

const response = await axios.get(api_url, {
headers: {
'User-Agent': 'ShinkaiWikimediaPageContent/1.0',
'Accept': 'text/html; charset=utf-8',
'Api-User-Agent': 'ShinkaiWikimediaPageContent/1.0 (https://github.com/dcSpark/shinkai-tools)'
}
});

if (!response.data) {
throw new Error('No data received from Wikimedia API');
}

return {
content: {
title: params.title,
html: response.data,
url: `https://${language}.${project}.org/wiki/${encodeURIComponent(params.title.replace(/ /g, '_'))}`,
lastModified: response.headers['last-modified'] || '',
language: language
}
};
} catch (error) {
if (axios.isAxiosError(error)) {
if (error.response?.status === 404) {
throw new Error(`Page '${params.title}' not found`);
}
throw new Error(`Failed to fetch page content: ${error.response?.data?.detail || error.response?.data?.message || error.message}`);
}
throw error;
}
};

0 comments on commit 3d86114

Please sign in to comment.