Merge pull request #115 from dcSpark/feature/add-webcam-capture-tool

feat: add webcam-capture tool
dcSpark · Feb 4, 2025 · cc089ae · cc089ae
2 parents 24bfcfb + 14cd047
commit cc089ae
Show file tree

Hide file tree

Showing 6 changed files with 252 additions and 0 deletions.
diff --git a/tools/webcam-capture/banner.png b/tools/webcam-capture/banner.png
diff --git a/tools/webcam-capture/icon.png b/tools/webcam-capture/icon.png
diff --git a/tools/webcam-capture/index.test.ts b/tools/webcam-capture/index.test.ts
@@ -0,0 +1,72 @@
+import { expect } from '@jest/globals';
+import { getToolTestClient } from '../../src/test/utils';
+import * as path from 'path';
+import * as fs from 'fs';
+
+describe('Webcam Capture Tool', () => {
+  const toolPath = path.join(__dirname, 'tool.py');
+  const client = getToolTestClient();
+
+  it('captures a frame with default config', async () => {
+    // Attempt to run the tool with default configuration
+    const response = await client.executeToolFromFile(
+      toolPath,
+      {},       // No input parameters => uses default
+      {}        // No special config => uses default "cameraIndex=0" & "format=png"
+    );
+
+    console.log(response);
+
+    // Validate shape
+    expect(response).toHaveProperty('__created_files__');
+    expect(Array.isArray(response.__created_files__)).toBe(true);
+    expect(response.__created_files__[0]).toMatch(/webcam_capture_\d+\.png$/);
+
+    expect(response).toHaveProperty('imagePath');
+    expect(typeof response.imagePath).toBe('string');
+    expect(response.imagePath).toMatch(/webcam_capture_\d+\.png$/);
+
+    // Check if file exists
+    expect(fs.existsSync(response.imagePath)).toBe(true);
+
+    // Check dimensions
+    expect(response).toHaveProperty('width', 640);  // Default width
+    expect(response).toHaveProperty('height', 480); // Default height
+  }, 120000);
+
+  it('captures a frame as JPEG with custom config', async () => {
+    const response = await client.executeToolFromFile(
+      toolPath,
+      { width: 800, height: 600 }, // Input parameters
+      { cameraIndex: 0, format: 'jpeg' } // Config
+    );
+
+    expect(response).toHaveProperty('__created_files__');
+    expect(Array.isArray(response.__created_files__)).toBe(true);
+    expect(response.__created_files__[0]).toMatch(/webcam_capture_\d+\.jpeg$/);
+
+    expect(response).toHaveProperty('imagePath');
+    expect(typeof response.imagePath).toBe('string');
+    expect(response.imagePath).toMatch(/webcam_capture_\d+\.jpeg$/);
+
+    // Check if file exists
+    expect(fs.existsSync(response.imagePath)).toBe(true);
+
+    // Check dimensions
+    expect(response).toHaveProperty('width', 800);
+    expect(response).toHaveProperty('height', 600);
+  }, 120000);
+
+  it('handles invalid camera device gracefully', async () => {
+    try {
+      await client.executeToolFromFile(
+        toolPath,
+        {},
+        { cameraIndex: 999999, format: 'png' }
+      );
+      fail('Should have thrown an error for invalid camera index');
+    } catch (err: any) {
+      expect(err.message).toMatch(/Failed to open webcam/i);
+    }
+  }, 20000);
+}); 
diff --git a/tools/webcam-capture/metadata.json b/tools/webcam-capture/metadata.json
@@ -0,0 +1,67 @@
+{
+  "id": "webcam-capture",
+  "name": "Webcam Capture Tool",
+  "description": "Captures a single frame from a local webcam and returns it as a Base64-encoded image (PNG or JPEG). Example usage with Python + opencv.",
+  "author": "Shinkai",
+  "version": "1.0.0",
+  "keywords": [
+    "webcam",
+    "capture",
+    "camera",
+    "image",
+    "tools"
+  ],
+  "configurations": {
+    "type": "object",
+    "properties": {
+      "cameraIndex": {
+        "type": "number",
+        "description": "Which camera index to capture from. 0 is the default. If you only have one camera, use 0."
+      },
+      "format": {
+        "type": "string",
+        "description": "Image format to return (png or jpeg)",
+        "default": "png"
+      }
+    },
+    "required": []
+  },
+  "parameters": {
+    "type": "object",
+    "properties": {
+      "width": {
+        "type": "number",
+        "description": "Requested width of the capture in pixels",
+        "default": 640
+      },
+      "height": {
+        "type": "number",
+        "description": "Requested height of the capture in pixels",
+        "default": 480
+      }
+    },
+    "required": []
+  },
+  "result": {
+    "type": "object",
+    "properties": {
+      "imageBase64": {
+        "type": "string",
+        "description": "The captured image as a Base64-encoded string"
+      },
+      "width": {
+        "type": "number",
+        "description": "Actual width of the returned frame"
+      },
+      "height": {
+        "type": "number",
+        "description": "Actual height of the returned frame"
+      }
+    },
+    "required": [
+      "imageBase64",
+      "width",
+      "height"
+    ]
+  }
+} 
diff --git a/tools/webcam-capture/store.json b/tools/webcam-capture/store.json
@@ -0,0 +1,3 @@
+{
+  "categoryId": "b04aabe6-4fce-46f1-b6f2-7a96d742b9d1"
+}
diff --git a/tools/webcam-capture/tool.py b/tools/webcam-capture/tool.py
@@ -0,0 +1,110 @@
+# /// script
+# dependencies = [
+#     "requests",
+#     "numpy==1.26.4",
+#     "opencv-python==4.8.0.76"
+# ]
+# ///
+
+import cv2
+import time
+import base64
+import numpy as np
+import os
+import platform
+from typing import Dict, Any, Optional, List
+from shinkai_local_support import get_home_path
+
+class CONFIG:
+    cameraIndex: Optional[int]
+    format: Optional[str]
+
+class INPUTS:
+    width: Optional[int]
+    height: Optional[int]
+
+class OUTPUT:
+    imagePath: str
+    width: int
+    height: int
+
+async def run(config: CONFIG, inputs: INPUTS) -> OUTPUT:
+    """
+    Captures a single frame from a local webcam and saves it to disk.
+    
+    Args:
+        config: Configuration with camera index and output format
+        inputs: Input parameters with width and height
+    Returns:
+        OUTPUT object with image path and dimensions
+    """
+    # Set defaults
+    camera_index = getattr(config, 'cameraIndex', 0)
+    img_format = getattr(config, 'format', 'png').lower()
+    if img_format not in ('png', 'jpeg', 'jpg'):
+        img_format = 'png'
+
+    width = getattr(inputs, 'width', 640)
+    height = getattr(inputs, 'height', 480)
+
+    # Determine camera source based on platform
+    if platform.system() == 'Darwin':  # macOS
+        camera_source = camera_index
+    else:  # Linux, Windows
+        camera_source = camera_index
+
+    # Open the camera
+    cap = cv2.VideoCapture(camera_source)
+    if not cap.isOpened():
+        raise RuntimeError(f"Failed to open webcam (index={camera_index}). Please check if the camera is connected and accessible.")
+
+    try:
+        # Set resolution
+        cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
+        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
+
+        # Let the camera warm up and auto-adjust: grab/discard extra frames
+        for _ in range(15):
+            _, _ = cap.read()
+
+        # Wait a moment so the auto-exposure has time to adapt
+        time.sleep(0.5)
+
+        # Try to capture the final frame
+        ret, frame = cap.read()
+        if not ret or frame is None:
+            raise RuntimeError("Failed to capture frame from webcam. Please check camera permissions and settings.")
+
+        # Optional gamma correction for better brightness
+        gamma = 1.2  # Adjust this value if needed (>1 brightens, <1 darkens)
+        look_up_table = np.array([((i / 255.0) ** (1.0/gamma)) * 255 for i in range(256)]).astype("uint8")
+        frame = cv2.LUT(frame, look_up_table)
+
+        # Get final dimensions
+        final_height, final_width, _ = frame.shape
+
+        # Get home path for writing file
+        home_path = await get_home_path()
+
+        # Create filename with timestamp
+        timestamp = int(time.time())
+        filename = f"webcam_capture_{timestamp}.{img_format}"
+        file_path = os.path.join(home_path, filename)
+
+        # Encode and write to file
+        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95] if img_format.startswith('jp') else []
+        result = cv2.imwrite(file_path, frame, encode_param)
+        if not result:
+            raise RuntimeError("Failed to write image to disk. Please check disk permissions and space.")
+
+        # Create output
+        output = OUTPUT()
+        output.imagePath = file_path
+        output.width = final_width
+        output.height = final_height
+
+        return output
+
+    finally:
+        # Always release the camera
+        cap.release()