Skip to content

Commit

Permalink
Merge pull request #115 from dcSpark/feature/add-webcam-capture-tool
Browse files Browse the repository at this point in the history
feat: add webcam-capture tool
  • Loading branch information
guillevalin authored Feb 4, 2025
2 parents 24bfcfb + 14cd047 commit cc089ae
Show file tree
Hide file tree
Showing 6 changed files with 252 additions and 0 deletions.
Binary file added tools/webcam-capture/banner.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tools/webcam-capture/icon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
72 changes: 72 additions & 0 deletions tools/webcam-capture/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { expect } from '@jest/globals';
import { getToolTestClient } from '../../src/test/utils';
import * as path from 'path';
import * as fs from 'fs';

describe('Webcam Capture Tool', () => {
const toolPath = path.join(__dirname, 'tool.py');
const client = getToolTestClient();

it('captures a frame with default config', async () => {
// Attempt to run the tool with default configuration
const response = await client.executeToolFromFile(
toolPath,
{}, // No input parameters => uses default
{} // No special config => uses default "cameraIndex=0" & "format=png"
);

console.log(response);

// Validate shape
expect(response).toHaveProperty('__created_files__');
expect(Array.isArray(response.__created_files__)).toBe(true);
expect(response.__created_files__[0]).toMatch(/webcam_capture_\d+\.png$/);

expect(response).toHaveProperty('imagePath');
expect(typeof response.imagePath).toBe('string');
expect(response.imagePath).toMatch(/webcam_capture_\d+\.png$/);

// Check if file exists
expect(fs.existsSync(response.imagePath)).toBe(true);

// Check dimensions
expect(response).toHaveProperty('width', 640); // Default width
expect(response).toHaveProperty('height', 480); // Default height
}, 120000);

it('captures a frame as JPEG with custom config', async () => {
const response = await client.executeToolFromFile(
toolPath,
{ width: 800, height: 600 }, // Input parameters
{ cameraIndex: 0, format: 'jpeg' } // Config
);

expect(response).toHaveProperty('__created_files__');
expect(Array.isArray(response.__created_files__)).toBe(true);
expect(response.__created_files__[0]).toMatch(/webcam_capture_\d+\.jpeg$/);

expect(response).toHaveProperty('imagePath');
expect(typeof response.imagePath).toBe('string');
expect(response.imagePath).toMatch(/webcam_capture_\d+\.jpeg$/);

// Check if file exists
expect(fs.existsSync(response.imagePath)).toBe(true);

// Check dimensions
expect(response).toHaveProperty('width', 800);
expect(response).toHaveProperty('height', 600);
}, 120000);

it('handles invalid camera device gracefully', async () => {
try {
await client.executeToolFromFile(
toolPath,
{},
{ cameraIndex: 999999, format: 'png' }
);
fail('Should have thrown an error for invalid camera index');
} catch (err: any) {
expect(err.message).toMatch(/Failed to open webcam/i);
}
}, 20000);
});
67 changes: 67 additions & 0 deletions tools/webcam-capture/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"id": "webcam-capture",
"name": "Webcam Capture Tool",
"description": "Captures a single frame from a local webcam and returns it as a Base64-encoded image (PNG or JPEG). Example usage with Python + opencv.",
"author": "Shinkai",
"version": "1.0.0",
"keywords": [
"webcam",
"capture",
"camera",
"image",
"tools"
],
"configurations": {
"type": "object",
"properties": {
"cameraIndex": {
"type": "number",
"description": "Which camera index to capture from. 0 is the default. If you only have one camera, use 0."
},
"format": {
"type": "string",
"description": "Image format to return (png or jpeg)",
"default": "png"
}
},
"required": []
},
"parameters": {
"type": "object",
"properties": {
"width": {
"type": "number",
"description": "Requested width of the capture in pixels",
"default": 640
},
"height": {
"type": "number",
"description": "Requested height of the capture in pixels",
"default": 480
}
},
"required": []
},
"result": {
"type": "object",
"properties": {
"imageBase64": {
"type": "string",
"description": "The captured image as a Base64-encoded string"
},
"width": {
"type": "number",
"description": "Actual width of the returned frame"
},
"height": {
"type": "number",
"description": "Actual height of the returned frame"
}
},
"required": [
"imageBase64",
"width",
"height"
]
}
}
3 changes: 3 additions & 0 deletions tools/webcam-capture/store.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"categoryId": "b04aabe6-4fce-46f1-b6f2-7a96d742b9d1"
}
110 changes: 110 additions & 0 deletions tools/webcam-capture/tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# /// script
# dependencies = [
# "requests",
# "numpy==1.26.4",
# "opencv-python==4.8.0.76"
# ]
# ///

import cv2
import time
import base64
import numpy as np
import os
import platform
from typing import Dict, Any, Optional, List
from shinkai_local_support import get_home_path

class CONFIG:
cameraIndex: Optional[int]
format: Optional[str]

class INPUTS:
width: Optional[int]
height: Optional[int]

class OUTPUT:
imagePath: str
width: int
height: int

async def run(config: CONFIG, inputs: INPUTS) -> OUTPUT:
"""
Captures a single frame from a local webcam and saves it to disk.
Args:
config: Configuration with camera index and output format
inputs: Input parameters with width and height
Returns:
OUTPUT object with image path and dimensions
"""
# Set defaults
camera_index = getattr(config, 'cameraIndex', 0)
img_format = getattr(config, 'format', 'png').lower()
if img_format not in ('png', 'jpeg', 'jpg'):
img_format = 'png'

width = getattr(inputs, 'width', 640)
height = getattr(inputs, 'height', 480)

# Determine camera source based on platform
if platform.system() == 'Darwin': # macOS
camera_source = camera_index
else: # Linux, Windows
camera_source = camera_index

# Open the camera
cap = cv2.VideoCapture(camera_source)
if not cap.isOpened():
raise RuntimeError(f"Failed to open webcam (index={camera_index}). Please check if the camera is connected and accessible.")

try:
# Set resolution
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)

# Let the camera warm up and auto-adjust: grab/discard extra frames
for _ in range(15):
_, _ = cap.read()

# Wait a moment so the auto-exposure has time to adapt
time.sleep(0.5)

# Try to capture the final frame
ret, frame = cap.read()
if not ret or frame is None:
raise RuntimeError("Failed to capture frame from webcam. Please check camera permissions and settings.")

# Optional gamma correction for better brightness
gamma = 1.2 # Adjust this value if needed (>1 brightens, <1 darkens)
look_up_table = np.array([((i / 255.0) ** (1.0/gamma)) * 255 for i in range(256)]).astype("uint8")
frame = cv2.LUT(frame, look_up_table)

# Get final dimensions
final_height, final_width, _ = frame.shape

# Get home path for writing file
home_path = await get_home_path()

# Create filename with timestamp
timestamp = int(time.time())
filename = f"webcam_capture_{timestamp}.{img_format}"
file_path = os.path.join(home_path, filename)

# Encode and write to file
encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95] if img_format.startswith('jp') else []
result = cv2.imwrite(file_path, frame, encode_param)
if not result:
raise RuntimeError("Failed to write image to disk. Please check disk permissions and space.")

# Create output
output = OUTPUT()
output.imagePath = file_path
output.width = final_width
output.height = final_height

return output

finally:
# Always release the camera
cap.release()

0 comments on commit cc089ae

Please sign in to comment.