-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #115 from dcSpark/feature/add-webcam-capture-tool
feat: add webcam-capture tool
- Loading branch information
Showing
6 changed files
with
252 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import { expect } from '@jest/globals'; | ||
import { getToolTestClient } from '../../src/test/utils'; | ||
import * as path from 'path'; | ||
import * as fs from 'fs'; | ||
|
||
describe('Webcam Capture Tool', () => { | ||
const toolPath = path.join(__dirname, 'tool.py'); | ||
const client = getToolTestClient(); | ||
|
||
it('captures a frame with default config', async () => { | ||
// Attempt to run the tool with default configuration | ||
const response = await client.executeToolFromFile( | ||
toolPath, | ||
{}, // No input parameters => uses default | ||
{} // No special config => uses default "cameraIndex=0" & "format=png" | ||
); | ||
|
||
console.log(response); | ||
|
||
// Validate shape | ||
expect(response).toHaveProperty('__created_files__'); | ||
expect(Array.isArray(response.__created_files__)).toBe(true); | ||
expect(response.__created_files__[0]).toMatch(/webcam_capture_\d+\.png$/); | ||
|
||
expect(response).toHaveProperty('imagePath'); | ||
expect(typeof response.imagePath).toBe('string'); | ||
expect(response.imagePath).toMatch(/webcam_capture_\d+\.png$/); | ||
|
||
// Check if file exists | ||
expect(fs.existsSync(response.imagePath)).toBe(true); | ||
|
||
// Check dimensions | ||
expect(response).toHaveProperty('width', 640); // Default width | ||
expect(response).toHaveProperty('height', 480); // Default height | ||
}, 120000); | ||
|
||
it('captures a frame as JPEG with custom config', async () => { | ||
const response = await client.executeToolFromFile( | ||
toolPath, | ||
{ width: 800, height: 600 }, // Input parameters | ||
{ cameraIndex: 0, format: 'jpeg' } // Config | ||
); | ||
|
||
expect(response).toHaveProperty('__created_files__'); | ||
expect(Array.isArray(response.__created_files__)).toBe(true); | ||
expect(response.__created_files__[0]).toMatch(/webcam_capture_\d+\.jpeg$/); | ||
|
||
expect(response).toHaveProperty('imagePath'); | ||
expect(typeof response.imagePath).toBe('string'); | ||
expect(response.imagePath).toMatch(/webcam_capture_\d+\.jpeg$/); | ||
|
||
// Check if file exists | ||
expect(fs.existsSync(response.imagePath)).toBe(true); | ||
|
||
// Check dimensions | ||
expect(response).toHaveProperty('width', 800); | ||
expect(response).toHaveProperty('height', 600); | ||
}, 120000); | ||
|
||
it('handles invalid camera device gracefully', async () => { | ||
try { | ||
await client.executeToolFromFile( | ||
toolPath, | ||
{}, | ||
{ cameraIndex: 999999, format: 'png' } | ||
); | ||
fail('Should have thrown an error for invalid camera index'); | ||
} catch (err: any) { | ||
expect(err.message).toMatch(/Failed to open webcam/i); | ||
} | ||
}, 20000); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
{ | ||
"id": "webcam-capture", | ||
"name": "Webcam Capture Tool", | ||
"description": "Captures a single frame from a local webcam and returns it as a Base64-encoded image (PNG or JPEG). Example usage with Python + opencv.", | ||
"author": "Shinkai", | ||
"version": "1.0.0", | ||
"keywords": [ | ||
"webcam", | ||
"capture", | ||
"camera", | ||
"image", | ||
"tools" | ||
], | ||
"configurations": { | ||
"type": "object", | ||
"properties": { | ||
"cameraIndex": { | ||
"type": "number", | ||
"description": "Which camera index to capture from. 0 is the default. If you only have one camera, use 0." | ||
}, | ||
"format": { | ||
"type": "string", | ||
"description": "Image format to return (png or jpeg)", | ||
"default": "png" | ||
} | ||
}, | ||
"required": [] | ||
}, | ||
"parameters": { | ||
"type": "object", | ||
"properties": { | ||
"width": { | ||
"type": "number", | ||
"description": "Requested width of the capture in pixels", | ||
"default": 640 | ||
}, | ||
"height": { | ||
"type": "number", | ||
"description": "Requested height of the capture in pixels", | ||
"default": 480 | ||
} | ||
}, | ||
"required": [] | ||
}, | ||
"result": { | ||
"type": "object", | ||
"properties": { | ||
"imageBase64": { | ||
"type": "string", | ||
"description": "The captured image as a Base64-encoded string" | ||
}, | ||
"width": { | ||
"type": "number", | ||
"description": "Actual width of the returned frame" | ||
}, | ||
"height": { | ||
"type": "number", | ||
"description": "Actual height of the returned frame" | ||
} | ||
}, | ||
"required": [ | ||
"imageBase64", | ||
"width", | ||
"height" | ||
] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"categoryId": "b04aabe6-4fce-46f1-b6f2-7a96d742b9d1" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# /// script | ||
# dependencies = [ | ||
# "requests", | ||
# "numpy==1.26.4", | ||
# "opencv-python==4.8.0.76" | ||
# ] | ||
# /// | ||
|
||
import cv2 | ||
import time | ||
import base64 | ||
import numpy as np | ||
import os | ||
import platform | ||
from typing import Dict, Any, Optional, List | ||
from shinkai_local_support import get_home_path | ||
|
||
class CONFIG: | ||
cameraIndex: Optional[int] | ||
format: Optional[str] | ||
|
||
class INPUTS: | ||
width: Optional[int] | ||
height: Optional[int] | ||
|
||
class OUTPUT: | ||
imagePath: str | ||
width: int | ||
height: int | ||
|
||
async def run(config: CONFIG, inputs: INPUTS) -> OUTPUT: | ||
""" | ||
Captures a single frame from a local webcam and saves it to disk. | ||
Args: | ||
config: Configuration with camera index and output format | ||
inputs: Input parameters with width and height | ||
Returns: | ||
OUTPUT object with image path and dimensions | ||
""" | ||
# Set defaults | ||
camera_index = getattr(config, 'cameraIndex', 0) | ||
img_format = getattr(config, 'format', 'png').lower() | ||
if img_format not in ('png', 'jpeg', 'jpg'): | ||
img_format = 'png' | ||
|
||
width = getattr(inputs, 'width', 640) | ||
height = getattr(inputs, 'height', 480) | ||
|
||
# Determine camera source based on platform | ||
if platform.system() == 'Darwin': # macOS | ||
camera_source = camera_index | ||
else: # Linux, Windows | ||
camera_source = camera_index | ||
|
||
# Open the camera | ||
cap = cv2.VideoCapture(camera_source) | ||
if not cap.isOpened(): | ||
raise RuntimeError(f"Failed to open webcam (index={camera_index}). Please check if the camera is connected and accessible.") | ||
|
||
try: | ||
# Set resolution | ||
cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) | ||
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) | ||
|
||
# Let the camera warm up and auto-adjust: grab/discard extra frames | ||
for _ in range(15): | ||
_, _ = cap.read() | ||
|
||
# Wait a moment so the auto-exposure has time to adapt | ||
time.sleep(0.5) | ||
|
||
# Try to capture the final frame | ||
ret, frame = cap.read() | ||
if not ret or frame is None: | ||
raise RuntimeError("Failed to capture frame from webcam. Please check camera permissions and settings.") | ||
|
||
# Optional gamma correction for better brightness | ||
gamma = 1.2 # Adjust this value if needed (>1 brightens, <1 darkens) | ||
look_up_table = np.array([((i / 255.0) ** (1.0/gamma)) * 255 for i in range(256)]).astype("uint8") | ||
frame = cv2.LUT(frame, look_up_table) | ||
|
||
# Get final dimensions | ||
final_height, final_width, _ = frame.shape | ||
|
||
# Get home path for writing file | ||
home_path = await get_home_path() | ||
|
||
# Create filename with timestamp | ||
timestamp = int(time.time()) | ||
filename = f"webcam_capture_{timestamp}.{img_format}" | ||
file_path = os.path.join(home_path, filename) | ||
|
||
# Encode and write to file | ||
encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95] if img_format.startswith('jp') else [] | ||
result = cv2.imwrite(file_path, frame, encode_param) | ||
if not result: | ||
raise RuntimeError("Failed to write image to disk. Please check disk permissions and space.") | ||
|
||
# Create output | ||
output = OUTPUT() | ||
output.imagePath = file_path | ||
output.width = final_width | ||
output.height = final_height | ||
|
||
return output | ||
|
||
finally: | ||
# Always release the camera | ||
cap.release() |