-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.json
60 lines (60 loc) · 8.57 KB
/
config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
"system_prompt": "<system_prompt><description>You are an expert AI framework advisor. Based on the following use case description: '{use_case_description}', evaluate and select the most suitable AI framework from the following options: vLLM, FastChat, Mistral.rs, Ollama, SGLangChain, Transformers/Pipeline, Transformers/Tokenizer, llama.cpp, ONNX Runtime, PyTorch, TensorFlow Serving, DeepSpeed-Inference, NVIDIA Triton, NVIDIA TensorRT, NVIDIA Inference Microservice (NIM), OpenVINO, DJL (Deep Java Library), Ray Serve, KServe, TorchServe, Hugging Face Inference API, AWS SageMaker, Google Vertex AI, Apache TVM, TinyML, LiteRT, DeepSparse, ONNX.js, TFLite, Core ML, SNPE (Snapdragon Neural Processing Engine), MACE (Mobile AI Compute Engine), NCNN, LiteML, Banana, Gradient Inference, H2O AI Cloud, Inferentia, RunPod, Deci AI, RedisAI, MLflow, ONNX Runtime Web, Raspberry Pi Compute, Colossal-AI, Enflame AI, Azure Machine Learning Endpoint, BigDL, Amazon SageMaker Neo, ModelPlace.AI, Hugging Face Text Generation Inference, LiteX, Deploy.ai, Snorkel Flow, Azure Functions for ML, AWS Lambda for ML, Dask-ML.</description><criteria><throughput>Number of tokens processed per second. (Low: < 100, Medium: 100-300, High: > 300)</throughput><latency>Time to First Token (TTFT) in milliseconds. (Low: < 50 ms, Medium: 50-100 ms, High: > 100 ms)</latency><scalability>Ability to handle increased load. (Low, Medium, High)</scalability><security>Level of security features available. (Low, Medium, High)</security><ease_of_use>Complexity of setup and integration. (Low, Medium, High)</ease_of_use><model_support>Variety of models supported. (Low, Medium, High)</model_support><cost_efficiency>Resource utilization and cost-effectiveness. (Low, Medium, High)</cost_efficiency></criteria><evaluation_logic><real_time_applications>Prioritize high throughput and low latency (e.g., chatbots, virtual assistants).</real_time_applications><enterprise_applications>Prioritize high security and scalability (e.g., fraud detection, customer service).</enterprise_applications><resource_constrained_environments>Prioritize cost efficiency and ease of use.</resource_constrained_environments><diverse_model_requirements>Prioritize high model support.</diverse_model_requirements></evaluation_logic><examples><example><use_case>Interactive AI Assistant</use_case><selected_framework>SGLang</selected_framework><reasoning>High throughput and low latency make it ideal for real-time interactions.</reasoning></example><example><use_case>Enterprise-Level Customer Support Chatbot</use_case><selected_framework>NVIDIA NIM</selected_framework><reasoning>Offers robust security features and is optimized for enterprise-level deployments.</reasoning></example><example><use_case>Real-Time Programming Assistance Tool</use_case><selected_framework>vLLM</selected_framework><reasoning>Provides low latency and efficient memory management suitable for coding assistance.</reasoning></example></examples><output_format>{ \"framework_name\": \"<Selected Framework>\", \"reasoning\": \"<Reason for selection based on criteria>\", \"criteria_values\": { \"throughput\": \"<Value>\", \"latency\": \"<Value>\", \"scalability\": \"<Value>\", \"security\": \"<Value>\", \"ease_of_use\": \"<Value>\", \"model_support\": \"<Value>\", \"cost_efficiency\": \"<Value>\" } }</output_format><rules><rule>Always evaluate all frameworks based on the criteria.</rule><rule>Provide clear reasoning for the selected framework.</rule><rule>Return a valid JSON object without the word 'json' or single quotes in the response.</rule><rule>Return only the requested content as per the specified format.</rule></rules></system_prompt>",
"use_case_list": [
"Interactive AI Assistant requiring high throughput and low latency for real-time interactions.",
"Enterprise-Level Customer Support Chatbot requiring high security and scalability.",
"Real-Time Programming Assistance Tool requiring low latency and efficient memory management.",
"Multi-modal Content Generation System requiring high throughput and diverse model support.",
"Large-Scale Chatbot Deployment for Customer Engagement requiring high throughput and scalability.",
"Fraud Detection System in Finance requiring high security and low latency.",
"Personalized Product Recommendation Engine requiring high throughput and cost efficiency.",
"Predictive Maintenance System in Manufacturing requiring high scalability and model support.",
"Medical Image Analysis System requiring high security and ease of use.",
"Sentiment Analysis on Social Media requiring high throughput and cost efficiency.",
"Speech Recognition for Virtual Assistants requiring low latency and high throughput.",
"Autonomous Vehicle Navigation System requiring high security and low latency.",
"Supply Chain Optimization System requiring high scalability and cost efficiency.",
"AI-Driven Content Generation Platform requiring high throughput and diverse model support.",
"Predictive Analytics for Sales Forecasting requiring high scalability and ease of use.",
"Real-Time Language Translation System requiring low latency and high throughput.",
"Automated Document Processing System requiring high ease of use and cost efficiency.",
"AI-Powered Tutoring System requiring high model support and ease of use.",
"Smart Home Automation System requiring low latency and high ease of use.",
"Image Recognition for Security Systems requiring high security and low latency.",
"Customer Churn Prediction System requiring high scalability and cost efficiency.",
"Dynamic Pricing Strategy in Retail requiring high throughput and cost efficiency.",
"AI-Enhanced Recruitment Process requiring high security and ease of use.",
"Energy Consumption Optimization System requiring high scalability and cost efficiency.",
"Virtual Health Assistant requiring low latency and high ease of use.",
"Traffic Prediction and Management System requiring high scalability and low latency.",
"Personalized Marketing Campaign requiring high throughput and cost efficiency.",
"Predictive Analytics for Healthcare Outcomes requiring high security and model support.",
"Robotic Process Automation (RPA) requiring high ease of use and cost efficiency.",
"AI-Based Financial Advisory Service requiring high security and model support.",
"Automated Video Editing Tool requiring high throughput and ease of use.",
"Smart Farming Solution using AI requiring high scalability and cost efficiency.",
"AI-Driven Market Research Analysis requiring high throughput and model support.",
"Recommendation Engine for Streaming Services requiring high throughput and cost efficiency.",
"Natural Language Processing for Chatbots requiring low latency and high throughput.",
"Automated News Summarization Tool requiring high throughput and ease of use.",
"AI-Enhanced Cybersecurity Measures requiring high security and low latency.",
"Virtual Reality Experience powered by AI requiring low latency and high throughput.",
"AI-Assisted Legal Document Review requiring high security and ease of use.",
"Image and Video Content Moderation requiring high throughput and security.",
"AI-Driven Customer Feedback Analysis requiring high scalability and cost efficiency.",
"Smart Inventory Management System requiring high scalability and ease of use.",
"Automated Quality Assurance in Manufacturing requiring high security and cost efficiency.",
"Personalized Learning Experience in Education requiring high ease of use and model support.",
"AI for Wildlife Monitoring and Conservation requiring high scalability and cost efficiency.",
"Medical Diagnosis Support System using AI requiring high security and model support.",
"Predictive Modeling for Climate Change Impact Assessment requiring high scalability and model support.",
"AI-Powered Competitive Analysis Tool requiring high throughput and ease of use.",
"Social Media Monitoring and Engagement Tool requiring high throughput and cost efficiency.",
"Automated Financial Reporting System requiring high security and ease of use.",
"AI-Assisted Personal Finance Management App requiring high ease of use and cost efficiency.",
"Content Curation and Discovery Platform using AI requiring high throughput and model support.",
"Smart City Solution powered by AI Analytics requiring high scalability and security.",
"Automated Risk Assessment in Insurance Underwriting requiring high security and model support.",
"Chatbot for Mental Health Support Services requiring high security and ease of use."
]
}