chore(dashboard): add new models in chat

different-ai · Jun 24, 2023 · cc4d525 · cc4d525 · vercel · Jun 24, 2023
1 parent 6178a64
commit cc4d525
Show file tree

Hide file tree

Showing 3 changed files with 125 additions and 13 deletions.
diff --git a/dashboard/lib/apiMiddleware.ts b/dashboard/lib/apiMiddleware.ts
@@ -37,9 +37,7 @@ export const apiMiddleware = async ({ req, res }) => {
     'chat' :
     req.nextUrl.pathname === '/api/search' ?
       'search' :
-      req.nextUrl.pathname === '/api/add' ?
-        'add' :
-        'unknown'
+      'unknown'
 
   // api key auth
   if (apiKey) {

diff --git a/dashboard/lib/utils.ts b/dashboard/lib/utils.ts
@@ -157,4 +157,4 @@ async function huggingFaceStream(modelUrl: string, payload: HuggingFacePayload):
   });
 }
 
-export { generateText, huggingFaceStream };
+export { generateText, huggingFaceStream };
diff --git a/dashboard/pages/api/chat.ts b/dashboard/pages/api/chat.ts
@@ -11,13 +11,14 @@ export const config = {
   runtime: 'edge',
 }
 
-type Model = 'chatgpt' | 'gpt4' | 'falcon' | 'google' | 'anthropic'
+type LLM = 'openai/gpt-4' | 'openai/gpt-3.5-turbo-16k' | 'tiiuae/falcon-7b' | 'google/bison' | 'bigscience/bloomz-7b1'
 
 interface RequestPayload {
   prompt: string
   history: Chat[]
   system?: string
-  model: Model
+  model: LLM
+  stream: boolean
 }
 
 export type Role = 'user' | 'system' | 'assistant'
@@ -26,8 +27,7 @@ type Chat = {
   content: string
 }
 const handler = async (req: Request, res: Response): Promise<Response> => {
-  const { prompt, history, system, model } = (await req.json()) as RequestPayload
-  console.log('starting')
+  const { prompt, history, system, model, stream } = (await req.json()) as RequestPayload
   if (!prompt) {
     return new Response(JSON.stringify({ error: 'No prompt in the request' }), {
       status: 400,
@@ -51,22 +51,136 @@ const handler = async (req: Request, res: Response): Promise<Response> => {
   }
 
   try {
-    let stream: ReadableStream
-    if (model === 'falcon') {
-      stream = await huggingFaceStream('http://34.127.99.191:9090', {
+    let readableStream: ReadableStream
+
+
+    if (model === 'tiiuae/falcon-7b') {
+      const url = 'http://34.127.99.191:9090'
+      if (!stream) {
+        const res = await fetch(url, {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+          },
+          body: JSON.stringify({
+            inputs: JSON.stringify(messages),
+            stream: false,
+            parameters: {
+              max_new_tokens: 1000,
+              return_full_text: false,
+            },
+          }),
+        }).then((res) => res.json())
+        console.log('res', res)
+        return new Response(JSON.stringify({
+          generated_text: res?.[0]?.generated_text || ''
+        }), {
+          status: 200,
+        })
+      }
+      readableStream = await huggingFaceStream(url, {
         inputs: JSON.stringify(messages),
         stream: true,
         parameters: {
           // { model_id: "tiiuae/falcon-7b", revision: None, sharded: None, num_shard: Some(1), quantize: None, trust_remote_code: false, max_concurrent_requests: 128, max_best_of: 2, max_stop_sequences: 4, max_input_length: 1000, max_total_tokens: 1512, max_batch_size: None, waiting_served_ratio: 1.2, max_batch_total_tokens: 32000, max_waiting_tokens: 20, port: 80, shard_uds_path: "/tmp/text-generation-server", master_addr: "localhost", master_port: 29500, huggingface_hub_cache: Some("/data"), weights_cache_override: None, disable_custom_kernels: false, json_output: false, otlp_endpoint: None, cors_allow_origin: [], watermark_gamma: None, watermark_delta: None, env: false }
           max_new_tokens: 1000
         }
       })
+    } else if (model === 'bigscience/bloomz-7b1') {
+      const url = 'http://34.70.171.197:9090'
+      if (!stream) {
+        const res = await fetch(url, {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+          },
+          body: JSON.stringify({
+            inputs: JSON.stringify(messages),
+            stream: false,
+            parameters: {
+              max_new_tokens: 1000,
+              return_full_text: false,
+            },
+          }),
+        }).then((res) => res.json())
+        return new Response(JSON.stringify({
+          generated_text: res?.[0]?.generated_text || ''
+        }), {
+          status: 200,
+        })
+      }
+      // { model_id: "bigscience/bloomz-7b1", revision: None, sharded: None, num_shard: Some(1), quantize: None, trust_remote_code: false, max_concurrent_requests: 128, max_best_of: 2, max_stop_sequences: 4, max_input_length: 1000, max_total_tokens: 1512, max_batch_size: None, waiting_served_ratio: 1.2, max_batch_total_tokens: 32000, max_waiting_tokens: 20, port: 80, shard_uds_path: "/tmp/text-generation-server", master_addr: "localhost", master_port: 29500, huggingface_hub_cache: Some("/data"), weights_cache_override: None, disable_custom_kernels: false, json_output: false, otlp_endpoint: None, cors_allow_origin: [], watermark_gamma: None, watermark_delta: None, env: false 
+      readableStream = await huggingFaceStream(url, {
+        inputs: JSON.stringify(messages),
+        stream: true,
+        parameters: {
+          max_new_tokens: 1000
+        }
+      })
+    } else if (model === 'google/bison') {
+      const url = 'https://llm-usx5gpslaq-uc.a.run.app'
+
+      const res = await fetch(url, {
+        method: 'POST',
+        body: JSON.stringify({
+          prompt: prompt,
+        }),
+        headers: {
+          'Content-Type': 'application/json',
+        },
+      })
+
+      const data: { answer: string } = await res.json()
+      return cors(
+        req,
+        new Response(JSON.stringify({
+          generated_text: data.answer
+        }), {
+          status: 200,
+        })
+      )
+    } else if (model === 'openai/gpt-4') {
+      payload.model = 'gpt-4'
+      if (!stream) {
+        payload.stream = stream
+        const res = await fetch('https://api.openai.com/v1/chat/completions', {
+          headers: {
+            'Content-Type': 'application/json',
+            Authorization: `Bearer ${process.env.OPENAI_API_KEY ?? ''}`,
+          },
+          method: 'POST',
+          body: JSON.stringify(payload),
+        }).then((res) => res.json())
+        return new Response(JSON.stringify({
+          generated_text: res?.choices?.[0]?.message.content || ''
+        }), {
+          status: 200,
+        })
+      }
+      readableStream = await OpenAIStream(payload)
     } else {
-      stream = await OpenAIStream(payload)
+      if (!stream) {
+        payload.stream = stream
+        const res = await fetch('https://api.openai.com/v1/chat/completions', {
+          headers: {
+            'Content-Type': 'application/json',
+            Authorization: `Bearer ${process.env.OPENAI_API_KEY ?? ''}`,
+          },
+          method: 'POST',
+          body: JSON.stringify(payload),
+        }).then((res) => res.json())
+        return new Response(JSON.stringify({
+          generated_text: res?.choices?.[0]?.message.content || ''
+        }), {
+          status: 200,
+        })
+      }
+      readableStream = await OpenAIStream(payload)
     }
+    console.log('readableStream', readableStream)
     return cors(
       req,
-      new Response(stream, {
+      new Response(readableStream, {
         status: 200,
       })
     )
-Original file line number
+Diff line change
@@ Expand Up @@
       });
     }
-    export { generateText, huggingFaceStream };
+    export { generateText, huggingFaceStream };