Skip to content

Commit

Permalink
chore(dashboard): add new models in chat
Browse files Browse the repository at this point in the history
  • Loading branch information
louis030195 committed Jun 24, 2023
1 parent 6178a64 commit cc4d525
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 13 deletions.
4 changes: 1 addition & 3 deletions dashboard/lib/apiMiddleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ export const apiMiddleware = async ({ req, res }) => {
'chat' :
req.nextUrl.pathname === '/api/search' ?
'search' :
req.nextUrl.pathname === '/api/add' ?
'add' :
'unknown'
'unknown'

// api key auth
if (apiKey) {
Expand Down
2 changes: 1 addition & 1 deletion dashboard/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,4 @@ async function huggingFaceStream(modelUrl: string, payload: HuggingFacePayload):
});
}

export { generateText, huggingFaceStream };
export { generateText, huggingFaceStream };
132 changes: 123 additions & 9 deletions dashboard/pages/api/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ export const config = {
runtime: 'edge',
}

type Model = 'chatgpt' | 'gpt4' | 'falcon' | 'google' | 'anthropic'
type LLM = 'openai/gpt-4' | 'openai/gpt-3.5-turbo-16k' | 'tiiuae/falcon-7b' | 'google/bison' | 'bigscience/bloomz-7b1'

interface RequestPayload {
prompt: string
history: Chat[]
system?: string
model: Model
model: LLM
stream: boolean
}

export type Role = 'user' | 'system' | 'assistant'
Expand All @@ -26,8 +27,7 @@ type Chat = {
content: string
}
const handler = async (req: Request, res: Response): Promise<Response> => {
const { prompt, history, system, model } = (await req.json()) as RequestPayload
console.log('starting')
const { prompt, history, system, model, stream } = (await req.json()) as RequestPayload
if (!prompt) {
return new Response(JSON.stringify({ error: 'No prompt in the request' }), {
status: 400,
Expand All @@ -51,22 +51,136 @@ const handler = async (req: Request, res: Response): Promise<Response> => {
}

try {
let stream: ReadableStream
if (model === 'falcon') {
stream = await huggingFaceStream('http://34.127.99.191:9090', {
let readableStream: ReadableStream


if (model === 'tiiuae/falcon-7b') {
const url = 'http://34.127.99.191:9090'
if (!stream) {
const res = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
inputs: JSON.stringify(messages),
stream: false,
parameters: {
max_new_tokens: 1000,
return_full_text: false,
},
}),
}).then((res) => res.json())
console.log('res', res)
return new Response(JSON.stringify({
generated_text: res?.[0]?.generated_text || ''
}), {
status: 200,
})
}
readableStream = await huggingFaceStream(url, {
inputs: JSON.stringify(messages),
stream: true,
parameters: {
// { model_id: "tiiuae/falcon-7b", revision: None, sharded: None, num_shard: Some(1), quantize: None, trust_remote_code: false, max_concurrent_requests: 128, max_best_of: 2, max_stop_sequences: 4, max_input_length: 1000, max_total_tokens: 1512, max_batch_size: None, waiting_served_ratio: 1.2, max_batch_total_tokens: 32000, max_waiting_tokens: 20, port: 80, shard_uds_path: "/tmp/text-generation-server", master_addr: "localhost", master_port: 29500, huggingface_hub_cache: Some("/data"), weights_cache_override: None, disable_custom_kernels: false, json_output: false, otlp_endpoint: None, cors_allow_origin: [], watermark_gamma: None, watermark_delta: None, env: false }
max_new_tokens: 1000
}
})
} else if (model === 'bigscience/bloomz-7b1') {
const url = 'http://34.70.171.197:9090'
if (!stream) {
const res = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
inputs: JSON.stringify(messages),
stream: false,
parameters: {
max_new_tokens: 1000,
return_full_text: false,
},
}),
}).then((res) => res.json())
return new Response(JSON.stringify({
generated_text: res?.[0]?.generated_text || ''
}), {
status: 200,
})
}
// { model_id: "bigscience/bloomz-7b1", revision: None, sharded: None, num_shard: Some(1), quantize: None, trust_remote_code: false, max_concurrent_requests: 128, max_best_of: 2, max_stop_sequences: 4, max_input_length: 1000, max_total_tokens: 1512, max_batch_size: None, waiting_served_ratio: 1.2, max_batch_total_tokens: 32000, max_waiting_tokens: 20, port: 80, shard_uds_path: "/tmp/text-generation-server", master_addr: "localhost", master_port: 29500, huggingface_hub_cache: Some("/data"), weights_cache_override: None, disable_custom_kernels: false, json_output: false, otlp_endpoint: None, cors_allow_origin: [], watermark_gamma: None, watermark_delta: None, env: false
readableStream = await huggingFaceStream(url, {
inputs: JSON.stringify(messages),
stream: true,
parameters: {
max_new_tokens: 1000
}
})
} else if (model === 'google/bison') {
const url = 'https://llm-usx5gpslaq-uc.a.run.app'

const res = await fetch(url, {
method: 'POST',
body: JSON.stringify({
prompt: prompt,
}),
headers: {
'Content-Type': 'application/json',
},
})

const data: { answer: string } = await res.json()
return cors(
req,
new Response(JSON.stringify({
generated_text: data.answer
}), {
status: 200,
})
)
} else if (model === 'openai/gpt-4') {
payload.model = 'gpt-4'
if (!stream) {
payload.stream = stream
const res = await fetch('https://api.openai.com/v1/chat/completions', {
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${process.env.OPENAI_API_KEY ?? ''}`,
},
method: 'POST',
body: JSON.stringify(payload),
}).then((res) => res.json())
return new Response(JSON.stringify({
generated_text: res?.choices?.[0]?.message.content || ''
}), {
status: 200,
})
}
readableStream = await OpenAIStream(payload)
} else {
stream = await OpenAIStream(payload)
if (!stream) {
payload.stream = stream
const res = await fetch('https://api.openai.com/v1/chat/completions', {
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${process.env.OPENAI_API_KEY ?? ''}`,
},
method: 'POST',
body: JSON.stringify(payload),
}).then((res) => res.json())
return new Response(JSON.stringify({
generated_text: res?.choices?.[0]?.message.content || ''
}), {
status: 200,
})
}
readableStream = await OpenAIStream(payload)
}
console.log('readableStream', readableStream)
return cors(
req,
new Response(stream, {
new Response(readableStream, {
status: 200,
})
)
Expand Down

2 comments on commit cc4d525

@vercel
Copy link

@vercel vercel bot commented on cc4d525 Jun 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

embedbase – ./dashboard

embedbase.vercel.app
embedbase-git-main-prologe.vercel.app
app.embedbase.xyz
embedbase-prologe.vercel.app

@vercel
Copy link

@vercel vercel bot commented on cc4d525 Jun 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.