- Add killport.js script for terminating processes on specific ports

- Introduce `supportedModels` in `ClientChatStore` and update model validation logic
- Enhance OpenAI inferencing with local setup adaptations and improved streaming options
- Modify ChatService to handle local and remote model fetching
- Update input menu to dynamically fetch and display supported models
- Add start_inference_server.sh for initiating local inference server
- Upgrade OpenAI SDK to v5.0.1 and adjust dependencies accordingly
This commit is contained in:
geoffsee
2025-05-29 19:28:54 -04:00
committed by Geoff Seemueller
parent c9ee7c7690
commit cc0da17b5f
11 changed files with 204 additions and 23 deletions

View File

@@ -28,6 +28,13 @@ export function createRouter() {
},
)
.get("/api/models",
async (req, env, ctx) => {
const { chatService } = createRequestContext(env, ctx);
return chatService.getSupportedModels();
},
)
.post("/api/feedback", async (r, e, c) => {
const { feedbackService } = createRequestContext(e, c);
return feedbackService.handleFeedback(r);

View File

@@ -59,4 +59,38 @@ export class Utils {
return result;
}
static normalizeWithBlanks<T extends Normalize.ChatMessage>(msgs: T[]): T[] {
const out: T[] = [];
// In local mode first turn expected to be user.
let expected: Normalize.Role = "user";
for (const m of msgs) {
while (m.role !== expected) {
// Insert blanks to match expected sequence user/assistant/user...
out.push(Normalize.makeBlank(expected) as T);
expected = expected === "user" ? "assistant" : "user";
}
out.push(m);
expected = expected === "user" ? "assistant" : "user";
}
return out;
}
}
module Normalize {
export type Role = "user" | "assistant";
export interface ChatMessage extends Record<any, any> {
role: Role;
}
export const makeBlank = (role: Role): ChatMessage => ({
role,
content: ""
});
}

View File

@@ -1,5 +1,7 @@
import { OpenAI } from "openai";
import ChatSdk from "../lib/chat-sdk";
import { Utils } from "../lib/utils";
import {ChatCompletionCreateParamsStreaming} from "openai/resources/chat/completions/completions";
export class OpenAiChatSdk {
static async handleOpenAiStream(
@@ -81,14 +83,42 @@ export class OpenAiChatSdk {
return gpt4oTuningParams;
};
const openAIStream = await opts.openai.chat.completions.create({
let completionRequest: ChatCompletionCreateParamsStreaming = {
model: opts.model,
messages: messages,
stream: true,
...getTuningParams(),
});
messages: messages
};
const isLocal = opts.openai.baseURL.includes("localhost");
if(isLocal) {
completionRequest["messages"] = Utils.normalizeWithBlanks(messages)
completionRequest["stream_options"] = {
include_usage: true
}
} else {
completionRequest = {...completionRequest, ...getTuningParams()}
}
const openAIStream = await opts.openai.chat.completions.create(completionRequest);
for await (const chunk of openAIStream) {
if (isLocal && chunk.usage) {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: "" },
logprobs: null,
finish_reason: "stop",
},
],
},
});
break;
}
dataCallback({ type: "chat", data: chunk });
}
}

View File

@@ -3,7 +3,7 @@ import OpenAI from 'openai';
import ChatSdk from '../lib/chat-sdk';
import Message from "../models/Message";
import O1Message from "../models/O1Message";
import {getModelFamily, ModelFamily} from "../../../src/components/chat/lib/SupportedModels";
import {getModelFamily, ModelFamily, SUPPORTED_MODELS} from "../../../src/components/chat/lib/SupportedModels";
import {OpenAiChatSdk} from "../providers/openai";
import {GroqChatSdk} from "../providers/groq";
import {ClaudeChatSdk} from "../providers/claude";
@@ -73,11 +73,21 @@ const ChatService = types
throw new Error('Unsupported message format');
};
const getSupportedModels = async () => {
if(self.env.OPENAI_API_ENDPOINT.includes("localhost")) {
const openaiClient = new OpenAI({baseURL: self.env.OPENAI_API_ENDPOINT})
const models = await openaiClient.models.list();
return Response.json(models.data.map(model => model.id));
}
return Response.json(SUPPORTED_MODELS);
};
const createStreamParams = async (
streamConfig: any,
dynamicContext: any,
durableObject: any
): Promise<StreamParams> => {
return {
env: self.env,
openai: self.openai,
@@ -112,6 +122,7 @@ const ChatService = types
};
return {
getSupportedModels,
setActiveStream(streamId: string, stream: any) {
const validStream = {
name: stream?.name || "Unnamed Stream",
@@ -129,10 +140,18 @@ const ChatService = types
},
setEnv(env: Env) {
self.env = env;
self.openai = new OpenAI({
apiKey: self.openAIApiKey,
baseURL: self.openAIBaseURL,
});
if(env.OPENAI_API_ENDPOINT.includes("localhost")) {
self.openai = new OpenAI({
apiKey: self.env.OPENAI_API_KEY,
baseURL: self.env.OPENAI_API_ENDPOINT,
});
} else{
self.openai = new OpenAI({
apiKey: self.openAIApiKey,
baseURL: self.openAIBaseURL,
});
}
},
handleChatRequest: async (request: Request) => {
@@ -154,12 +173,12 @@ const ChatService = types
}) {
const {streamConfig, streamParams, controller, encoder, streamId} = params;
const modelFamily = getModelFamily(streamConfig.model);
const modelFamily = !self.env.OPENAI_API_ENDPOINT.includes("localhost") ? getModelFamily(streamConfig.model) : "openai";
const handler = !self.env.OPENAI_API_ENDPOINT.includes("localhost") ? modelHandlers[modelFamily as ModelFamily] : modelHandlers.openai;
const handler = modelHandlers[modelFamily as ModelFamily];
if (handler) {
try {
await handler(streamParams, handleStreamData(controller, encoder));
} catch (error) {