- Add killport.js script for terminating processes on specific ports

- Introduce `supportedModels` in `ClientChatStore` and update model validation logic - Enhance OpenAI inferencing with local setup adaptations and improved streaming options - Modify ChatService to handle local and remote model fetching - Update input menu to dynamically fetch and display supported models - Add start_inference_server.sh for initiating local inference server - Upgrade OpenAI SDK to v5.0.1 and adjust dependencies accordingly
2025-09-08 22:56:46 +00:00 · 2025-05-29 19:28:54 -04:00
parent c9ee7c7690
commit cc0da17b5f
11 changed files with 204 additions and 23 deletions
--- a/workers/site/api-router.ts
+++ b/workers/site/api-router.ts
@@ -28,6 +28,13 @@ export function createRouter() {
        },
      )

+    .get("/api/models",
+        async (req, env, ctx) => {
+            const { chatService } = createRequestContext(env, ctx);
+            return chatService.getSupportedModels();
+        },
+    )
+
      .post("/api/feedback", async (r, e, c) => {
        const { feedbackService } = createRequestContext(e, c);
        return feedbackService.handleFeedback(r);
--- a/workers/site/lib/utils.ts
+++ b/workers/site/lib/utils.ts
@@ -59,4 +59,38 @@ export class Utils {

    return result;
  }
+
+  static normalizeWithBlanks<T extends Normalize.ChatMessage>(msgs: T[]): T[] {
+    const out: T[] = [];
+
+    // In local mode first turn expected to be user.
+    let expected: Normalize.Role = "user";
+
+    for (const m of msgs) {
+      while (m.role !== expected) {
+        // Insert blanks to match expected sequence user/assistant/user...
+        out.push(Normalize.makeBlank(expected) as T);
+        expected = expected === "user" ? "assistant" : "user";
+      }
+
+      out.push(m);
+      expected = expected === "user" ? "assistant" : "user";
+    }
+
+    return out;
+  }
+
+}
+
+module Normalize {
+  export type Role = "user" | "assistant";
+
+  export interface ChatMessage extends Record<any, any> {
+    role: Role;
+  }
+
+  export const makeBlank = (role: Role): ChatMessage => ({
+    role,
+    content: ""
+  });
 }
--- a/workers/site/providers/openai.ts
+++ b/workers/site/providers/openai.ts
@@ -1,5 +1,7 @@
 import { OpenAI } from "openai";
 import ChatSdk from "../lib/chat-sdk";
+import { Utils } from "../lib/utils";
+import {ChatCompletionCreateParamsStreaming} from "openai/resources/chat/completions/completions";

 export class OpenAiChatSdk {
  static async handleOpenAiStream(
@@ -81,14 +83,42 @@ export class OpenAiChatSdk {
      return gpt4oTuningParams;
    };

-    const openAIStream = await opts.openai.chat.completions.create({
+    let completionRequest: ChatCompletionCreateParamsStreaming = {
      model: opts.model,
-      messages: messages,
      stream: true,
-      ...getTuningParams(),
-    });
+      messages: messages
+    };
+
+    const isLocal = opts.openai.baseURL.includes("localhost");
+
+
+    if(isLocal) {
+      completionRequest["messages"] = Utils.normalizeWithBlanks(messages)
+      completionRequest["stream_options"] =  {
+        include_usage: true
+      }
+    } else {
+      completionRequest = {...completionRequest, ...getTuningParams()}
+    }
+
+    const openAIStream = await opts.openai.chat.completions.create(completionRequest);

    for await (const chunk of openAIStream) {
+      if (isLocal && chunk.usage) {
+        dataCallback({
+          type: "chat",
+          data: {
+            choices: [
+              {
+                delta: { content: "" },
+                logprobs: null,
+                finish_reason: "stop",
+              },
+            ],
+          },
+        });
+        break;
+      }
      dataCallback({ type: "chat", data: chunk });
    }
  }
--- a/workers/site/services/ChatService.ts
+++ b/workers/site/services/ChatService.ts
@@ -3,7 +3,7 @@ import OpenAI from 'openai';
 import ChatSdk from '../lib/chat-sdk';
 import Message from "../models/Message";
 import O1Message from "../models/O1Message";
-import {getModelFamily, ModelFamily} from "../../../src/components/chat/lib/SupportedModels";
+import {getModelFamily, ModelFamily, SUPPORTED_MODELS} from "../../../src/components/chat/lib/SupportedModels";
 import {OpenAiChatSdk} from "../providers/openai";
 import {GroqChatSdk} from "../providers/groq";
 import {ClaudeChatSdk} from "../providers/claude";
@@ -73,11 +73,21 @@ const ChatService = types
            throw new Error('Unsupported message format');
        };

+        const getSupportedModels = async () => {
+            if(self.env.OPENAI_API_ENDPOINT.includes("localhost")) {
+                const openaiClient = new OpenAI({baseURL: self.env.OPENAI_API_ENDPOINT})
+                const models = await openaiClient.models.list();
+                return Response.json(models.data.map(model => model.id));
+            }
+            return Response.json(SUPPORTED_MODELS);
+        };
+
        const createStreamParams = async (
            streamConfig: any,
            dynamicContext: any,
            durableObject: any
        ): Promise<StreamParams> => {
+
            return {
                env: self.env,
                openai: self.openai,
@@ -112,6 +122,7 @@ const ChatService = types
        };

        return {
+            getSupportedModels,
            setActiveStream(streamId: string, stream: any) {
                const validStream = {
                    name: stream?.name || "Unnamed Stream",
@@ -129,10 +140,18 @@ const ChatService = types
            },
            setEnv(env: Env) {
                self.env = env;
-                self.openai = new OpenAI({
-                    apiKey: self.openAIApiKey,
-                    baseURL: self.openAIBaseURL,
-                });
+
+                if(env.OPENAI_API_ENDPOINT.includes("localhost")) {
+                    self.openai = new OpenAI({
+                        apiKey: self.env.OPENAI_API_KEY,
+                        baseURL: self.env.OPENAI_API_ENDPOINT,
+                    });
+                } else{
+                    self.openai = new OpenAI({
+                        apiKey: self.openAIApiKey,
+                        baseURL: self.openAIBaseURL,
+                    });
+                }
            },

            handleChatRequest: async (request: Request) => {
@@ -154,12 +173,12 @@ const ChatService = types
            }) {
                const {streamConfig, streamParams, controller, encoder, streamId} = params;

-                const modelFamily = getModelFamily(streamConfig.model);
+                const modelFamily = !self.env.OPENAI_API_ENDPOINT.includes("localhost") ? getModelFamily(streamConfig.model) : "openai";
+
+                const handler = !self.env.OPENAI_API_ENDPOINT.includes("localhost") ? modelHandlers[modelFamily as ModelFamily] : modelHandlers.openai;

-                const handler = modelHandlers[modelFamily as ModelFamily];
                if (handler) {
                    try {
-
                        await handler(streamParams, handleStreamData(controller, encoder));

                    } catch (error) {