- Add killport.js script for terminating processes on specific ports

- Introduce `supportedModels` in `ClientChatStore` and update model validation logic - Enhance OpenAI inferencing with local setup adaptations and improved streaming options - Modify ChatService to handle local and remote model fetching - Update input menu to dynamically fetch and display supported models - Add start_inference_server.sh for initiating local inference server - Upgrade OpenAI SDK to v5.0.1 and adjust dependencies accordingly
2025-09-08 22:56:46 +00:00 · 2025-05-29 19:28:54 -04:00
parent c9ee7c7690
commit cc0da17b5f
11 changed files with 204 additions and 23 deletions
--- a/workers/site/providers/openai.ts
+++ b/workers/site/providers/openai.ts
@@ -1,5 +1,7 @@
 import { OpenAI } from "openai";
 import ChatSdk from "../lib/chat-sdk";
+import { Utils } from "../lib/utils";
+import {ChatCompletionCreateParamsStreaming} from "openai/resources/chat/completions/completions";

 export class OpenAiChatSdk {
  static async handleOpenAiStream(
@@ -81,14 +83,42 @@ export class OpenAiChatSdk {
      return gpt4oTuningParams;
    };

-    const openAIStream = await opts.openai.chat.completions.create({
+    let completionRequest: ChatCompletionCreateParamsStreaming = {
      model: opts.model,
-      messages: messages,
      stream: true,
-      ...getTuningParams(),
-    });
+      messages: messages
+    };
+
+    const isLocal = opts.openai.baseURL.includes("localhost");
+
+
+    if(isLocal) {
+      completionRequest["messages"] = Utils.normalizeWithBlanks(messages)
+      completionRequest["stream_options"] =  {
+        include_usage: true
+      }
+    } else {
+      completionRequest = {...completionRequest, ...getTuningParams()}
+    }
+
+    const openAIStream = await opts.openai.chat.completions.create(completionRequest);

    for await (const chunk of openAIStream) {
+      if (isLocal && chunk.usage) {
+        dataCallback({
+          type: "chat",
+          data: {
+            choices: [
+              {
+                delta: { content: "" },
+                logprobs: null,
+                finish_reason: "stop",
+              },
+            ],
+          },
+        });
+        break;
+      }
      dataCallback({ type: "chat", data: chunk });
    }
  }