change semantics

Update README deployment steps and add deploy:secrets script to package.json

update local inference script and README

update lockfile

reconfigure package scripts for development

update test execution

pass server tests

Update README with revised Bun commands and workspace details

remove pnpm package manager designator

create bun server
This commit is contained in:
geoffsee
2025-06-02 18:41:16 -04:00
committed by Geoff Seemueller
parent 1055cda2f1
commit 497eb22ad8
218 changed files with 1273 additions and 4987 deletions

View File

@@ -0,0 +1,70 @@
import { describe, it, expect, vi } from 'vitest';
import { BaseChatProvider, CommonProviderParams, ChatStreamProvider } from '../chat-stream-provider.ts';
import { OpenAI } from 'openai';
// Create a concrete implementation of BaseChatProvider for testing
class TestChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return param.openai as OpenAI;
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
return {
model: param.model,
messages: safeMessages,
stream: true,
max_tokens: param.maxTokens as number,
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
dataCallback({ type: 'chat', data: chunk });
return false;
}
}
// Mock dependencies
vi.mock('../../lib/chat-sdk', () => ({
default: {
buildAssistantPrompt: vi.fn().mockReturnValue('Assistant prompt'),
buildMessageChain: vi.fn().mockReturnValue([
{ role: 'system', content: 'System prompt' },
{ role: 'user', content: 'User message' }
])
}
}));
describe('ChatStreamProvider', () => {
it('should define the required interface', () => {
// Verify the interface has the required method
const mockProvider: ChatStreamProvider = {
handleStream: vi.fn()
};
expect(mockProvider.handleStream).toBeDefined();
});
});
describe('BaseChatProvider', () => {
it('should implement the ChatStreamProvider interface', () => {
// Create a concrete implementation
const provider = new TestChatProvider();
// Verify it implements the interface
expect(provider.handleStream).toBeInstanceOf(Function);
expect(provider.getOpenAIClient).toBeInstanceOf(Function);
expect(provider.getStreamParams).toBeInstanceOf(Function);
expect(provider.processChunk).toBeInstanceOf(Function);
});
it('should have abstract methods that need to be implemented', () => {
// This test verifies that the abstract methods exist
// We can't instantiate BaseChatProvider directly, so we use the concrete implementation
const provider = new TestChatProvider();
// Verify the abstract methods are implemented
expect(provider.getOpenAIClient).toBeDefined();
expect(provider.getStreamParams).toBeDefined();
expect(provider.processChunk).toBeDefined();
});
});

View File

@@ -0,0 +1,71 @@
import {OpenAI} from "openai";
import {BaseChatProvider, CommonProviderParams} from "./chat-stream-provider.ts";
export class CerebrasChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({
baseURL: "https://api.cerebras.ai/v1",
apiKey: param.env.CEREBRAS_API_KEY,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
// models provided by cerebras do not follow standard tune params
// they must be individually configured
// const tuningParams = {
// temperature: 0.86,
// top_p: 0.98,
// presence_penalty: 0.1,
// frequency_penalty: 0.3,
// max_tokens: param.maxTokens as number,
// };
return {
model: param.model,
messages: safeMessages,
stream: true
// ...tuningParams
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true;
}
dataCallback({ type: "chat", data: chunk });
return false;
}
}
export class CerebrasSdk {
private static provider = new CerebrasChatProvider();
static async handleCerebrasStream(
param: {
openai: OpenAI;
systemPrompt: any;
disableWebhookGeneration: boolean;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
model: string;
env: Env;
},
dataCallback: (data) => void,
) {
return this.provider.handleStream(
{
systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens,
messages: param.messages,
model: param.model,
env: param.env,
disableWebhookGeneration: param.disableWebhookGeneration,
},
dataCallback,
);
}
}

View File

@@ -0,0 +1,49 @@
import { OpenAI } from "openai";
import ChatSdk from "../lib/chat-sdk.ts";
export interface CommonProviderParams {
openai?: OpenAI; // Optional for providers that use a custom client.
systemPrompt: any;
preprocessedContext: any;
maxTokens: number | unknown | undefined;
messages: any;
model: string;
env: Env;
disableWebhookGeneration?: boolean;
// Additional fields can be added as needed
}
export interface ChatStreamProvider {
handleStream(
param: CommonProviderParams,
dataCallback: (data: any) => void,
): Promise<any>;
}
export abstract class BaseChatProvider implements ChatStreamProvider {
abstract getOpenAIClient(param: CommonProviderParams): OpenAI;
abstract getStreamParams(param: CommonProviderParams, safeMessages: any[]): any;
abstract async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean>;
async handleStream(
param: CommonProviderParams,
dataCallback: (data: any) => void,
) {
const assistantPrompt = ChatSdk.buildAssistantPrompt({ maxTokens: param.maxTokens });
const safeMessages = ChatSdk.buildMessageChain(param.messages, {
systemPrompt: param.systemPrompt,
model: param.model,
assistantPrompt,
toolResults: param.preprocessedContext,
});
const client = this.getOpenAIClient(param);
const streamParams = this.getStreamParams(param, safeMessages);
const stream = await client.chat.completions.create(streamParams);
for await (const chunk of stream) {
const shouldBreak = await this.processChunk(chunk, dataCallback);
if (shouldBreak) break;
}
}
}

View File

@@ -0,0 +1,124 @@
import Anthropic from "@anthropic-ai/sdk";
import {OpenAI} from "openai";
import {
_NotCustomized,
ISimpleType,
ModelPropertiesDeclarationToProperties,
ModelSnapshotType2,
UnionStringArray,
} from "mobx-state-tree";
import ChatSdk from "../lib/chat-sdk.ts";
import {BaseChatProvider, CommonProviderParams} from "./chat-stream-provider.ts";
export class ClaudeChatProvider extends BaseChatProvider {
private anthropic: Anthropic | null = null;
getOpenAIClient(param: CommonProviderParams): OpenAI {
// Claude doesn't use OpenAI client directly, but we need to return something
// to satisfy the interface. The actual Anthropic client is created in getStreamParams.
return param.openai as OpenAI;
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
this.anthropic = new Anthropic({
apiKey: param.env.ANTHROPIC_API_KEY,
});
const claudeTuningParams = {
temperature: 0.7,
max_tokens: param.maxTokens as number,
};
return {
stream: true,
model: param.model,
messages: safeMessages,
...claudeTuningParams
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.type === "message_stop") {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: "" },
logprobs: null,
finish_reason: "stop",
},
],
},
});
return true;
}
dataCallback({ type: "chat", data: chunk });
return false;
}
// Override the base handleStream method to use Anthropic client instead of OpenAI
async handleStream(
param: CommonProviderParams,
dataCallback: (data: any) => void,
) {
const assistantPrompt = ChatSdk.buildAssistantPrompt({ maxTokens: param.maxTokens });
const safeMessages = ChatSdk.buildMessageChain(param.messages, {
systemPrompt: param.systemPrompt,
model: param.model,
assistantPrompt,
toolResults: param.preprocessedContext,
});
const streamParams = this.getStreamParams(param, safeMessages);
if (!this.anthropic) {
throw new Error("Anthropic client not initialized");
}
const stream = await this.anthropic.messages.create(streamParams);
for await (const chunk of stream) {
const shouldBreak = await this.processChunk(chunk, dataCallback);
if (shouldBreak) break;
}
}
}
// Legacy class for backward compatibility
export class ClaudeChatSdk {
private static provider = new ClaudeChatProvider();
static async handleClaudeStream(
param: {
openai: OpenAI;
systemPrompt: any;
preprocessedContext: ModelSnapshotType2<
ModelPropertiesDeclarationToProperties<{
role: ISimpleType<UnionStringArray<string[]>>;
content: ISimpleType<unknown>;
}>,
_NotCustomized
>;
maxTokens: unknown | number | undefined;
messages: any;
model: string;
env: Env;
},
dataCallback: (data) => void,
) {
return this.provider.handleStream(
{
openai: param.openai,
systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens,
messages: param.messages,
model: param.model,
env: param.env,
},
dataCallback,
);
}
}

View File

@@ -0,0 +1,141 @@
import {OpenAI} from "openai";
import {BaseChatProvider, CommonProviderParams} from "./chat-stream-provider.ts";
export class CloudflareAiChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
const cfAiURL = `https://api.cloudflare.com/client/v4/accounts/${param.env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`;
return new OpenAI({
apiKey: param.env.CLOUDFLARE_API_KEY,
baseURL: cfAiURL,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
const generationParams: Record<string, any> = {
model: this.getModelWithPrefix(param.model),
messages: safeMessages,
stream: true,
};
// Set max_tokens based on model
if (this.getModelPrefix(param.model) === "@cf/meta") {
generationParams["max_tokens"] = 4096;
}
if (this.getModelPrefix(param.model) === "@hf/mistral") {
generationParams["max_tokens"] = 4096;
}
if (param.model.toLowerCase().includes("hermes-2-pro-mistral-7b")) {
generationParams["max_tokens"] = 1000;
}
if (param.model.toLowerCase().includes("openhermes-2.5-mistral-7b-awq")) {
generationParams["max_tokens"] = 1000;
}
if (param.model.toLowerCase().includes("deepseek-coder-6.7b-instruct-awq")) {
generationParams["max_tokens"] = 590;
}
if (param.model.toLowerCase().includes("deepseek-math-7b-instruct")) {
generationParams["max_tokens"] = 512;
}
if (param.model.toLowerCase().includes("neural-chat-7b-v3-1-awq")) {
generationParams["max_tokens"] = 590;
}
if (param.model.toLowerCase().includes("openchat-3.5-0106")) {
generationParams["max_tokens"] = 2000;
}
return generationParams;
}
private getModelPrefix(model: string): string {
let modelPrefix = `@cf/meta`;
if (model.toLowerCase().includes("llama")) {
modelPrefix = `@cf/meta`;
}
if (model.toLowerCase().includes("hermes-2-pro-mistral-7b")) {
modelPrefix = `@hf/nousresearch`;
}
if (model.toLowerCase().includes("mistral-7b-instruct")) {
modelPrefix = `@hf/mistral`;
}
if (model.toLowerCase().includes("gemma")) {
modelPrefix = `@cf/google`;
}
if (model.toLowerCase().includes("deepseek")) {
modelPrefix = `@cf/deepseek-ai`;
}
if (model.toLowerCase().includes("openchat-3.5-0106")) {
modelPrefix = `@cf/openchat`;
}
const isNueralChat = model
.toLowerCase()
.includes("neural-chat-7b-v3-1-awq");
if (
isNueralChat ||
model.toLowerCase().includes("openhermes-2.5-mistral-7b-awq") ||
model.toLowerCase().includes("zephyr-7b-beta-awq") ||
model.toLowerCase().includes("deepseek-coder-6.7b-instruct-awq")
) {
modelPrefix = `@hf/thebloke`;
}
return modelPrefix;
}
private getModelWithPrefix(model: string): string {
return `${this.getModelPrefix(model)}/${model}`;
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true;
}
dataCallback({ type: "chat", data: chunk });
return false;
}
}
export class CloudflareAISdk {
private static provider = new CloudflareAiChatProvider();
static async handleCloudflareAIStream(
param: {
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
model: string;
env: Env;
},
dataCallback: (data) => void,
) {
return this.provider.handleStream(
{
systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens,
messages: param.messages,
model: param.model,
env: param.env,
},
dataCallback,
);
}
}

View File

@@ -0,0 +1,74 @@
import { OpenAI } from "openai";
import {
_NotCustomized,
castToSnapshot,
getSnapshot,
ISimpleType,
ModelPropertiesDeclarationToProperties,
ModelSnapshotType2,
UnionStringArray,
} from "mobx-state-tree";
import Message from "../models/Message.ts";
import ChatSdk from "../lib/chat-sdk.ts";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts";
export class FireworksAiChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({
apiKey: param.env.FIREWORKS_API_KEY,
baseURL: "https://api.fireworks.ai/inference/v1",
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
let modelPrefix = "accounts/fireworks/models/";
if (param.model.toLowerCase().includes("yi-")) {
modelPrefix = "accounts/yi-01-ai/models/";
}
return {
model: `${modelPrefix}${param.model}`,
messages: safeMessages,
stream: true,
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true;
}
dataCallback({ type: "chat", data: chunk });
return false;
}
}
export class FireworksAiChatSdk {
private static provider = new FireworksAiChatProvider();
static async handleFireworksStream(
param: {
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: number;
messages: any;
model: any;
env: Env;
},
dataCallback: (data) => void,
) {
return this.provider.handleStream(
{
systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens,
messages: param.messages,
model: param.model,
env: param.env,
},
dataCallback,
);
}
}

View File

@@ -0,0 +1,74 @@
import { OpenAI } from "openai";
import ChatSdk from "../lib/chat-sdk.ts";
import { StreamParams } from "../services/ChatService.ts";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts";
export class GoogleChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai",
apiKey: param.env.GEMINI_API_KEY,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
return {
model: param.model,
messages: safeMessages,
stream: true,
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.choices?.[0]?.finish_reason === "stop") {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: chunk.choices[0].delta.content || "" },
finish_reason: "stop",
index: chunk.choices[0].index,
},
],
},
});
return true;
} else {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: chunk.choices?.[0]?.delta?.content || "" },
finish_reason: null,
index: chunk.choices?.[0]?.index || 0,
},
],
},
});
return false;
}
}
}
export class GoogleChatSdk {
private static provider = new GoogleChatProvider();
static async handleGoogleStream(
param: StreamParams,
dataCallback: (data) => void,
) {
return this.provider.handleStream(
{
systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens,
messages: param.messages,
model: param.model,
env: param.env,
},
dataCallback,
);
}
}

View File

@@ -0,0 +1,80 @@
import { OpenAI } from "openai";
import {
_NotCustomized,
ISimpleType,
ModelPropertiesDeclarationToProperties,
ModelSnapshotType2,
UnionStringArray,
} from "mobx-state-tree";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts";
export class GroqChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({
baseURL: "https://api.groq.com/openai/v1",
apiKey: param.env.GROQ_API_KEY,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
const tuningParams = {
temperature: 0.86,
top_p: 0.98,
presence_penalty: 0.1,
frequency_penalty: 0.3,
max_tokens: param.maxTokens as number,
};
return {
model: param.model,
messages: safeMessages,
stream: true,
...tuningParams
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true;
}
dataCallback({ type: "chat", data: chunk });
return false;
}
}
export class GroqChatSdk {
private static provider = new GroqChatProvider();
static async handleGroqStream(
param: {
openai: OpenAI;
systemPrompt: any;
preprocessedContext: ModelSnapshotType2<
ModelPropertiesDeclarationToProperties<{
role: ISimpleType<UnionStringArray<string[]>>;
content: ISimpleType<unknown>;
}>,
_NotCustomized
>;
maxTokens: unknown | number | undefined;
messages: any;
model: string;
env: Env;
},
dataCallback: (data) => void,
) {
return this.provider.handleStream(
{
systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens,
messages: param.messages,
model: param.model,
env: param.env,
},
dataCallback,
);
}
}

View File

@@ -0,0 +1,114 @@
import { OpenAI } from "openai";
import { Utils } from "../lib/utils.ts";
import { ChatCompletionCreateParamsStreaming } from "openai/resources/chat/completions/completions";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts";
export class OpenAiChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return param.openai as OpenAI;
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): ChatCompletionCreateParamsStreaming {
const isO1 = () => {
if (param.model === "o1-preview" || param.model === "o1-mini") {
return true;
}
};
const tuningParams: Record<string, any> = {};
const gpt4oTuningParams = {
temperature: 0.86,
top_p: 0.98,
presence_penalty: 0.1,
frequency_penalty: 0.3,
max_tokens: param.maxTokens as number,
};
const getTuningParams = () => {
if (isO1()) {
tuningParams["temperature"] = 1;
tuningParams["max_completion_tokens"] = (param.maxTokens as number) + 10000;
return tuningParams;
}
return gpt4oTuningParams;
};
let completionRequest: ChatCompletionCreateParamsStreaming = {
model: param.model,
stream: true,
messages: safeMessages
};
const client = this.getOpenAIClient(param);
const isLocal = client.baseURL.includes("localhost");
if(isLocal) {
completionRequest["messages"] = Utils.normalizeWithBlanks(safeMessages);
completionRequest["stream_options"] = {
include_usage: true
};
} else {
completionRequest = {...completionRequest, ...getTuningParams()};
}
return completionRequest;
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
const isLocal = chunk.usage !== undefined;
if (isLocal && chunk.usage) {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: "" },
logprobs: null,
finish_reason: "stop",
},
],
},
});
return true; // Break the stream
}
dataCallback({ type: "chat", data: chunk });
return false; // Continue the stream
}
}
// Legacy class for backward compatibility
export class OpenAiChatSdk {
private static provider = new OpenAiChatProvider();
static async handleOpenAiStream(
ctx: {
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
model: any;
},
dataCallback: (data: any) => any,
) {
if (!ctx.messages?.length) {
return new Response("No messages provided", { status: 400 });
}
return this.provider.handleStream(
{
openai: ctx.openai,
systemPrompt: ctx.systemPrompt,
preprocessedContext: ctx.preprocessedContext,
maxTokens: ctx.maxTokens,
messages: ctx.messages,
model: ctx.model,
env: {} as Env, // This is not used in OpenAI provider
},
dataCallback,
);
}
}

View File

@@ -0,0 +1,73 @@
import { OpenAI } from "openai";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider.ts";
export class XaiChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({
baseURL: "https://api.x.ai/v1",
apiKey: param.env.XAI_API_KEY,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
const tuningParams = {
temperature: 0.75,
};
const getTuningParams = () => {
return tuningParams;
};
return {
model: param.model,
messages: safeMessages,
stream: true,
...getTuningParams(),
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true;
}
dataCallback({ type: "chat", data: chunk });
return false;
}
}
export class XaiChatSdk {
private static provider = new XaiChatProvider();
static async handleXaiStream(
ctx: {
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
disableWebhookGeneration: boolean;
model: any;
env: Env;
},
dataCallback: (data: any) => any,
) {
if (!ctx.messages?.length) {
return new Response("No messages provided", { status: 400 });
}
return this.provider.handleStream(
{
systemPrompt: ctx.systemPrompt,
preprocessedContext: ctx.preprocessedContext,
maxTokens: ctx.maxTokens,
messages: ctx.messages,
model: ctx.model,
env: ctx.env,
disableWebhookGeneration: ctx.disableWebhookGeneration,
},
dataCallback,
);
}
}