creates a common abstraction for providers

This commit is contained in:
geoffsee
2025-05-31 20:21:22 -04:00
committed by Geoff Seemueller
parent 5a7691a9af
commit 87e083682c
9 changed files with 569 additions and 618 deletions

View File

@@ -7,8 +7,47 @@ import {
UnionStringArray, UnionStringArray,
} from "mobx-state-tree"; } from "mobx-state-tree";
import ChatSdk from "../lib/chat-sdk"; import ChatSdk from "../lib/chat-sdk";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class CerebrasChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({
baseURL: "https://api.cerebras.ai/v1",
apiKey: param.env.CEREBRAS_API_KEY,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
const llamaTuningParams = {
temperature: 0.86,
top_p: 0.98,
presence_penalty: 0.1,
frequency_penalty: 0.3,
max_tokens: param.maxTokens as number,
};
return {
model: param.model,
messages: safeMessages,
stream: true,
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
// Check if this is the final chunk
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true; // Break the stream
}
dataCallback({ type: "chat", data: chunk });
return false; // Continue the stream
}
}
export class CerebrasSdk { export class CerebrasSdk {
private static provider = new CerebrasChatProvider();
static async handleCerebrasStream( static async handleCerebrasStream(
param: { param: {
openai: OpenAI; openai: OpenAI;
@@ -28,73 +67,17 @@ export class CerebrasSdk {
}, },
dataCallback: (data) => void, dataCallback: (data) => void,
) { ) {
const { return this.provider.handleStream(
preprocessedContext,
messages,
env,
maxTokens,
systemPrompt,
model,
} = param;
const assistantPrompt = ChatSdk.buildAssistantPrompt({
maxTokens: maxTokens,
});
const safeMessages = ChatSdk.buildMessageChain(messages, {
systemPrompt: systemPrompt,
model,
assistantPrompt,
toolResults: preprocessedContext,
});
const openai = new OpenAI({
baseURL: "https://api.cerebras.ai/v1",
apiKey: param.env.CEREBRAS_API_KEY,
});
return CerebrasSdk.streamCerebrasResponse(
safeMessages,
{ {
model: param.model, systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens, maxTokens: param.maxTokens,
openai: openai, messages: param.messages,
model: param.model,
env: param.env,
disableWebhookGeneration: param.disableWebhookGeneration,
}, },
dataCallback, dataCallback,
); );
} }
private static async streamCerebrasResponse(
messages: any[],
opts: {
model: string;
maxTokens: number | unknown | undefined;
openai: OpenAI;
},
dataCallback: (data: any) => void,
) {
const tuningParams: Record<string, any> = {};
const llamaTuningParams = {
temperature: 0.86,
top_p: 0.98,
presence_penalty: 0.1,
frequency_penalty: 0.3,
max_tokens: opts.maxTokens,
};
const getLlamaTuningParams = () => {
return llamaTuningParams;
};
const groqStream = await opts.openai.chat.completions.create({
model: opts.model,
messages: messages,
stream: true,
});
for await (const chunk of groqStream) {
dataCallback({ type: "chat", data: chunk });
}
}
} }

View File

@@ -0,0 +1,49 @@
import { OpenAI } from "openai";
import ChatSdk from "../lib/chat-sdk";
export interface CommonProviderParams {
openai?: OpenAI; // Optional for providers that use a custom client.
systemPrompt: any;
preprocessedContext: any;
maxTokens: number | unknown | undefined;
messages: any;
model: string;
env: Env;
disableWebhookGeneration?: boolean;
// Additional fields can be added as needed
}
export interface ChatStreamProvider {
handleStream(
param: CommonProviderParams,
dataCallback: (data: any) => void,
): Promise<any>;
}
export abstract class BaseChatProvider implements ChatStreamProvider {
abstract getOpenAIClient(param: CommonProviderParams): OpenAI;
abstract getStreamParams(param: CommonProviderParams, safeMessages: any[]): any;
abstract async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean>;
async handleStream(
param: CommonProviderParams,
dataCallback: (data: any) => void,
) {
const assistantPrompt = ChatSdk.buildAssistantPrompt({ maxTokens: param.maxTokens });
const safeMessages = ChatSdk.buildMessageChain(param.messages, {
systemPrompt: param.systemPrompt,
model: param.model,
assistantPrompt,
toolResults: param.preprocessedContext,
});
const client = this.getOpenAIClient(param);
const streamParams = this.getStreamParams(param, safeMessages);
const stream = await client.chat.completions.create(streamParams);
for await (const chunk of stream) {
const shouldBreak = await this.processChunk(chunk, dataCallback);
if (shouldBreak) break;
}
}
}

View File

@@ -8,43 +8,88 @@ import {
UnionStringArray, UnionStringArray,
} from "mobx-state-tree"; } from "mobx-state-tree";
import ChatSdk from "../lib/chat-sdk"; import ChatSdk from "../lib/chat-sdk";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class ClaudeChatSdk { export class ClaudeChatProvider extends BaseChatProvider {
private static async streamClaudeResponse( private anthropic: Anthropic | null = null;
messages: any[],
param: { getOpenAIClient(param: CommonProviderParams): OpenAI {
model: string; // Claude doesn't use OpenAI client directly, but we need to return something
maxTokens: number | unknown | undefined; // to satisfy the interface. The actual Anthropic client is created in getStreamParams.
anthropic: Anthropic; return param.openai as OpenAI;
}, }
dataCallback: (data: any) => void,
) { getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
const claudeStream = await param.anthropic.messages.create({ this.anthropic = new Anthropic({
stream: true, apiKey: param.env.ANTHROPIC_API_KEY,
model: param.model,
max_tokens: param.maxTokens,
messages: messages,
}); });
for await (const chunk of claudeStream) { const claudeTuningParams = {
if (chunk.type === "message_stop") { temperature: 0.7,
dataCallback({ max_tokens: param.maxTokens as number,
type: "chat", };
data: {
choices: [ return {
{ stream: true,
delta: { content: "" }, model: param.model,
logprobs: null, messages: safeMessages,
finish_reason: "stop", ...claudeTuningParams
}, };
], }
},
}); async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
break; if (chunk.type === "message_stop") {
} dataCallback({
dataCallback({ type: "chat", data: chunk }); type: "chat",
data: {
choices: [
{
delta: { content: "" },
logprobs: null,
finish_reason: "stop",
},
],
},
});
return true; // Break the stream
}
dataCallback({ type: "chat", data: chunk });
return false; // Continue the stream
}
// Override the base handleStream method to use Anthropic client instead of OpenAI
async handleStream(
param: CommonProviderParams,
dataCallback: (data: any) => void,
) {
const assistantPrompt = ChatSdk.buildAssistantPrompt({ maxTokens: param.maxTokens });
const safeMessages = ChatSdk.buildMessageChain(param.messages, {
systemPrompt: param.systemPrompt,
model: param.model,
assistantPrompt,
toolResults: param.preprocessedContext,
});
const streamParams = this.getStreamParams(param, safeMessages);
if (!this.anthropic) {
throw new Error("Anthropic client not initialized");
}
const stream = await this.anthropic.messages.create(streamParams);
for await (const chunk of stream) {
const shouldBreak = await this.processChunk(chunk, dataCallback);
if (shouldBreak) break;
} }
} }
}
// Legacy class for backward compatibility
export class ClaudeChatSdk {
private static provider = new ClaudeChatProvider();
static async handleClaudeStream( static async handleClaudeStream(
param: { param: {
openai: OpenAI; openai: OpenAI;
@@ -63,36 +108,15 @@ export class ClaudeChatSdk {
}, },
dataCallback: (data) => void, dataCallback: (data) => void,
) { ) {
const { return this.provider.handleStream(
preprocessedContext,
messages,
env,
maxTokens,
systemPrompt,
model,
} = param;
const assistantPrompt = ChatSdk.buildAssistantPrompt({
maxTokens: maxTokens,
});
const safeMessages = ChatSdk.buildMessageChain(messages, {
systemPrompt: systemPrompt,
model,
assistantPrompt,
toolResults: preprocessedContext,
});
const anthropic = new Anthropic({
apiKey: env.ANTHROPIC_API_KEY,
});
return ClaudeChatSdk.streamClaudeResponse(
safeMessages,
{ {
model: param.model, openai: param.openai,
systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens, maxTokens: param.maxTokens,
anthropic: anthropic, messages: param.messages,
model: param.model,
env: param.env,
}, },
dataCallback, dataCallback,
); );

View File

@@ -7,8 +7,122 @@ import {
UnionStringArray, UnionStringArray,
} from "mobx-state-tree"; } from "mobx-state-tree";
import ChatSdk from "../lib/chat-sdk"; import ChatSdk from "../lib/chat-sdk";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class CloudflareAiChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
const cfAiURL = `https://api.cloudflare.com/client/v4/accounts/${param.env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`;
return new OpenAI({
apiKey: param.env.CLOUDFLARE_API_KEY,
baseURL: cfAiURL,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
const generationParams: Record<string, any> = {
model: this.getModelWithPrefix(param.model),
messages: safeMessages,
stream: true,
};
// Set max_tokens based on model
if (this.getModelPrefix(param.model) === "@cf/meta") {
generationParams["max_tokens"] = 4096;
}
if (this.getModelPrefix(param.model) === "@hf/mistral") {
generationParams["max_tokens"] = 4096;
}
if (param.model.toLowerCase().includes("hermes-2-pro-mistral-7b")) {
generationParams["max_tokens"] = 1000;
}
if (param.model.toLowerCase().includes("openhermes-2.5-mistral-7b-awq")) {
generationParams["max_tokens"] = 1000;
}
if (param.model.toLowerCase().includes("deepseek-coder-6.7b-instruct-awq")) {
generationParams["max_tokens"] = 590;
}
if (param.model.toLowerCase().includes("deepseek-math-7b-instruct")) {
generationParams["max_tokens"] = 512;
}
if (param.model.toLowerCase().includes("neural-chat-7b-v3-1-awq")) {
generationParams["max_tokens"] = 590;
}
if (param.model.toLowerCase().includes("openchat-3.5-0106")) {
generationParams["max_tokens"] = 2000;
}
return generationParams;
}
private getModelPrefix(model: string): string {
let modelPrefix = `@cf/meta`;
if (model.toLowerCase().includes("llama")) {
modelPrefix = `@cf/meta`;
}
if (model.toLowerCase().includes("hermes-2-pro-mistral-7b")) {
modelPrefix = `@hf/nousresearch`;
}
if (model.toLowerCase().includes("mistral-7b-instruct")) {
modelPrefix = `@hf/mistral`;
}
if (model.toLowerCase().includes("gemma")) {
modelPrefix = `@cf/google`;
}
if (model.toLowerCase().includes("deepseek")) {
modelPrefix = `@cf/deepseek-ai`;
}
if (model.toLowerCase().includes("openchat-3.5-0106")) {
modelPrefix = `@cf/openchat`;
}
const isNueralChat = model
.toLowerCase()
.includes("neural-chat-7b-v3-1-awq");
if (
isNueralChat ||
model.toLowerCase().includes("openhermes-2.5-mistral-7b-awq") ||
model.toLowerCase().includes("zephyr-7b-beta-awq") ||
model.toLowerCase().includes("deepseek-coder-6.7b-instruct-awq")
) {
modelPrefix = `@hf/thebloke`;
}
return modelPrefix;
}
private getModelWithPrefix(model: string): string {
return `${this.getModelPrefix(model)}/${model}`;
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
// Check if this is the final chunk
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true; // Break the stream
}
dataCallback({ type: "chat", data: chunk });
return false; // Continue the stream
}
}
export class CloudflareAISdk { export class CloudflareAISdk {
private static provider = new CloudflareAiChatProvider();
static async handleCloudflareAIStream( static async handleCloudflareAIStream(
param: { param: {
openai: OpenAI; openai: OpenAI;
@@ -27,148 +141,16 @@ export class CloudflareAISdk {
}, },
dataCallback: (data) => void, dataCallback: (data) => void,
) { ) {
const { return this.provider.handleStream(
preprocessedContext,
messages,
env,
maxTokens,
systemPrompt,
model,
} = param;
const assistantPrompt = ChatSdk.buildAssistantPrompt({
maxTokens: maxTokens,
});
const safeMessages = ChatSdk.buildMessageChain(messages, {
systemPrompt: systemPrompt,
model,
assistantPrompt,
toolResults: preprocessedContext,
});
const cfAiURL = `https://api.cloudflare.com/client/v4/accounts/${env.CLOUDFLARE_ACCOUNT_ID}/ai/v1`;
console.log({ cfAiURL });
const openai = new OpenAI({
apiKey: env.CLOUDFLARE_API_KEY,
baseURL: cfAiURL,
});
return CloudflareAISdk.streamCloudflareAIResponse(
safeMessages,
{ {
model: param.model, systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens, maxTokens: param.maxTokens,
openai: openai, messages: param.messages,
model: param.model,
env: param.env,
}, },
dataCallback, dataCallback,
); );
} }
private static async streamCloudflareAIResponse(
messages: any[],
opts: {
model: string;
maxTokens: number | unknown | undefined;
openai: OpenAI;
},
dataCallback: (data: any) => void,
) {
const tuningParams: Record<string, any> = {};
const llamaTuningParams = {
temperature: 0.86,
top_p: 0.98,
presence_penalty: 0.1,
frequency_penalty: 0.3,
max_tokens: opts.maxTokens,
};
const getLlamaTuningParams = () => {
return llamaTuningParams;
};
let modelPrefix = `@cf/meta`;
if (opts.model.toLowerCase().includes("llama")) {
modelPrefix = `@cf/meta`;
}
if (opts.model.toLowerCase().includes("hermes-2-pro-mistral-7b")) {
modelPrefix = `@hf/nousresearch`;
}
if (opts.model.toLowerCase().includes("mistral-7b-instruct")) {
modelPrefix = `@hf/mistral`;
}
if (opts.model.toLowerCase().includes("gemma")) {
modelPrefix = `@cf/google`;
}
if (opts.model.toLowerCase().includes("deepseek")) {
modelPrefix = `@cf/deepseek-ai`;
}
if (opts.model.toLowerCase().includes("openchat-3.5-0106")) {
modelPrefix = `@cf/openchat`;
}
const isNueralChat = opts.model
.toLowerCase()
.includes("neural-chat-7b-v3-1-awq");
if (
isNueralChat ||
opts.model.toLowerCase().includes("openhermes-2.5-mistral-7b-awq") ||
opts.model.toLowerCase().includes("zephyr-7b-beta-awq") ||
opts.model.toLowerCase().includes("deepseek-coder-6.7b-instruct-awq")
) {
modelPrefix = `@hf/thebloke`;
}
const generationParams: Record<string, any> = {
model: `${modelPrefix}/${opts.model}`,
messages: messages,
stream: true,
};
if (modelPrefix === "@cf/meta") {
generationParams["max_tokens"] = 4096;
}
if (modelPrefix === "@hf/mistral") {
generationParams["max_tokens"] = 4096;
}
if (opts.model.toLowerCase().includes("hermes-2-pro-mistral-7b")) {
generationParams["max_tokens"] = 1000;
}
if (opts.model.toLowerCase().includes("openhermes-2.5-mistral-7b-awq")) {
generationParams["max_tokens"] = 1000;
}
if (opts.model.toLowerCase().includes("deepseek-coder-6.7b-instruct-awq")) {
generationParams["max_tokens"] = 590;
}
if (opts.model.toLowerCase().includes("deepseek-math-7b-instruct")) {
generationParams["max_tokens"] = 512;
}
if (opts.model.toLowerCase().includes("neural-chat-7b-v3-1-awq")) {
generationParams["max_tokens"] = 590;
}
if (opts.model.toLowerCase().includes("openchat-3.5-0106")) {
generationParams["max_tokens"] = 2000;
}
const cloudflareAiStream = await opts.openai.chat.completions.create({
...generationParams,
});
for await (const chunk of cloudflareAiStream) {
dataCallback({ type: "chat", data: chunk });
}
}
} }

View File

@@ -10,33 +10,44 @@ import {
} from "mobx-state-tree"; } from "mobx-state-tree";
import Message from "../models/Message"; import Message from "../models/Message";
import ChatSdk from "../lib/chat-sdk"; import ChatSdk from "../lib/chat-sdk";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class FireworksAiChatSdk { export class FireworksAiChatProvider extends BaseChatProvider {
private static async streamFireworksResponse( getOpenAIClient(param: CommonProviderParams): OpenAI {
messages: any[], return new OpenAI({
opts: { apiKey: param.env.FIREWORKS_API_KEY,
model: string; baseURL: "https://api.fireworks.ai/inference/v1",
maxTokens: number | unknown | undefined; });
openai: OpenAI; }
},
dataCallback: (data: any) => void, getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
) {
let modelPrefix = "accounts/fireworks/models/"; let modelPrefix = "accounts/fireworks/models/";
if (opts.model.toLowerCase().includes("yi-")) { if (param.model.toLowerCase().includes("yi-")) {
modelPrefix = "accounts/yi-01-ai/models/"; modelPrefix = "accounts/yi-01-ai/models/";
} }
const fireworksStream = await opts.openai.chat.completions.create({ return {
model: `${modelPrefix}${opts.model}`, model: `${modelPrefix}${param.model}`,
messages: messages, messages: safeMessages,
stream: true, stream: true,
}); };
for await (const chunk of fireworksStream) {
dataCallback({ type: "chat", data: chunk });
}
} }
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
// Check if this is the final chunk
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true; // Break the stream
}
dataCallback({ type: "chat", data: chunk });
return false; // Continue the stream
}
}
export class FireworksAiChatSdk {
private static provider = new FireworksAiChatProvider();
static async handleFireworksStream( static async handleFireworksStream(
param: { param: {
openai: OpenAI; openai: OpenAI;
@@ -55,36 +66,14 @@ export class FireworksAiChatSdk {
}, },
dataCallback: (data) => void, dataCallback: (data) => void,
) { ) {
const { return this.provider.handleStream(
preprocessedContext,
messages,
env,
maxTokens,
systemPrompt,
model,
} = param;
const assistantPrompt = ChatSdk.buildAssistantPrompt({
maxTokens: maxTokens,
});
const safeMessages = ChatSdk.buildMessageChain(messages, {
systemPrompt: systemPrompt,
model,
assistantPrompt,
toolResults: preprocessedContext,
});
const fireworksOpenAIClient = new OpenAI({
apiKey: param.env.FIREWORKS_API_KEY,
baseURL: "https://api.fireworks.ai/inference/v1",
});
return FireworksAiChatSdk.streamFireworksResponse(
safeMessages,
{ {
model: param.model, systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens, maxTokens: param.maxTokens,
openai: fireworksOpenAIClient, messages: param.messages,
model: param.model,
env: param.env,
}, },
dataCallback, dataCallback,
); );

View File

@@ -1,97 +1,75 @@
import { OpenAI } from "openai"; import { OpenAI } from "openai";
import ChatSdk from "../lib/chat-sdk"; import ChatSdk from "../lib/chat-sdk";
import { StreamParams } from "../services/ChatService"; import { StreamParams } from "../services/ChatService";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class GoogleChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai",
apiKey: param.env.GEMINI_API_KEY,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
return {
model: param.model,
messages: safeMessages,
stream: true,
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (chunk.choices?.[0]?.finish_reason === "stop") {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: chunk.choices[0].delta.content || "" },
finish_reason: "stop",
index: chunk.choices[0].index,
},
],
},
});
return true; // Break the stream
} else {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: chunk.choices?.[0]?.delta?.content || "" },
finish_reason: null,
index: chunk.choices?.[0]?.index || 0,
},
],
},
});
return false; // Continue the stream
}
}
}
export class GoogleChatSdk { export class GoogleChatSdk {
private static provider = new GoogleChatProvider();
static async handleGoogleStream( static async handleGoogleStream(
param: StreamParams, param: StreamParams,
dataCallback: (data) => void, dataCallback: (data) => void,
) { ) {
const { return this.provider.handleStream(
preprocessedContext,
messages,
env,
maxTokens,
systemPrompt,
model,
} = param;
const assistantPrompt = ChatSdk.buildAssistantPrompt({
maxTokens: maxTokens,
});
const safeMessages = ChatSdk.buildMessageChain(messages, {
systemPrompt: systemPrompt,
model,
assistantPrompt,
toolResults: preprocessedContext,
});
const openai = new OpenAI({
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai",
apiKey: param.env.GEMINI_API_KEY,
});
return GoogleChatSdk.streamGoogleResponse(
safeMessages,
{ {
model: param.model, systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens, maxTokens: param.maxTokens,
openai: openai, messages: param.messages,
model: param.model,
env: param.env,
disableWebhookGeneration: param.disableWebhookGeneration,
}, },
dataCallback, dataCallback,
); );
} }
private static async streamGoogleResponse(
messages: any[],
opts: {
model: string;
maxTokens: number | unknown | undefined;
openai: OpenAI;
},
dataCallback: (data: any) => void,
) {
const chatReq = JSON.stringify({
model: opts.model,
messages: messages,
stream: true,
});
const googleStream = await opts.openai.chat.completions.create(
JSON.parse(chatReq),
);
for await (const chunk of googleStream) {
console.log(JSON.stringify(chunk));
if (chunk.choices?.[0]?.finishReason === "stop") {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: chunk.choices[0].delta.content || "" },
finish_reason: "stop",
index: chunk.choices[0].index,
},
],
},
});
break;
} else {
dataCallback({
type: "chat",
data: {
choices: [
{
delta: { content: chunk.choices?.[0]?.delta?.content || "" },
finish_reason: null,
index: chunk.choices?.[0]?.index || 0,
},
],
},
});
}
}
}
} }

View File

@@ -6,9 +6,49 @@ import {
ModelSnapshotType2, ModelSnapshotType2,
UnionStringArray, UnionStringArray,
} from "mobx-state-tree"; } from "mobx-state-tree";
import ChatSdk from "../lib/chat-sdk"; import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class GroqChatProvider extends BaseChatProvider {
getOpenAIClient(param: CommonProviderParams): OpenAI {
return new OpenAI({
baseURL: "https://api.groq.com/openai/v1",
apiKey: param.env.GROQ_API_KEY,
});
}
getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
const llamaTuningParams = {
temperature: 0.86,
top_p: 0.98,
presence_penalty: 0.1,
frequency_penalty: 0.3,
max_tokens: param.maxTokens as number,
};
return {
model: param.model,
messages: safeMessages,
stream: true,
...llamaTuningParams
};
}
async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
// Check if this is the final chunk
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk });
return true; // Break the stream
}
dataCallback({ type: "chat", data: chunk });
return false; // Continue the stream
}
}
// Legacy class for backward compatibility
export class GroqChatSdk { export class GroqChatSdk {
private static provider = new GroqChatProvider();
static async handleGroqStream( static async handleGroqStream(
param: { param: {
openai: OpenAI; openai: OpenAI;
@@ -27,73 +67,16 @@ export class GroqChatSdk {
}, },
dataCallback: (data) => void, dataCallback: (data) => void,
) { ) {
const { return this.provider.handleStream(
preprocessedContext,
messages,
env,
maxTokens,
systemPrompt,
model,
} = param;
const assistantPrompt = ChatSdk.buildAssistantPrompt({
maxTokens: maxTokens,
});
const safeMessages = ChatSdk.buildMessageChain(messages, {
systemPrompt: systemPrompt,
model,
assistantPrompt,
toolResults: preprocessedContext,
});
const openai = new OpenAI({
baseURL: "https://api.groq.com/openai/v1",
apiKey: param.env.GROQ_API_KEY,
});
return GroqChatSdk.streamGroqResponse(
safeMessages,
{ {
model: param.model, systemPrompt: param.systemPrompt,
preprocessedContext: param.preprocessedContext,
maxTokens: param.maxTokens, maxTokens: param.maxTokens,
openai: openai, messages: param.messages,
model: param.model,
env: param.env,
}, },
dataCallback, dataCallback,
); );
} }
private static async streamGroqResponse(
messages: any[],
opts: {
model: string;
maxTokens: number | unknown | undefined;
openai: OpenAI;
},
dataCallback: (data: any) => void,
) {
const tuningParams: Record<string, any> = {};
const llamaTuningParams = {
temperature: 0.86,
top_p: 0.98,
presence_penalty: 0.1,
frequency_penalty: 0.3,
max_tokens: opts.maxTokens,
};
const getLlamaTuningParams = () => {
return llamaTuningParams;
};
const groqStream = await opts.openai.chat.completions.create({
model: opts.model,
messages: messages,
frequency_penalty: 2,
stream: true,
temperature: 0.78,
});
for await (const chunk of groqStream) {
dataCallback({ type: "chat", data: chunk });
}
}
} }

View File

@@ -1,65 +1,16 @@
import { OpenAI } from "openai"; import { OpenAI } from "openai";
import ChatSdk from "../lib/chat-sdk";
import { Utils } from "../lib/utils"; import { Utils } from "../lib/utils";
import {ChatCompletionCreateParamsStreaming} from "openai/resources/chat/completions/completions"; import { ChatCompletionCreateParamsStreaming } from "openai/resources/chat/completions/completions";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class OpenAiChatSdk { export class OpenAiChatProvider extends BaseChatProvider {
static async handleOpenAiStream( getOpenAIClient(param: CommonProviderParams): OpenAI {
ctx: { return param.openai as OpenAI;
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
model: any;
},
dataCallback: (data: any) => any,
) {
const {
openai,
systemPrompt,
maxTokens,
messages,
model,
preprocessedContext,
} = ctx;
if (!messages?.length) {
return new Response("No messages provided", { status: 400 });
}
const assistantPrompt = ChatSdk.buildAssistantPrompt({
maxTokens: maxTokens,
});
const safeMessages = ChatSdk.buildMessageChain(messages, {
systemPrompt: systemPrompt,
model,
assistantPrompt,
toolResults: preprocessedContext,
});
return OpenAiChatSdk.streamOpenAiResponse(
safeMessages,
{
model,
maxTokens: maxTokens as number,
openai: openai,
},
dataCallback,
);
} }
private static async streamOpenAiResponse( getStreamParams(param: CommonProviderParams, safeMessages: any[]): ChatCompletionCreateParamsStreaming {
messages: any[],
opts: {
model: string;
maxTokens: number | undefined;
openai: OpenAI;
},
dataCallback: (data: any) => any,
) {
const isO1 = () => { const isO1 = () => {
if (opts.model === "o1-preview" || opts.model === "o1-mini") { if (param.model === "o1-preview" || param.model === "o1-mini") {
return true; return true;
} }
}; };
@@ -71,55 +22,93 @@ export class OpenAiChatSdk {
top_p: 0.98, top_p: 0.98,
presence_penalty: 0.1, presence_penalty: 0.1,
frequency_penalty: 0.3, frequency_penalty: 0.3,
max_tokens: opts.maxTokens, max_tokens: param.maxTokens as number,
}; };
const getTuningParams = () => { const getTuningParams = () => {
if (isO1()) { if (isO1()) {
tuningParams["temperature"] = 1; tuningParams["temperature"] = 1;
tuningParams["max_completion_tokens"] = opts.maxTokens + 10000; tuningParams["max_completion_tokens"] = (param.maxTokens as number) + 10000;
return tuningParams; return tuningParams;
} }
return gpt4oTuningParams; return gpt4oTuningParams;
}; };
let completionRequest: ChatCompletionCreateParamsStreaming = { let completionRequest: ChatCompletionCreateParamsStreaming = {
model: opts.model, model: param.model,
stream: true, stream: true,
messages: messages messages: safeMessages
}; };
const isLocal = opts.openai.baseURL.includes("localhost"); const client = this.getOpenAIClient(param);
const isLocal = client.baseURL.includes("localhost");
if(isLocal) { if(isLocal) {
completionRequest["messages"] = Utils.normalizeWithBlanks(messages) completionRequest["messages"] = Utils.normalizeWithBlanks(safeMessages);
completionRequest["stream_options"] = { completionRequest["stream_options"] = {
include_usage: true include_usage: true
} };
} else { } else {
completionRequest = {...completionRequest, ...getTuningParams()} completionRequest = {...completionRequest, ...getTuningParams()};
} }
const openAIStream = await opts.openai.chat.completions.create(completionRequest); return completionRequest;
}
for await (const chunk of openAIStream) { async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
if (isLocal && chunk.usage) { const isLocal = chunk.usage !== undefined;
dataCallback({
type: "chat", if (isLocal && chunk.usage) {
data: { dataCallback({
choices: [ type: "chat",
{ data: {
delta: { content: "" }, choices: [
logprobs: null, {
finish_reason: "stop", delta: { content: "" },
}, logprobs: null,
], finish_reason: "stop",
}, },
}); ],
break; },
} });
dataCallback({ type: "chat", data: chunk }); return true; // Break the stream
} }
dataCallback({ type: "chat", data: chunk });
return false; // Continue the stream
}
}
// Legacy class for backward compatibility
export class OpenAiChatSdk {
private static provider = new OpenAiChatProvider();
static async handleOpenAiStream(
ctx: {
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
model: any;
},
dataCallback: (data: any) => any,
) {
if (!ctx.messages?.length) {
return new Response("No messages provided", { status: 400 });
}
return this.provider.handleStream(
{
openai: ctx.openai,
systemPrompt: ctx.systemPrompt,
preprocessedContext: ctx.preprocessedContext,
maxTokens: ctx.maxTokens,
messages: ctx.messages,
model: ctx.model,
env: {} as Env, // This is not used in OpenAI provider
},
dataCallback,
);
} }
} }

View File

@@ -1,86 +1,18 @@
import { OpenAI } from "openai"; import { OpenAI } from "openai";
import ChatSdk from "../lib/chat-sdk"; import ChatSdk from "../lib/chat-sdk";
import { BaseChatProvider, CommonProviderParams } from "./chat-stream-provider";
export class XaiChatSdk { export class XaiChatProvider extends BaseChatProvider {
static async handleXaiStream( getOpenAIClient(param: CommonProviderParams): OpenAI {
ctx: { return new OpenAI({
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
disableWebhookGeneration: boolean;
model: any;
env: Env;
},
dataCallback: (data: any) => any,
) {
const {
openai,
systemPrompt,
maxTokens,
messages,
env,
model,
preprocessedContext,
} = ctx;
if (!messages?.length) {
return new Response("No messages provided", { status: 400 });
}
const getMaxTokens = async (mt) => {
if (mt) {
return await ChatSdk.calculateMaxTokens(
JSON.parse(JSON.stringify(messages)),
{
env,
maxTokens: mt,
},
);
} else {
return undefined;
}
};
const assistantPrompt = ChatSdk.buildAssistantPrompt({
maxTokens: maxTokens,
});
const safeMessages = ChatSdk.buildMessageChain(messages, {
systemPrompt: systemPrompt,
model,
assistantPrompt,
toolResults: preprocessedContext,
});
const xAiClient = new OpenAI({
baseURL: "https://api.x.ai/v1", baseURL: "https://api.x.ai/v1",
apiKey: env.XAI_API_KEY, apiKey: param.env.XAI_API_KEY,
}); });
return XaiChatSdk.streamOpenAiResponse(
safeMessages,
{
model,
maxTokens: maxTokens as number,
openai: xAiClient,
},
dataCallback,
);
} }
private static async streamOpenAiResponse( getStreamParams(param: CommonProviderParams, safeMessages: any[]): any {
messages: any[],
opts: {
model: string;
maxTokens: number | undefined;
openai: OpenAI;
},
dataCallback: (data: any) => any,
) {
const isO1 = () => { const isO1 = () => {
if (opts.model === "o1-preview" || opts.model === "o1-mini") { if (param.model === "o1-preview" || param.model === "o1-mini") {
return true; return true;
} }
}; };
@@ -94,21 +26,63 @@ export class XaiChatSdk {
const getTuningParams = () => { const getTuningParams = () => {
if (isO1()) { if (isO1()) {
tuningParams["temperature"] = 1; tuningParams["temperature"] = 1;
tuningParams["max_completion_tokens"] = opts.maxTokens + 10000; tuningParams["max_completion_tokens"] = (param.maxTokens as number) + 10000;
return tuningParams; return tuningParams;
} }
return gpt4oTuningParams; return gpt4oTuningParams;
}; };
const xAIStream = await opts.openai.chat.completions.create({ return {
model: opts.model, model: param.model,
messages: messages, messages: safeMessages,
stream: true, stream: true,
...getTuningParams(), ...getTuningParams(),
}); };
}
for await (const chunk of xAIStream) { async processChunk(chunk: any, dataCallback: (data: any) => void): Promise<boolean> {
// Check if this is the final chunk
if (chunk.choices && chunk.choices[0]?.finish_reason === "stop") {
dataCallback({ type: "chat", data: chunk }); dataCallback({ type: "chat", data: chunk });
return true; // Break the stream
} }
dataCallback({ type: "chat", data: chunk });
return false; // Continue the stream
}
}
export class XaiChatSdk {
private static provider = new XaiChatProvider();
static async handleXaiStream(
ctx: {
openai: OpenAI;
systemPrompt: any;
preprocessedContext: any;
maxTokens: unknown | number | undefined;
messages: any;
disableWebhookGeneration: boolean;
model: any;
env: Env;
},
dataCallback: (data: any) => any,
) {
if (!ctx.messages?.length) {
return new Response("No messages provided", { status: 400 });
}
return this.provider.handleStream(
{
systemPrompt: ctx.systemPrompt,
preprocessedContext: ctx.preprocessedContext,
maxTokens: ctx.maxTokens,
messages: ctx.messages,
model: ctx.model,
env: ctx.env,
disableWebhookGeneration: ctx.disableWebhookGeneration,
},
dataCallback,
);
} }
} }