mirror of
https://github.com/geoffsee/open-gsio.git
synced 2025-09-08 22:56:46 +00:00
init
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
import {
|
||||
ManifoldRegion,
|
||||
WorkflowFunctionManifold,
|
||||
} from "manifold-workflow-engine";
|
||||
import { createIntentService } from "../IntentService";
|
||||
import { createSearchWebhookOperator } from "./webOperator";
|
||||
import { createNewsWebhookOperator } from "./newsOperator";
|
||||
import { createScrapeWebhookOperator } from "./scrapeOperator";
|
||||
|
||||
export const createPreprocessingWorkflow = ({
|
||||
eventHost,
|
||||
initialState,
|
||||
streamId,
|
||||
chat: { messages, openai },
|
||||
}) => {
|
||||
const preprocessingManifold = new WorkflowFunctionManifold(
|
||||
createIntentService({ messages, openai }),
|
||||
);
|
||||
preprocessingManifold.state = { ...initialState };
|
||||
|
||||
const searchWebhookOperator = createSearchWebhookOperator({
|
||||
eventHost,
|
||||
streamId,
|
||||
openai,
|
||||
messages,
|
||||
});
|
||||
const newsWebhookOperator = createNewsWebhookOperator({
|
||||
eventHost,
|
||||
streamId,
|
||||
openai,
|
||||
messages,
|
||||
});
|
||||
const scrapeWebhookOperator = createScrapeWebhookOperator({
|
||||
eventHost,
|
||||
streamId,
|
||||
openai,
|
||||
messages,
|
||||
});
|
||||
|
||||
const preprocessingRegion = new ManifoldRegion("preprocessingRegion", [
|
||||
searchWebhookOperator,
|
||||
newsWebhookOperator,
|
||||
scrapeWebhookOperator,
|
||||
]);
|
||||
|
||||
preprocessingManifold.addRegion(preprocessingRegion);
|
||||
|
||||
return preprocessingManifold;
|
||||
};
|
@@ -0,0 +1,54 @@
|
||||
import { createPreprocessingWorkflow } from "./createPreprocessingWorkflow";
|
||||
|
||||
export async function executePreprocessingWorkflow({
|
||||
latestUserMessage,
|
||||
latestAiMessage,
|
||||
eventHost,
|
||||
streamId,
|
||||
chat: { messages, openai },
|
||||
}) {
|
||||
console.log(`Executing executePreprocessingWorkflow`);
|
||||
const initialState = { latestUserMessage, latestAiMessage };
|
||||
|
||||
// Add execution tracking flag to prevent duplicate runs
|
||||
const executionKey = `preprocessing-${crypto.randomUUID()}`;
|
||||
if (globalThis[executionKey]) {
|
||||
console.log("Preventing duplicate preprocessing workflow execution");
|
||||
return globalThis[executionKey];
|
||||
}
|
||||
|
||||
const workflows = {
|
||||
preprocessing: createPreprocessingWorkflow({
|
||||
eventHost,
|
||||
initialState,
|
||||
streamId,
|
||||
chat: { messages, openai },
|
||||
}),
|
||||
results: new Map(),
|
||||
};
|
||||
|
||||
try {
|
||||
// Store the promise to prevent parallel executions
|
||||
globalThis[executionKey] = (async () => {
|
||||
await workflows.preprocessing.navigate(latestUserMessage);
|
||||
await workflows.preprocessing.executeWorkflow(latestUserMessage);
|
||||
console.log(
|
||||
`executePreprocessingWorkflow::workflow::preprocessing::results`,
|
||||
{ state: JSON.stringify(workflows.preprocessing.state, null, 2) },
|
||||
);
|
||||
workflows.results.set("preprocessed", workflows.preprocessing.state);
|
||||
|
||||
// Cleanup after execution
|
||||
setTimeout(() => {
|
||||
delete globalThis[executionKey];
|
||||
}, 1000);
|
||||
|
||||
return workflows;
|
||||
})();
|
||||
|
||||
return await globalThis[executionKey];
|
||||
} catch (error) {
|
||||
delete globalThis[executionKey];
|
||||
throw new Error("Workflow execution failed");
|
||||
}
|
||||
}
|
101
workers/site/workflows/preprocessing/newsOperator.ts
Normal file
101
workers/site/workflows/preprocessing/newsOperator.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
import { WorkflowOperator } from "manifold-workflow-engine";
|
||||
import { zodResponseFormat } from "openai/helpers/zod";
|
||||
import { z } from "zod";
|
||||
|
||||
const QuerySchema = z.object({
|
||||
query: z.string(),
|
||||
});
|
||||
|
||||
export function createNewsWebhookOperator({
|
||||
eventHost,
|
||||
streamId,
|
||||
openai,
|
||||
messages,
|
||||
}) {
|
||||
return new WorkflowOperator("news-search", async (state: any) => {
|
||||
const { latestUserMessage } = state;
|
||||
console.log(`Processing user message: ${latestUserMessage}`);
|
||||
|
||||
const resource = "news-search";
|
||||
const input = await getQueryFromContext({
|
||||
openai,
|
||||
messages,
|
||||
latestUserMessage,
|
||||
});
|
||||
|
||||
const eventSource = new URL(eventHost);
|
||||
const url = `${eventSource}api/webhooks`;
|
||||
console.log({ url });
|
||||
|
||||
const stream = {
|
||||
id: crypto.randomUUID(),
|
||||
parent: streamId,
|
||||
resource,
|
||||
payload: input,
|
||||
};
|
||||
const createStreamResponse = await fetch(`${eventSource}api/webhooks`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
id: stream.id,
|
||||
parent: streamId,
|
||||
resource: "news-search",
|
||||
payload: {
|
||||
input,
|
||||
},
|
||||
}),
|
||||
});
|
||||
const raw = await createStreamResponse.text();
|
||||
const { stream_url } = JSON.parse(raw);
|
||||
const surl = eventHost + stream_url;
|
||||
const webhook = { url: surl, id: stream.id, resource };
|
||||
|
||||
return {
|
||||
...state,
|
||||
webhook,
|
||||
latestUserMessage: "",
|
||||
latestAiMessage: "",
|
||||
};
|
||||
});
|
||||
|
||||
async function getQueryFromContext({ messages, openai, latestUserMessage }) {
|
||||
const systemMessage = {
|
||||
role: "system",
|
||||
content: `Analyze the latest message in a conversation and generate a JSON object with a single implied question for a news search. The JSON should be structured as follows:
|
||||
|
||||
{
|
||||
"query": "<question to be answered by a news search>"
|
||||
}
|
||||
|
||||
## Example
|
||||
{
|
||||
"query": "When was the last Buffalo Sabres hockey game?"
|
||||
}
|
||||
|
||||
Focus on the most recent message to determine the query. Output only the JSON object without any additional text.`,
|
||||
};
|
||||
|
||||
const conversation = messages.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
}));
|
||||
conversation.push({ role: "user", content: `${latestUserMessage}` });
|
||||
|
||||
const m = [systemMessage, ...conversation];
|
||||
|
||||
const completion = await openai.beta.chat.completions.parse({
|
||||
model: "gpt-4o-mini",
|
||||
messages: m,
|
||||
temperature: 0,
|
||||
response_format: zodResponseFormat(QuerySchema, "query"),
|
||||
});
|
||||
|
||||
const { query } = completion.choices[0].message.parsed;
|
||||
|
||||
console.log({ newsWebhookQuery: query });
|
||||
|
||||
return query;
|
||||
}
|
||||
}
|
112
workers/site/workflows/preprocessing/scrapeOperator.ts
Normal file
112
workers/site/workflows/preprocessing/scrapeOperator.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
import { WorkflowOperator } from "manifold-workflow-engine";
|
||||
import { zodResponseFormat } from "openai/helpers/zod";
|
||||
import { z } from "zod";
|
||||
|
||||
const UrlActionSchema = z.object({
|
||||
url: z.string(),
|
||||
query: z.string(),
|
||||
action: z.enum(["read", "scrape", "crawl", ""]),
|
||||
});
|
||||
|
||||
export function createScrapeWebhookOperator({
|
||||
eventHost,
|
||||
streamId,
|
||||
openai,
|
||||
messages,
|
||||
}) {
|
||||
return new WorkflowOperator("web-scrape", async (state: any) => {
|
||||
const { latestUserMessage } = state;
|
||||
|
||||
const webscrapeWebhookEndpoint = "/api/webhooks";
|
||||
|
||||
const resource = "web-scrape";
|
||||
const context = await getQueryFromContext({
|
||||
openai,
|
||||
messages,
|
||||
latestUserMessage,
|
||||
});
|
||||
|
||||
const input = {
|
||||
url: context?.url,
|
||||
action: context?.action,
|
||||
query: context.query,
|
||||
};
|
||||
|
||||
const eventSource = new URL(eventHost);
|
||||
const url = `${eventSource}api/webhooks`;
|
||||
|
||||
const stream = {
|
||||
id: crypto.randomUUID(),
|
||||
parent: streamId,
|
||||
resource,
|
||||
payload: input,
|
||||
};
|
||||
const createStreamResponse = await fetch(`${eventSource}api/webhooks`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
id: stream.id,
|
||||
parent: streamId,
|
||||
resource: "web-scrape",
|
||||
payload: {
|
||||
input,
|
||||
},
|
||||
}),
|
||||
});
|
||||
const raw = await createStreamResponse.text();
|
||||
const { stream_url } = JSON.parse(raw);
|
||||
const surl = eventHost + stream_url;
|
||||
const webhook = { url: surl, id: stream.id, resource };
|
||||
|
||||
return {
|
||||
...state,
|
||||
webhook,
|
||||
latestUserMessage: "",
|
||||
latestAiMessage: "",
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function getQueryFromContext({ messages, openai, latestUserMessage }) {
|
||||
const systemMessage = {
|
||||
role: "system" as const,
|
||||
content:
|
||||
`You are modeling a structured output containing a single question, a URL, and an action, all relative to a single input.
|
||||
|
||||
Return the result as a JSON object in the following structure:
|
||||
{
|
||||
"url": "Full URL in the conversation that references the URL being interacted with. No trailing slash!",
|
||||
"query": "Implied question about the resources at the URL.",
|
||||
"action": "read | scrape | crawl"
|
||||
}
|
||||
|
||||
- The input being modeled is conversational data from a different conversation than this one.
|
||||
- Intent should represent a next likely action the system might take to satisfy or enhance the user's request.
|
||||
|
||||
Instructions:
|
||||
1. Analyze the provided context and declare the url, action, and question implied by the latest message.
|
||||
|
||||
Output the JSON object. Do not include any additional explanations or text.`.trim(),
|
||||
};
|
||||
|
||||
const conversation = messages.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
}));
|
||||
conversation.push({ role: "user", content: `${latestUserMessage}` });
|
||||
|
||||
const m = [systemMessage, ...conversation];
|
||||
|
||||
const completion = await openai.beta.chat.completions.parse({
|
||||
model: "gpt-4o-mini",
|
||||
messages: m,
|
||||
temperature: 0,
|
||||
response_format: zodResponseFormat(UrlActionSchema, "UrlActionSchema"),
|
||||
});
|
||||
|
||||
const { query, action, url } = completion.choices[0].message.parsed;
|
||||
|
||||
return { query, action, url };
|
||||
}
|
100
workers/site/workflows/preprocessing/webOperator.ts
Normal file
100
workers/site/workflows/preprocessing/webOperator.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import { WorkflowOperator } from "manifold-workflow-engine";
|
||||
import { zodResponseFormat } from "openai/helpers/zod";
|
||||
import { z } from "zod";
|
||||
|
||||
const QuerySchema = z.object({
|
||||
query: z.string(), // No min/max constraints in the schema
|
||||
});
|
||||
|
||||
export function createSearchWebhookOperator({
|
||||
eventHost,
|
||||
streamId,
|
||||
openai,
|
||||
messages,
|
||||
}) {
|
||||
return new WorkflowOperator("web-search", async (state: any) => {
|
||||
const { latestUserMessage } = state;
|
||||
|
||||
const websearchWebhookEndpoint = "/api/webhooks";
|
||||
|
||||
const resource = "web-search";
|
||||
const input = await getQueryFromContext({
|
||||
openai,
|
||||
messages,
|
||||
latestUserMessage,
|
||||
});
|
||||
|
||||
// process webhooks
|
||||
const eventSource = new URL(eventHost);
|
||||
const url = `${eventSource}api/webhooks`;
|
||||
|
||||
const stream = {
|
||||
id: crypto.randomUUID(),
|
||||
parent: streamId,
|
||||
resource,
|
||||
payload: input,
|
||||
};
|
||||
const createStreamResponse = await fetch(`${eventSource}api/webhooks`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
id: stream.id,
|
||||
parent: streamId,
|
||||
resource: "web-search",
|
||||
payload: {
|
||||
input,
|
||||
},
|
||||
}),
|
||||
});
|
||||
const raw = await createStreamResponse.text();
|
||||
const { stream_url } = JSON.parse(raw);
|
||||
const surl = eventHost + stream_url;
|
||||
const webhook = { url: surl, id: stream.id, resource };
|
||||
|
||||
return {
|
||||
...state,
|
||||
webhook,
|
||||
latestUserMessage: "", // unset to break out of loop
|
||||
latestAiMessage: "", // unset to break out of loop
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function getQueryFromContext({ messages, openai, latestUserMessage }) {
|
||||
const systemMessage = {
|
||||
role: "system",
|
||||
content: `Analyze the latest message in the conversation and generate a JSON object with a single implied question for a web search. The JSON should be structured as follows:
|
||||
|
||||
{
|
||||
"query": "the question that needs a web search"
|
||||
}
|
||||
|
||||
## Example
|
||||
{
|
||||
"query": "What was the score of the last Buffalo Sabres hockey game?"
|
||||
}
|
||||
|
||||
Focus on the most recent message to determine the query. Output only the JSON object without any additional text.`,
|
||||
};
|
||||
|
||||
const conversation = messages.map((m) => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
}));
|
||||
conversation.push({ role: "user", content: `${latestUserMessage}` });
|
||||
|
||||
const m = [systemMessage, ...conversation];
|
||||
|
||||
const completion = await openai.beta.chat.completions.parse({
|
||||
model: "gpt-4o-mini",
|
||||
messages: m,
|
||||
temperature: 0,
|
||||
response_format: zodResponseFormat(QuerySchema, "query"),
|
||||
});
|
||||
|
||||
const { query } = completion.choices[0].message.parsed;
|
||||
|
||||
return query;
|
||||
}
|
Reference in New Issue
Block a user