This commit is contained in:
geoffsee
2025-05-22 23:14:01 -04:00
commit 33679583af
242 changed files with 15090 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
import {
ManifoldRegion,
WorkflowFunctionManifold,
} from "manifold-workflow-engine";
import { createIntentService } from "../IntentService";
import { createSearchWebhookOperator } from "./webOperator";
import { createNewsWebhookOperator } from "./newsOperator";
import { createScrapeWebhookOperator } from "./scrapeOperator";
export const createPreprocessingWorkflow = ({
eventHost,
initialState,
streamId,
chat: { messages, openai },
}) => {
const preprocessingManifold = new WorkflowFunctionManifold(
createIntentService({ messages, openai }),
);
preprocessingManifold.state = { ...initialState };
const searchWebhookOperator = createSearchWebhookOperator({
eventHost,
streamId,
openai,
messages,
});
const newsWebhookOperator = createNewsWebhookOperator({
eventHost,
streamId,
openai,
messages,
});
const scrapeWebhookOperator = createScrapeWebhookOperator({
eventHost,
streamId,
openai,
messages,
});
const preprocessingRegion = new ManifoldRegion("preprocessingRegion", [
searchWebhookOperator,
newsWebhookOperator,
scrapeWebhookOperator,
]);
preprocessingManifold.addRegion(preprocessingRegion);
return preprocessingManifold;
};

View File

@@ -0,0 +1,54 @@
import { createPreprocessingWorkflow } from "./createPreprocessingWorkflow";
export async function executePreprocessingWorkflow({
latestUserMessage,
latestAiMessage,
eventHost,
streamId,
chat: { messages, openai },
}) {
console.log(`Executing executePreprocessingWorkflow`);
const initialState = { latestUserMessage, latestAiMessage };
// Add execution tracking flag to prevent duplicate runs
const executionKey = `preprocessing-${crypto.randomUUID()}`;
if (globalThis[executionKey]) {
console.log("Preventing duplicate preprocessing workflow execution");
return globalThis[executionKey];
}
const workflows = {
preprocessing: createPreprocessingWorkflow({
eventHost,
initialState,
streamId,
chat: { messages, openai },
}),
results: new Map(),
};
try {
// Store the promise to prevent parallel executions
globalThis[executionKey] = (async () => {
await workflows.preprocessing.navigate(latestUserMessage);
await workflows.preprocessing.executeWorkflow(latestUserMessage);
console.log(
`executePreprocessingWorkflow::workflow::preprocessing::results`,
{ state: JSON.stringify(workflows.preprocessing.state, null, 2) },
);
workflows.results.set("preprocessed", workflows.preprocessing.state);
// Cleanup after execution
setTimeout(() => {
delete globalThis[executionKey];
}, 1000);
return workflows;
})();
return await globalThis[executionKey];
} catch (error) {
delete globalThis[executionKey];
throw new Error("Workflow execution failed");
}
}

View File

@@ -0,0 +1,101 @@
import { WorkflowOperator } from "manifold-workflow-engine";
import { zodResponseFormat } from "openai/helpers/zod";
import { z } from "zod";
const QuerySchema = z.object({
query: z.string(),
});
export function createNewsWebhookOperator({
eventHost,
streamId,
openai,
messages,
}) {
return new WorkflowOperator("news-search", async (state: any) => {
const { latestUserMessage } = state;
console.log(`Processing user message: ${latestUserMessage}`);
const resource = "news-search";
const input = await getQueryFromContext({
openai,
messages,
latestUserMessage,
});
const eventSource = new URL(eventHost);
const url = `${eventSource}api/webhooks`;
console.log({ url });
const stream = {
id: crypto.randomUUID(),
parent: streamId,
resource,
payload: input,
};
const createStreamResponse = await fetch(`${eventSource}api/webhooks`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
id: stream.id,
parent: streamId,
resource: "news-search",
payload: {
input,
},
}),
});
const raw = await createStreamResponse.text();
const { stream_url } = JSON.parse(raw);
const surl = eventHost + stream_url;
const webhook = { url: surl, id: stream.id, resource };
return {
...state,
webhook,
latestUserMessage: "",
latestAiMessage: "",
};
});
async function getQueryFromContext({ messages, openai, latestUserMessage }) {
const systemMessage = {
role: "system",
content: `Analyze the latest message in a conversation and generate a JSON object with a single implied question for a news search. The JSON should be structured as follows:
{
"query": "<question to be answered by a news search>"
}
## Example
{
"query": "When was the last Buffalo Sabres hockey game?"
}
Focus on the most recent message to determine the query. Output only the JSON object without any additional text.`,
};
const conversation = messages.map((m) => ({
role: m.role,
content: m.content,
}));
conversation.push({ role: "user", content: `${latestUserMessage}` });
const m = [systemMessage, ...conversation];
const completion = await openai.beta.chat.completions.parse({
model: "gpt-4o-mini",
messages: m,
temperature: 0,
response_format: zodResponseFormat(QuerySchema, "query"),
});
const { query } = completion.choices[0].message.parsed;
console.log({ newsWebhookQuery: query });
return query;
}
}

View File

@@ -0,0 +1,112 @@
import { WorkflowOperator } from "manifold-workflow-engine";
import { zodResponseFormat } from "openai/helpers/zod";
import { z } from "zod";
const UrlActionSchema = z.object({
url: z.string(),
query: z.string(),
action: z.enum(["read", "scrape", "crawl", ""]),
});
export function createScrapeWebhookOperator({
eventHost,
streamId,
openai,
messages,
}) {
return new WorkflowOperator("web-scrape", async (state: any) => {
const { latestUserMessage } = state;
const webscrapeWebhookEndpoint = "/api/webhooks";
const resource = "web-scrape";
const context = await getQueryFromContext({
openai,
messages,
latestUserMessage,
});
const input = {
url: context?.url,
action: context?.action,
query: context.query,
};
const eventSource = new URL(eventHost);
const url = `${eventSource}api/webhooks`;
const stream = {
id: crypto.randomUUID(),
parent: streamId,
resource,
payload: input,
};
const createStreamResponse = await fetch(`${eventSource}api/webhooks`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
id: stream.id,
parent: streamId,
resource: "web-scrape",
payload: {
input,
},
}),
});
const raw = await createStreamResponse.text();
const { stream_url } = JSON.parse(raw);
const surl = eventHost + stream_url;
const webhook = { url: surl, id: stream.id, resource };
return {
...state,
webhook,
latestUserMessage: "",
latestAiMessage: "",
};
});
}
async function getQueryFromContext({ messages, openai, latestUserMessage }) {
const systemMessage = {
role: "system" as const,
content:
`You are modeling a structured output containing a single question, a URL, and an action, all relative to a single input.
Return the result as a JSON object in the following structure:
{
"url": "Full URL in the conversation that references the URL being interacted with. No trailing slash!",
"query": "Implied question about the resources at the URL.",
"action": "read | scrape | crawl"
}
- The input being modeled is conversational data from a different conversation than this one.
- Intent should represent a next likely action the system might take to satisfy or enhance the user's request.
Instructions:
1. Analyze the provided context and declare the url, action, and question implied by the latest message.
Output the JSON object. Do not include any additional explanations or text.`.trim(),
};
const conversation = messages.map((m) => ({
role: m.role,
content: m.content,
}));
conversation.push({ role: "user", content: `${latestUserMessage}` });
const m = [systemMessage, ...conversation];
const completion = await openai.beta.chat.completions.parse({
model: "gpt-4o-mini",
messages: m,
temperature: 0,
response_format: zodResponseFormat(UrlActionSchema, "UrlActionSchema"),
});
const { query, action, url } = completion.choices[0].message.parsed;
return { query, action, url };
}

View File

@@ -0,0 +1,100 @@
import { WorkflowOperator } from "manifold-workflow-engine";
import { zodResponseFormat } from "openai/helpers/zod";
import { z } from "zod";
const QuerySchema = z.object({
query: z.string(), // No min/max constraints in the schema
});
export function createSearchWebhookOperator({
eventHost,
streamId,
openai,
messages,
}) {
return new WorkflowOperator("web-search", async (state: any) => {
const { latestUserMessage } = state;
const websearchWebhookEndpoint = "/api/webhooks";
const resource = "web-search";
const input = await getQueryFromContext({
openai,
messages,
latestUserMessage,
});
// process webhooks
const eventSource = new URL(eventHost);
const url = `${eventSource}api/webhooks`;
const stream = {
id: crypto.randomUUID(),
parent: streamId,
resource,
payload: input,
};
const createStreamResponse = await fetch(`${eventSource}api/webhooks`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
id: stream.id,
parent: streamId,
resource: "web-search",
payload: {
input,
},
}),
});
const raw = await createStreamResponse.text();
const { stream_url } = JSON.parse(raw);
const surl = eventHost + stream_url;
const webhook = { url: surl, id: stream.id, resource };
return {
...state,
webhook,
latestUserMessage: "", // unset to break out of loop
latestAiMessage: "", // unset to break out of loop
};
});
}
async function getQueryFromContext({ messages, openai, latestUserMessage }) {
const systemMessage = {
role: "system",
content: `Analyze the latest message in the conversation and generate a JSON object with a single implied question for a web search. The JSON should be structured as follows:
{
"query": "the question that needs a web search"
}
## Example
{
"query": "What was the score of the last Buffalo Sabres hockey game?"
}
Focus on the most recent message to determine the query. Output only the JSON object without any additional text.`,
};
const conversation = messages.map((m) => ({
role: m.role,
content: m.content,
}));
conversation.push({ role: "user", content: `${latestUserMessage}` });
const m = [systemMessage, ...conversation];
const completion = await openai.beta.chat.completions.parse({
model: "gpt-4o-mini",
messages: m,
temperature: 0,
response_format: zodResponseFormat(QuerySchema, "query"),
});
const { query } = completion.choices[0].message.parsed;
return query;
}