enables deep research

This commit is contained in:
geoffsee
2025-05-28 19:43:36 -04:00
committed by Geoff Seemueller
parent 805b524471
commit 2e45c6b67b
14 changed files with 245 additions and 59 deletions

View File

@@ -27,7 +27,7 @@ rust-embed = "8.5.0"
bytes = "1.8.0"
lazy_static = "1.5.0"
sled = "0.34.7"
tower-http = { version = "0.6.2", features = ["trace"] }
tower-http = { version = "0.6.2", features = ["trace", "cors"] }
anyhow = "1.0.97"
base64 = "0.22.1"
fips204 = "0.4.6"

BIN
bun.lockb

Binary file not shown.

51
killport.js Normal file
View File

@@ -0,0 +1,51 @@
#!/usr/bin/env node
import * as child_process from "node:child_process";
const args = process.argv.slice(2);
const port = args.length > 0 ? parseInt(args[0], 10) : null;
if (!port || isNaN(port)) {
console.error('Please provide a valid port number');
process.exit(1);
}
export const killProcessOnPort = (port) => {
return new Promise((resolve, reject) => {
// Find the PID of the process using the specified port
child_process.exec(`lsof -t -i :${port}`.trim(), (err, stdout) => {
if (err) {
// Handle command error (such as permission denied)
if (err.code !== 1) {
console.error(`Error finding process on port ${port}:`, err);
return reject(err);
} else {
// If code is 1, it generally means no process is using the port
console.log(`No process found on port ${port}`);
return resolve();
}
}
// If stdout is empty, no process is using the port
const pid = stdout.trim();
if (!pid) {
console.log(`No process is currently running on port ${port}`);
return resolve();
}
// Kill the process using the specified PID
child_process.exec(`kill -9 ${pid}`.trim(), (killErr) => {
if (killErr) {
console.error(`Failed to kill process ${pid} on port ${port}`, killErr);
return reject(killErr);
}
console.log(`Successfully killed process ${pid} on port ${port}`);
resolve();
});
});
});
};
await killProcessOnPort(port);

View File

@@ -6,7 +6,7 @@
],
"private": true,
"scripts": {
"dev": "bunx kill-port 3006 && bun run build && cargo watch -x 'run src/main.rs'",
"dev": "./killport.js 3006 && bun run build && cargo watch -x 'run src/main.rs'",
"start": "docker compose up --build",
"ai:search": "genaiscript run packages/genaiscript/genaisrc/web-search.genai.mts --vars USER_INPUT='who won the 2024 election?'",
"shim:ai:search": "pnpm build && ./packages/genaiscript-rust-shim/dist/genaiscript-rust-shim.js --file=packages/genaiscript/genaisrc/web-search.genai.mts USER_INPUT=\"Who won the 2024 presidential election?\"\n",
@@ -14,7 +14,7 @@
"ai:url:read": "genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://geoff.seemueller.io/about\",\"query\":\"Describe the details of the page.\", \"action\": \"read\"}'",
"ai:url:scrape": "npx genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'",
"prod:logs": "fly logs",
"test-http": "./test-search.ts",
"test-http": "test/test-search.ts",
"build": "(cd packages/genaiscript-rust-shim && bun run buildShim && bun run setupDev && cargo build)"
}
}

View File

@@ -1,3 +0,0 @@
# genaiscript-rust-shim
simple package that wraps genaiscript for binding to `tokio::process::Command`

View File

@@ -1,7 +1,7 @@
import {task, entrypoint, interrupt, MemorySaver} from "@langchain/langgraph"
import {entrypoint, InMemoryStore, MemorySaver, task} from "@langchain/langgraph"
import "./tools/searxng.genai.mjs"
import {SearxngClient} from "@agentic/searxng";
import ky from "ky";
script({
title: "Deep Research Program",
@@ -15,6 +15,7 @@ const {output, vars} = env
const breakdownResearch = task(
"breakdown_research",
async (question: string) => {
output.log("Breaking down question:", question);
const result = await runPrompt(
async (ctx) => {
ctx.$`You are an expert research strategist.
@@ -51,39 +52,44 @@ Output the breakdown as a JSON object.`
},
}
)
output.fence(result.json, "json");
return result.json
}
)
const globalCtx = this;
const researchSubQuestion = task(
"research_subquestion",
async (subQuestion: { id: string; question: string }) => {
output.log(`Researching sub-question: ${subQuestion.question}`);
const kyWithHeaders = ky.create({
referrerPolicy: "unsafe-url",
const searxng = new SearxngClient({apiBaseUrl: "https://search-engine-gsio.fly.dev"});
headers: {
'Authorization': 'Basic ' + btoa(`admin:${process.env.SEARXNG_PASSWORD}`),
}
});
const {text} = await runPrompt(
const searxng = new SearxngClient({ky: kyWithHeaders});
const {json} = await runPrompt(
(_) => {
_.defTool(searxng)
_.$`You are an expert researcher with access to comprehensive information.
Task: Thoroughly research the following question and provide a detailed answer.
Task: Thoroughly research the following question and create a JSON formatted response.
Question ID: ${subQuestion.id}
Question: ${subQuestion.question}
Provide your findings in a structured format that includes:
- Your answer to the sub-question
- Relevant sources that support your answer
- Your confidence level in the answer (0-1)`
Respond with the specified JSON format.
`
},
{
model: "small",
label: `research subquestion ${subQuestion.id}`,
maxDataRepairs: 2,
responseType: "json_object",
responseSchema: {
type: "object",
properties: {
@@ -105,19 +111,20 @@ Provide your findings in a structured format that includes:
},
}
)
return text
output.fence(json, "json");
return json
}
)
const synthesizeFindings = task(
"synthesize_findings",
async (mainQuestion: string, findings: any[]) => {
output.log(`Synthesizing Findings: ${JSON.stringify(findings, null, 2)}`);
const result = await runPrompt(
async (ctx) => {
ctx.$`You are an expert research synthesizer.
Task: Synthesize the following research findings into a coherent response to the main research question.
Task: Synthesize the following research findings into a JSON object to answer the main research question.
Main Research Question: ${mainQuestion}
@@ -128,11 +135,14 @@ Provide a synthesis that:
1. Directly answers the main research question
2. Integrates the findings from all sub-questions
3. Identifies limitations in the current research
4. Suggests next steps for further investigation`
4. Suggests next steps for further investigation
Respond in the specified JSON format.`
},
{
label: "synthesize findings",
responseType: "markdown",
responseType: "json_object",
maxDataRepairs: 2,
responseSchema: {
type: "object",
properties: {
@@ -147,15 +157,15 @@ Provide a synthesis that:
},
}
)
output.fence(result.json, "json");
return result.json
}
)
const summarizeAndIdentifyGaps = task(
"summarize_and_identify_gaps",
{name: "summarize_and_identify_gaps"},
async (synthesis: any, findings: any[]) => {
output.log(`Summarizing and identifying gaps: ${JSON.stringify(findings, null, 2)}`);
const result = await runPrompt(
async (ctx) => {
ctx.$`You are an expert research evaluator.
@@ -171,10 +181,13 @@ ${JSON.stringify(findings, null, 2)}
Please provide:
1. A concise summary of current findings
2. Identify 2-3 specific knowledge gaps
3. Formulate follow-up questions to address these gaps`
3. Formulate follow-up questions to address these gaps
Respond using the specified JSON schema.`
},
{
label: "identify research gaps",
maxDataRepairs: 2,
responseSchema: {
type: "object",
properties: {
@@ -197,27 +210,28 @@ Please provide:
},
}
)
output.fence(result.json, "json");
return result.json
}
)
// Research Workflow
const researchWorkflow = entrypoint(
{checkpointer: new MemorySaver(), name: "research_workflow"},
{checkpointer: new MemorySaver(), name: "research_workflow", store: new InMemoryStore() },
async (input: { question: string; context?: string }) => {
output.log(`Deep research initiated`);
// Step 1: Break down the research question
const breakdown = await breakdownResearch(input.question)
// Step 2: Research each sub-question in parallel
const subQuestionFindings = []
// handle both subQuestions and sub_questions, since the API returns one or the other
const subquestions = breakdown?.sub_questions ? breakdown.sub_questions : breakdown.subQuestions;
const forSq = await Promise.all(subquestions.map(async (q) => await researchSubQuestion(q)));
forSq.map(subQuestionFindings.push)
for (const sq of breakdown.subQuestions) {
const analysis = await researchSubQuestion(sq);
console.log(analysis);
subQuestionFindings.push(analysis);
}
// Step 3: Synthesize the findings
let synthesis = await synthesizeFindings(
input.question,
subQuestionFindings
@@ -228,22 +242,21 @@ const researchWorkflow = entrypoint(
subQuestionFindings
)
// Step 5: Conduct follow-up research on identified gaps
const followUpFindings = [];
for (const fq of gapAnalysis.followUpQuestions) {
const anwser = await researchSubQuestion(fq);
console.log(anwser);
followUpFindings.push(anwser);
}
// Step 6: Final synthesis with deep research
const allFindings = [...subQuestionFindings, ...followUpFindings]
const finalSynthesis = await synthesizeFindings(
input.question,
allFindings
)
return {
question: input.question,
breakdown: breakdown,
@@ -255,27 +268,26 @@ const researchWorkflow = entrypoint(
}
)
const researchQuestion =
env.vars.question ||
"What are the most promising approaches to climate change mitigation?"
// An arbitrary ID locked to this workflow run
const threadId = `research-${Date.now()}`
const options = {
configurable: {thread_id: threadId},
};
const config = {
configurable: {
thread_id: threadId,
},
}
const researchQuestion = env.vars.user_input;
const inputs = {
question: researchQuestion,
context: vars.context || "",
};
// Execute workflow. Checkpoints are manually sent across the wire in the tasks.
const results = await researchWorkflow.invoke(
inputs,
{
question: researchQuestion,
context: vars.context || "",
},
config
...options,
}
)
output.fence(results, "json")
env.output.fence(results)

View File

@@ -9,13 +9,16 @@
"shim:ai:search": "pnpm build && ./dist/shim.js --file=genaisrc/search.genai.mts USER_INPUT=\"Who won the 2024 presidential election?\"\n",
"ai:news": "genaiscript run genaisrc/news-search.genai.mts --vars USER_INPUT='What are the latest updates and developments in the Ukraine war?'",
"ai:url:read": "genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://geoff.seemueller.io/about\",\"query\":\"Describe the details of the page.\", \"action\": \"read\"}'",
"ai:url:scrape": "npx genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'"
"ai:url:scrape": "npx genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'",
"typegen": " genaiscript scripts compile"
},
"dependencies": {
"@agentic/searxng": "7.5.3",
"@types/node": "^22.10.2",
"genaiscript": "^1.95.1",
"happy-dom": "^16.0.1",
"ky": "^1.8.0"
"ky": "^1.8.0",
"md-front-matter": "^1.0.4",
"@langchain/langgraph":"0.2.57"
}
}

View File

@@ -0,0 +1,43 @@
#!/usr/bin/env deno -A
const API_ROOT = "http://localhost:3006";
const sid = crypto.randomUUID();
// -------------------- 1. Create the agent --------------------
const createAgentBody = {
id: sid,
resource: "deep-research",
parent: sid,
payload: { input: "Research unexplored cures for cancer." },
};
const createRes = await fetch(`${API_ROOT}/api/agents`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify(createAgentBody),
});
const raw = await createRes.text();
console.log({raw});
const {stream_url: streamId} = JSON.parse(raw);
console.log("Agent created with streamId:", streamId);
// -------------------- 2. Listen to the SSE stream --------------------
const streamUrl = `${API_ROOT}${streamId}`;
const es = new EventSource(streamUrl);
es.onopen = (e) => {
console.log("connected", e);
};
es.onmessage = (e) => {
console.log("⟶", e.data);
};
es.onerror = (e) => {
console.error("SSE error:", e);
es.close();
};

View File

@@ -0,0 +1,43 @@
#!/usr/bin/env deno -A
const API_ROOT = "http://localhost:3006";
const sid = crypto.randomUUID();
// -------------------- 1. Create the agent --------------------
const createAgentBody = {
id: sid,
resource: "web-search",
parent: sid,
payload: { input: "What is the capital of France?" },
};
const createRes = await fetch(`${API_ROOT}/api/agents`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify(createAgentBody),
});
const raw = await createRes.text();
console.log({raw});
const {stream_url: streamId} = JSON.parse(raw);
console.log("Agent created with streamId:", streamId);
// -------------------- 2. Listen to the SSE stream --------------------
const streamUrl = `${API_ROOT}${streamId}`;
const es = new EventSource(streamUrl);
es.onopen = (e) => {
console.log("connected", e);
};
es.onmessage = (e) => {
console.log("⟶", e.data);
};
es.onerror = (e) => {
console.error("SSE error:", e);
es.close();
};

View File

@@ -0,0 +1,32 @@
use crate::utils::utils::run_agent;
use tokio::process::Child;
use tracing;
pub async fn agent(stream_id: &str, input: &str) -> Result<Child, String> {
run_agent(stream_id, input, "./packages/genaiscript/genaisrc/deep-research.genai.mts", 60).await
}
#[cfg(test)]
mod tests {
use crate::agents::deep_research::agent;
use std::fmt::Debug;
#[tokio::test]
async fn test_deepresearch() {
// a really provocative question for research that generally yields infinite complexity with each run
let input = "What is a life of meaning?";
let mut command = agent("test-deepresearch-agent", input).await.unwrap();
// let mut stdout = String::new();
// command.stdout.take().unwrap().read_to_string(&mut stdout).await.unwrap();
// println!("stdout: {}", stdout);
// // Optionally, you can capture and inspect stdout if needed:
let _output = command.wait_with_output().await.expect("Failed to wait for output");
// println!("Stdout: {}", String::from_utf8_lossy(&output.stdout));
// println!("Stderr: {}", String::from_utf8_lossy(&output.stderr));
}
}

View File

@@ -1,4 +1,5 @@
pub(crate) mod news;
pub(crate) mod scrape;
pub(crate) mod search;
pub(crate) mod image_generator;
pub(crate) mod image_generator;
pub(crate) mod deep_research;

View File

@@ -97,6 +97,9 @@ pub async fn use_agent(Path(agent_id): Path<String>) -> impl IntoResponse {
"image-generator" => {
crate::agents::image_generator::agent(agent_id.as_str(), &*input).await
}
"deep-research" => {
crate::agents::deep_research::agent(agent_id.as_str(), &*input).await
}
"web-scrape" => crate::agents::scrape::agent(agent_id.as_str(), &*input).await,
_ => {
tracing::error!("Unsupported resource type: {}", resource);

View File

@@ -18,6 +18,7 @@ pub fn create_router() -> Router {
.make_span_with(trace::DefaultMakeSpan::new().level(Level::INFO))
.on_response(trace::DefaultOnResponse::new().level(Level::INFO)),
)
.layer(tower_http::cors::CorsLayer::very_permissive())
.fallback(handle_not_found)
}