enables deep research
This commit is contained in:

committed by
Geoff Seemueller

parent
805b524471
commit
2e45c6b67b
@@ -27,7 +27,7 @@ rust-embed = "8.5.0"
|
||||
bytes = "1.8.0"
|
||||
lazy_static = "1.5.0"
|
||||
sled = "0.34.7"
|
||||
tower-http = { version = "0.6.2", features = ["trace"] }
|
||||
tower-http = { version = "0.6.2", features = ["trace", "cors"] }
|
||||
anyhow = "1.0.97"
|
||||
base64 = "0.22.1"
|
||||
fips204 = "0.4.6"
|
51
killport.js
Normal file
51
killport.js
Normal file
@@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import * as child_process from "node:child_process";
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
const port = args.length > 0 ? parseInt(args[0], 10) : null;
|
||||
|
||||
if (!port || isNaN(port)) {
|
||||
console.error('Please provide a valid port number');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
|
||||
export const killProcessOnPort = (port) => {
|
||||
return new Promise((resolve, reject) => {
|
||||
// Find the PID of the process using the specified port
|
||||
child_process.exec(`lsof -t -i :${port}`.trim(), (err, stdout) => {
|
||||
if (err) {
|
||||
// Handle command error (such as permission denied)
|
||||
if (err.code !== 1) {
|
||||
console.error(`Error finding process on port ${port}:`, err);
|
||||
return reject(err);
|
||||
} else {
|
||||
// If code is 1, it generally means no process is using the port
|
||||
console.log(`No process found on port ${port}`);
|
||||
return resolve();
|
||||
}
|
||||
}
|
||||
|
||||
// If stdout is empty, no process is using the port
|
||||
const pid = stdout.trim();
|
||||
if (!pid) {
|
||||
console.log(`No process is currently running on port ${port}`);
|
||||
return resolve();
|
||||
}
|
||||
|
||||
// Kill the process using the specified PID
|
||||
child_process.exec(`kill -9 ${pid}`.trim(), (killErr) => {
|
||||
if (killErr) {
|
||||
console.error(`Failed to kill process ${pid} on port ${port}`, killErr);
|
||||
return reject(killErr);
|
||||
}
|
||||
|
||||
console.log(`Successfully killed process ${pid} on port ${port}`);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
await killProcessOnPort(port);
|
@@ -6,7 +6,7 @@
|
||||
],
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "bunx kill-port 3006 && bun run build && cargo watch -x 'run src/main.rs'",
|
||||
"dev": "./killport.js 3006 && bun run build && cargo watch -x 'run src/main.rs'",
|
||||
"start": "docker compose up --build",
|
||||
"ai:search": "genaiscript run packages/genaiscript/genaisrc/web-search.genai.mts --vars USER_INPUT='who won the 2024 election?'",
|
||||
"shim:ai:search": "pnpm build && ./packages/genaiscript-rust-shim/dist/genaiscript-rust-shim.js --file=packages/genaiscript/genaisrc/web-search.genai.mts USER_INPUT=\"Who won the 2024 presidential election?\"\n",
|
||||
@@ -14,7 +14,7 @@
|
||||
"ai:url:read": "genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://geoff.seemueller.io/about\",\"query\":\"Describe the details of the page.\", \"action\": \"read\"}'",
|
||||
"ai:url:scrape": "npx genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'",
|
||||
"prod:logs": "fly logs",
|
||||
"test-http": "./test-search.ts",
|
||||
"test-http": "test/test-search.ts",
|
||||
"build": "(cd packages/genaiscript-rust-shim && bun run buildShim && bun run setupDev && cargo build)"
|
||||
}
|
||||
}
|
||||
|
@@ -1,3 +0,0 @@
|
||||
# genaiscript-rust-shim
|
||||
|
||||
simple package that wraps genaiscript for binding to `tokio::process::Command`
|
Binary file not shown.
@@ -1,7 +1,7 @@
|
||||
import {task, entrypoint, interrupt, MemorySaver} from "@langchain/langgraph"
|
||||
import {entrypoint, InMemoryStore, MemorySaver, task} from "@langchain/langgraph"
|
||||
import "./tools/searxng.genai.mjs"
|
||||
import {SearxngClient} from "@agentic/searxng";
|
||||
|
||||
import ky from "ky";
|
||||
|
||||
script({
|
||||
title: "Deep Research Program",
|
||||
@@ -15,6 +15,7 @@ const {output, vars} = env
|
||||
const breakdownResearch = task(
|
||||
"breakdown_research",
|
||||
async (question: string) => {
|
||||
output.log("Breaking down question:", question);
|
||||
const result = await runPrompt(
|
||||
async (ctx) => {
|
||||
ctx.$`You are an expert research strategist.
|
||||
@@ -51,39 +52,44 @@ Output the breakdown as a JSON object.`
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
output.fence(result.json, "json");
|
||||
return result.json
|
||||
}
|
||||
)
|
||||
|
||||
const globalCtx = this;
|
||||
|
||||
|
||||
const researchSubQuestion = task(
|
||||
"research_subquestion",
|
||||
async (subQuestion: { id: string; question: string }) => {
|
||||
output.log(`Researching sub-question: ${subQuestion.question}`);
|
||||
const kyWithHeaders = ky.create({
|
||||
referrerPolicy: "unsafe-url",
|
||||
|
||||
const searxng = new SearxngClient({apiBaseUrl: "https://search-engine-gsio.fly.dev"});
|
||||
headers: {
|
||||
'Authorization': 'Basic ' + btoa(`admin:${process.env.SEARXNG_PASSWORD}`),
|
||||
}
|
||||
});
|
||||
|
||||
const {text} = await runPrompt(
|
||||
const searxng = new SearxngClient({ky: kyWithHeaders});
|
||||
|
||||
const {json} = await runPrompt(
|
||||
(_) => {
|
||||
_.defTool(searxng)
|
||||
_.$`You are an expert researcher with access to comprehensive information.
|
||||
|
||||
Task: Thoroughly research the following question and provide a detailed answer.
|
||||
Task: Thoroughly research the following question and create a JSON formatted response.
|
||||
|
||||
Question ID: ${subQuestion.id}
|
||||
Question: ${subQuestion.question}
|
||||
|
||||
Provide your findings in a structured format that includes:
|
||||
- Your answer to the sub-question
|
||||
- Relevant sources that support your answer
|
||||
- Your confidence level in the answer (0-1)`
|
||||
Respond with the specified JSON format.
|
||||
`
|
||||
},
|
||||
{
|
||||
model: "small",
|
||||
label: `research subquestion ${subQuestion.id}`,
|
||||
maxDataRepairs: 2,
|
||||
responseType: "json_object",
|
||||
responseSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
@@ -105,19 +111,20 @@ Provide your findings in a structured format that includes:
|
||||
},
|
||||
}
|
||||
)
|
||||
return text
|
||||
output.fence(json, "json");
|
||||
return json
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
const synthesizeFindings = task(
|
||||
"synthesize_findings",
|
||||
async (mainQuestion: string, findings: any[]) => {
|
||||
output.log(`Synthesizing Findings: ${JSON.stringify(findings, null, 2)}`);
|
||||
const result = await runPrompt(
|
||||
async (ctx) => {
|
||||
ctx.$`You are an expert research synthesizer.
|
||||
|
||||
Task: Synthesize the following research findings into a coherent response to the main research question.
|
||||
Task: Synthesize the following research findings into a JSON object to answer the main research question.
|
||||
|
||||
Main Research Question: ${mainQuestion}
|
||||
|
||||
@@ -128,11 +135,14 @@ Provide a synthesis that:
|
||||
1. Directly answers the main research question
|
||||
2. Integrates the findings from all sub-questions
|
||||
3. Identifies limitations in the current research
|
||||
4. Suggests next steps for further investigation`
|
||||
4. Suggests next steps for further investigation
|
||||
|
||||
Respond in the specified JSON format.`
|
||||
},
|
||||
{
|
||||
label: "synthesize findings",
|
||||
responseType: "markdown",
|
||||
responseType: "json_object",
|
||||
maxDataRepairs: 2,
|
||||
responseSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
@@ -147,15 +157,15 @@ Provide a synthesis that:
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
output.fence(result.json, "json");
|
||||
return result.json
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
const summarizeAndIdentifyGaps = task(
|
||||
"summarize_and_identify_gaps",
|
||||
{name: "summarize_and_identify_gaps"},
|
||||
async (synthesis: any, findings: any[]) => {
|
||||
output.log(`Summarizing and identifying gaps: ${JSON.stringify(findings, null, 2)}`);
|
||||
const result = await runPrompt(
|
||||
async (ctx) => {
|
||||
ctx.$`You are an expert research evaluator.
|
||||
@@ -171,10 +181,13 @@ ${JSON.stringify(findings, null, 2)}
|
||||
Please provide:
|
||||
1. A concise summary of current findings
|
||||
2. Identify 2-3 specific knowledge gaps
|
||||
3. Formulate follow-up questions to address these gaps`
|
||||
3. Formulate follow-up questions to address these gaps
|
||||
|
||||
Respond using the specified JSON schema.`
|
||||
},
|
||||
{
|
||||
label: "identify research gaps",
|
||||
maxDataRepairs: 2,
|
||||
responseSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
@@ -197,27 +210,28 @@ Please provide:
|
||||
},
|
||||
}
|
||||
)
|
||||
output.fence(result.json, "json");
|
||||
return result.json
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
// Research Workflow
|
||||
const researchWorkflow = entrypoint(
|
||||
{checkpointer: new MemorySaver(), name: "research_workflow"},
|
||||
{checkpointer: new MemorySaver(), name: "research_workflow", store: new InMemoryStore() },
|
||||
async (input: { question: string; context?: string }) => {
|
||||
|
||||
output.log(`Deep research initiated`);
|
||||
// Step 1: Break down the research question
|
||||
const breakdown = await breakdownResearch(input.question)
|
||||
|
||||
|
||||
// Step 2: Research each sub-question in parallel
|
||||
const subQuestionFindings = []
|
||||
// handle both subQuestions and sub_questions, since the API returns one or the other
|
||||
const subquestions = breakdown?.sub_questions ? breakdown.sub_questions : breakdown.subQuestions;
|
||||
const forSq = await Promise.all(subquestions.map(async (q) => await researchSubQuestion(q)));
|
||||
forSq.map(subQuestionFindings.push)
|
||||
|
||||
for (const sq of breakdown.subQuestions) {
|
||||
const analysis = await researchSubQuestion(sq);
|
||||
console.log(analysis);
|
||||
subQuestionFindings.push(analysis);
|
||||
}
|
||||
|
||||
|
||||
// Step 3: Synthesize the findings
|
||||
let synthesis = await synthesizeFindings(
|
||||
input.question,
|
||||
subQuestionFindings
|
||||
@@ -228,22 +242,21 @@ const researchWorkflow = entrypoint(
|
||||
subQuestionFindings
|
||||
)
|
||||
|
||||
|
||||
// Step 5: Conduct follow-up research on identified gaps
|
||||
const followUpFindings = [];
|
||||
for (const fq of gapAnalysis.followUpQuestions) {
|
||||
const anwser = await researchSubQuestion(fq);
|
||||
console.log(anwser);
|
||||
followUpFindings.push(anwser);
|
||||
}
|
||||
|
||||
|
||||
// Step 6: Final synthesis with deep research
|
||||
const allFindings = [...subQuestionFindings, ...followUpFindings]
|
||||
const finalSynthesis = await synthesizeFindings(
|
||||
input.question,
|
||||
allFindings
|
||||
)
|
||||
|
||||
|
||||
return {
|
||||
question: input.question,
|
||||
breakdown: breakdown,
|
||||
@@ -255,27 +268,26 @@ const researchWorkflow = entrypoint(
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
const researchQuestion =
|
||||
env.vars.question ||
|
||||
"What are the most promising approaches to climate change mitigation?"
|
||||
|
||||
|
||||
// An arbitrary ID locked to this workflow run
|
||||
const threadId = `research-${Date.now()}`
|
||||
|
||||
const options = {
|
||||
configurable: {thread_id: threadId},
|
||||
};
|
||||
|
||||
const config = {
|
||||
configurable: {
|
||||
thread_id: threadId,
|
||||
},
|
||||
}
|
||||
const researchQuestion = env.vars.user_input;
|
||||
|
||||
const inputs = {
|
||||
question: researchQuestion,
|
||||
context: vars.context || "",
|
||||
};
|
||||
|
||||
// Execute workflow. Checkpoints are manually sent across the wire in the tasks.
|
||||
const results = await researchWorkflow.invoke(
|
||||
inputs,
|
||||
{
|
||||
question: researchQuestion,
|
||||
context: vars.context || "",
|
||||
},
|
||||
config
|
||||
...options,
|
||||
}
|
||||
)
|
||||
output.fence(results, "json")
|
||||
|
||||
env.output.fence(results)
|
@@ -9,13 +9,16 @@
|
||||
"shim:ai:search": "pnpm build && ./dist/shim.js --file=genaisrc/search.genai.mts USER_INPUT=\"Who won the 2024 presidential election?\"\n",
|
||||
"ai:news": "genaiscript run genaisrc/news-search.genai.mts --vars USER_INPUT='What are the latest updates and developments in the Ukraine war?'",
|
||||
"ai:url:read": "genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://geoff.seemueller.io/about\",\"query\":\"Describe the details of the page.\", \"action\": \"read\"}'",
|
||||
"ai:url:scrape": "npx genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'"
|
||||
"ai:url:scrape": "npx genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'",
|
||||
"typegen": " genaiscript scripts compile"
|
||||
},
|
||||
"dependencies": {
|
||||
"@agentic/searxng": "7.5.3",
|
||||
"@types/node": "^22.10.2",
|
||||
"genaiscript": "^1.95.1",
|
||||
"happy-dom": "^16.0.1",
|
||||
"ky": "^1.8.0"
|
||||
"ky": "^1.8.0",
|
||||
"md-front-matter": "^1.0.4",
|
||||
"@langchain/langgraph":"0.2.57"
|
||||
}
|
||||
}
|
||||
|
43
smoke-tests/test-deep-research.ts
Normal file
43
smoke-tests/test-deep-research.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env deno -A
|
||||
|
||||
const API_ROOT = "http://localhost:3006";
|
||||
|
||||
const sid = crypto.randomUUID();
|
||||
// -------------------- 1. Create the agent --------------------
|
||||
const createAgentBody = {
|
||||
id: sid,
|
||||
resource: "deep-research",
|
||||
parent: sid,
|
||||
payload: { input: "Research unexplored cures for cancer." },
|
||||
};
|
||||
|
||||
const createRes = await fetch(`${API_ROOT}/api/agents`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify(createAgentBody),
|
||||
});
|
||||
|
||||
|
||||
const raw = await createRes.text();
|
||||
console.log({raw});
|
||||
const {stream_url: streamId} = JSON.parse(raw);
|
||||
|
||||
console.log("Agent created with streamId:", streamId);
|
||||
|
||||
// -------------------- 2. Listen to the SSE stream --------------------
|
||||
const streamUrl = `${API_ROOT}${streamId}`;
|
||||
const es = new EventSource(streamUrl);
|
||||
|
||||
|
||||
es.onopen = (e) => {
|
||||
console.log("connected", e);
|
||||
};
|
||||
|
||||
es.onmessage = (e) => {
|
||||
console.log("⟶", e.data);
|
||||
};
|
||||
|
||||
es.onerror = (e) => {
|
||||
console.error("SSE error:", e);
|
||||
es.close();
|
||||
};
|
43
smoke-tests/test-search.ts
Normal file
43
smoke-tests/test-search.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env deno -A
|
||||
|
||||
const API_ROOT = "http://localhost:3006";
|
||||
|
||||
const sid = crypto.randomUUID();
|
||||
// -------------------- 1. Create the agent --------------------
|
||||
const createAgentBody = {
|
||||
id: sid,
|
||||
resource: "web-search",
|
||||
parent: sid,
|
||||
payload: { input: "What is the capital of France?" },
|
||||
};
|
||||
|
||||
const createRes = await fetch(`${API_ROOT}/api/agents`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify(createAgentBody),
|
||||
});
|
||||
|
||||
|
||||
const raw = await createRes.text();
|
||||
console.log({raw});
|
||||
const {stream_url: streamId} = JSON.parse(raw);
|
||||
|
||||
console.log("Agent created with streamId:", streamId);
|
||||
|
||||
// -------------------- 2. Listen to the SSE stream --------------------
|
||||
const streamUrl = `${API_ROOT}${streamId}`;
|
||||
const es = new EventSource(streamUrl);
|
||||
|
||||
|
||||
es.onopen = (e) => {
|
||||
console.log("connected", e);
|
||||
};
|
||||
|
||||
es.onmessage = (e) => {
|
||||
console.log("⟶", e.data);
|
||||
};
|
||||
|
||||
es.onerror = (e) => {
|
||||
console.error("SSE error:", e);
|
||||
es.close();
|
||||
};
|
32
src/agents/deep_research.rs
Normal file
32
src/agents/deep_research.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use crate::utils::utils::run_agent;
|
||||
use tokio::process::Child;
|
||||
use tracing;
|
||||
|
||||
pub async fn agent(stream_id: &str, input: &str) -> Result<Child, String> {
|
||||
run_agent(stream_id, input, "./packages/genaiscript/genaisrc/deep-research.genai.mts", 60).await
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::agents::deep_research::agent;
|
||||
use std::fmt::Debug;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_deepresearch() {
|
||||
// a really provocative question for research that generally yields infinite complexity with each run
|
||||
let input = "What is a life of meaning?";
|
||||
|
||||
let mut command = agent("test-deepresearch-agent", input).await.unwrap();
|
||||
|
||||
// let mut stdout = String::new();
|
||||
|
||||
// command.stdout.take().unwrap().read_to_string(&mut stdout).await.unwrap();
|
||||
|
||||
// println!("stdout: {}", stdout);
|
||||
// // Optionally, you can capture and inspect stdout if needed:
|
||||
let _output = command.wait_with_output().await.expect("Failed to wait for output");
|
||||
// println!("Stdout: {}", String::from_utf8_lossy(&output.stdout));
|
||||
// println!("Stderr: {}", String::from_utf8_lossy(&output.stderr));
|
||||
}
|
||||
}
|
@@ -1,4 +1,5 @@
|
||||
pub(crate) mod news;
|
||||
pub(crate) mod scrape;
|
||||
pub(crate) mod search;
|
||||
pub(crate) mod image_generator;
|
||||
pub(crate) mod image_generator;
|
||||
pub(crate) mod deep_research;
|
@@ -97,6 +97,9 @@ pub async fn use_agent(Path(agent_id): Path<String>) -> impl IntoResponse {
|
||||
"image-generator" => {
|
||||
crate::agents::image_generator::agent(agent_id.as_str(), &*input).await
|
||||
}
|
||||
"deep-research" => {
|
||||
crate::agents::deep_research::agent(agent_id.as_str(), &*input).await
|
||||
}
|
||||
"web-scrape" => crate::agents::scrape::agent(agent_id.as_str(), &*input).await,
|
||||
_ => {
|
||||
tracing::error!("Unsupported resource type: {}", resource);
|
||||
|
@@ -18,6 +18,7 @@ pub fn create_router() -> Router {
|
||||
.make_span_with(trace::DefaultMakeSpan::new().level(Level::INFO))
|
||||
.on_response(trace::DefaultOnResponse::new().level(Level::INFO)),
|
||||
)
|
||||
.layer(tower_http::cors::CorsLayer::very_permissive())
|
||||
.fallback(handle_not_found)
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user