enables deep research

This commit is contained in:
geoffsee
2025-05-28 19:43:36 -04:00
committed by Geoff Seemueller
parent 805b524471
commit 2e45c6b67b
14 changed files with 245 additions and 59 deletions

View File

@@ -1,7 +1,7 @@
import {task, entrypoint, interrupt, MemorySaver} from "@langchain/langgraph"
import {entrypoint, InMemoryStore, MemorySaver, task} from "@langchain/langgraph"
import "./tools/searxng.genai.mjs"
import {SearxngClient} from "@agentic/searxng";
import ky from "ky";
script({
title: "Deep Research Program",
@@ -15,6 +15,7 @@ const {output, vars} = env
const breakdownResearch = task(
"breakdown_research",
async (question: string) => {
output.log("Breaking down question:", question);
const result = await runPrompt(
async (ctx) => {
ctx.$`You are an expert research strategist.
@@ -51,39 +52,44 @@ Output the breakdown as a JSON object.`
},
}
)
output.fence(result.json, "json");
return result.json
}
)
const globalCtx = this;
const researchSubQuestion = task(
"research_subquestion",
async (subQuestion: { id: string; question: string }) => {
output.log(`Researching sub-question: ${subQuestion.question}`);
const kyWithHeaders = ky.create({
referrerPolicy: "unsafe-url",
const searxng = new SearxngClient({apiBaseUrl: "https://search-engine-gsio.fly.dev"});
headers: {
'Authorization': 'Basic ' + btoa(`admin:${process.env.SEARXNG_PASSWORD}`),
}
});
const {text} = await runPrompt(
const searxng = new SearxngClient({ky: kyWithHeaders});
const {json} = await runPrompt(
(_) => {
_.defTool(searxng)
_.$`You are an expert researcher with access to comprehensive information.
Task: Thoroughly research the following question and provide a detailed answer.
Task: Thoroughly research the following question and create a JSON formatted response.
Question ID: ${subQuestion.id}
Question: ${subQuestion.question}
Provide your findings in a structured format that includes:
- Your answer to the sub-question
- Relevant sources that support your answer
- Your confidence level in the answer (0-1)`
Respond with the specified JSON format.
`
},
{
model: "small",
label: `research subquestion ${subQuestion.id}`,
maxDataRepairs: 2,
responseType: "json_object",
responseSchema: {
type: "object",
properties: {
@@ -105,19 +111,20 @@ Provide your findings in a structured format that includes:
},
}
)
return text
output.fence(json, "json");
return json
}
)
const synthesizeFindings = task(
"synthesize_findings",
async (mainQuestion: string, findings: any[]) => {
output.log(`Synthesizing Findings: ${JSON.stringify(findings, null, 2)}`);
const result = await runPrompt(
async (ctx) => {
ctx.$`You are an expert research synthesizer.
Task: Synthesize the following research findings into a coherent response to the main research question.
Task: Synthesize the following research findings into a JSON object to answer the main research question.
Main Research Question: ${mainQuestion}
@@ -128,11 +135,14 @@ Provide a synthesis that:
1. Directly answers the main research question
2. Integrates the findings from all sub-questions
3. Identifies limitations in the current research
4. Suggests next steps for further investigation`
4. Suggests next steps for further investigation
Respond in the specified JSON format.`
},
{
label: "synthesize findings",
responseType: "markdown",
responseType: "json_object",
maxDataRepairs: 2,
responseSchema: {
type: "object",
properties: {
@@ -147,15 +157,15 @@ Provide a synthesis that:
},
}
)
output.fence(result.json, "json");
return result.json
}
)
const summarizeAndIdentifyGaps = task(
"summarize_and_identify_gaps",
{name: "summarize_and_identify_gaps"},
async (synthesis: any, findings: any[]) => {
output.log(`Summarizing and identifying gaps: ${JSON.stringify(findings, null, 2)}`);
const result = await runPrompt(
async (ctx) => {
ctx.$`You are an expert research evaluator.
@@ -171,10 +181,13 @@ ${JSON.stringify(findings, null, 2)}
Please provide:
1. A concise summary of current findings
2. Identify 2-3 specific knowledge gaps
3. Formulate follow-up questions to address these gaps`
3. Formulate follow-up questions to address these gaps
Respond using the specified JSON schema.`
},
{
label: "identify research gaps",
maxDataRepairs: 2,
responseSchema: {
type: "object",
properties: {
@@ -197,27 +210,28 @@ Please provide:
},
}
)
output.fence(result.json, "json");
return result.json
}
)
// Research Workflow
const researchWorkflow = entrypoint(
{checkpointer: new MemorySaver(), name: "research_workflow"},
{checkpointer: new MemorySaver(), name: "research_workflow", store: new InMemoryStore() },
async (input: { question: string; context?: string }) => {
output.log(`Deep research initiated`);
// Step 1: Break down the research question
const breakdown = await breakdownResearch(input.question)
// Step 2: Research each sub-question in parallel
const subQuestionFindings = []
// handle both subQuestions and sub_questions, since the API returns one or the other
const subquestions = breakdown?.sub_questions ? breakdown.sub_questions : breakdown.subQuestions;
const forSq = await Promise.all(subquestions.map(async (q) => await researchSubQuestion(q)));
forSq.map(subQuestionFindings.push)
for (const sq of breakdown.subQuestions) {
const analysis = await researchSubQuestion(sq);
console.log(analysis);
subQuestionFindings.push(analysis);
}
// Step 3: Synthesize the findings
let synthesis = await synthesizeFindings(
input.question,
subQuestionFindings
@@ -228,22 +242,21 @@ const researchWorkflow = entrypoint(
subQuestionFindings
)
// Step 5: Conduct follow-up research on identified gaps
const followUpFindings = [];
for (const fq of gapAnalysis.followUpQuestions) {
const anwser = await researchSubQuestion(fq);
console.log(anwser);
followUpFindings.push(anwser);
}
// Step 6: Final synthesis with deep research
const allFindings = [...subQuestionFindings, ...followUpFindings]
const finalSynthesis = await synthesizeFindings(
input.question,
allFindings
)
return {
question: input.question,
breakdown: breakdown,
@@ -255,27 +268,26 @@ const researchWorkflow = entrypoint(
}
)
const researchQuestion =
env.vars.question ||
"What are the most promising approaches to climate change mitigation?"
// An arbitrary ID locked to this workflow run
const threadId = `research-${Date.now()}`
const options = {
configurable: {thread_id: threadId},
};
const config = {
configurable: {
thread_id: threadId,
},
}
const researchQuestion = env.vars.user_input;
const inputs = {
question: researchQuestion,
context: vars.context || "",
};
// Execute workflow. Checkpoints are manually sent across the wire in the tasks.
const results = await researchWorkflow.invoke(
inputs,
{
question: researchQuestion,
context: vars.context || "",
},
config
...options,
}
)
output.fence(results, "json")
env.output.fence(results)

View File

@@ -9,13 +9,16 @@
"shim:ai:search": "pnpm build && ./dist/shim.js --file=genaisrc/search.genai.mts USER_INPUT=\"Who won the 2024 presidential election?\"\n",
"ai:news": "genaiscript run genaisrc/news-search.genai.mts --vars USER_INPUT='What are the latest updates and developments in the Ukraine war?'",
"ai:url:read": "genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://geoff.seemueller.io/about\",\"query\":\"Describe the details of the page.\", \"action\": \"read\"}'",
"ai:url:scrape": "npx genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'"
"ai:url:scrape": "npx genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'",
"typegen": " genaiscript scripts compile"
},
"dependencies": {
"@agentic/searxng": "7.5.3",
"@types/node": "^22.10.2",
"genaiscript": "^1.95.1",
"happy-dom": "^16.0.1",
"ky": "^1.8.0"
"ky": "^1.8.0",
"md-front-matter": "^1.0.4",
"@langchain/langgraph":"0.2.57"
}
}