Files
open-web-agent-rs/packages/genaiscript/genaisrc/deep-research.genai.mts
2025-05-28 19:48:50 -04:00

293 lines
9.5 KiB
TypeScript

import {entrypoint, InMemoryStore, MemorySaver, task} from "@langchain/langgraph"
import "./tools/searxng.genai.mjs"
import {SearxngClient} from "@agentic/searxng";
import ky from "ky";
script({
title: "Deep Research Program",
description: "Researchers can use this program to conduct deep research on a topic",
model: "large",
cache: "ephemeral",
})
const {output, vars} = env
const breakdownResearch = task(
"breakdown_research",
async (question: string) => {
output.log("Breaking down question:", question);
const result = await runPrompt(
async (ctx) => {
ctx.$`You are an expert research strategist.
Task: Break down the following research question into 3-5 focused sub-questions that would help comprehensively answer the main question.
Research question: ${question}
For each sub-question:
1. Assign a unique ID (e.g., SQ1, SQ2)
2. Explain the rationale for why this sub-question is important
3. Ensure the sub-questions collectively cover the main research question
Output the breakdown as a JSON object.`
},
{
label: "breakdown research",
responseSchema: {
type: "object",
properties: {
mainQuestion: {type: "string"},
subQuestions: {
type: "array",
items: {
type: "object",
properties: {
id: {type: "string"},
question: {type: "string"},
rationale: {type: "string"},
},
},
},
},
},
}
)
output.fence(result.json, "json");
return result.json
}
)
const researchSubQuestion = task(
"research_subquestion",
async (subQuestion: { id: string; question: string }) => {
output.log(`Researching sub-question: ${subQuestion.question}`);
const kyWithHeaders = ky.create({
referrerPolicy: "unsafe-url",
headers: {
'Authorization': 'Basic ' + btoa(`admin:${process.env.SEARXNG_PASSWORD}`),
}
});
const searxng = new SearxngClient({ky: kyWithHeaders});
const {json} = await runPrompt(
(_) => {
_.defTool(searxng)
_.$`You are an expert researcher with access to comprehensive information.
Task: Thoroughly research the following question and create a JSON formatted response.
Question ID: ${subQuestion.id}
Question: ${subQuestion.question}
Respond with the specified JSON format.
`
},
{
model: "small",
label: `research subquestion ${subQuestion.id}`,
maxDataRepairs: 2,
responseType: "json_object",
responseSchema: {
type: "object",
properties: {
subQuestionId: {type: "string"},
answer: {type: "string"},
sources: {
type: "array",
items: {
type: "object",
properties: {
title: {type: "string"},
url: {type: "string"},
relevance: {type: "string"},
},
},
},
confidence: {type: "number"},
},
},
}
)
output.fence(json, "json");
return json
}
)
const synthesizeFindings = task(
"synthesize_findings",
async (mainQuestion: string, findings: any[]) => {
output.log(`Synthesizing Findings: ${JSON.stringify(findings, null, 2)}`);
const result = await runPrompt(
async (ctx) => {
ctx.$`You are an expert research synthesizer.
Task: Synthesize the following research findings into a JSON object to answer the main research question.
Main Research Question: ${mainQuestion}
Findings:
${JSON.stringify(findings, null, 2)}
Provide a synthesis that:
1. Directly answers the main research question
2. Integrates the findings from all sub-questions
3. Identifies limitations in the current research
4. Suggests next steps for further investigation
Respond in the specified JSON format.`
},
{
label: "synthesize findings",
responseType: "json_object",
maxDataRepairs: 2,
responseSchema: {
type: "object",
properties: {
summary: {type: "string"},
findings: {type: "array", items: {type: "string"}},
limitations: {
type: "array",
items: {type: "string"},
},
nextSteps: {type: "array", items: {type: "string"}},
},
},
}
)
output.fence(result.json, "json");
return result.json
}
)
const summarizeAndIdentifyGaps = task(
{name: "summarize_and_identify_gaps"},
async (synthesis: any, findings: any[]) => {
output.log(`Summarizing and identifying gaps: ${JSON.stringify(findings, null, 2)}`);
const result = await runPrompt(
async (ctx) => {
ctx.$`You are an expert research evaluator.
Task: Review the research synthesis and identify any gaps or areas that need deeper investigation.
Current synthesis:
${JSON.stringify(synthesis, null, 2)}
Research findings:
${JSON.stringify(findings, null, 2)}
Please provide:
1. A concise summary of current findings
2. Identify 2-3 specific knowledge gaps
3. Formulate follow-up questions to address these gaps
Respond using the specified JSON schema.`
},
{
label: "identify research gaps",
maxDataRepairs: 2,
responseSchema: {
type: "object",
properties: {
summary: {type: "string"},
gaps: {
type: "array",
items: {type: "string"},
},
followUpQuestions: {
type: "array",
items: {
type: "object",
properties: {
id: {type: "string"},
question: {type: "string"},
},
},
},
},
},
}
)
output.fence(result.json, "json");
return result.json
}
)
// Research Workflow
const researchWorkflow = entrypoint(
{checkpointer: new MemorySaver(), name: "research_workflow", store: new InMemoryStore() },
async (input: { question: string; context?: string }) => {
output.log(`Deep research initiated`);
// Step 1: Break down the research question
const breakdown = await breakdownResearch(input.question)
// Step 2: Research each sub-question in parallel
const subQuestionFindings = []
// handle both subQuestions and sub_questions, since the API returns one or the other
const subquestions = breakdown?.sub_questions ? breakdown.sub_questions : breakdown.subQuestions;
const forSq = await Promise.all(subquestions.map(async (q) => await researchSubQuestion(q)));
forSq.map(subQuestionFindings.push)
// Step 3: Synthesize the findings
let synthesis = await synthesizeFindings(
input.question,
subQuestionFindings
)
const gapAnalysis = await summarizeAndIdentifyGaps(
synthesis,
subQuestionFindings
)
// Step 5: Conduct follow-up research on identified gaps
const followUpFindings = [];
for (const fq of gapAnalysis.followUpQuestions) {
const anwser = await researchSubQuestion(fq);
followUpFindings.push(anwser);
}
// Step 6: Final synthesis with deep research
const allFindings = [...subQuestionFindings, ...followUpFindings]
const finalSynthesis = await synthesizeFindings(
input.question,
allFindings
)
return {
question: input.question,
breakdown: breakdown,
initialFindings: subQuestionFindings,
gapAnalysis: gapAnalysis,
followUpFindings: followUpFindings,
synthesis: finalSynthesis,
}
}
)
// An arbitrary ID locked to this workflow run
const threadId = `research-${Date.now()}`
const options = {
configurable: {thread_id: threadId},
};
const researchQuestion = env.vars.user_input;
const inputs = {
question: researchQuestion,
context: vars.context || "",
};
// Execute workflow. Checkpoints are manually sent across the wire in the tasks.
const results = await researchWorkflow.invoke(
inputs,
{
...options,
}
)
env.output.fence(results)