diff --git a/Cargo.toml b/Cargo.toml index da9f138..af79aab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,7 @@ rust-embed = "8.5.0" bytes = "1.8.0" lazy_static = "1.5.0" sled = "0.34.7" -tower-http = { version = "0.6.2", features = ["trace"] } +tower-http = { version = "0.6.2", features = ["trace", "cors"] } anyhow = "1.0.97" base64 = "0.22.1" fips204 = "0.4.6" \ No newline at end of file diff --git a/bun.lockb b/bun.lockb index 1b99e39..97e3aa9 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/killport.js b/killport.js new file mode 100644 index 0000000..9f7a859 --- /dev/null +++ b/killport.js @@ -0,0 +1,51 @@ +#!/usr/bin/env node + +import * as child_process from "node:child_process"; + +const args = process.argv.slice(2); +const port = args.length > 0 ? parseInt(args[0], 10) : null; + +if (!port || isNaN(port)) { + console.error('Please provide a valid port number'); + process.exit(1); +} + + +export const killProcessOnPort = (port) => { + return new Promise((resolve, reject) => { + // Find the PID of the process using the specified port + child_process.exec(`lsof -t -i :${port}`.trim(), (err, stdout) => { + if (err) { + // Handle command error (such as permission denied) + if (err.code !== 1) { + console.error(`Error finding process on port ${port}:`, err); + return reject(err); + } else { + // If code is 1, it generally means no process is using the port + console.log(`No process found on port ${port}`); + return resolve(); + } + } + + // If stdout is empty, no process is using the port + const pid = stdout.trim(); + if (!pid) { + console.log(`No process is currently running on port ${port}`); + return resolve(); + } + + // Kill the process using the specified PID + child_process.exec(`kill -9 ${pid}`.trim(), (killErr) => { + if (killErr) { + console.error(`Failed to kill process ${pid} on port ${port}`, killErr); + return reject(killErr); + } + + console.log(`Successfully killed process ${pid} on port ${port}`); + resolve(); + }); + }); + }); +}; + +await killProcessOnPort(port); diff --git a/package.json b/package.json index 934ab12..593ad6c 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ ], "private": true, "scripts": { - "dev": "bunx kill-port 3006 && bun run build && cargo watch -x 'run src/main.rs'", + "dev": "./killport.js 3006 && bun run build && cargo watch -x 'run src/main.rs'", "start": "docker compose up --build", "ai:search": "genaiscript run packages/genaiscript/genaisrc/web-search.genai.mts --vars USER_INPUT='who won the 2024 election?'", "shim:ai:search": "pnpm build && ./packages/genaiscript-rust-shim/dist/genaiscript-rust-shim.js --file=packages/genaiscript/genaisrc/web-search.genai.mts USER_INPUT=\"Who won the 2024 presidential election?\"\n", @@ -14,7 +14,7 @@ "ai:url:read": "genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://geoff.seemueller.io/about\",\"query\":\"Describe the details of the page.\", \"action\": \"read\"}'", "ai:url:scrape": "npx genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'", "prod:logs": "fly logs", - "test-http": "./test-search.ts", + "test-http": "test/test-search.ts", "build": "(cd packages/genaiscript-rust-shim && bun run buildShim && bun run setupDev && cargo build)" } } diff --git a/packages/genaiscript-rust-shim/README.md b/packages/genaiscript-rust-shim/README.md index 040c853..e69de29 100644 --- a/packages/genaiscript-rust-shim/README.md +++ b/packages/genaiscript-rust-shim/README.md @@ -1,3 +0,0 @@ -# genaiscript-rust-shim - -simple package that wraps genaiscript for binding to `tokio::process::Command` \ No newline at end of file diff --git a/packages/genaiscript-rust-shim/bun.lockb b/packages/genaiscript-rust-shim/bun.lockb index 9feb5b8..e69de29 100755 Binary files a/packages/genaiscript-rust-shim/bun.lockb and b/packages/genaiscript-rust-shim/bun.lockb differ diff --git a/packages/genaiscript/genaisrc/deep-research.genai.mts b/packages/genaiscript/genaisrc/deep-research.genai.mts index 1a15d84..44456cd 100644 --- a/packages/genaiscript/genaisrc/deep-research.genai.mts +++ b/packages/genaiscript/genaisrc/deep-research.genai.mts @@ -1,7 +1,7 @@ -import {task, entrypoint, interrupt, MemorySaver} from "@langchain/langgraph" +import {entrypoint, InMemoryStore, MemorySaver, task} from "@langchain/langgraph" import "./tools/searxng.genai.mjs" import {SearxngClient} from "@agentic/searxng"; - +import ky from "ky"; script({ title: "Deep Research Program", @@ -15,6 +15,7 @@ const {output, vars} = env const breakdownResearch = task( "breakdown_research", async (question: string) => { + output.log("Breaking down question:", question); const result = await runPrompt( async (ctx) => { ctx.$`You are an expert research strategist. @@ -51,39 +52,44 @@ Output the breakdown as a JSON object.` }, } ) - + output.fence(result.json, "json"); return result.json } ) -const globalCtx = this; - const researchSubQuestion = task( "research_subquestion", async (subQuestion: { id: string; question: string }) => { + output.log(`Researching sub-question: ${subQuestion.question}`); + const kyWithHeaders = ky.create({ + referrerPolicy: "unsafe-url", - const searxng = new SearxngClient({apiBaseUrl: "https://search-engine-gsio.fly.dev"}); + headers: { + 'Authorization': 'Basic ' + btoa(`admin:${process.env.SEARXNG_PASSWORD}`), + } + }); - const {text} = await runPrompt( + const searxng = new SearxngClient({ky: kyWithHeaders}); + + const {json} = await runPrompt( (_) => { _.defTool(searxng) _.$`You are an expert researcher with access to comprehensive information. -Task: Thoroughly research the following question and provide a detailed answer. +Task: Thoroughly research the following question and create a JSON formatted response. Question ID: ${subQuestion.id} Question: ${subQuestion.question} -Provide your findings in a structured format that includes: -- Your answer to the sub-question -- Relevant sources that support your answer -- Your confidence level in the answer (0-1)` +Respond with the specified JSON format. +` }, { model: "small", label: `research subquestion ${subQuestion.id}`, maxDataRepairs: 2, + responseType: "json_object", responseSchema: { type: "object", properties: { @@ -105,19 +111,20 @@ Provide your findings in a structured format that includes: }, } ) - return text + output.fence(json, "json"); + return json } ) - const synthesizeFindings = task( "synthesize_findings", async (mainQuestion: string, findings: any[]) => { + output.log(`Synthesizing Findings: ${JSON.stringify(findings, null, 2)}`); const result = await runPrompt( async (ctx) => { ctx.$`You are an expert research synthesizer. -Task: Synthesize the following research findings into a coherent response to the main research question. +Task: Synthesize the following research findings into a JSON object to answer the main research question. Main Research Question: ${mainQuestion} @@ -128,11 +135,14 @@ Provide a synthesis that: 1. Directly answers the main research question 2. Integrates the findings from all sub-questions 3. Identifies limitations in the current research -4. Suggests next steps for further investigation` +4. Suggests next steps for further investigation + +Respond in the specified JSON format.` }, { label: "synthesize findings", - responseType: "markdown", + responseType: "json_object", + maxDataRepairs: 2, responseSchema: { type: "object", properties: { @@ -147,15 +157,15 @@ Provide a synthesis that: }, } ) - + output.fence(result.json, "json"); return result.json } ) - const summarizeAndIdentifyGaps = task( - "summarize_and_identify_gaps", + {name: "summarize_and_identify_gaps"}, async (synthesis: any, findings: any[]) => { + output.log(`Summarizing and identifying gaps: ${JSON.stringify(findings, null, 2)}`); const result = await runPrompt( async (ctx) => { ctx.$`You are an expert research evaluator. @@ -171,10 +181,13 @@ ${JSON.stringify(findings, null, 2)} Please provide: 1. A concise summary of current findings 2. Identify 2-3 specific knowledge gaps -3. Formulate follow-up questions to address these gaps` +3. Formulate follow-up questions to address these gaps + +Respond using the specified JSON schema.` }, { label: "identify research gaps", + maxDataRepairs: 2, responseSchema: { type: "object", properties: { @@ -197,27 +210,28 @@ Please provide: }, } ) + output.fence(result.json, "json"); return result.json } ) - +// Research Workflow const researchWorkflow = entrypoint( - {checkpointer: new MemorySaver(), name: "research_workflow"}, + {checkpointer: new MemorySaver(), name: "research_workflow", store: new InMemoryStore() }, async (input: { question: string; context?: string }) => { - + output.log(`Deep research initiated`); + // Step 1: Break down the research question const breakdown = await breakdownResearch(input.question) + // Step 2: Research each sub-question in parallel const subQuestionFindings = [] + // handle both subQuestions and sub_questions, since the API returns one or the other + const subquestions = breakdown?.sub_questions ? breakdown.sub_questions : breakdown.subQuestions; + const forSq = await Promise.all(subquestions.map(async (q) => await researchSubQuestion(q))); + forSq.map(subQuestionFindings.push) - for (const sq of breakdown.subQuestions) { - const analysis = await researchSubQuestion(sq); - console.log(analysis); - subQuestionFindings.push(analysis); - } - - + // Step 3: Synthesize the findings let synthesis = await synthesizeFindings( input.question, subQuestionFindings @@ -228,22 +242,21 @@ const researchWorkflow = entrypoint( subQuestionFindings ) - + // Step 5: Conduct follow-up research on identified gaps const followUpFindings = []; for (const fq of gapAnalysis.followUpQuestions) { const anwser = await researchSubQuestion(fq); - console.log(anwser); followUpFindings.push(anwser); } + // Step 6: Final synthesis with deep research const allFindings = [...subQuestionFindings, ...followUpFindings] const finalSynthesis = await synthesizeFindings( input.question, allFindings ) - return { question: input.question, breakdown: breakdown, @@ -255,27 +268,26 @@ const researchWorkflow = entrypoint( } ) - -const researchQuestion = - env.vars.question || - "What are the most promising approaches to climate change mitigation?" - - +// An arbitrary ID locked to this workflow run const threadId = `research-${Date.now()}` +const options = { + configurable: {thread_id: threadId}, +}; -const config = { - configurable: { - thread_id: threadId, - }, -} +const researchQuestion = env.vars.user_input; +const inputs = { + question: researchQuestion, + context: vars.context || "", +}; +// Execute workflow. Checkpoints are manually sent across the wire in the tasks. const results = await researchWorkflow.invoke( + inputs, { - question: researchQuestion, - context: vars.context || "", - }, - config + ...options, + } ) -output.fence(results, "json") \ No newline at end of file + +env.output.fence(results) \ No newline at end of file diff --git a/packages/genaiscript/package.json b/packages/genaiscript/package.json index a0acc39..d67a74a 100644 --- a/packages/genaiscript/package.json +++ b/packages/genaiscript/package.json @@ -9,13 +9,16 @@ "shim:ai:search": "pnpm build && ./dist/shim.js --file=genaisrc/search.genai.mts USER_INPUT=\"Who won the 2024 presidential election?\"\n", "ai:news": "genaiscript run genaisrc/news-search.genai.mts --vars USER_INPUT='What are the latest updates and developments in the Ukraine war?'", "ai:url:read": "genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://geoff.seemueller.io/about\",\"query\":\"Describe the details of the page.\", \"action\": \"read\"}'", - "ai:url:scrape": "npx genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'" + "ai:url:scrape": "npx genaiscript run genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'", + "typegen": " genaiscript scripts compile" }, "dependencies": { "@agentic/searxng": "7.5.3", "@types/node": "^22.10.2", "genaiscript": "^1.95.1", "happy-dom": "^16.0.1", - "ky": "^1.8.0" + "ky": "^1.8.0", + "md-front-matter": "^1.0.4", + "@langchain/langgraph":"0.2.57" } } diff --git a/smoke-tests/test-deep-research.ts b/smoke-tests/test-deep-research.ts new file mode 100644 index 0000000..f17c0f9 --- /dev/null +++ b/smoke-tests/test-deep-research.ts @@ -0,0 +1,43 @@ +#!/usr/bin/env deno -A + +const API_ROOT = "http://localhost:3006"; + +const sid = crypto.randomUUID(); +// -------------------- 1. Create the agent -------------------- +const createAgentBody = { + id: sid, + resource: "deep-research", + parent: sid, + payload: { input: "Research unexplored cures for cancer." }, +}; + +const createRes = await fetch(`${API_ROOT}/api/agents`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(createAgentBody), +}); + + +const raw = await createRes.text(); +console.log({raw}); +const {stream_url: streamId} = JSON.parse(raw); + +console.log("Agent created with streamId:", streamId); + +// -------------------- 2. Listen to the SSE stream -------------------- +const streamUrl = `${API_ROOT}${streamId}`; +const es = new EventSource(streamUrl); + + +es.onopen = (e) => { + console.log("connected", e); +}; + +es.onmessage = (e) => { + console.log("⟶", e.data); +}; + +es.onerror = (e) => { + console.error("SSE error:", e); + es.close(); +}; \ No newline at end of file diff --git a/smoke-tests/test-search.ts b/smoke-tests/test-search.ts new file mode 100644 index 0000000..d742aeb --- /dev/null +++ b/smoke-tests/test-search.ts @@ -0,0 +1,43 @@ +#!/usr/bin/env deno -A + +const API_ROOT = "http://localhost:3006"; + +const sid = crypto.randomUUID(); +// -------------------- 1. Create the agent -------------------- +const createAgentBody = { + id: sid, + resource: "web-search", + parent: sid, + payload: { input: "What is the capital of France?" }, +}; + +const createRes = await fetch(`${API_ROOT}/api/agents`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(createAgentBody), +}); + + +const raw = await createRes.text(); +console.log({raw}); +const {stream_url: streamId} = JSON.parse(raw); + +console.log("Agent created with streamId:", streamId); + +// -------------------- 2. Listen to the SSE stream -------------------- +const streamUrl = `${API_ROOT}${streamId}`; +const es = new EventSource(streamUrl); + + +es.onopen = (e) => { + console.log("connected", e); +}; + +es.onmessage = (e) => { + console.log("⟶", e.data); +}; + +es.onerror = (e) => { + console.error("SSE error:", e); + es.close(); +}; \ No newline at end of file diff --git a/src/agents/deep_research.rs b/src/agents/deep_research.rs new file mode 100644 index 0000000..299742c --- /dev/null +++ b/src/agents/deep_research.rs @@ -0,0 +1,32 @@ +use crate::utils::utils::run_agent; +use tokio::process::Child; +use tracing; + +pub async fn agent(stream_id: &str, input: &str) -> Result { + run_agent(stream_id, input, "./packages/genaiscript/genaisrc/deep-research.genai.mts", 60).await +} + + +#[cfg(test)] +mod tests { + use crate::agents::deep_research::agent; + use std::fmt::Debug; + + #[tokio::test] + async fn test_deepresearch() { + // a really provocative question for research that generally yields infinite complexity with each run + let input = "What is a life of meaning?"; + + let mut command = agent("test-deepresearch-agent", input).await.unwrap(); + + // let mut stdout = String::new(); + + // command.stdout.take().unwrap().read_to_string(&mut stdout).await.unwrap(); + + // println!("stdout: {}", stdout); + // // Optionally, you can capture and inspect stdout if needed: + let _output = command.wait_with_output().await.expect("Failed to wait for output"); + // println!("Stdout: {}", String::from_utf8_lossy(&output.stdout)); + // println!("Stderr: {}", String::from_utf8_lossy(&output.stderr)); + } +} diff --git a/src/agents/mod.rs b/src/agents/mod.rs index 3cf8f91..fecb0c6 100644 --- a/src/agents/mod.rs +++ b/src/agents/mod.rs @@ -1,4 +1,5 @@ pub(crate) mod news; pub(crate) mod scrape; pub(crate) mod search; -pub(crate) mod image_generator; \ No newline at end of file +pub(crate) mod image_generator; +pub(crate) mod deep_research; \ No newline at end of file diff --git a/src/handlers/agents.rs b/src/handlers/agents.rs index 2cb6554..a1cc2bc 100644 --- a/src/handlers/agents.rs +++ b/src/handlers/agents.rs @@ -97,6 +97,9 @@ pub async fn use_agent(Path(agent_id): Path) -> impl IntoResponse { "image-generator" => { crate::agents::image_generator::agent(agent_id.as_str(), &*input).await } + "deep-research" => { + crate::agents::deep_research::agent(agent_id.as_str(), &*input).await + } "web-scrape" => crate::agents::scrape::agent(agent_id.as_str(), &*input).await, _ => { tracing::error!("Unsupported resource type: {}", resource); diff --git a/src/routes.rs b/src/routes.rs index 3a99fd9..000a700 100644 --- a/src/routes.rs +++ b/src/routes.rs @@ -18,6 +18,7 @@ pub fn create_router() -> Router { .make_span_with(trace::DefaultMakeSpan::new().level(Level::INFO)) .on_response(trace::DefaultOnResponse::new().level(Level::INFO)), ) + .layer(tower_http::cors::CorsLayer::very_permissive()) .fallback(handle_not_found) }