housekeeping

This commit is contained in:
geoffsee
2025-06-21 08:06:14 -04:00
parent e8ea6c0246
commit d4013be986
24 changed files with 85 additions and 81 deletions

102
Cargo.lock generated
View File

@@ -2351,6 +2351,57 @@ version = "2.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
[[package]]
name = "inference-engine"
version = "0.1.0"
dependencies = [
"ab_glyph",
"accelerate-src",
"anyhow",
"axum 0.7.9",
"bindgen_cuda",
"byteorder",
"candle-core",
"candle-datasets",
"candle-flash-attn",
"candle-nn",
"candle-onnx",
"candle-transformers",
"clap",
"cpal",
"csv",
"cudarc",
"either",
"enterpolation",
"half",
"hf-hub",
"image",
"imageproc",
"intel-mkl-src",
"memmap2",
"num-traits",
"palette",
"pdf2image",
"pyo3",
"rand 0.9.1",
"rayon",
"reborrow",
"rubato",
"safetensors",
"serde",
"serde_json",
"symphonia",
"tokenizers",
"tokio",
"tower 0.4.13",
"tower-http 0.5.2",
"tracing",
"tracing-chrome",
"tracing-subscriber",
"utoipa",
"uuid",
]
[[package]]
name = "instant"
version = "0.1.13"
@@ -2620,57 +2671,6 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
[[package]]
name = "local_inference_engine"
version = "0.1.0"
dependencies = [
"ab_glyph",
"accelerate-src",
"anyhow",
"axum 0.7.9",
"bindgen_cuda",
"byteorder",
"candle-core",
"candle-datasets",
"candle-flash-attn",
"candle-nn",
"candle-onnx",
"candle-transformers",
"clap",
"cpal",
"csv",
"cudarc",
"either",
"enterpolation",
"half",
"hf-hub",
"image",
"imageproc",
"intel-mkl-src",
"memmap2",
"num-traits",
"palette",
"pdf2image",
"pyo3",
"rand 0.9.1",
"rayon",
"reborrow",
"rubato",
"safetensors",
"serde",
"serde_json",
"symphonia",
"tokenizers",
"tokio",
"tower 0.4.13",
"tower-http 0.5.2",
"tracing",
"tracing-chrome",
"tracing-subscriber",
"utoipa",
"uuid",
]
[[package]]
name = "lock_api"
version = "0.4.12"

View File

@@ -1,7 +1,7 @@
[workspace]
members = [
"crates/agent-server",
"crates/local_inference_engine",
"crates/inference-engine",
]
resolver = "2"

View File

@@ -1,13 +1,17 @@
# open-web-agent-rs
A Rust-based web agent with an embedded OpenAI-compatible inference server (supports Gemma models only).
A Rust-based web agent with an embedded OpenAI-compatible inference server (supports Gemma models only). It is packaged and deployed as a container.
## Project Structure
This project is organized as a Cargo workspace with the following crates:
- `agent-server`: The main web agent server
- `local_inference_engine`: An embedded OpenAI-compatible inference server for Gemma models
- `crates`
- [agent-server](crates/agent-server): The main web agent server
- [inference-engine](crates/inference-engine): An embedded OpenAI-compatible inference server for Gemma models
- `packages`
- [genaiscript](packages/genaiscript): GenaiScript scripts
- [genaiscript-rust-shim](packages/genaiscript-rust-shim): An embedded OpenAI-compatible inference server for Gemma models
## Acknowledgements
@@ -61,9 +65,9 @@ flowchart LR
end
%% ─────────────── Local inference ───────────────
subgraph INFERENCE["Local Inference Engine"]
subgraph INFERENCE["Inference Engine"]
direction TB
LIE["Local Inference Engine"]:::core -->|loads| MODELS["Gemma Models"]:::model
LIE["Inference Engine"]:::core -->|loads| MODELS["Gemma Models"]:::model
LIE -->|exposes| OPENAI_API["OpenAIcompatible API"]:::api
MODELS -->|runs on| ACCEL
@@ -121,12 +125,12 @@ flowchart LR
## Running the Project
### Local Inference Engine
### Inference Engine
To run the local inference engine:
```bash
cd crates/local_inference_engine
cd crates/inference-engine
cargo run --release -- --server
```
@@ -159,5 +163,5 @@ To build a specific crate:
```bash
cargo build -p agent-server
# or
cargo build -p local_inference_engine
cargo build -p inference-engine
```

View File

@@ -13,6 +13,7 @@ mod tests {
use std::fmt::Debug;
#[tokio::test]
#[ignore]
async fn test_deepresearch() {
// a really provocative question for research that generally yields infinite complexity with each run
let input = "What is a life of meaning?";

View File

@@ -12,7 +12,8 @@ mod tests {
use std::fmt::Debug;
use crate::agents::search::agent;
#[tokio::test]
#[tokio::test]
#[ignore]
async fn test_search_execution() {
let input = "Who won the 2024 presidential election?";

View File

@@ -1,5 +1,5 @@
[package]
name = "local_inference_engine"
name = "inference-engine"
version = "0.1.0"
edition = "2021"

View File

@@ -1,4 +1,4 @@
# Local Inference Engine
# @open-web-agent-rs/inference-engine
A Rust-based inference engine for running large language models locally. This tool supports both CLI mode for direct text generation and server mode with an OpenAI-compatible API.
@@ -30,7 +30,7 @@ A Rust-based inference engine for running large language models locally. This to
2. Build the local inference engine:
```bash
cargo build -p local_inference_engine --release
cargo build -p inference-engine --release
```
## Usage
@@ -40,7 +40,7 @@ A Rust-based inference engine for running large language models locally. This to
Run the inference engine in CLI mode to generate text directly:
```bash
cargo run -p local_inference_engine --release -- --prompt "Your prompt text here" --which 3-1b-it
cargo run -p inference-engine --release -- --prompt "Your prompt text here" --which 3-1b-it
```
#### CLI Options
@@ -62,7 +62,7 @@ cargo run -p local_inference_engine --release -- --prompt "Your prompt text here
Run the inference engine in server mode to expose an OpenAI-compatible API:
```bash
cargo run -p local_inference_engine --release -- --server --port 3777 --which 3-1b-it
cargo run -p inference-engine --release -- --server --port 3777 --which 3-1b-it
```
This starts a web server on the specified port (default: 3777) with an OpenAI-compatible chat completions endpoint.

View File

@@ -173,4 +173,4 @@
})();
console.log("\nAll test requests have been sent. Check the server logs for more details.");
console.log("To run the server, use: cargo run --bin local_inference_engine -- --server");
console.log("To run the server, use: cargo run --bin inference-engine -- --server");

View File

@@ -246,7 +246,7 @@ async fn chat_completions(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"error": {
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"",
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
"type": "unsupported_api"
}
})),
@@ -292,6 +292,7 @@ use candle_core::{DType, Device, MetalDevice, Tensor};
use candle_nn::VarBuilder;
use candle_transformers::generation::LogitsProcessor;
use hf_hub::{Repo, RepoType, api::sync::Api};
use serde_json::json;
use tokenizers::Tokenizer;
use crate::token_output_stream::TokenOutputStream;
use crate::utilities_lib::device;
@@ -596,7 +597,7 @@ impl TextGeneration {
logits
} else {
let start_at = tokens.len().saturating_sub(self.repeat_last_n);
// Manual implementation of repeat penalty to avoid type conflicts
let mut logits_vec = logits.to_vec1::<f32>()?;

View File

@@ -67,7 +67,7 @@ pub async fn chat_completions(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"error": {
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"",
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
"type": "unsupported_api"
}
})),

View File

@@ -1,4 +1,4 @@
use local_inference_engine::model::{Model, Which};
use inference_engine::model::{Model, Which};
#[cfg(test)]
mod tests {

View File

@@ -1,11 +1,8 @@
use local_inference_engine::text_generation::TextGeneration;
use local_inference_engine::model::{Model, Which};
use local_inference_engine::token_output_stream::TokenOutputStream;
use tokenizers::Tokenizer;
use candle_core::{DType, Device, Tensor};
use candle_transformers::generation::LogitsProcessor;
use anyhow::Result;
use std::sync::Arc;
use candle_transformers::generation::LogitsProcessor;
use inference_engine::model::Which;
use inference_engine::token_output_stream::TokenOutputStream;
use tokenizers::Tokenizer;
#[cfg(test)]
mod tests {

View File

@@ -1,4 +1,4 @@
use local_inference_engine::token_output_stream::TokenOutputStream;
use inference_engine::token_output_stream::TokenOutputStream;
use tokenizers::Tokenizer;
use std::path::PathBuf;
use anyhow::Result;

View File

@@ -7,6 +7,8 @@
"private": true,
"scripts": {
"clean": "rm -rf .genaiscript && rm -rf dist && rm -rf node_modules && rm -rf open-web-agent-rs && rm -rf target && rm -rf packages/genaiscript-rust-shim/dist",
"build": "(cd packages/genaiscript-rust-shim && bun run buildShim && bun run setupDev && cargo build -p agent-server)",
"mcp-inspector": "bunx @modelcontextprotocol/inspector",
"dev": "bun i && ./killport.js 3006 && bun run build && cargo watch -x 'run -p agent-server'",
"start": "docker compose up --build",
"ai:search": "genaiscript run packages/genaiscript/genaisrc/web-search.genai.mts --vars USER_INPUT='who won the 2024 election?'",
@@ -16,8 +18,6 @@
"ai:url:scrape": "npx genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'",
"prod:logs": "fly logs",
"test-http": "test/test-search.ts",
"mcp-inspector": "bunx @modelcontextprotocol/inspector",
"build": "(cd packages/genaiscript-rust-shim && bun run buildShim && bun run setupDev && cargo build -p agent-server)"
},
"dependencies": {
"@modelcontextprotocol/inspector": "^0.14.0"