housekeeping
This commit is contained in:
102
Cargo.lock
generated
102
Cargo.lock
generated
@@ -2351,6 +2351,57 @@ version = "2.0.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
|
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "inference-engine"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"ab_glyph",
|
||||||
|
"accelerate-src",
|
||||||
|
"anyhow",
|
||||||
|
"axum 0.7.9",
|
||||||
|
"bindgen_cuda",
|
||||||
|
"byteorder",
|
||||||
|
"candle-core",
|
||||||
|
"candle-datasets",
|
||||||
|
"candle-flash-attn",
|
||||||
|
"candle-nn",
|
||||||
|
"candle-onnx",
|
||||||
|
"candle-transformers",
|
||||||
|
"clap",
|
||||||
|
"cpal",
|
||||||
|
"csv",
|
||||||
|
"cudarc",
|
||||||
|
"either",
|
||||||
|
"enterpolation",
|
||||||
|
"half",
|
||||||
|
"hf-hub",
|
||||||
|
"image",
|
||||||
|
"imageproc",
|
||||||
|
"intel-mkl-src",
|
||||||
|
"memmap2",
|
||||||
|
"num-traits",
|
||||||
|
"palette",
|
||||||
|
"pdf2image",
|
||||||
|
"pyo3",
|
||||||
|
"rand 0.9.1",
|
||||||
|
"rayon",
|
||||||
|
"reborrow",
|
||||||
|
"rubato",
|
||||||
|
"safetensors",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"symphonia",
|
||||||
|
"tokenizers",
|
||||||
|
"tokio",
|
||||||
|
"tower 0.4.13",
|
||||||
|
"tower-http 0.5.2",
|
||||||
|
"tracing",
|
||||||
|
"tracing-chrome",
|
||||||
|
"tracing-subscriber",
|
||||||
|
"utoipa",
|
||||||
|
"uuid",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "instant"
|
name = "instant"
|
||||||
version = "0.1.13"
|
version = "0.1.13"
|
||||||
@@ -2620,57 +2671,6 @@ version = "0.8.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "local_inference_engine"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"ab_glyph",
|
|
||||||
"accelerate-src",
|
|
||||||
"anyhow",
|
|
||||||
"axum 0.7.9",
|
|
||||||
"bindgen_cuda",
|
|
||||||
"byteorder",
|
|
||||||
"candle-core",
|
|
||||||
"candle-datasets",
|
|
||||||
"candle-flash-attn",
|
|
||||||
"candle-nn",
|
|
||||||
"candle-onnx",
|
|
||||||
"candle-transformers",
|
|
||||||
"clap",
|
|
||||||
"cpal",
|
|
||||||
"csv",
|
|
||||||
"cudarc",
|
|
||||||
"either",
|
|
||||||
"enterpolation",
|
|
||||||
"half",
|
|
||||||
"hf-hub",
|
|
||||||
"image",
|
|
||||||
"imageproc",
|
|
||||||
"intel-mkl-src",
|
|
||||||
"memmap2",
|
|
||||||
"num-traits",
|
|
||||||
"palette",
|
|
||||||
"pdf2image",
|
|
||||||
"pyo3",
|
|
||||||
"rand 0.9.1",
|
|
||||||
"rayon",
|
|
||||||
"reborrow",
|
|
||||||
"rubato",
|
|
||||||
"safetensors",
|
|
||||||
"serde",
|
|
||||||
"serde_json",
|
|
||||||
"symphonia",
|
|
||||||
"tokenizers",
|
|
||||||
"tokio",
|
|
||||||
"tower 0.4.13",
|
|
||||||
"tower-http 0.5.2",
|
|
||||||
"tracing",
|
|
||||||
"tracing-chrome",
|
|
||||||
"tracing-subscriber",
|
|
||||||
"utoipa",
|
|
||||||
"uuid",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
version = "0.4.12"
|
version = "0.4.12"
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = [
|
||||||
"crates/agent-server",
|
"crates/agent-server",
|
||||||
"crates/local_inference_engine",
|
"crates/inference-engine",
|
||||||
]
|
]
|
||||||
|
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
20
README.md
20
README.md
@@ -1,13 +1,17 @@
|
|||||||
# open-web-agent-rs
|
# open-web-agent-rs
|
||||||
|
|
||||||
A Rust-based web agent with an embedded OpenAI-compatible inference server (supports Gemma models only).
|
A Rust-based web agent with an embedded OpenAI-compatible inference server (supports Gemma models only). It is packaged and deployed as a container.
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
This project is organized as a Cargo workspace with the following crates:
|
This project is organized as a Cargo workspace with the following crates:
|
||||||
|
|
||||||
- `agent-server`: The main web agent server
|
- `crates`
|
||||||
- `local_inference_engine`: An embedded OpenAI-compatible inference server for Gemma models
|
- [agent-server](crates/agent-server): The main web agent server
|
||||||
|
- [inference-engine](crates/inference-engine): An embedded OpenAI-compatible inference server for Gemma models
|
||||||
|
- `packages`
|
||||||
|
- [genaiscript](packages/genaiscript): GenaiScript scripts
|
||||||
|
- [genaiscript-rust-shim](packages/genaiscript-rust-shim): An embedded OpenAI-compatible inference server for Gemma models
|
||||||
|
|
||||||
## Acknowledgements
|
## Acknowledgements
|
||||||
|
|
||||||
@@ -61,9 +65,9 @@ flowchart LR
|
|||||||
end
|
end
|
||||||
|
|
||||||
%% ─────────────── Local inference ───────────────
|
%% ─────────────── Local inference ───────────────
|
||||||
subgraph INFERENCE["Local Inference Engine"]
|
subgraph INFERENCE["Inference Engine"]
|
||||||
direction TB
|
direction TB
|
||||||
LIE["Local Inference Engine"]:::core -->|loads| MODELS["Gemma Models"]:::model
|
LIE["Inference Engine"]:::core -->|loads| MODELS["Gemma Models"]:::model
|
||||||
LIE -->|exposes| OPENAI_API["OpenAI‑compatible API"]:::api
|
LIE -->|exposes| OPENAI_API["OpenAI‑compatible API"]:::api
|
||||||
MODELS -->|runs on| ACCEL
|
MODELS -->|runs on| ACCEL
|
||||||
|
|
||||||
@@ -121,12 +125,12 @@ flowchart LR
|
|||||||
|
|
||||||
## Running the Project
|
## Running the Project
|
||||||
|
|
||||||
### Local Inference Engine
|
### Inference Engine
|
||||||
|
|
||||||
To run the local inference engine:
|
To run the local inference engine:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd crates/local_inference_engine
|
cd crates/inference-engine
|
||||||
cargo run --release -- --server
|
cargo run --release -- --server
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -159,5 +163,5 @@ To build a specific crate:
|
|||||||
```bash
|
```bash
|
||||||
cargo build -p agent-server
|
cargo build -p agent-server
|
||||||
# or
|
# or
|
||||||
cargo build -p local_inference_engine
|
cargo build -p inference-engine
|
||||||
```
|
```
|
||||||
|
@@ -13,6 +13,7 @@ mod tests {
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
#[ignore]
|
||||||
async fn test_deepresearch() {
|
async fn test_deepresearch() {
|
||||||
// a really provocative question for research that generally yields infinite complexity with each run
|
// a really provocative question for research that generally yields infinite complexity with each run
|
||||||
let input = "What is a life of meaning?";
|
let input = "What is a life of meaning?";
|
||||||
|
@@ -12,7 +12,8 @@ mod tests {
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use crate::agents::search::agent;
|
use crate::agents::search::agent;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
|
#[ignore]
|
||||||
async fn test_search_execution() {
|
async fn test_search_execution() {
|
||||||
let input = "Who won the 2024 presidential election?";
|
let input = "Who won the 2024 presidential election?";
|
||||||
|
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "local_inference_engine"
|
name = "inference-engine"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
@@ -1,4 +1,4 @@
|
|||||||
# Local Inference Engine
|
# @open-web-agent-rs/inference-engine
|
||||||
|
|
||||||
A Rust-based inference engine for running large language models locally. This tool supports both CLI mode for direct text generation and server mode with an OpenAI-compatible API.
|
A Rust-based inference engine for running large language models locally. This tool supports both CLI mode for direct text generation and server mode with an OpenAI-compatible API.
|
||||||
|
|
||||||
@@ -30,7 +30,7 @@ A Rust-based inference engine for running large language models locally. This to
|
|||||||
|
|
||||||
2. Build the local inference engine:
|
2. Build the local inference engine:
|
||||||
```bash
|
```bash
|
||||||
cargo build -p local_inference_engine --release
|
cargo build -p inference-engine --release
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
@@ -40,7 +40,7 @@ A Rust-based inference engine for running large language models locally. This to
|
|||||||
Run the inference engine in CLI mode to generate text directly:
|
Run the inference engine in CLI mode to generate text directly:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -p local_inference_engine --release -- --prompt "Your prompt text here" --which 3-1b-it
|
cargo run -p inference-engine --release -- --prompt "Your prompt text here" --which 3-1b-it
|
||||||
```
|
```
|
||||||
|
|
||||||
#### CLI Options
|
#### CLI Options
|
||||||
@@ -62,7 +62,7 @@ cargo run -p local_inference_engine --release -- --prompt "Your prompt text here
|
|||||||
Run the inference engine in server mode to expose an OpenAI-compatible API:
|
Run the inference engine in server mode to expose an OpenAI-compatible API:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -p local_inference_engine --release -- --server --port 3777 --which 3-1b-it
|
cargo run -p inference-engine --release -- --server --port 3777 --which 3-1b-it
|
||||||
```
|
```
|
||||||
|
|
||||||
This starts a web server on the specified port (default: 3777) with an OpenAI-compatible chat completions endpoint.
|
This starts a web server on the specified port (default: 3777) with an OpenAI-compatible chat completions endpoint.
|
@@ -173,4 +173,4 @@
|
|||||||
})();
|
})();
|
||||||
|
|
||||||
console.log("\nAll test requests have been sent. Check the server logs for more details.");
|
console.log("\nAll test requests have been sent. Check the server logs for more details.");
|
||||||
console.log("To run the server, use: cargo run --bin local_inference_engine -- --server");
|
console.log("To run the server, use: cargo run --bin inference-engine -- --server");
|
@@ -246,7 +246,7 @@ async fn chat_completions(
|
|||||||
StatusCode::BAD_REQUEST,
|
StatusCode::BAD_REQUEST,
|
||||||
Json(serde_json::json!({
|
Json(serde_json::json!({
|
||||||
"error": {
|
"error": {
|
||||||
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"",
|
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
|
||||||
"type": "unsupported_api"
|
"type": "unsupported_api"
|
||||||
}
|
}
|
||||||
})),
|
})),
|
||||||
@@ -292,6 +292,7 @@ use candle_core::{DType, Device, MetalDevice, Tensor};
|
|||||||
use candle_nn::VarBuilder;
|
use candle_nn::VarBuilder;
|
||||||
use candle_transformers::generation::LogitsProcessor;
|
use candle_transformers::generation::LogitsProcessor;
|
||||||
use hf_hub::{Repo, RepoType, api::sync::Api};
|
use hf_hub::{Repo, RepoType, api::sync::Api};
|
||||||
|
use serde_json::json;
|
||||||
use tokenizers::Tokenizer;
|
use tokenizers::Tokenizer;
|
||||||
use crate::token_output_stream::TokenOutputStream;
|
use crate::token_output_stream::TokenOutputStream;
|
||||||
use crate::utilities_lib::device;
|
use crate::utilities_lib::device;
|
||||||
@@ -596,7 +597,7 @@ impl TextGeneration {
|
|||||||
logits
|
logits
|
||||||
} else {
|
} else {
|
||||||
let start_at = tokens.len().saturating_sub(self.repeat_last_n);
|
let start_at = tokens.len().saturating_sub(self.repeat_last_n);
|
||||||
|
|
||||||
// Manual implementation of repeat penalty to avoid type conflicts
|
// Manual implementation of repeat penalty to avoid type conflicts
|
||||||
let mut logits_vec = logits.to_vec1::<f32>()?;
|
let mut logits_vec = logits.to_vec1::<f32>()?;
|
||||||
|
|
@@ -67,7 +67,7 @@ pub async fn chat_completions(
|
|||||||
StatusCode::BAD_REQUEST,
|
StatusCode::BAD_REQUEST,
|
||||||
Json(serde_json::json!({
|
Json(serde_json::json!({
|
||||||
"error": {
|
"error": {
|
||||||
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"",
|
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
|
||||||
"type": "unsupported_api"
|
"type": "unsupported_api"
|
||||||
}
|
}
|
||||||
})),
|
})),
|
@@ -1,4 +1,4 @@
|
|||||||
use local_inference_engine::model::{Model, Which};
|
use inference_engine::model::{Model, Which};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
@@ -1,11 +1,8 @@
|
|||||||
use local_inference_engine::text_generation::TextGeneration;
|
|
||||||
use local_inference_engine::model::{Model, Which};
|
|
||||||
use local_inference_engine::token_output_stream::TokenOutputStream;
|
|
||||||
use tokenizers::Tokenizer;
|
|
||||||
use candle_core::{DType, Device, Tensor};
|
|
||||||
use candle_transformers::generation::LogitsProcessor;
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use std::sync::Arc;
|
use candle_transformers::generation::LogitsProcessor;
|
||||||
|
use inference_engine::model::Which;
|
||||||
|
use inference_engine::token_output_stream::TokenOutputStream;
|
||||||
|
use tokenizers::Tokenizer;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
@@ -1,4 +1,4 @@
|
|||||||
use local_inference_engine::token_output_stream::TokenOutputStream;
|
use inference_engine::token_output_stream::TokenOutputStream;
|
||||||
use tokenizers::Tokenizer;
|
use tokenizers::Tokenizer;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
@@ -7,6 +7,8 @@
|
|||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"clean": "rm -rf .genaiscript && rm -rf dist && rm -rf node_modules && rm -rf open-web-agent-rs && rm -rf target && rm -rf packages/genaiscript-rust-shim/dist",
|
"clean": "rm -rf .genaiscript && rm -rf dist && rm -rf node_modules && rm -rf open-web-agent-rs && rm -rf target && rm -rf packages/genaiscript-rust-shim/dist",
|
||||||
|
"build": "(cd packages/genaiscript-rust-shim && bun run buildShim && bun run setupDev && cargo build -p agent-server)",
|
||||||
|
"mcp-inspector": "bunx @modelcontextprotocol/inspector",
|
||||||
"dev": "bun i && ./killport.js 3006 && bun run build && cargo watch -x 'run -p agent-server'",
|
"dev": "bun i && ./killport.js 3006 && bun run build && cargo watch -x 'run -p agent-server'",
|
||||||
"start": "docker compose up --build",
|
"start": "docker compose up --build",
|
||||||
"ai:search": "genaiscript run packages/genaiscript/genaisrc/web-search.genai.mts --vars USER_INPUT='who won the 2024 election?'",
|
"ai:search": "genaiscript run packages/genaiscript/genaisrc/web-search.genai.mts --vars USER_INPUT='who won the 2024 election?'",
|
||||||
@@ -16,8 +18,6 @@
|
|||||||
"ai:url:scrape": "npx genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'",
|
"ai:url:scrape": "npx genaiscript run packages/genaiscript/genaisrc/web-scrape.genai.mts --vars USER_INPUT='{\"url\":\"https://www.time4learning.com/homeschool-curriculum/high-school/eleventh-grade/math.html\",\"query\":\"What is on this page?\", \"action\": \"scrape\"}'",
|
||||||
"prod:logs": "fly logs",
|
"prod:logs": "fly logs",
|
||||||
"test-http": "test/test-search.ts",
|
"test-http": "test/test-search.ts",
|
||||||
"mcp-inspector": "bunx @modelcontextprotocol/inspector",
|
|
||||||
"build": "(cd packages/genaiscript-rust-shim && bun run buildShim && bun run setupDev && cargo build -p agent-server)"
|
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@modelcontextprotocol/inspector": "^0.14.0"
|
"@modelcontextprotocol/inspector": "^0.14.0"
|
||||||
|
Reference in New Issue
Block a user