housekeeping

This commit is contained in:
geoffsee
2025-06-21 08:06:14 -04:00
parent e8ea6c0246
commit d4013be986
24 changed files with 85 additions and 81 deletions

View File

@@ -13,6 +13,7 @@ mod tests {
use std::fmt::Debug;
#[tokio::test]
#[ignore]
async fn test_deepresearch() {
// a really provocative question for research that generally yields infinite complexity with each run
let input = "What is a life of meaning?";

View File

@@ -12,7 +12,8 @@ mod tests {
use std::fmt::Debug;
use crate::agents::search::agent;
#[tokio::test]
#[tokio::test]
#[ignore]
async fn test_search_execution() {
let input = "Who won the 2024 presidential election?";

View File

@@ -1,5 +1,5 @@
[package]
name = "local_inference_engine"
name = "inference-engine"
version = "0.1.0"
edition = "2021"

View File

@@ -1,4 +1,4 @@
# Local Inference Engine
# @open-web-agent-rs/inference-engine
A Rust-based inference engine for running large language models locally. This tool supports both CLI mode for direct text generation and server mode with an OpenAI-compatible API.
@@ -30,7 +30,7 @@ A Rust-based inference engine for running large language models locally. This to
2. Build the local inference engine:
```bash
cargo build -p local_inference_engine --release
cargo build -p inference-engine --release
```
## Usage
@@ -40,7 +40,7 @@ A Rust-based inference engine for running large language models locally. This to
Run the inference engine in CLI mode to generate text directly:
```bash
cargo run -p local_inference_engine --release -- --prompt "Your prompt text here" --which 3-1b-it
cargo run -p inference-engine --release -- --prompt "Your prompt text here" --which 3-1b-it
```
#### CLI Options
@@ -62,7 +62,7 @@ cargo run -p local_inference_engine --release -- --prompt "Your prompt text here
Run the inference engine in server mode to expose an OpenAI-compatible API:
```bash
cargo run -p local_inference_engine --release -- --server --port 3777 --which 3-1b-it
cargo run -p inference-engine --release -- --server --port 3777 --which 3-1b-it
```
This starts a web server on the specified port (default: 3777) with an OpenAI-compatible chat completions endpoint.

View File

@@ -173,4 +173,4 @@
})();
console.log("\nAll test requests have been sent. Check the server logs for more details.");
console.log("To run the server, use: cargo run --bin local_inference_engine -- --server");
console.log("To run the server, use: cargo run --bin inference-engine -- --server");

View File

@@ -246,7 +246,7 @@ async fn chat_completions(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"error": {
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"",
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
"type": "unsupported_api"
}
})),
@@ -292,6 +292,7 @@ use candle_core::{DType, Device, MetalDevice, Tensor};
use candle_nn::VarBuilder;
use candle_transformers::generation::LogitsProcessor;
use hf_hub::{Repo, RepoType, api::sync::Api};
use serde_json::json;
use tokenizers::Tokenizer;
use crate::token_output_stream::TokenOutputStream;
use crate::utilities_lib::device;
@@ -596,7 +597,7 @@ impl TextGeneration {
logits
} else {
let start_at = tokens.len().saturating_sub(self.repeat_last_n);
// Manual implementation of repeat penalty to avoid type conflicts
let mut logits_vec = logits.to_vec1::<f32>()?;

View File

@@ -67,7 +67,7 @@ pub async fn chat_completions(
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"error": {
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"",
"message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
"type": "unsupported_api"
}
})),

View File

@@ -1,4 +1,4 @@
use local_inference_engine::model::{Model, Which};
use inference_engine::model::{Model, Which};
#[cfg(test)]
mod tests {

View File

@@ -1,11 +1,8 @@
use local_inference_engine::text_generation::TextGeneration;
use local_inference_engine::model::{Model, Which};
use local_inference_engine::token_output_stream::TokenOutputStream;
use tokenizers::Tokenizer;
use candle_core::{DType, Device, Tensor};
use candle_transformers::generation::LogitsProcessor;
use anyhow::Result;
use std::sync::Arc;
use candle_transformers::generation::LogitsProcessor;
use inference_engine::model::Which;
use inference_engine::token_output_stream::TokenOutputStream;
use tokenizers::Tokenizer;
#[cfg(test)]
mod tests {

View File

@@ -1,4 +1,4 @@
use local_inference_engine::token_output_stream::TokenOutputStream;
use inference_engine::token_output_stream::TokenOutputStream;
use tokenizers::Tokenizer;
use std::path::PathBuf;
use anyhow::Result;