housekeeping

2025-06-21 08:06:14 -04:00
parent e8ea6c0246
commit d4013be986
24 changed files with 85 additions and 81 deletions
--- a/crates/agent-server/src/agents/deep_research.rs
+++ b/crates/agent-server/src/agents/deep_research.rs
@@ -13,6 +13,7 @@ mod tests {
    use std::fmt::Debug;

    #[tokio::test]
+    #[ignore]
    async fn test_deepresearch() {
        // a really provocative question for research that generally yields infinite complexity with each run
        let input = "What is a life of meaning?";
--- a/crates/agent-server/src/agents/search.rs
+++ b/crates/agent-server/src/agents/search.rs
@@ -12,7 +12,8 @@ mod tests {
    use std::fmt::Debug;
    use crate::agents::search::agent;

-    #[tokio::test] 
+    #[tokio::test]
+    #[ignore]
    async fn test_search_execution() {
        let input = "Who won the 2024 presidential election?";

--- a/crates/local_inference_engine/Cargo.lock
+++ b/crates/local_inference_engine/Cargo.lock
--- a/crates/local_inference_engine/Cargo.toml
+++ b/crates/local_inference_engine/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "local_inference_engine"
+name = "inference-engine"
 version = "0.1.0"
 edition = "2021"

--- a/crates/local_inference_engine/README.md
+++ b/crates/local_inference_engine/README.md
@@ -1,4 +1,4 @@
-# Local Inference Engine
+# @open-web-agent-rs/inference-engine

 A Rust-based inference engine for running large language models locally. This tool supports both CLI mode for direct text generation and server mode with an OpenAI-compatible API.

@@ -30,7 +30,7 @@ A Rust-based inference engine for running large language models locally. This to

 2. Build the local inference engine:
   ```bash
-   cargo build -p local_inference_engine --release
+   cargo build -p inference-engine --release
   ```

 ## Usage
@@ -40,7 +40,7 @@ A Rust-based inference engine for running large language models locally. This to
 Run the inference engine in CLI mode to generate text directly:

 ```bash
-cargo run -p local_inference_engine --release -- --prompt "Your prompt text here" --which 3-1b-it
+cargo run -p inference-engine --release -- --prompt "Your prompt text here" --which 3-1b-it
 ```

 #### CLI Options
@@ -62,7 +62,7 @@ cargo run -p local_inference_engine --release -- --prompt "Your prompt text here
 Run the inference engine in server mode to expose an OpenAI-compatible API:

 ```bash
-cargo run -p local_inference_engine --release -- --server --port 3777 --which 3-1b-it
+cargo run -p inference-engine --release -- --server --port 3777 --which 3-1b-it
 ```

 This starts a web server on the specified port (default: 3777) with an OpenAI-compatible chat completions endpoint.
--- a/crates/local_inference_engine/api_test.html
+++ b/crates/local_inference_engine/api_test.html
--- a/crates/local_inference_engine/openai_api_test.js
+++ b/crates/local_inference_engine/openai_api_test.js
@@ -173,4 +173,4 @@
 })();

 console.log("\nAll test requests have been sent. Check the server logs for more details.");
-console.log("To run the server, use: cargo run --bin local_inference_engine -- --server");
+console.log("To run the server, use: cargo run --bin inference-engine -- --server");
--- a/crates/local_inference_engine/src/cli.rs
+++ b/crates/local_inference_engine/src/cli.rs
--- a/crates/local_inference_engine/src/lib.rs
+++ b/crates/local_inference_engine/src/lib.rs
--- a/crates/local_inference_engine/src/main.rs
+++ b/crates/local_inference_engine/src/main.rs
@@ -246,7 +246,7 @@ async fn chat_completions(
        StatusCode::BAD_REQUEST,
        Json(serde_json::json!({
            "error": {
-                "message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"",
+                "message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
                "type": "unsupported_api"
            }
        })),
@@ -292,6 +292,7 @@ use candle_core::{DType, Device, MetalDevice, Tensor};
 use candle_nn::VarBuilder;
 use candle_transformers::generation::LogitsProcessor;
 use hf_hub::{Repo, RepoType, api::sync::Api};
+use serde_json::json;
 use tokenizers::Tokenizer;
 use crate::token_output_stream::TokenOutputStream;
 use crate::utilities_lib::device;
@@ -596,7 +597,7 @@ impl TextGeneration {
                logits
            } else {
                let start_at = tokens.len().saturating_sub(self.repeat_last_n);
-
+ 
                // Manual implementation of repeat penalty to avoid type conflicts
                let mut logits_vec = logits.to_vec1::<f32>()?;

--- a/crates/local_inference_engine/src/model.rs
+++ b/crates/local_inference_engine/src/model.rs
--- a/crates/local_inference_engine/src/openai_types.rs
+++ b/crates/local_inference_engine/src/openai_types.rs
--- a/crates/local_inference_engine/src/server.rs
+++ b/crates/local_inference_engine/src/server.rs
@@ -67,7 +67,7 @@ pub async fn chat_completions(
                StatusCode::BAD_REQUEST,
                Json(serde_json::json!({
                    "error": {
-                        "message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin local_inference_engine -- --prompt \"Your prompt here\"",
+                        "message": "The OpenAI API is currently not supported due to compatibility issues with the tensor operations. Please use the CLI mode instead with: cargo run --bin inference-engine -- --prompt \"Your prompt here\"",
                        "type": "unsupported_api"
                    }
                })),
--- a/crates/local_inference_engine/src/text_generation.rs
+++ b/crates/local_inference_engine/src/text_generation.rs
--- a/crates/local_inference_engine/src/token_output_stream.rs
+++ b/crates/local_inference_engine/src/token_output_stream.rs
--- a/crates/local_inference_engine/src/utilities_lib.rs
+++ b/crates/local_inference_engine/src/utilities_lib.rs
--- a/crates/local_inference_engine/test.sh
+++ b/crates/local_inference_engine/test.sh
--- a/crates/local_inference_engine/tests/model_tests.rs
+++ b/crates/local_inference_engine/tests/model_tests.rs
@@ -1,4 +1,4 @@
-use local_inference_engine::model::{Model, Which};
+use inference_engine::model::{Model, Which};

 #[cfg(test)]
 mod tests {
--- a/crates/local_inference_engine/tests/text_generation_tests.rs
+++ b/crates/local_inference_engine/tests/text_generation_tests.rs
@@ -1,11 +1,8 @@
-use local_inference_engine::text_generation::TextGeneration;
-use local_inference_engine::model::{Model, Which};
-use local_inference_engine::token_output_stream::TokenOutputStream;
-use tokenizers::Tokenizer;
-use candle_core::{DType, Device, Tensor};
-use candle_transformers::generation::LogitsProcessor;
 use anyhow::Result;
-use std::sync::Arc;
+use candle_transformers::generation::LogitsProcessor;
+use inference_engine::model::Which;
+use inference_engine::token_output_stream::TokenOutputStream;
+use tokenizers::Tokenizer;

 #[cfg(test)]
 mod tests {
--- a/crates/local_inference_engine/tests/token_output_stream_tests.rs
+++ b/crates/local_inference_engine/tests/token_output_stream_tests.rs
@@ -1,4 +1,4 @@
-use local_inference_engine::token_output_stream::TokenOutputStream;
+use inference_engine::token_output_stream::TokenOutputStream;
 use tokenizers::Tokenizer;
 use std::path::PathBuf;
 use anyhow::Result;