mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00

Removed `test_request.sh`, deprecated functionality, and unused imports; introduced a new CLI tool (`cli.ts`) for testing inference engine and adjusted handling of non-streaming/streaming chat completions. - Add CPU fallback support for text generation when primary device is unsupported - Introduce `execute_with_fallback` method to handle device compatibility and shape mismatch errors - Extend unit tests to reproduce tensor shape mismatch errors specific to model configurations - Increase HTTP timeout limits in `curl_chat_stream.sh` script for reliable API testing chat completion endpoint functions with gemma3 (no streaming) Add benchmarking guide with HTML reporting, Leptos chat crate, and middleware for metrics tracking
27 lines
577 B
TOML
27 lines
577 B
TOML
[package]
|
|
name = "embeddings-engine"
|
|
version = "0.1.0"
|
|
edition = "2024"
|
|
|
|
[lib]
|
|
name = "embeddings_engine"
|
|
path = "src/lib.rs"
|
|
|
|
[[bin]]
|
|
name = "embeddings-engine"
|
|
path = "src/main.rs"
|
|
|
|
[dependencies]
|
|
axum = "0.8.4"
|
|
tokio = { version = "1.45.1", features = ["full"] }
|
|
tower = "0.5.2"
|
|
serde = { version = "1.0.219", features = ["derive"] }
|
|
serde_json = "1.0.140"
|
|
fastembed = "4"
|
|
tower-http = { version = "0.6.6", features = ["trace"] }
|
|
tracing = "0.1"
|
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
rand = "0.8.5"
|
|
async-openai = "0.28.3"
|
|
once_cell = "1.19.0"
|