mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00

Removed `test_request.sh`, deprecated functionality, and unused imports; introduced a new CLI tool (`cli.ts`) for testing inference engine and adjusted handling of non-streaming/streaming chat completions. - Add CPU fallback support for text generation when primary device is unsupported - Introduce `execute_with_fallback` method to handle device compatibility and shape mismatch errors - Extend unit tests to reproduce tensor shape mismatch errors specific to model configurations - Increase HTTP timeout limits in `curl_chat_stream.sh` script for reliable API testing chat completion endpoint functions with gemma3 (no streaming) Add benchmarking guide with HTML reporting, Leptos chat crate, and middleware for metrics tracking
83 lines
3.6 KiB
TOML
83 lines
3.6 KiB
TOML
[package]
|
|
name = "inference-engine"
|
|
version = "0.1.0"
|
|
edition = "2021"
|
|
|
|
[[bin]]
|
|
name="cli"
|
|
path = "src/cli_main.rs"
|
|
|
|
|
|
[dependencies]
|
|
accelerate-src = { version = "0.3.2", optional = true }
|
|
candle-datasets = { version = "=0.9.1", optional = true }
|
|
candle-nn = { version = "=0.9.1" }
|
|
candle-transformers = { version = "=0.9.1" }
|
|
candle-flash-attn = { version = "=0.9.1", optional = true }
|
|
candle-onnx = { version = "=0.9.1", optional = true }
|
|
|
|
csv = "1.3.0"
|
|
cudarc = { version = "0.16.3", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false, optional = true }
|
|
half = { version = "2.5.0", features = ["num-traits", "use-intrinsics", "rand_distr"], optional = true }
|
|
hf-hub = { version = "0.4.1", features = ["tokio"] }
|
|
image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] }
|
|
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"], optional = true }
|
|
num-traits = { version = "0.2.15" }
|
|
palette = { version = "0.7.6", optional = true }
|
|
enterpolation = { version = "0.2.1", optional = true}
|
|
pyo3 = { version = "0.22.0", features = ["auto-initialize", "abi3-py311"], optional = true }
|
|
rayon = "1.7.0"
|
|
rubato = { version = "0.15.0", optional = true }
|
|
safetensors = "0.4.1"
|
|
serde = { version = "1.0.171", features = ["derive"] }
|
|
serde_json = "1.0.99"
|
|
symphonia = { version = "0.5.3", features = ["all"], optional = true }
|
|
tokenizers = { version = "0.21.0", default-features = false, features = ["onig", "http"] }
|
|
cpal = { version = "0.15.2", optional = true }
|
|
pdf2image = { version = "0.1.2" , optional = true}
|
|
anyhow = "1.0.98"
|
|
clap= { version = "4.2.4", features = ["derive"] }
|
|
tracing = "0.1.37"
|
|
tracing-chrome = "0.7.1"
|
|
tracing-subscriber = { version = "0.3.7", features = ["env-filter"] }
|
|
axum = { version = "0.8.4", features = ["json"] }
|
|
tower = "0.5.2"
|
|
tower-http = { version = "0.6.6", features = ["cors"] }
|
|
tokio = { version = "1.43.0", features = ["full"] }
|
|
either = { version = "1.9.0", features = ["serde"] }
|
|
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
|
uuid = { version = "1.7.0", features = ["v4"] }
|
|
reborrow = "0.5.5"
|
|
futures-util = "0.3.31"
|
|
|
|
# --- Add this section for conditional compilation ---
|
|
[target.'cfg(target_os = "macos")'.dependencies]
|
|
# Use CPU backend for macOS to avoid Metal rotary-emb implementation issues
|
|
candle-core = { version = "=0.9.1", features = ["metal"], optional = false }
|
|
|
|
[target.'cfg(not(target_os = "macos"))'.dependencies]
|
|
# For Linux or other non-macOS systems, you likely want the CPU backend or CUDA
|
|
# If you're building on Linux with a CUDA-enabled GPU:
|
|
candle-core = { version = "=0.9.1", features = ["cuda"], default-features = false } # Or just "cuda" if not using default features
|
|
|
|
# If you're building on Linux with only CPU:
|
|
# candle-core = { version = "=0.9.1", default-features = false } # CPU is often the default, but good to be explicit
|
|
# --- End of conditional compilation section ---
|
|
|
|
[dev-dependencies]
|
|
anyhow = { version = "1", features = ["backtrace"] }
|
|
byteorder = { version = "1.4.3" }
|
|
clap = { version = "4.2.4", features = ["derive"] }
|
|
imageproc = { version = "0.24.0", default-features = false }
|
|
memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] }
|
|
rand = { version = "0.9.0" }
|
|
ab_glyph = { version = "0.2.23" }
|
|
tracing = { version = "0.1.37" }
|
|
tracing-chrome = { version = "0.7.1" }
|
|
tracing-subscriber = { version = "0.3.7" }
|
|
# Necessary to disambiguate with tokio in wasm examples which are 1.28.1
|
|
tokio = "1.43.0"
|
|
|
|
[build-dependencies]
|
|
anyhow = { version = "1", features = ["backtrace"] }
|
|
bindgen_cuda = { version = "0.1.1", optional = true } |