mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00

Removed `test_request.sh`, deprecated functionality, and unused imports; introduced a new CLI tool (`cli.ts`) for testing inference engine and adjusted handling of non-streaming/streaming chat completions. - Add CPU fallback support for text generation when primary device is unsupported - Introduce `execute_with_fallback` method to handle device compatibility and shape mismatch errors - Extend unit tests to reproduce tensor shape mismatch errors specific to model configurations - Increase HTTP timeout limits in `curl_chat_stream.sh` script for reliable API testing chat completion endpoint functions with gemma3 (no streaming) Add benchmarking guide with HTML reporting, Leptos chat crate, and middleware for metrics tracking
18 lines
376 B
TOML
18 lines
376 B
TOML
[workspace]
|
|
members = [
|
|
"crates/predict-otron-9000",
|
|
"crates/inference-engine",
|
|
"crates/embeddings-engine",
|
|
"crates/leptos-chat",
|
|
"crates/legacy-inference-engine"
|
|
]
|
|
default-members = ["crates/predict-otron-9000"]
|
|
resolver = "2"
|
|
|
|
|
|
|
|
[[workspace.metadata.leptos]]
|
|
# project name
|
|
name = "leptos-project"
|
|
bin-package = "leptos-chat"
|
|
lib-package = "leptos-chat" |