supports small llama and gemma models

Refactor inference dedicated crates for llama and gemma inferencing, not integrated
2025-09-08 22:46:44 +00:00 · 2025-08-29 18:15:29 -04:00
parent d06b16bb12
commit 315ef17605
26 changed files with 2136 additions and 1402 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,7 +4,9 @@ members = [
    "crates/inference-engine",
    "crates/embeddings-engine",
    "crates/leptos-app",
-    "crates/helm-chart-tool"
+    "crates/helm-chart-tool",
+    "crates/llama-runner",
+    "crates/gemma-runner"
 ]
 default-members = ["crates/predict-otron-9000"]
 resolver = "2"