move cli into crates and stage for release

2025-09-08 22:46:44 +00:00 · 2025-08-31 13:23:50 -04:00
parent 9e9aa69769
commit 0580dc8c5e
26 changed files with 604 additions and 447 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -34,6 +34,9 @@ jobs:
      - name: Setup Rust
        run: rustup update stable && rustup default stable
      - name: Setup Bun
        uses: oven-sh/setup-bun@v2
      - name: Install clippy and rustfmt
        run: rustup component add clippy rustfmt
@@ -132,7 +135,7 @@ jobs:
        run: rustup target add ${{ matrix.target }}
      - name: Build binary
-        run: cargo build --release --target ${{ matrix.target }} -p predict-otron-9000
+        run: cargo build --release --target ${{ matrix.target }} -p predict-otron-9000 -p cli
        env:
          CARGO_TERM_COLOR: always
@@ -140,14 +143,14 @@ jobs:
        if: matrix.os != 'windows-latest'
        run: |
          cd target/${{ matrix.target }}/release
-          tar czf ../../../${{ matrix.name }}.tar.gz predict-otron-9000
+          tar czf ../../../${{ matrix.name }}.tar.gz predict-otron-9000 cli
          cd ../../../
      - name: Package binary (Windows)
        if: matrix.os == 'windows-latest'
        run: |
          cd target/${{ matrix.target }}/release
-          7z a ../../../${{ matrix.name }}.zip predict-otron-9000.exe
+          7z a ../../../${{ matrix.name }}.zip predict-otron-9000.exe cli.exe
          cd ../../../
      - name: Upload binary artifacts (Unix)
--- a/.gitignore
+++ b/.gitignore
@@ -78,3 +78,4 @@ venv/
 *~
 /scripts/cli
 !/scripts/cli.ts
 /**/.*.bun-build
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -896,6 +896,10 @@ version = "0.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
 [[package]]
 name = "cli"
 version = "0.1.1"
 [[package]]
 name = "codee"
 version = "0.3.2"
@@ -1472,7 +1476,7 @@ dependencies = [
 [[package]]
 name = "embeddings-engine"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "async-openai",
 "axum",
@@ -2094,7 +2098,7 @@ dependencies = [
 [[package]]
 name = "gemma-runner"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "anyhow",
 "candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)",
@@ -2275,7 +2279,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 [[package]]
 name = "helm-chart-tool"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "anyhow",
 "clap",
@@ -2685,7 +2689,7 @@ dependencies = [
 [[package]]
 name = "inference-engine"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "ab_glyph",
 "anyhow",
@@ -2937,7 +2941,7 @@ dependencies = [
 [[package]]
 name = "leptos-app"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "async-openai-wasm",
 "axum",
@@ -3200,7 +3204,7 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
 [[package]]
 name = "llama-runner"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "anyhow",
 "candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)",
@@ -4079,7 +4083,7 @@ dependencies = [
 [[package]]
 name = "predict-otron-9000"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "axum",
 "embeddings-engine",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,11 +6,15 @@ members = [
    "crates/leptos-app",
    "crates/helm-chart-tool",
    "crates/llama-runner",
-    "crates/gemma-runner"
+    "crates/gemma-runner",
    "crates/cli"
 ]
 default-members = ["crates/predict-otron-9000"]
 resolver = "2"
 [workspace.package]
 version = "0.1.2"
 # Compiler optimization profiles for the workspace
 [profile.release]
 opt-level = 3
--- a/bun.lock
+++ b/bun.lock
@@ -0,0 +1,22 @@
 {
  "lockfileVersion": 1,
  "workspaces": {
    "": {
      "name": "predict-otron-9000",
    },
    "crates/cli/package": {
      "name": "cli",
      "dependencies": {
        "install": "^0.13.0",
        "openai": "^5.16.0",
      },
    },
  },
  "packages": {
    "cli": ["cli@workspace:crates/cli/package"],
    "install": ["install@0.13.0", "", {}, "sha512-zDml/jzr2PKU9I8J/xyZBQn8rPCAY//UOYNmR01XwNwyfhEWObo2SWfSl1+0tm1u6PhxLwDnfsT/6jB7OUxqFA=="],
    "openai": ["openai@5.16.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.23.8" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-hoEH8ZNvg1HXjU9mp88L/ZH8O082Z8r6FHCXGiWAzVRrEv443aI57qhch4snu07yQydj+AUAWLenAiBXhu89Tw=="],
  }
 }
--- a/crates/cli/Cargo.toml
+++ b/crates/cli/Cargo.toml
@@ -0,0 +1,11 @@
 [package]
 name = "cli"
 version.workspace = true
 edition = "2021"
 build = "build.rs"
 [[bin]]
 name = "cli"
 path = "src/main.rs"
 [dependencies]
--- a/crates/cli/README.md
+++ b/crates/cli/README.md
@@ -0,0 +1,23 @@
 # cli
 A Rust/Typescript Hybrid
 ```console
 ./cli [options] [prompt]
 Simple CLI tool for testing the local OpenAI-compatible API server.
 Options:
  --model <model>     Model to use (default: gemma-3-1b-it)
  --prompt <prompt>   The prompt to send (can also be provided as positional argument)
  --list-models       List all available models from the server
  --help              Show this help message
 Examples:
  ./cli "What is the capital of France?"
  ./cli --model gemma-3-1b-it --prompt "Hello, world!"
  ./cli --prompt "Who was the 16th president of the United States?"
  ./cli --list-models
 The server must be running at http://localhost:8080
 ```
--- a/crates/cli/build.rs
+++ b/crates/cli/build.rs
@@ -0,0 +1,209 @@
 use std::env;
 use std::fs;
 use std::io::{self, BufRead, Write};
 use std::path::{Path, PathBuf};
 use std::process::{ChildStderr, ChildStdout, Command, Stdio};
 use std::thread;
 use std::time::{Duration, SystemTime};
 mod bun_target;
 use bun_target::BunTarget;
 fn main() {
    println!("cargo:rerun-if-changed=");
    if let Err(e) = run_build() {
        println!("cargo:warning=build.rs failed: {e}");
        std::process::exit(1);
    }
 }
 fn run_build() -> io::Result<()> {
    let manifest_dir =
        PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"));
    let package_dir = manifest_dir.join("package");
    let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set by Cargo"));
    let output_path = out_dir.join("client-cli");
    let bun_tgt = BunTarget::from_cargo_env()
        .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
    // Optional: warn if using a Bun target that’s marked unsupported in your chart
    if matches!(bun_tgt, BunTarget::WindowsArm64) {
        println!("cargo:warning=bun-windows-arm64 is marked unsupported in the compatibility chart");
    }
    warn(&format!("Building CLI into: {}", output_path.display()));
    // --- bun install (in ./package), keep temps inside OUT_DIR ---
    let mut install = Command::new("bun")
        .current_dir(&package_dir)
        .env("TMPDIR", &out_dir)
        .arg("install")
        .stdin(Stdio::null())
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
        .map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun install`: {e}")))?;
    let install_join = stream_child("bun install", install.stdout.take(), install.stderr.take());
    let install_status = install.wait()?;
    // ensure streams finish
    join_streams(install_join);
    if !install_status.success() {
        let code = install_status.code().unwrap_or(1);
        return Err(io::Error::new(
            io::ErrorKind::Other,
            format!("bun install failed with status {code}"),
        ));
    }
    let target = env::var("TARGET").unwrap();
    // --- bun build (in ./package), emit to OUT_DIR, keep temps inside OUT_DIR ---
    let mut build = Command::new("bun")
        .current_dir(&package_dir)
        .env("TMPDIR", &out_dir)
        .arg("build")
        .arg("./cli.ts")
        .arg(format!("--target={}", bun_tgt.as_bun_flag()))
        .arg("--compile")
        .arg("--outfile")
        .arg(&output_path)
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
        .map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun build`: {e}")))?;
    let build_join = stream_child("bun build", build.stdout.take(), build.stderr.take());
    let status = build.wait()?;
    // ensure streams finish
    join_streams(build_join);
    if status.success() {
        info("bun build succeeded");
    } else {
        let code = status.code().unwrap_or(1);
        warn(&format!("bun build failed with status: {code}"));
        return Err(io::Error::new(io::ErrorKind::Other, "bun build failed"));
    }
    // Ensure the output is executable (after it exists)
    #[cfg(unix)]
    {
        use std::os::unix::fs::PermissionsExt;
        let mut perms = fs::metadata(&output_path)?.permissions();
        perms.set_mode(0o755);
        fs::set_permissions(&output_path, perms)?;
    }
    println!("cargo:warning=Built CLI at {}", output_path.display());
    println!("cargo:rustc-env=CLIENT_CLI_BIN={}", output_path.display());
    // --- Cleanup stray .bun-build temp files (conservative: older than 5 minutes) ---
    for dir in [&manifest_dir, &package_dir, &out_dir] {
        if let Err(e) = remove_bun_temp_files(dir, Some(Duration::from_secs(5 * 60))) {
            println!("cargo:warning=cleanup in {} failed: {e}", dir.display());
        }
    }
    Ok(())
 }
 // Spawn readers for child's stdout/stderr so we don't deadlock on pipe buffers
 fn stream_child(
    tag: &str,
    stdout: Option<ChildStdout>,
    stderr: Option<ChildStderr>,
 ) -> (
    Option<thread::JoinHandle<()>>,
    Option<thread::JoinHandle<()>>,
 ) {
    let t1 = stdout.map(|out| {
        let tag = tag.to_string();
        thread::spawn(move || {
            let reader = io::BufReader::new(out);
            for line in reader.lines() {
                info(&format!("[{tag} stdout] {}", line.unwrap_or_default()));
            }
        })
    });
    let t2 = stderr.map(|err| {
        let tag = tag.to_string();
        thread::spawn(move || {
            let reader = io::BufReader::new(err);
            for line in reader.lines() {
                warn(&format!("[{tag} stderr] {}", line.unwrap_or_default()));
            }
        })
    });
    (t1, t2)
 }
 fn join_streams(
    joins: (
        Option<thread::JoinHandle<()>>,
        Option<thread::JoinHandle<()>>,
    ),
 ) {
    if let Some(j) = joins.0 {
        let _ = j.join();
    }
    if let Some(j) = joins.1 {
        let _ = j.join();
    }
 }
 fn remove_bun_temp_files(dir: &Path, older_than: Option<Duration>) -> io::Result<()> {
    let now = SystemTime::now();
    for entry in fs::read_dir(dir)? {
        let entry = entry?;
        let path = entry.path();
        if !path.is_file() {
            continue;
        }
        // Files like ".1860e7df40ff1bef-00000000.bun-build"
        let name = entry.file_name();
        let name = name.to_string_lossy();
        let looks_like_bun_temp = name.starts_with('.') && name.ends_with(".bun-build");
        if !looks_like_bun_temp {
            continue;
        }
        if let Some(age) = older_than {
            if let Ok(meta) = entry.metadata() {
                if let Ok(modified) = meta.modified() {
                    if now.duration_since(modified).unwrap_or_default() < age {
                        // too new; skip to avoid racing an in-flight builder
                        continue;
                    }
                }
            }
        }
        match fs::remove_file(&path) {
            Ok(_) => println!("cargo:warning=removed stray bun temp {}", path.display()),
            Err(e) => println!("cargo:warning=failed to remove {}: {e}", path.display()),
        }
    }
    Ok(())
 }
 fn warn(msg: &str) {
    let _ = writeln!(io::stderr(), "[build.rs] {msg}");
    println!("cargo:warning={msg}");
 }
 fn info(msg: &str) {
    let _ = writeln!(io::stderr(), "[build.rs] {msg}");
    println!("cargo:warning=INFO|{msg}");
 }
--- a/crates/cli/bun_target.rs
+++ b/crates/cli/bun_target.rs
@@ -0,0 +1,125 @@
 use std::env;
 use std::fmt;
 #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
 pub enum BunTarget {
    LinuxX64Glibc,
    LinuxArm64Glibc,
    LinuxX64Musl,
    LinuxArm64Musl,
    WindowsX64,
    WindowsArm64,
    MacX64,
    MacArm64,
 }
 impl BunTarget {
    pub const fn as_bun_flag(self) -> &'static str {
        match self {
            BunTarget::LinuxX64Glibc   => "bun-linux-x64",
            BunTarget::LinuxArm64Glibc => "bun-linux-arm64",
            BunTarget::LinuxX64Musl    => "bun-linux-x64-musl",
            BunTarget::LinuxArm64Musl  => "bun-linux-arm64-musl",
            BunTarget::WindowsX64      => "bun-windows-x64",
            BunTarget::WindowsArm64    => "bun-windows-arm64",
            BunTarget::MacX64          => "bun-darwin-x64",
            BunTarget::MacArm64        => "bun-darwin-arm64",
        }
    }
    pub const fn rust_triples(self) -> &'static [&'static str] {
        match self {
            BunTarget::LinuxX64Glibc   => &["x86_64-unknown-linux-gnu", "x86_64-unknown-linux-gnu.2.17"],
            BunTarget::LinuxArm64Glibc => &["aarch64-unknown-linux-gnu"],
            BunTarget::LinuxX64Musl    => &["x86_64-unknown-linux-musl"],
            BunTarget::LinuxArm64Musl  => &["aarch64-unknown-linux-musl"],
            BunTarget::WindowsX64      => &["x86_64-pc-windows-msvc"],
            BunTarget::WindowsArm64    => &["aarch64-pc-windows-msvc"], // chart says unsupported; still map
            BunTarget::MacX64          => &["x86_64-apple-darwin"],
            BunTarget::MacArm64        => &["aarch64-apple-darwin"],
        }
    }
    pub fn from_rust_target(triple: &str) -> Option<Self> {
        let norm = triple.trim();
        if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("gnu") {
            return Some(BunTarget::LinuxX64Glibc);
        }
        if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("gnu") {
            return Some(BunTarget::LinuxArm64Glibc);
        }
        if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("musl") {
            return Some(BunTarget::LinuxX64Musl);
        }
        if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("musl") {
            return Some(BunTarget::LinuxArm64Musl);
        }
        if norm == "x86_64-pc-windows-msvc" {
            return Some(BunTarget::WindowsX64);
        }
        if norm == "aarch64-pc-windows-msvc" {
            return Some(BunTarget::WindowsArm64);
        }
        if norm == "x86_64-apple-darwin" {
            return Some(BunTarget::MacX64);
        }
        if norm == "aarch64-apple-darwin" {
            return Some(BunTarget::MacArm64);
        }
        for bt in [
            BunTarget::LinuxX64Glibc,
            BunTarget::LinuxArm64Glibc,
            BunTarget::LinuxX64Musl,
            BunTarget::LinuxArm64Musl,
            BunTarget::WindowsX64,
            BunTarget::WindowsArm64,
            BunTarget::MacX64,
            BunTarget::MacArm64,
        ] {
            for &t in bt.rust_triples() {
                if t == norm {
                    return Some(bt);
                }
            }
        }
        None
    }
    pub fn from_cargo_env() -> Result<Self, BunTargetError> {
        if let Ok(triple) = env::var("TARGET") {
            if let Some(bt) = Self::from_rust_target(&triple) {
                return Ok(bt);
            }
            return Err(BunTargetError::UnknownTriple(triple));
        }
        let os  = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
        let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
        let envv = env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default();
        let vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_else(|_| "unknown".into());
        let triple = format!("{}-{}-{}-{}", arch, vendor, os, if envv.is_empty() { "gnu" } else { &envv });
        if let Some(bt) = Self::from_rust_target(&triple) {
            Ok(bt)
        } else {
            Err(BunTargetError::UnknownTriple(triple))
        }
    }
 }
 #[derive(Debug)]
 pub enum BunTargetError {
    UnknownTriple(String),
 }
 impl fmt::Display for BunTargetError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            BunTargetError::UnknownTriple(t) => write!(f, "unrecognized Rust target triple: {t}"),
        }
    }
 }
 impl std::error::Error for BunTargetError {}
--- a/crates/cli/package/cli.ts
+++ b/crates/cli/package/cli.ts
@@ -30,24 +30,23 @@ type ChunkStat = {
 function printHelp() {
    console.log(`
-Usage: bun client_cli.ts [options] [prompt]
+./cli [options] [prompt]
 Simple CLI tool for testing the local OpenAI-compatible API server.
 Options:
-  --model <model>     Model to use (default: ${DEFAULT_MODEL})
+  --model <model>     Model to use (default: gemma-3-1b-it)
  --prompt <prompt>   The prompt to send (can also be provided as positional argument)
  --list-models       List all available models from the server
  --help              Show this help message
 Examples:
-  ./cli.ts "What is the capital of France?"
+  ./cli "What is the capital of France?"
-  ./cli.ts --model gemma-3-1b-it --prompt "Hello, world!"
+  ./cli --model gemma-3-1b-it --prompt "Hello, world!"
-  ./cli.ts --prompt "Who was the 16th president of the United States?"
+  ./cli --prompt "Who was the 16th president of the United States?"
-  ./cli.ts --list-models
+  ./cli --list-models
-The server should be running at http://localhost:8080
+The server must be running at http://localhost:8080
 Start it with: ./run_server.sh
 `);
 }
--- a/crates/cli/package/package.json
+++ b/crates/cli/package/package.json
@@ -0,0 +1,11 @@
 {
  "name": "cli",
  "main": "cli.ts",
  "scripts": {
    "build": "bun build cli.ts --compile --outfile cli"
  },
  "dependencies": {
    "install": "^0.13.0",
    "openai": "^5.16.0"
  }
 }
--- a/crates/cli/src/main.rs
+++ b/crates/cli/src/main.rs
@@ -0,0 +1,32 @@
 use std::{env, fs, io, path::PathBuf, process::Command};
 #[cfg(unix)]
 use std::os::unix::fs::PermissionsExt;
 fn main() -> io::Result<()> {
    // Absolute path provided by build.rs at compile time.
    // `include_bytes!` accepts string literals; `env!` expands to a literal at compile time.
    const CLIENT_CLI: &[u8] = include_bytes!(env!("CLIENT_CLI_BIN"));
    // Write to a temp file
    let mut tmp = env::temp_dir();
    tmp.push("client-cli-embedded");
    fs::write(&tmp, CLIENT_CLI)?;
    // Ensure it's executable on Unix
    #[cfg(unix)]
    {
        let mut perms = fs::metadata(&tmp)?.permissions();
        perms.set_mode(0o755);
        fs::set_permissions(&tmp, perms)?;
    }
    // Run it
    let status = Command::new(&tmp).arg("--version").status()?;
    if !status.success() {
        return Err(io::Error::new(io::ErrorKind::Other, "client-cli failed"));
    }
    Ok(())
 }
--- a/crates/embeddings-engine/Cargo.toml
+++ b/crates/embeddings-engine/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "embeddings-engine"
-version = "0.1.0"
+version.workspace = true
 edition = "2024"
 [lib]
--- a/crates/gemma-runner/Cargo.toml
+++ b/crates/gemma-runner/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "gemma-runner"
-version = "0.1.0"
+version.workspace = true
 edition = "2021"
--- a/crates/helm-chart-tool/Cargo.toml
+++ b/crates/helm-chart-tool/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "helm-chart-tool"
-version = "0.1.0"
+version.workspace = true
 edition = "2021"
 [[bin]]
--- a/crates/inference-engine/Cargo.toml
+++ b/crates/inference-engine/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "inference-engine"
-version = "0.1.0"
+version.workspace = true
 edition = "2021"
 [dependencies]
--- a/crates/leptos-app/Cargo.toml
+++ b/crates/leptos-app/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "leptos-app"
-version = "0.1.0"
+version.workspace = true
 edition = "2021"
 [lib]
--- a/crates/llama-runner/Cargo.toml
+++ b/crates/llama-runner/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "llama-runner"
-version = "0.1.0"
+version.workspace = true
 edition = "2021"
 [dependencies]
--- a/crates/predict-otron-9000/Cargo.toml
+++ b/crates/predict-otron-9000/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "predict-otron-9000"
-version = "0.1.1"
+version.workspace = true
 edition = "2024"
 [[bin]]
@@ -44,4 +44,8 @@ port = 8080
 image = "ghcr.io/geoffsee/predict-otron-9000:latest"
 replicas = 1
 port = 8080
-env = { SERVER_CONFIG = "" }
+# SERVER_CONFIG Example: {\"serverMode\":\"HighAvailability\",\"services\":{\"inference_url\":\"http://custom-inference:9000\",\"embeddings_url\":\"http://custom-embeddings:9001\"}}
 # you can generate this via node to avoid toil
 # const server_config = {serverMode: "HighAvailability", services: {inference_url: "http://custom-inference:9000", embeddings_url: "http://custom-embeddings:9001"} };
 # console.log(JSON.stringify(server_config).replace(/"/g, '\\"'));
 env = { SERVER_CONFIG = "<your-json-value-here>" }
--- a/crates/predict-otron-9000/src/config.rs
+++ b/crates/predict-otron-9000/src/config.rs
@@ -2,7 +2,10 @@ use serde::{Deserialize, Serialize};
 use std::env;
 use tracing::info;
 use tracing::log::error;
-
+/// # Generating `SERVER_CONFIG` with Node
 // # const server_config = {serverMode: "HighAvailability", services: {inference_url: "http://custom-inference:9000", embeddings_url: "http://custom-embeddings:9001"} };
 // # console.log(JSON.stringify(server_config).replace(/"/g, '\\"'));
 ///
 #[derive(Serialize, Deserialize, Clone, Debug)]
 #[serde(rename_all = "camelCase")]
 pub struct ServerConfig {
--- a/crates/predict-otron-9000/src/ha_mode.rs
+++ b/crates/predict-otron-9000/src/ha_mode.rs
@@ -12,6 +12,120 @@ use std::time::Duration;
 use crate::config::ServerConfig;
 /// # Generating `SERVER_CONFIG` for TOML using Node.js
 ///
 /// You can still use the Node.js REPL to build the JSON, but when pasting into
 /// a `.toml` file you must follow TOML's string rules. Below are the safest patterns.
 ///
 /// ## 1) Generate the JSON in Node
 /// ```bash
 /// node
 /// ```
 /// ```javascript
 /// const myobject = {
 ///   serverMode: "HighAvailability",
 ///   services: {
 ///     inference_url: "http://custom-inference:9000",
 ///     embeddings_url: "http://custom-embeddings:9001"
 ///   }
 /// };
 /// const json = JSON.stringify(myobject);
 /// json
 /// // -> '{"serverMode":"HighAvailability","services":{"inference_url":"http://custom-inference:9000","embeddings_url":"http://custom-embeddings:9001"}}'
 /// ```
 ///
 /// ## 2) Put it into `.toml`
 ///
 /// ### Option A (recommended): single-quoted TOML *literal* string
 /// Single quotes in TOML mean "no escaping", so your inner double quotes are safe.
 /// ```toml
 /// SERVER_CONFIG = '{"serverMode":"HighAvailability","services":{"inference_url":"http://custom-inference:9000","embeddings_url":"http://custom-embeddings:9001"}}'
 /// ```
 ///
 /// ### Option B: double-quoted TOML string (must escape inner quotes)
 /// If you *must* use double quotes in TOML, escape all `"` inside the JSON.
 /// You can have Node do this for you:
 /// ```javascript
 /// // In Node:
 /// const jsonForToml = JSON.stringify(myobject).replace(/"/g, '\\"');
 /// jsonForToml
 /// // -> \"{\\\"serverMode\\\":\\\"HighAvailability\\\",...}\"
 /// ```
 /// Then paste into TOML:
 /// ```toml
 /// SERVER_CONFIG = "{\"serverMode\":\"HighAvailability\",\"services\":{\"inference_url\":\"http://custom-inference:9000\",\"embeddings_url\":\"http://custom-embeddings:9001\"}}"
 /// ```
 ///
 /// ### Option C: multi-line literal (for pretty JSON)
 /// If you want pretty-printed JSON in the file, use TOML's triple single quotes:
 /// ```javascript
 /// // In Node (pretty with 2 spaces):
 /// const pretty = JSON.stringify(myobject, null, 2);
 /// ```
 /// ```toml
 /// SERVER_CONFIG = '''{
 ///   "serverMode": "HighAvailability",
 ///   "services": {
 ///     "inference_url": "http://custom-inference:9000",
 ///     "embeddings_url": "http://custom-embeddings:9001"
 ///   }
 /// }'''
 /// ```
 ///
 /// ## 3) Reading it in Rust
 ///
 /// If `SERVER_CONFIG` is stored as a **string** in TOML (Options A/B/C):
 /// ```rust
 /// use serde_json::Value;
 ///
 /// // Suppose you've already loaded your .toml into a struct or a toml::Value:
 /// // e.g., struct FileCfg { pub SERVER_CONFIG: String }
 /// fn parse_server_config(raw: &str) -> anyhow::Result<Value> {
 ///     let v: Value = serde_json::from_str(raw)?;
 ///     Ok(v)
 /// }
 /// ```
 ///
 /// ### Alternative: store it as TOML tables and serialize to JSON at runtime
 /// Instead of a JSON string, you can make the TOML first-class tables:
 /// ```toml
 /// [SERVER_CONFIG]
 /// serverMode = "HighAvailability"
 ///
 /// [SERVER_CONFIG.services]
 /// inference_url = "http://custom-inference:9000"
 /// embeddings_url = "http://custom-embeddings:9001"
 /// ```
 /// ```rust
 /// use serde::{Deserialize, Serialize};
 /// use serde_json::Value;
 ///
 /// #[derive(Debug, Serialize, Deserialize)]
 /// struct Services {
 ///     inference_url: String,
 ///     embeddings_url: String,
 /// }
 ///
 /// #[derive(Debug, Serialize, Deserialize)]
 /// struct ServerConfig {
 ///     serverMode: String,
 ///     services: Services,
 /// }
 ///
 /// // After loading the .toml (e.g., via `toml::from_str`):
 /// // let cfg: ServerConfig = toml::from_str(toml_str)?;
 /// // Convert to JSON if needed:
 /// fn to_json(cfg: &ServerConfig) -> serde_json::Result<Value> {
 ///     Ok(serde_json::to_value(cfg)?)
 /// }
 /// ```
 ///
 /// ## Gotchas
 /// - Prefer **single-quoted** TOML strings for raw JSON to avoid escaping.
 /// - If you use **double-quoted** TOML strings, escape every inner `"` in the JSON.
 /// - Pretty JSON is fine in TOML using `''' ... '''`, but remember the newlines are part of the string.
 /// - If you control the consumer, TOML tables (the alternative above) are more ergonomic than embedding JSON.
 /// HTTP client configured for proxying requests
 #[derive(Clone)]
 pub struct ProxyClient {
@@ -31,7 +145,7 @@ impl ProxyClient {
 }
 /// Create a router that proxies requests to external services in HighAvailability mode
-pub fn create_proxy_router(config: ServerConfig) -> Router {
+pub fn create_ha_router(config: ServerConfig) -> Router {
    let proxy_client = ProxyClient::new(config.clone());
    Router::new()
--- a/crates/predict-otron-9000/src/main.rs
+++ b/crates/predict-otron-9000/src/main.rs
@@ -1,16 +1,16 @@
 mod config;
 mod ha_mode;
 mod middleware;
-mod proxy;
+mod standalone_mode;
 mod standalone;
-use crate::standalone::create_standalone_router;
+use crate::standalone_mode::create_standalone_router;
 use axum::response::IntoResponse;
 use axum::routing::get;
 use axum::{Router, http::Uri, response::Html, serve};
 use config::ServerConfig;
 use ha_mode::create_ha_router;
 use inference_engine::AppState;
 use middleware::{MetricsLayer, MetricsLoggerFuture, MetricsStore};
 use proxy::create_proxy_router;
 use rust_embed::Embed;
 use std::env;
 use std::path::Component::ParentDir;
@@ -56,7 +56,7 @@ async fn main() {
        Ok(is_ha) => {
            if is_ha {
                log_config(server_config.clone());
-                create_proxy_router(server_config.clone())
+                create_ha_router(server_config.clone())
            } else {
                log_config(server_config.clone());
                create_standalone_router(server_config)
--- a/crates/predict-otron-9000/src/standalone_mode.rs
+++ b/crates/predict-otron-9000/src/standalone_mode.rs
--- a/package.json
+++ b/package.json
@@ -1,8 +1,8 @@
 {
-  "dependencies": {
+  "name": "predict-otron-9000",
-    "openai": "^5.16.0"
+  "workspaces": ["crates/cli/package"],
  },
  "scripts": {
-    "cli": "./scripts/cli.ts"
+    "# WORKSPACE ALIASES": "#",
    "cli": "bun --filter crates/cli/package"
  }
 }
--- a/scripts/build_all_platforms.sh
+++ b/scripts/build_all_platforms.sh
@@ -1,389 +0,0 @@
 #!/bin/bash
 # Cross-platform build script for predict-otron-9000
 # Builds all workspace crates for common platforms
 set -euo pipefail
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 NC='\033[0m' # No Color
 # Configuration
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
 BUILD_DIR="${PROJECT_ROOT}/build"
 TIMESTAMP=$(date +%Y%m%d_%H%M%S)
 # Supported platforms
 PLATFORMS=(
    "x86_64-unknown-linux-gnu"
    "x86_64-pc-windows-msvc"
    "x86_64-apple-darwin"
    "aarch64-apple-darwin"
    "aarch64-unknown-linux-gnu"
 )
 # Main binaries to build
 MAIN_BINARIES=(
    "predict-otron-9000"
    "embeddings-engine"
 )
 # Inference engine binaries (with bin feature)
 INFERENCE_BINARIES=(
    "gemma_inference"
    "llama_inference"
 )
 # Other workspace binaries
 OTHER_BINARIES=(
    "helm-chart-tool"
 )
 print_header() {
    echo -e "${BLUE}================================${NC}"
    echo -e "${BLUE}$1${NC}"
    echo -e "${BLUE}================================${NC}"
 }
 print_info() {
    echo -e "${GREEN}[INFO]${NC} $1"
 }
 print_warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
 }
 print_error() {
    echo -e "${RED}[ERROR]${NC} $1"
 }
 check_dependencies() {
    print_header "Checking Dependencies"
    # Check rust
    if ! command -v cargo >/dev/null 2>&1; then
        print_error "Rust/Cargo is not installed"
        exit 1
    fi
    # Check cargo-leptos for WASM frontend
    if ! command -v cargo-leptos >/dev/null 2>&1; then
        print_warn "cargo-leptos not found. Installing..."
        cargo install cargo-leptos
    fi
    print_info "All dependencies available"
 }
 install_targets() {
    print_header "Installing Rust Targets"
    for platform in "${PLATFORMS[@]}"; do
        print_info "Installing target: $platform"
        rustup target add "$platform" || {
            print_warn "Failed to install target $platform (may not be available on this host)"
        }
    done
    # Add WASM target for leptos
    print_info "Installing wasm32-unknown-unknown target for Leptos"
    rustup target add wasm32-unknown-unknown
 }
 create_build_dirs() {
    print_header "Setting up Build Directory"
    rm -rf "$BUILD_DIR"
    mkdir -p "$BUILD_DIR"
    for platform in "${PLATFORMS[@]}"; do
        mkdir -p "$BUILD_DIR/$platform"
    done
    mkdir -p "$BUILD_DIR/web"
    print_info "Build directories created"
 }
 build_leptos_app() {
    print_header "Building Leptos Web Frontend"
    cd "$PROJECT_ROOT/crates/leptos-app"
    # Build the WASM frontend
    print_info "Building WASM frontend with cargo-leptos..."
    cargo leptos build --release || {
        print_error "Failed to build Leptos WASM frontend"
        return 1
    }
    # Copy built assets to build directory
    if [ -d "target/site" ]; then
        cp -r target/site/* "$BUILD_DIR/web/"
        print_info "Leptos frontend built and copied to $BUILD_DIR/web/"
    else
        print_error "Leptos build output not found at target/site"
        return 1
    fi
    cd "$PROJECT_ROOT"
 }
 get_platform_features() {
    local platform="$1"
    local features=""
    case "$platform" in
        *-apple-darwin)
            # macOS uses Metal but routes to CPU for Gemma stability
            features=""
            ;;
        *-unknown-linux-gnu|*-pc-windows-msvc)
            # Linux and Windows can use CUDA if available
            features=""
            ;;
        *)
            features=""
            ;;
    esac
    echo "$features"
 }
 build_binary_for_platform() {
    local binary_name="$1"
    local platform="$2"
    local package_name="$3"
    local additional_args="$4"
    print_info "Building $binary_name for $platform"
    local features=$(get_platform_features "$platform")
    local feature_flag=""
    if [ -n "$features" ]; then
        feature_flag="--features $features"
    fi
    # Build command
    local build_cmd="cargo build --release --target $platform --bin $binary_name"
    if [ -n "$package_name" ]; then
        build_cmd="$build_cmd --package $package_name"
    fi
    if [ -n "$additional_args" ]; then
        build_cmd="$build_cmd $additional_args"
    fi
    if [ -n "$feature_flag" ]; then
        build_cmd="$build_cmd $feature_flag"
    fi
    print_info "Running: $build_cmd"
    if eval "$build_cmd"; then
        # Copy binary to build directory
        local target_dir="target/$platform/release"
        local binary_file="$binary_name"
        # Add .exe extension for Windows
        if [[ "$platform" == *-pc-windows-msvc ]]; then
            binary_file="$binary_name.exe"
        fi
        if [ -f "$target_dir/$binary_file" ]; then
            cp "$target_dir/$binary_file" "$BUILD_DIR/$platform/"
            print_info "✓ $binary_name built and copied for $platform"
        else
            print_error "Binary not found: $target_dir/$binary_file"
            return 1
        fi
    else
        print_error "Failed to build $binary_name for $platform"
        return 1
    fi
 }
 build_for_platform() {
    local platform="$1"
    print_header "Building for $platform"
    local failed_builds=()
    # Build main binaries
    for binary in "${MAIN_BINARIES[@]}"; do
        if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then
            failed_builds+=("$binary")
        fi
    done
    # Build inference engine binaries with bin feature
    for binary in "${INFERENCE_BINARIES[@]}"; do
        if ! build_binary_for_platform "$binary" "$platform" "inference-engine" "--features bin"; then
            failed_builds+=("$binary")
        fi
    done
    # Build other workspace binaries
    for binary in "${OTHER_BINARIES[@]}"; do
        if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then
            failed_builds+=("$binary")
        fi
    done
    if [ ${#failed_builds[@]} -eq 0 ]; then
        print_info "✓ All binaries built successfully for $platform"
    else
        print_warn "Some builds failed for $platform: ${failed_builds[*]}"
    fi
 }
 create_archives() {
    print_header "Creating Release Archives"
    cd "$BUILD_DIR"
    for platform in "${PLATFORMS[@]}"; do
        if [ -d "$platform" ] && [ -n "$(ls -A "$platform" 2>/dev/null)" ]; then
            local archive_name="predict-otron-9000-${platform}-${TIMESTAMP}"
            print_info "Creating archive for $platform"
            # Create platform-specific directory with all files
            mkdir -p "$archive_name"
            cp -r "$platform"/* "$archive_name/"
            # Add web assets to each platform archive
            if [ -d "web" ]; then
                mkdir -p "$archive_name/web"
                cp -r web/* "$archive_name/web/"
            fi
            # Create README for the platform
            cat > "$archive_name/README.txt" << EOF
 Predict-Otron-9000 - Platform: $platform
 Build Date: $(date)
 ========================================
 Binaries included:
 $(ls -1 "$platform")
 Web Frontend:
 - Located in the 'web' directory
 - Serve with any static file server on port 8788 or configure your server
 Usage:
 1. Start the main server: ./predict-otron-9000
 2. Start embeddings service: ./embeddings-engine  
 3. Access web interface at http://localhost:8080 (served by main server)
 For more information, visit: https://github.com/geoffsee/predict-otron-9000
 EOF
            # Create tar.gz archive
            tar -czf "${archive_name}.tar.gz" "$archive_name"
            rm -rf "$archive_name"
            print_info "✓ Created ${archive_name}.tar.gz"
        else
            print_warn "No binaries found for $platform, skipping archive"
        fi
    done
    cd "$PROJECT_ROOT"
 }
 generate_build_report() {
    print_header "Build Report"
    echo "Build completed at: $(date)"
    echo "Build directory: $BUILD_DIR"
    echo ""
    echo "Archives created:"
    ls -la "$BUILD_DIR"/*.tar.gz 2>/dev/null || echo "No archives created"
    echo ""
    echo "Platform directories:"
    for platform in "${PLATFORMS[@]}"; do
        if [ -d "$BUILD_DIR/$platform" ]; then
            echo "  $platform:"
            ls -la "$BUILD_DIR/$platform" | sed 's/^/    /'
        fi
    done
    if [ -d "$BUILD_DIR/web" ]; then
        echo ""
        echo "Web frontend assets:"
        ls -la "$BUILD_DIR/web" | head -10 | sed 's/^/    /'
        if [ $(ls -1 "$BUILD_DIR/web" | wc -l) -gt 10 ]; then
            echo "    ... and $(( $(ls -1 "$BUILD_DIR/web" | wc -l) - 10 )) more files"
        fi
    fi
 }
 main() {
    print_header "Predict-Otron-9000 Cross-Platform Build Script"
    cd "$PROJECT_ROOT"
    check_dependencies
    install_targets
    create_build_dirs
    # Build Leptos web frontend first
    build_leptos_app
    # Build for each platform
    for platform in "${PLATFORMS[@]}"; do
        build_for_platform "$platform"
    done
    create_archives
    generate_build_report
    print_header "Build Complete!"
    print_info "All artifacts are available in: $BUILD_DIR"
 }
 # Handle command line arguments
 case "${1:-}" in
    --help|-h)
        echo "Usage: $0 [options]"
        echo ""
        echo "Cross-platform build script for predict-otron-9000"
        echo ""
        echo "Options:"
        echo "  --help, -h          Show this help message"
        echo "  --platforms         Show supported platforms"
        echo "  --clean             Clean build directory before building"
        echo ""
        echo "Supported platforms:"
        for platform in "${PLATFORMS[@]}"; do
            echo "  - $platform"
        done
        echo ""
        echo "Prerequisites:"
        echo "  - Rust toolchain with rustup"
        echo "  - cargo-leptos (will be installed if missing)"
        echo "  - Platform-specific toolchains for cross-compilation"
        echo ""
        exit 0
        ;;
    --platforms)
        echo "Supported platforms:"
        for platform in "${PLATFORMS[@]}"; do
            echo "  - $platform"
        done
        exit 0
        ;;
    --clean)
        print_info "Cleaning build directory..."
        rm -rf "$BUILD_DIR"
        print_info "Build directory cleaned"
        ;;
 esac
 main "$@"
--- a/scripts/build_cli.sh
+++ b/scripts/build_cli.sh
@@ -1,19 +0,0 @@
 #!/usr/bin/env sh
 set -e
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 TEMP_DIR="$SCRIPT_DIR/temp"
 mkdir -p "$TEMP_DIR"
 cp "$SCRIPT_DIR/cli.ts" "$TEMP_DIR/cli.ts"
 cp "$SCRIPT_DIR/../package.json" "$TEMP_DIR/package.json"
 (
 cd "$TEMP_DIR"
 bun i
 bun build ./cli.ts --compile --outfile "$SCRIPT_DIR/cli"
 )
 rm -rf "$TEMP_DIR"