From 0580dc8c5e732581207b788af1e759642d483802 Mon Sep 17 00:00:00 2001 From: geoffsee <> Date: Sun, 31 Aug 2025 13:23:50 -0400 Subject: [PATCH] move cli into crates and stage for release --- .github/workflows/release.yml | 9 +- .gitignore | 1 + Cargo.lock | 18 +- Cargo.toml | 8 +- bun.lock | 22 + crates/cli/Cargo.toml | 11 + crates/cli/README.md | 23 ++ crates/cli/build.rs | 209 ++++++++++ crates/cli/bun_target.rs | 125 ++++++ {scripts => crates/cli/package}/cli.ts | 15 +- crates/cli/package/package.json | 11 + crates/cli/src/main.rs | 32 ++ crates/embeddings-engine/Cargo.toml | 2 +- crates/gemma-runner/Cargo.toml | 2 +- crates/helm-chart-tool/Cargo.toml | 2 +- crates/inference-engine/Cargo.toml | 2 +- crates/leptos-app/Cargo.toml | 2 +- crates/llama-runner/Cargo.toml | 2 +- crates/predict-otron-9000/Cargo.toml | 8 +- crates/predict-otron-9000/src/config.rs | 5 +- .../src/{proxy.rs => ha_mode.rs} | 116 +++++- crates/predict-otron-9000/src/main.rs | 10 +- .../src/{standalone.rs => standalone_mode.rs} | 0 package.json | 8 +- scripts/build_all_platforms.sh | 389 ------------------ scripts/build_cli.sh | 19 - 26 files changed, 604 insertions(+), 447 deletions(-) create mode 100644 bun.lock create mode 100644 crates/cli/Cargo.toml create mode 100644 crates/cli/README.md create mode 100644 crates/cli/build.rs create mode 100644 crates/cli/bun_target.rs rename {scripts => crates/cli/package}/cli.ts (96%) create mode 100644 crates/cli/package/package.json create mode 100644 crates/cli/src/main.rs rename crates/predict-otron-9000/src/{proxy.rs => ha_mode.rs} (72%) rename crates/predict-otron-9000/src/{standalone.rs => standalone_mode.rs} (100%) delete mode 100755 scripts/build_all_platforms.sh delete mode 100755 scripts/build_cli.sh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f7d82f5..2165019 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,6 +34,9 @@ jobs: - name: Setup Rust run: rustup update stable && rustup default stable + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + - name: Install clippy and rustfmt run: rustup component add clippy rustfmt @@ -132,7 +135,7 @@ jobs: run: rustup target add ${{ matrix.target }} - name: Build binary - run: cargo build --release --target ${{ matrix.target }} -p predict-otron-9000 + run: cargo build --release --target ${{ matrix.target }} -p predict-otron-9000 -p cli env: CARGO_TERM_COLOR: always @@ -140,14 +143,14 @@ jobs: if: matrix.os != 'windows-latest' run: | cd target/${{ matrix.target }}/release - tar czf ../../../${{ matrix.name }}.tar.gz predict-otron-9000 + tar czf ../../../${{ matrix.name }}.tar.gz predict-otron-9000 cli cd ../../../ - name: Package binary (Windows) if: matrix.os == 'windows-latest' run: | cd target/${{ matrix.target }}/release - 7z a ../../../${{ matrix.name }}.zip predict-otron-9000.exe + 7z a ../../../${{ matrix.name }}.zip predict-otron-9000.exe cli.exe cd ../../../ - name: Upload binary artifacts (Unix) diff --git a/.gitignore b/.gitignore index fb8dc58..963a460 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,4 @@ venv/ *~ /scripts/cli !/scripts/cli.ts +/**/.*.bun-build diff --git a/Cargo.lock b/Cargo.lock index 575d6ba..ddaa099 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -896,6 +896,10 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" +[[package]] +name = "cli" +version = "0.1.1" + [[package]] name = "codee" version = "0.3.2" @@ -1472,7 +1476,7 @@ dependencies = [ [[package]] name = "embeddings-engine" -version = "0.1.0" +version = "0.1.1" dependencies = [ "async-openai", "axum", @@ -2094,7 +2098,7 @@ dependencies = [ [[package]] name = "gemma-runner" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)", @@ -2275,7 +2279,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "helm-chart-tool" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "clap", @@ -2685,7 +2689,7 @@ dependencies = [ [[package]] name = "inference-engine" -version = "0.1.0" +version = "0.1.1" dependencies = [ "ab_glyph", "anyhow", @@ -2937,7 +2941,7 @@ dependencies = [ [[package]] name = "leptos-app" -version = "0.1.0" +version = "0.1.1" dependencies = [ "async-openai-wasm", "axum", @@ -3200,7 +3204,7 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "llama-runner" -version = "0.1.0" +version = "0.1.1" dependencies = [ "anyhow", "candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)", @@ -4079,7 +4083,7 @@ dependencies = [ [[package]] name = "predict-otron-9000" -version = "0.1.0" +version = "0.1.1" dependencies = [ "axum", "embeddings-engine", diff --git a/Cargo.toml b/Cargo.toml index cc22e49..f762dfd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,11 +6,15 @@ members = [ "crates/leptos-app", "crates/helm-chart-tool", "crates/llama-runner", - "crates/gemma-runner" + "crates/gemma-runner", + "crates/cli" ] default-members = ["crates/predict-otron-9000"] resolver = "2" +[workspace.package] +version = "0.1.2" + # Compiler optimization profiles for the workspace [profile.release] opt-level = 3 @@ -42,4 +46,4 @@ lto = "thin" [[workspace.metadata.leptos]] # project name bin-package = "leptos-app" -lib-package = "leptos-app" \ No newline at end of file +lib-package = "leptos-app" diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..365afba --- /dev/null +++ b/bun.lock @@ -0,0 +1,22 @@ +{ + "lockfileVersion": 1, + "workspaces": { + "": { + "name": "predict-otron-9000", + }, + "crates/cli/package": { + "name": "cli", + "dependencies": { + "install": "^0.13.0", + "openai": "^5.16.0", + }, + }, + }, + "packages": { + "cli": ["cli@workspace:crates/cli/package"], + + "install": ["install@0.13.0", "", {}, "sha512-zDml/jzr2PKU9I8J/xyZBQn8rPCAY//UOYNmR01XwNwyfhEWObo2SWfSl1+0tm1u6PhxLwDnfsT/6jB7OUxqFA=="], + + "openai": ["openai@5.16.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.23.8" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-hoEH8ZNvg1HXjU9mp88L/ZH8O082Z8r6FHCXGiWAzVRrEv443aI57qhch4snu07yQydj+AUAWLenAiBXhu89Tw=="], + } +} diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml new file mode 100644 index 0000000..bd4143d --- /dev/null +++ b/crates/cli/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "cli" +version.workspace = true +edition = "2021" +build = "build.rs" + +[[bin]] +name = "cli" +path = "src/main.rs" + +[dependencies] \ No newline at end of file diff --git a/crates/cli/README.md b/crates/cli/README.md new file mode 100644 index 0000000..0644108 --- /dev/null +++ b/crates/cli/README.md @@ -0,0 +1,23 @@ +# cli + +A Rust/Typescript Hybrid + +```console +./cli [options] [prompt] + +Simple CLI tool for testing the local OpenAI-compatible API server. + +Options: + --model Model to use (default: gemma-3-1b-it) + --prompt The prompt to send (can also be provided as positional argument) + --list-models List all available models from the server + --help Show this help message + +Examples: + ./cli "What is the capital of France?" + ./cli --model gemma-3-1b-it --prompt "Hello, world!" + ./cli --prompt "Who was the 16th president of the United States?" + ./cli --list-models + +The server must be running at http://localhost:8080 +``` \ No newline at end of file diff --git a/crates/cli/build.rs b/crates/cli/build.rs new file mode 100644 index 0000000..1d12d22 --- /dev/null +++ b/crates/cli/build.rs @@ -0,0 +1,209 @@ +use std::env; +use std::fs; +use std::io::{self, BufRead, Write}; +use std::path::{Path, PathBuf}; +use std::process::{ChildStderr, ChildStdout, Command, Stdio}; +use std::thread; +use std::time::{Duration, SystemTime}; +mod bun_target; +use bun_target::BunTarget; + +fn main() { + println!("cargo:rerun-if-changed="); + + if let Err(e) = run_build() { + println!("cargo:warning=build.rs failed: {e}"); + std::process::exit(1); + } +} + + + + + +fn run_build() -> io::Result<()> { + let manifest_dir = + PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set")); + let package_dir = manifest_dir.join("package"); + let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set by Cargo")); + let output_path = out_dir.join("client-cli"); + + let bun_tgt = BunTarget::from_cargo_env() + .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?; + + // Optional: warn if using a Bun target that’s marked unsupported in your chart + if matches!(bun_tgt, BunTarget::WindowsArm64) { + println!("cargo:warning=bun-windows-arm64 is marked unsupported in the compatibility chart"); + } + + warn(&format!("Building CLI into: {}", output_path.display())); + + // --- bun install (in ./package), keep temps inside OUT_DIR --- + let mut install = Command::new("bun") + .current_dir(&package_dir) + .env("TMPDIR", &out_dir) + .arg("install") + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun install`: {e}")))?; + + let install_join = stream_child("bun install", install.stdout.take(), install.stderr.take()); + let install_status = install.wait()?; + // ensure streams finish + join_streams(install_join); + + if !install_status.success() { + let code = install_status.code().unwrap_or(1); + return Err(io::Error::new( + io::ErrorKind::Other, + format!("bun install failed with status {code}"), + )); + } + + let target = env::var("TARGET").unwrap(); + + + + + // --- bun build (in ./package), emit to OUT_DIR, keep temps inside OUT_DIR --- + let mut build = Command::new("bun") + .current_dir(&package_dir) + .env("TMPDIR", &out_dir) + .arg("build") + .arg("./cli.ts") + .arg(format!("--target={}", bun_tgt.as_bun_flag())) + .arg("--compile") + .arg("--outfile") + .arg(&output_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun build`: {e}")))?; + + let build_join = stream_child("bun build", build.stdout.take(), build.stderr.take()); + let status = build.wait()?; + // ensure streams finish + join_streams(build_join); + + if status.success() { + info("bun build succeeded"); + } else { + let code = status.code().unwrap_or(1); + warn(&format!("bun build failed with status: {code}")); + return Err(io::Error::new(io::ErrorKind::Other, "bun build failed")); + } + + // Ensure the output is executable (after it exists) + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&output_path)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(&output_path, perms)?; + } + + println!("cargo:warning=Built CLI at {}", output_path.display()); + println!("cargo:rustc-env=CLIENT_CLI_BIN={}", output_path.display()); + + // --- Cleanup stray .bun-build temp files (conservative: older than 5 minutes) --- + for dir in [&manifest_dir, &package_dir, &out_dir] { + if let Err(e) = remove_bun_temp_files(dir, Some(Duration::from_secs(5 * 60))) { + println!("cargo:warning=cleanup in {} failed: {e}", dir.display()); + } + } + + Ok(()) +} + +// Spawn readers for child's stdout/stderr so we don't deadlock on pipe buffers +fn stream_child( + tag: &str, + stdout: Option, + stderr: Option, +) -> ( + Option>, + Option>, +) { + let t1 = stdout.map(|out| { + let tag = tag.to_string(); + thread::spawn(move || { + let reader = io::BufReader::new(out); + for line in reader.lines() { + info(&format!("[{tag} stdout] {}", line.unwrap_or_default())); + } + }) + }); + let t2 = stderr.map(|err| { + let tag = tag.to_string(); + thread::spawn(move || { + let reader = io::BufReader::new(err); + for line in reader.lines() { + warn(&format!("[{tag} stderr] {}", line.unwrap_or_default())); + } + }) + }); + (t1, t2) +} + +fn join_streams( + joins: ( + Option>, + Option>, + ), +) { + if let Some(j) = joins.0 { + let _ = j.join(); + } + if let Some(j) = joins.1 { + let _ = j.join(); + } +} + +fn remove_bun_temp_files(dir: &Path, older_than: Option) -> io::Result<()> { + let now = SystemTime::now(); + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if !path.is_file() { + continue; + } + + // Files like ".1860e7df40ff1bef-00000000.bun-build" + let name = entry.file_name(); + let name = name.to_string_lossy(); + let looks_like_bun_temp = name.starts_with('.') && name.ends_with(".bun-build"); + + if !looks_like_bun_temp { + continue; + } + + if let Some(age) = older_than { + if let Ok(meta) = entry.metadata() { + if let Ok(modified) = meta.modified() { + if now.duration_since(modified).unwrap_or_default() < age { + // too new; skip to avoid racing an in-flight builder + continue; + } + } + } + } + + match fs::remove_file(&path) { + Ok(_) => println!("cargo:warning=removed stray bun temp {}", path.display()), + Err(e) => println!("cargo:warning=failed to remove {}: {e}", path.display()), + } + } + Ok(()) +} + +fn warn(msg: &str) { + let _ = writeln!(io::stderr(), "[build.rs] {msg}"); + println!("cargo:warning={msg}"); +} + +fn info(msg: &str) { + let _ = writeln!(io::stderr(), "[build.rs] {msg}"); + println!("cargo:warning=INFO|{msg}"); +} diff --git a/crates/cli/bun_target.rs b/crates/cli/bun_target.rs new file mode 100644 index 0000000..cbd2e61 --- /dev/null +++ b/crates/cli/bun_target.rs @@ -0,0 +1,125 @@ +use std::env; +use std::fmt; + +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +pub enum BunTarget { + LinuxX64Glibc, + LinuxArm64Glibc, + LinuxX64Musl, + LinuxArm64Musl, + WindowsX64, + WindowsArm64, + MacX64, + MacArm64, +} + +impl BunTarget { + pub const fn as_bun_flag(self) -> &'static str { + match self { + BunTarget::LinuxX64Glibc => "bun-linux-x64", + BunTarget::LinuxArm64Glibc => "bun-linux-arm64", + BunTarget::LinuxX64Musl => "bun-linux-x64-musl", + BunTarget::LinuxArm64Musl => "bun-linux-arm64-musl", + BunTarget::WindowsX64 => "bun-windows-x64", + BunTarget::WindowsArm64 => "bun-windows-arm64", + BunTarget::MacX64 => "bun-darwin-x64", + BunTarget::MacArm64 => "bun-darwin-arm64", + } + } + + + pub const fn rust_triples(self) -> &'static [&'static str] { + match self { + BunTarget::LinuxX64Glibc => &["x86_64-unknown-linux-gnu", "x86_64-unknown-linux-gnu.2.17"], + BunTarget::LinuxArm64Glibc => &["aarch64-unknown-linux-gnu"], + BunTarget::LinuxX64Musl => &["x86_64-unknown-linux-musl"], + BunTarget::LinuxArm64Musl => &["aarch64-unknown-linux-musl"], + BunTarget::WindowsX64 => &["x86_64-pc-windows-msvc"], + BunTarget::WindowsArm64 => &["aarch64-pc-windows-msvc"], // chart says unsupported; still map + BunTarget::MacX64 => &["x86_64-apple-darwin"], + BunTarget::MacArm64 => &["aarch64-apple-darwin"], + } + } + + pub fn from_rust_target(triple: &str) -> Option { + let norm = triple.trim(); + if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("gnu") { + return Some(BunTarget::LinuxX64Glibc); + } + if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("gnu") { + return Some(BunTarget::LinuxArm64Glibc); + } + if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("musl") { + return Some(BunTarget::LinuxX64Musl); + } + if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("musl") { + return Some(BunTarget::LinuxArm64Musl); + } + if norm == "x86_64-pc-windows-msvc" { + return Some(BunTarget::WindowsX64); + } + if norm == "aarch64-pc-windows-msvc" { + return Some(BunTarget::WindowsArm64); + } + if norm == "x86_64-apple-darwin" { + return Some(BunTarget::MacX64); + } + if norm == "aarch64-apple-darwin" { + return Some(BunTarget::MacArm64); + } + for bt in [ + BunTarget::LinuxX64Glibc, + BunTarget::LinuxArm64Glibc, + BunTarget::LinuxX64Musl, + BunTarget::LinuxArm64Musl, + BunTarget::WindowsX64, + BunTarget::WindowsArm64, + BunTarget::MacX64, + BunTarget::MacArm64, + ] { + for &t in bt.rust_triples() { + if t == norm { + return Some(bt); + } + } + } + None + } + + + pub fn from_cargo_env() -> Result { + if let Ok(triple) = env::var("TARGET") { + if let Some(bt) = Self::from_rust_target(&triple) { + return Ok(bt); + } + return Err(BunTargetError::UnknownTriple(triple)); + } + + let os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); + let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + let envv = env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default(); + let vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_else(|_| "unknown".into()); + + let triple = format!("{}-{}-{}-{}", arch, vendor, os, if envv.is_empty() { "gnu" } else { &envv }); + if let Some(bt) = Self::from_rust_target(&triple) { + Ok(bt) + } else { + Err(BunTargetError::UnknownTriple(triple)) + } + } +} + +#[derive(Debug)] +pub enum BunTargetError { + UnknownTriple(String), +} + +impl fmt::Display for BunTargetError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BunTargetError::UnknownTriple(t) => write!(f, "unrecognized Rust target triple: {t}"), + } + } +} + +impl std::error::Error for BunTargetError {} diff --git a/scripts/cli.ts b/crates/cli/package/cli.ts similarity index 96% rename from scripts/cli.ts rename to crates/cli/package/cli.ts index aa091cd..105f7e8 100755 --- a/scripts/cli.ts +++ b/crates/cli/package/cli.ts @@ -30,24 +30,23 @@ type ChunkStat = { function printHelp() { console.log(` -Usage: bun client_cli.ts [options] [prompt] +./cli [options] [prompt] Simple CLI tool for testing the local OpenAI-compatible API server. Options: - --model Model to use (default: ${DEFAULT_MODEL}) + --model Model to use (default: gemma-3-1b-it) --prompt The prompt to send (can also be provided as positional argument) --list-models List all available models from the server --help Show this help message Examples: - ./cli.ts "What is the capital of France?" - ./cli.ts --model gemma-3-1b-it --prompt "Hello, world!" - ./cli.ts --prompt "Who was the 16th president of the United States?" - ./cli.ts --list-models + ./cli "What is the capital of France?" + ./cli --model gemma-3-1b-it --prompt "Hello, world!" + ./cli --prompt "Who was the 16th president of the United States?" + ./cli --list-models -The server should be running at http://localhost:8080 -Start it with: ./run_server.sh +The server must be running at http://localhost:8080 `); } diff --git a/crates/cli/package/package.json b/crates/cli/package/package.json new file mode 100644 index 0000000..d866cb3 --- /dev/null +++ b/crates/cli/package/package.json @@ -0,0 +1,11 @@ +{ + "name": "cli", + "main": "cli.ts", + "scripts": { + "build": "bun build cli.ts --compile --outfile cli" + }, + "dependencies": { + "install": "^0.13.0", + "openai": "^5.16.0" + } +} \ No newline at end of file diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs new file mode 100644 index 0000000..2113012 --- /dev/null +++ b/crates/cli/src/main.rs @@ -0,0 +1,32 @@ +use std::{env, fs, io, path::PathBuf, process::Command}; + +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; + +fn main() -> io::Result<()> { + // Absolute path provided by build.rs at compile time. + // `include_bytes!` accepts string literals; `env!` expands to a literal at compile time. + const CLIENT_CLI: &[u8] = include_bytes!(env!("CLIENT_CLI_BIN")); + + // Write to a temp file + let mut tmp = env::temp_dir(); + tmp.push("client-cli-embedded"); + + fs::write(&tmp, CLIENT_CLI)?; + + // Ensure it's executable on Unix + #[cfg(unix)] + { + let mut perms = fs::metadata(&tmp)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(&tmp, perms)?; + } + + // Run it + let status = Command::new(&tmp).arg("--version").status()?; + if !status.success() { + return Err(io::Error::new(io::ErrorKind::Other, "client-cli failed")); + } + + Ok(()) +} diff --git a/crates/embeddings-engine/Cargo.toml b/crates/embeddings-engine/Cargo.toml index ad95440..ffc0b96 100644 --- a/crates/embeddings-engine/Cargo.toml +++ b/crates/embeddings-engine/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "embeddings-engine" -version = "0.1.0" +version.workspace = true edition = "2024" [lib] diff --git a/crates/gemma-runner/Cargo.toml b/crates/gemma-runner/Cargo.toml index fdd2c63..fce2a32 100644 --- a/crates/gemma-runner/Cargo.toml +++ b/crates/gemma-runner/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gemma-runner" -version = "0.1.0" +version.workspace = true edition = "2021" diff --git a/crates/helm-chart-tool/Cargo.toml b/crates/helm-chart-tool/Cargo.toml index b55d7c0..ee8d45b 100644 --- a/crates/helm-chart-tool/Cargo.toml +++ b/crates/helm-chart-tool/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "helm-chart-tool" -version = "0.1.0" +version.workspace = true edition = "2021" [[bin]] diff --git a/crates/inference-engine/Cargo.toml b/crates/inference-engine/Cargo.toml index 30739b7..e5eed78 100644 --- a/crates/inference-engine/Cargo.toml +++ b/crates/inference-engine/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "inference-engine" -version = "0.1.0" +version.workspace = true edition = "2021" [dependencies] diff --git a/crates/leptos-app/Cargo.toml b/crates/leptos-app/Cargo.toml index 16d164b..b9bc214 100644 --- a/crates/leptos-app/Cargo.toml +++ b/crates/leptos-app/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "leptos-app" -version = "0.1.0" +version.workspace = true edition = "2021" [lib] diff --git a/crates/llama-runner/Cargo.toml b/crates/llama-runner/Cargo.toml index 168f65a..4927a22 100644 --- a/crates/llama-runner/Cargo.toml +++ b/crates/llama-runner/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "llama-runner" -version = "0.1.0" +version.workspace = true edition = "2021" [dependencies] diff --git a/crates/predict-otron-9000/Cargo.toml b/crates/predict-otron-9000/Cargo.toml index b040616..7e3691f 100644 --- a/crates/predict-otron-9000/Cargo.toml +++ b/crates/predict-otron-9000/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "predict-otron-9000" -version = "0.1.1" +version.workspace = true edition = "2024" [[bin]] @@ -44,4 +44,8 @@ port = 8080 image = "ghcr.io/geoffsee/predict-otron-9000:latest" replicas = 1 port = 8080 -env = { SERVER_CONFIG = "" } +# SERVER_CONFIG Example: {\"serverMode\":\"HighAvailability\",\"services\":{\"inference_url\":\"http://custom-inference:9000\",\"embeddings_url\":\"http://custom-embeddings:9001\"}} +# you can generate this via node to avoid toil +# const server_config = {serverMode: "HighAvailability", services: {inference_url: "http://custom-inference:9000", embeddings_url: "http://custom-embeddings:9001"} }; +# console.log(JSON.stringify(server_config).replace(/"/g, '\\"')); +env = { SERVER_CONFIG = "" } diff --git a/crates/predict-otron-9000/src/config.rs b/crates/predict-otron-9000/src/config.rs index 9b48fd8..efc7a1b 100644 --- a/crates/predict-otron-9000/src/config.rs +++ b/crates/predict-otron-9000/src/config.rs @@ -2,7 +2,10 @@ use serde::{Deserialize, Serialize}; use std::env; use tracing::info; use tracing::log::error; - +/// # Generating `SERVER_CONFIG` with Node +// # const server_config = {serverMode: "HighAvailability", services: {inference_url: "http://custom-inference:9000", embeddings_url: "http://custom-embeddings:9001"} }; +// # console.log(JSON.stringify(server_config).replace(/"/g, '\\"')); +/// #[derive(Serialize, Deserialize, Clone, Debug)] #[serde(rename_all = "camelCase")] pub struct ServerConfig { diff --git a/crates/predict-otron-9000/src/proxy.rs b/crates/predict-otron-9000/src/ha_mode.rs similarity index 72% rename from crates/predict-otron-9000/src/proxy.rs rename to crates/predict-otron-9000/src/ha_mode.rs index b0ad271..16626fe 100644 --- a/crates/predict-otron-9000/src/proxy.rs +++ b/crates/predict-otron-9000/src/ha_mode.rs @@ -12,6 +12,120 @@ use std::time::Duration; use crate::config::ServerConfig; +/// # Generating `SERVER_CONFIG` for TOML using Node.js +/// +/// You can still use the Node.js REPL to build the JSON, but when pasting into +/// a `.toml` file you must follow TOML's string rules. Below are the safest patterns. +/// +/// ## 1) Generate the JSON in Node +/// ```bash +/// node +/// ``` +/// ```javascript +/// const myobject = { +/// serverMode: "HighAvailability", +/// services: { +/// inference_url: "http://custom-inference:9000", +/// embeddings_url: "http://custom-embeddings:9001" +/// } +/// }; +/// const json = JSON.stringify(myobject); +/// json +/// // -> '{"serverMode":"HighAvailability","services":{"inference_url":"http://custom-inference:9000","embeddings_url":"http://custom-embeddings:9001"}}' +/// ``` +/// +/// ## 2) Put it into `.toml` +/// +/// ### Option A (recommended): single-quoted TOML *literal* string +/// Single quotes in TOML mean "no escaping", so your inner double quotes are safe. +/// ```toml +/// SERVER_CONFIG = '{"serverMode":"HighAvailability","services":{"inference_url":"http://custom-inference:9000","embeddings_url":"http://custom-embeddings:9001"}}' +/// ``` +/// +/// ### Option B: double-quoted TOML string (must escape inner quotes) +/// If you *must* use double quotes in TOML, escape all `"` inside the JSON. +/// You can have Node do this for you: +/// ```javascript +/// // In Node: +/// const jsonForToml = JSON.stringify(myobject).replace(/"/g, '\\"'); +/// jsonForToml +/// // -> \"{\\\"serverMode\\\":\\\"HighAvailability\\\",...}\" +/// ``` +/// Then paste into TOML: +/// ```toml +/// SERVER_CONFIG = "{\"serverMode\":\"HighAvailability\",\"services\":{\"inference_url\":\"http://custom-inference:9000\",\"embeddings_url\":\"http://custom-embeddings:9001\"}}" +/// ``` +/// +/// ### Option C: multi-line literal (for pretty JSON) +/// If you want pretty-printed JSON in the file, use TOML's triple single quotes: +/// ```javascript +/// // In Node (pretty with 2 spaces): +/// const pretty = JSON.stringify(myobject, null, 2); +/// ``` +/// ```toml +/// SERVER_CONFIG = '''{ +/// "serverMode": "HighAvailability", +/// "services": { +/// "inference_url": "http://custom-inference:9000", +/// "embeddings_url": "http://custom-embeddings:9001" +/// } +/// }''' +/// ``` +/// +/// ## 3) Reading it in Rust +/// +/// If `SERVER_CONFIG` is stored as a **string** in TOML (Options A/B/C): +/// ```rust +/// use serde_json::Value; +/// +/// // Suppose you've already loaded your .toml into a struct or a toml::Value: +/// // e.g., struct FileCfg { pub SERVER_CONFIG: String } +/// fn parse_server_config(raw: &str) -> anyhow::Result { +/// let v: Value = serde_json::from_str(raw)?; +/// Ok(v) +/// } +/// ``` +/// +/// ### Alternative: store it as TOML tables and serialize to JSON at runtime +/// Instead of a JSON string, you can make the TOML first-class tables: +/// ```toml +/// [SERVER_CONFIG] +/// serverMode = "HighAvailability" +/// +/// [SERVER_CONFIG.services] +/// inference_url = "http://custom-inference:9000" +/// embeddings_url = "http://custom-embeddings:9001" +/// ``` +/// ```rust +/// use serde::{Deserialize, Serialize}; +/// use serde_json::Value; +/// +/// #[derive(Debug, Serialize, Deserialize)] +/// struct Services { +/// inference_url: String, +/// embeddings_url: String, +/// } +/// +/// #[derive(Debug, Serialize, Deserialize)] +/// struct ServerConfig { +/// serverMode: String, +/// services: Services, +/// } +/// +/// // After loading the .toml (e.g., via `toml::from_str`): +/// // let cfg: ServerConfig = toml::from_str(toml_str)?; +/// // Convert to JSON if needed: +/// fn to_json(cfg: &ServerConfig) -> serde_json::Result { +/// Ok(serde_json::to_value(cfg)?) +/// } +/// ``` +/// +/// ## Gotchas +/// - Prefer **single-quoted** TOML strings for raw JSON to avoid escaping. +/// - If you use **double-quoted** TOML strings, escape every inner `"` in the JSON. +/// - Pretty JSON is fine in TOML using `''' ... '''`, but remember the newlines are part of the string. +/// - If you control the consumer, TOML tables (the alternative above) are more ergonomic than embedding JSON. + /// HTTP client configured for proxying requests #[derive(Clone)] pub struct ProxyClient { @@ -31,7 +145,7 @@ impl ProxyClient { } /// Create a router that proxies requests to external services in HighAvailability mode -pub fn create_proxy_router(config: ServerConfig) -> Router { +pub fn create_ha_router(config: ServerConfig) -> Router { let proxy_client = ProxyClient::new(config.clone()); Router::new() diff --git a/crates/predict-otron-9000/src/main.rs b/crates/predict-otron-9000/src/main.rs index ce9e4e0..0dfda47 100644 --- a/crates/predict-otron-9000/src/main.rs +++ b/crates/predict-otron-9000/src/main.rs @@ -1,16 +1,16 @@ mod config; +mod ha_mode; mod middleware; -mod proxy; -mod standalone; +mod standalone_mode; -use crate::standalone::create_standalone_router; +use crate::standalone_mode::create_standalone_router; use axum::response::IntoResponse; use axum::routing::get; use axum::{Router, http::Uri, response::Html, serve}; use config::ServerConfig; +use ha_mode::create_ha_router; use inference_engine::AppState; use middleware::{MetricsLayer, MetricsLoggerFuture, MetricsStore}; -use proxy::create_proxy_router; use rust_embed::Embed; use std::env; use std::path::Component::ParentDir; @@ -56,7 +56,7 @@ async fn main() { Ok(is_ha) => { if is_ha { log_config(server_config.clone()); - create_proxy_router(server_config.clone()) + create_ha_router(server_config.clone()) } else { log_config(server_config.clone()); create_standalone_router(server_config) diff --git a/crates/predict-otron-9000/src/standalone.rs b/crates/predict-otron-9000/src/standalone_mode.rs similarity index 100% rename from crates/predict-otron-9000/src/standalone.rs rename to crates/predict-otron-9000/src/standalone_mode.rs diff --git a/package.json b/package.json index f7f926d..7c18d10 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,8 @@ { - "dependencies": { - "openai": "^5.16.0" - }, + "name": "predict-otron-9000", + "workspaces": ["crates/cli/package"], "scripts": { - "cli": "./scripts/cli.ts" + "# WORKSPACE ALIASES": "#", + "cli": "bun --filter crates/cli/package" } } \ No newline at end of file diff --git a/scripts/build_all_platforms.sh b/scripts/build_all_platforms.sh deleted file mode 100755 index 6828fb8..0000000 --- a/scripts/build_all_platforms.sh +++ /dev/null @@ -1,389 +0,0 @@ -#!/bin/bash - -# Cross-platform build script for predict-otron-9000 -# Builds all workspace crates for common platforms - -set -euo pipefail - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Configuration -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" -BUILD_DIR="${PROJECT_ROOT}/build" -TIMESTAMP=$(date +%Y%m%d_%H%M%S) - -# Supported platforms -PLATFORMS=( - "x86_64-unknown-linux-gnu" - "x86_64-pc-windows-msvc" - "x86_64-apple-darwin" - "aarch64-apple-darwin" - "aarch64-unknown-linux-gnu" -) - -# Main binaries to build -MAIN_BINARIES=( - "predict-otron-9000" - "embeddings-engine" -) - -# Inference engine binaries (with bin feature) -INFERENCE_BINARIES=( - "gemma_inference" - "llama_inference" -) - -# Other workspace binaries -OTHER_BINARIES=( - "helm-chart-tool" -) - -print_header() { - echo -e "${BLUE}================================${NC}" - echo -e "${BLUE}$1${NC}" - echo -e "${BLUE}================================${NC}" -} - -print_info() { - echo -e "${GREEN}[INFO]${NC} $1" -} - -print_warn() { - echo -e "${YELLOW}[WARN]${NC} $1" -} - -print_error() { - echo -e "${RED}[ERROR]${NC} $1" -} - -check_dependencies() { - print_header "Checking Dependencies" - - # Check rust - if ! command -v cargo >/dev/null 2>&1; then - print_error "Rust/Cargo is not installed" - exit 1 - fi - - # Check cargo-leptos for WASM frontend - if ! command -v cargo-leptos >/dev/null 2>&1; then - print_warn "cargo-leptos not found. Installing..." - cargo install cargo-leptos - fi - - print_info "All dependencies available" -} - -install_targets() { - print_header "Installing Rust Targets" - - for platform in "${PLATFORMS[@]}"; do - print_info "Installing target: $platform" - rustup target add "$platform" || { - print_warn "Failed to install target $platform (may not be available on this host)" - } - done - - # Add WASM target for leptos - print_info "Installing wasm32-unknown-unknown target for Leptos" - rustup target add wasm32-unknown-unknown -} - -create_build_dirs() { - print_header "Setting up Build Directory" - - rm -rf "$BUILD_DIR" - mkdir -p "$BUILD_DIR" - - for platform in "${PLATFORMS[@]}"; do - mkdir -p "$BUILD_DIR/$platform" - done - - mkdir -p "$BUILD_DIR/web" - print_info "Build directories created" -} - -build_leptos_app() { - print_header "Building Leptos Web Frontend" - - cd "$PROJECT_ROOT/crates/leptos-app" - - # Build the WASM frontend - print_info "Building WASM frontend with cargo-leptos..." - cargo leptos build --release || { - print_error "Failed to build Leptos WASM frontend" - return 1 - } - - # Copy built assets to build directory - if [ -d "target/site" ]; then - cp -r target/site/* "$BUILD_DIR/web/" - print_info "Leptos frontend built and copied to $BUILD_DIR/web/" - else - print_error "Leptos build output not found at target/site" - return 1 - fi - - cd "$PROJECT_ROOT" -} - -get_platform_features() { - local platform="$1" - local features="" - - case "$platform" in - *-apple-darwin) - # macOS uses Metal but routes to CPU for Gemma stability - features="" - ;; - *-unknown-linux-gnu|*-pc-windows-msvc) - # Linux and Windows can use CUDA if available - features="" - ;; - *) - features="" - ;; - esac - - echo "$features" -} - -build_binary_for_platform() { - local binary_name="$1" - local platform="$2" - local package_name="$3" - local additional_args="$4" - - print_info "Building $binary_name for $platform" - - local features=$(get_platform_features "$platform") - local feature_flag="" - if [ -n "$features" ]; then - feature_flag="--features $features" - fi - - # Build command - local build_cmd="cargo build --release --target $platform --bin $binary_name" - - if [ -n "$package_name" ]; then - build_cmd="$build_cmd --package $package_name" - fi - - if [ -n "$additional_args" ]; then - build_cmd="$build_cmd $additional_args" - fi - - if [ -n "$feature_flag" ]; then - build_cmd="$build_cmd $feature_flag" - fi - - print_info "Running: $build_cmd" - - if eval "$build_cmd"; then - # Copy binary to build directory - local target_dir="target/$platform/release" - local binary_file="$binary_name" - - # Add .exe extension for Windows - if [[ "$platform" == *-pc-windows-msvc ]]; then - binary_file="$binary_name.exe" - fi - - if [ -f "$target_dir/$binary_file" ]; then - cp "$target_dir/$binary_file" "$BUILD_DIR/$platform/" - print_info "✓ $binary_name built and copied for $platform" - else - print_error "Binary not found: $target_dir/$binary_file" - return 1 - fi - else - print_error "Failed to build $binary_name for $platform" - return 1 - fi -} - -build_for_platform() { - local platform="$1" - print_header "Building for $platform" - - local failed_builds=() - - # Build main binaries - for binary in "${MAIN_BINARIES[@]}"; do - if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then - failed_builds+=("$binary") - fi - done - - # Build inference engine binaries with bin feature - for binary in "${INFERENCE_BINARIES[@]}"; do - if ! build_binary_for_platform "$binary" "$platform" "inference-engine" "--features bin"; then - failed_builds+=("$binary") - fi - done - - # Build other workspace binaries - for binary in "${OTHER_BINARIES[@]}"; do - if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then - failed_builds+=("$binary") - fi - done - - if [ ${#failed_builds[@]} -eq 0 ]; then - print_info "✓ All binaries built successfully for $platform" - else - print_warn "Some builds failed for $platform: ${failed_builds[*]}" - fi -} - -create_archives() { - print_header "Creating Release Archives" - - cd "$BUILD_DIR" - - for platform in "${PLATFORMS[@]}"; do - if [ -d "$platform" ] && [ -n "$(ls -A "$platform" 2>/dev/null)" ]; then - local archive_name="predict-otron-9000-${platform}-${TIMESTAMP}" - - print_info "Creating archive for $platform" - - # Create platform-specific directory with all files - mkdir -p "$archive_name" - cp -r "$platform"/* "$archive_name/" - - # Add web assets to each platform archive - if [ -d "web" ]; then - mkdir -p "$archive_name/web" - cp -r web/* "$archive_name/web/" - fi - - # Create README for the platform - cat > "$archive_name/README.txt" << EOF -Predict-Otron-9000 - Platform: $platform -Build Date: $(date) -======================================== - -Binaries included: -$(ls -1 "$platform") - -Web Frontend: -- Located in the 'web' directory -- Serve with any static file server on port 8788 or configure your server - -Usage: -1. Start the main server: ./predict-otron-9000 -2. Start embeddings service: ./embeddings-engine -3. Access web interface at http://localhost:8080 (served by main server) - -For more information, visit: https://github.com/geoffsee/predict-otron-9000 -EOF - - # Create tar.gz archive - tar -czf "${archive_name}.tar.gz" "$archive_name" - rm -rf "$archive_name" - - print_info "✓ Created ${archive_name}.tar.gz" - else - print_warn "No binaries found for $platform, skipping archive" - fi - done - - cd "$PROJECT_ROOT" -} - -generate_build_report() { - print_header "Build Report" - - echo "Build completed at: $(date)" - echo "Build directory: $BUILD_DIR" - echo "" - echo "Archives created:" - ls -la "$BUILD_DIR"/*.tar.gz 2>/dev/null || echo "No archives created" - echo "" - echo "Platform directories:" - for platform in "${PLATFORMS[@]}"; do - if [ -d "$BUILD_DIR/$platform" ]; then - echo " $platform:" - ls -la "$BUILD_DIR/$platform" | sed 's/^/ /' - fi - done - - if [ -d "$BUILD_DIR/web" ]; then - echo "" - echo "Web frontend assets:" - ls -la "$BUILD_DIR/web" | head -10 | sed 's/^/ /' - if [ $(ls -1 "$BUILD_DIR/web" | wc -l) -gt 10 ]; then - echo " ... and $(( $(ls -1 "$BUILD_DIR/web" | wc -l) - 10 )) more files" - fi - fi -} - -main() { - print_header "Predict-Otron-9000 Cross-Platform Build Script" - - cd "$PROJECT_ROOT" - - check_dependencies - install_targets - create_build_dirs - - # Build Leptos web frontend first - build_leptos_app - - # Build for each platform - for platform in "${PLATFORMS[@]}"; do - build_for_platform "$platform" - done - - create_archives - generate_build_report - - print_header "Build Complete!" - print_info "All artifacts are available in: $BUILD_DIR" -} - -# Handle command line arguments -case "${1:-}" in - --help|-h) - echo "Usage: $0 [options]" - echo "" - echo "Cross-platform build script for predict-otron-9000" - echo "" - echo "Options:" - echo " --help, -h Show this help message" - echo " --platforms Show supported platforms" - echo " --clean Clean build directory before building" - echo "" - echo "Supported platforms:" - for platform in "${PLATFORMS[@]}"; do - echo " - $platform" - done - echo "" - echo "Prerequisites:" - echo " - Rust toolchain with rustup" - echo " - cargo-leptos (will be installed if missing)" - echo " - Platform-specific toolchains for cross-compilation" - echo "" - exit 0 - ;; - --platforms) - echo "Supported platforms:" - for platform in "${PLATFORMS[@]}"; do - echo " - $platform" - done - exit 0 - ;; - --clean) - print_info "Cleaning build directory..." - rm -rf "$BUILD_DIR" - print_info "Build directory cleaned" - ;; -esac - -main "$@" \ No newline at end of file diff --git a/scripts/build_cli.sh b/scripts/build_cli.sh deleted file mode 100755 index 9e28b9f..0000000 --- a/scripts/build_cli.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env sh -set -e - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" - -TEMP_DIR="$SCRIPT_DIR/temp" - -mkdir -p "$TEMP_DIR" - -cp "$SCRIPT_DIR/cli.ts" "$TEMP_DIR/cli.ts" -cp "$SCRIPT_DIR/../package.json" "$TEMP_DIR/package.json" - -( -cd "$TEMP_DIR" -bun i -bun build ./cli.ts --compile --outfile "$SCRIPT_DIR/cli" -) - -rm -rf "$TEMP_DIR" \ No newline at end of file