move cli into crates and stage for release

This commit is contained in:
geoffsee
2025-08-31 13:23:50 -04:00
parent 9e9aa69769
commit 0580dc8c5e
26 changed files with 604 additions and 447 deletions

View File

@@ -34,6 +34,9 @@ jobs:
- name: Setup Rust - name: Setup Rust
run: rustup update stable && rustup default stable run: rustup update stable && rustup default stable
- name: Setup Bun
uses: oven-sh/setup-bun@v2
- name: Install clippy and rustfmt - name: Install clippy and rustfmt
run: rustup component add clippy rustfmt run: rustup component add clippy rustfmt
@@ -132,7 +135,7 @@ jobs:
run: rustup target add ${{ matrix.target }} run: rustup target add ${{ matrix.target }}
- name: Build binary - name: Build binary
run: cargo build --release --target ${{ matrix.target }} -p predict-otron-9000 run: cargo build --release --target ${{ matrix.target }} -p predict-otron-9000 -p cli
env: env:
CARGO_TERM_COLOR: always CARGO_TERM_COLOR: always
@@ -140,14 +143,14 @@ jobs:
if: matrix.os != 'windows-latest' if: matrix.os != 'windows-latest'
run: | run: |
cd target/${{ matrix.target }}/release cd target/${{ matrix.target }}/release
tar czf ../../../${{ matrix.name }}.tar.gz predict-otron-9000 tar czf ../../../${{ matrix.name }}.tar.gz predict-otron-9000 cli
cd ../../../ cd ../../../
- name: Package binary (Windows) - name: Package binary (Windows)
if: matrix.os == 'windows-latest' if: matrix.os == 'windows-latest'
run: | run: |
cd target/${{ matrix.target }}/release cd target/${{ matrix.target }}/release
7z a ../../../${{ matrix.name }}.zip predict-otron-9000.exe 7z a ../../../${{ matrix.name }}.zip predict-otron-9000.exe cli.exe
cd ../../../ cd ../../../
- name: Upload binary artifacts (Unix) - name: Upload binary artifacts (Unix)

1
.gitignore vendored
View File

@@ -78,3 +78,4 @@ venv/
*~ *~
/scripts/cli /scripts/cli
!/scripts/cli.ts !/scripts/cli.ts
/**/.*.bun-build

18
Cargo.lock generated
View File

@@ -896,6 +896,10 @@ version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
[[package]]
name = "cli"
version = "0.1.1"
[[package]] [[package]]
name = "codee" name = "codee"
version = "0.3.2" version = "0.3.2"
@@ -1472,7 +1476,7 @@ dependencies = [
[[package]] [[package]]
name = "embeddings-engine" name = "embeddings-engine"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"async-openai", "async-openai",
"axum", "axum",
@@ -2094,7 +2098,7 @@ dependencies = [
[[package]] [[package]]
name = "gemma-runner" name = "gemma-runner"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)", "candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)",
@@ -2275,7 +2279,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]] [[package]]
name = "helm-chart-tool" name = "helm-chart-tool"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
@@ -2685,7 +2689,7 @@ dependencies = [
[[package]] [[package]]
name = "inference-engine" name = "inference-engine"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"ab_glyph", "ab_glyph",
"anyhow", "anyhow",
@@ -2937,7 +2941,7 @@ dependencies = [
[[package]] [[package]]
name = "leptos-app" name = "leptos-app"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"async-openai-wasm", "async-openai-wasm",
"axum", "axum",
@@ -3200,7 +3204,7 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
[[package]] [[package]]
name = "llama-runner" name = "llama-runner"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)", "candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)",
@@ -4079,7 +4083,7 @@ dependencies = [
[[package]] [[package]]
name = "predict-otron-9000" name = "predict-otron-9000"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"axum", "axum",
"embeddings-engine", "embeddings-engine",

View File

@@ -6,11 +6,15 @@ members = [
"crates/leptos-app", "crates/leptos-app",
"crates/helm-chart-tool", "crates/helm-chart-tool",
"crates/llama-runner", "crates/llama-runner",
"crates/gemma-runner" "crates/gemma-runner",
"crates/cli"
] ]
default-members = ["crates/predict-otron-9000"] default-members = ["crates/predict-otron-9000"]
resolver = "2" resolver = "2"
[workspace.package]
version = "0.1.2"
# Compiler optimization profiles for the workspace # Compiler optimization profiles for the workspace
[profile.release] [profile.release]
opt-level = 3 opt-level = 3

22
bun.lock Normal file
View File

@@ -0,0 +1,22 @@
{
"lockfileVersion": 1,
"workspaces": {
"": {
"name": "predict-otron-9000",
},
"crates/cli/package": {
"name": "cli",
"dependencies": {
"install": "^0.13.0",
"openai": "^5.16.0",
},
},
},
"packages": {
"cli": ["cli@workspace:crates/cli/package"],
"install": ["install@0.13.0", "", {}, "sha512-zDml/jzr2PKU9I8J/xyZBQn8rPCAY//UOYNmR01XwNwyfhEWObo2SWfSl1+0tm1u6PhxLwDnfsT/6jB7OUxqFA=="],
"openai": ["openai@5.16.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.23.8" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-hoEH8ZNvg1HXjU9mp88L/ZH8O082Z8r6FHCXGiWAzVRrEv443aI57qhch4snu07yQydj+AUAWLenAiBXhu89Tw=="],
}
}

11
crates/cli/Cargo.toml Normal file
View File

@@ -0,0 +1,11 @@
[package]
name = "cli"
version.workspace = true
edition = "2021"
build = "build.rs"
[[bin]]
name = "cli"
path = "src/main.rs"
[dependencies]

23
crates/cli/README.md Normal file
View File

@@ -0,0 +1,23 @@
# cli
A Rust/Typescript Hybrid
```console
./cli [options] [prompt]
Simple CLI tool for testing the local OpenAI-compatible API server.
Options:
--model <model> Model to use (default: gemma-3-1b-it)
--prompt <prompt> The prompt to send (can also be provided as positional argument)
--list-models List all available models from the server
--help Show this help message
Examples:
./cli "What is the capital of France?"
./cli --model gemma-3-1b-it --prompt "Hello, world!"
./cli --prompt "Who was the 16th president of the United States?"
./cli --list-models
The server must be running at http://localhost:8080
```

209
crates/cli/build.rs Normal file
View File

@@ -0,0 +1,209 @@
use std::env;
use std::fs;
use std::io::{self, BufRead, Write};
use std::path::{Path, PathBuf};
use std::process::{ChildStderr, ChildStdout, Command, Stdio};
use std::thread;
use std::time::{Duration, SystemTime};
mod bun_target;
use bun_target::BunTarget;
fn main() {
println!("cargo:rerun-if-changed=");
if let Err(e) = run_build() {
println!("cargo:warning=build.rs failed: {e}");
std::process::exit(1);
}
}
fn run_build() -> io::Result<()> {
let manifest_dir =
PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"));
let package_dir = manifest_dir.join("package");
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set by Cargo"));
let output_path = out_dir.join("client-cli");
let bun_tgt = BunTarget::from_cargo_env()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
// Optional: warn if using a Bun target thats marked unsupported in your chart
if matches!(bun_tgt, BunTarget::WindowsArm64) {
println!("cargo:warning=bun-windows-arm64 is marked unsupported in the compatibility chart");
}
warn(&format!("Building CLI into: {}", output_path.display()));
// --- bun install (in ./package), keep temps inside OUT_DIR ---
let mut install = Command::new("bun")
.current_dir(&package_dir)
.env("TMPDIR", &out_dir)
.arg("install")
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun install`: {e}")))?;
let install_join = stream_child("bun install", install.stdout.take(), install.stderr.take());
let install_status = install.wait()?;
// ensure streams finish
join_streams(install_join);
if !install_status.success() {
let code = install_status.code().unwrap_or(1);
return Err(io::Error::new(
io::ErrorKind::Other,
format!("bun install failed with status {code}"),
));
}
let target = env::var("TARGET").unwrap();
// --- bun build (in ./package), emit to OUT_DIR, keep temps inside OUT_DIR ---
let mut build = Command::new("bun")
.current_dir(&package_dir)
.env("TMPDIR", &out_dir)
.arg("build")
.arg("./cli.ts")
.arg(format!("--target={}", bun_tgt.as_bun_flag()))
.arg("--compile")
.arg("--outfile")
.arg(&output_path)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| io::Error::new(e.kind(), format!("Failed to spawn `bun build`: {e}")))?;
let build_join = stream_child("bun build", build.stdout.take(), build.stderr.take());
let status = build.wait()?;
// ensure streams finish
join_streams(build_join);
if status.success() {
info("bun build succeeded");
} else {
let code = status.code().unwrap_or(1);
warn(&format!("bun build failed with status: {code}"));
return Err(io::Error::new(io::ErrorKind::Other, "bun build failed"));
}
// Ensure the output is executable (after it exists)
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let mut perms = fs::metadata(&output_path)?.permissions();
perms.set_mode(0o755);
fs::set_permissions(&output_path, perms)?;
}
println!("cargo:warning=Built CLI at {}", output_path.display());
println!("cargo:rustc-env=CLIENT_CLI_BIN={}", output_path.display());
// --- Cleanup stray .bun-build temp files (conservative: older than 5 minutes) ---
for dir in [&manifest_dir, &package_dir, &out_dir] {
if let Err(e) = remove_bun_temp_files(dir, Some(Duration::from_secs(5 * 60))) {
println!("cargo:warning=cleanup in {} failed: {e}", dir.display());
}
}
Ok(())
}
// Spawn readers for child's stdout/stderr so we don't deadlock on pipe buffers
fn stream_child(
tag: &str,
stdout: Option<ChildStdout>,
stderr: Option<ChildStderr>,
) -> (
Option<thread::JoinHandle<()>>,
Option<thread::JoinHandle<()>>,
) {
let t1 = stdout.map(|out| {
let tag = tag.to_string();
thread::spawn(move || {
let reader = io::BufReader::new(out);
for line in reader.lines() {
info(&format!("[{tag} stdout] {}", line.unwrap_or_default()));
}
})
});
let t2 = stderr.map(|err| {
let tag = tag.to_string();
thread::spawn(move || {
let reader = io::BufReader::new(err);
for line in reader.lines() {
warn(&format!("[{tag} stderr] {}", line.unwrap_or_default()));
}
})
});
(t1, t2)
}
fn join_streams(
joins: (
Option<thread::JoinHandle<()>>,
Option<thread::JoinHandle<()>>,
),
) {
if let Some(j) = joins.0 {
let _ = j.join();
}
if let Some(j) = joins.1 {
let _ = j.join();
}
}
fn remove_bun_temp_files(dir: &Path, older_than: Option<Duration>) -> io::Result<()> {
let now = SystemTime::now();
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if !path.is_file() {
continue;
}
// Files like ".1860e7df40ff1bef-00000000.bun-build"
let name = entry.file_name();
let name = name.to_string_lossy();
let looks_like_bun_temp = name.starts_with('.') && name.ends_with(".bun-build");
if !looks_like_bun_temp {
continue;
}
if let Some(age) = older_than {
if let Ok(meta) = entry.metadata() {
if let Ok(modified) = meta.modified() {
if now.duration_since(modified).unwrap_or_default() < age {
// too new; skip to avoid racing an in-flight builder
continue;
}
}
}
}
match fs::remove_file(&path) {
Ok(_) => println!("cargo:warning=removed stray bun temp {}", path.display()),
Err(e) => println!("cargo:warning=failed to remove {}: {e}", path.display()),
}
}
Ok(())
}
fn warn(msg: &str) {
let _ = writeln!(io::stderr(), "[build.rs] {msg}");
println!("cargo:warning={msg}");
}
fn info(msg: &str) {
let _ = writeln!(io::stderr(), "[build.rs] {msg}");
println!("cargo:warning=INFO|{msg}");
}

125
crates/cli/bun_target.rs Normal file
View File

@@ -0,0 +1,125 @@
use std::env;
use std::fmt;
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub enum BunTarget {
LinuxX64Glibc,
LinuxArm64Glibc,
LinuxX64Musl,
LinuxArm64Musl,
WindowsX64,
WindowsArm64,
MacX64,
MacArm64,
}
impl BunTarget {
pub const fn as_bun_flag(self) -> &'static str {
match self {
BunTarget::LinuxX64Glibc => "bun-linux-x64",
BunTarget::LinuxArm64Glibc => "bun-linux-arm64",
BunTarget::LinuxX64Musl => "bun-linux-x64-musl",
BunTarget::LinuxArm64Musl => "bun-linux-arm64-musl",
BunTarget::WindowsX64 => "bun-windows-x64",
BunTarget::WindowsArm64 => "bun-windows-arm64",
BunTarget::MacX64 => "bun-darwin-x64",
BunTarget::MacArm64 => "bun-darwin-arm64",
}
}
pub const fn rust_triples(self) -> &'static [&'static str] {
match self {
BunTarget::LinuxX64Glibc => &["x86_64-unknown-linux-gnu", "x86_64-unknown-linux-gnu.2.17"],
BunTarget::LinuxArm64Glibc => &["aarch64-unknown-linux-gnu"],
BunTarget::LinuxX64Musl => &["x86_64-unknown-linux-musl"],
BunTarget::LinuxArm64Musl => &["aarch64-unknown-linux-musl"],
BunTarget::WindowsX64 => &["x86_64-pc-windows-msvc"],
BunTarget::WindowsArm64 => &["aarch64-pc-windows-msvc"], // chart says unsupported; still map
BunTarget::MacX64 => &["x86_64-apple-darwin"],
BunTarget::MacArm64 => &["aarch64-apple-darwin"],
}
}
pub fn from_rust_target(triple: &str) -> Option<Self> {
let norm = triple.trim();
if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("gnu") {
return Some(BunTarget::LinuxX64Glibc);
}
if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("gnu") {
return Some(BunTarget::LinuxArm64Glibc);
}
if norm.starts_with("x86_64-") && norm.contains("-linux-") && norm.ends_with("musl") {
return Some(BunTarget::LinuxX64Musl);
}
if norm.starts_with("aarch64-") && norm.contains("-linux-") && norm.ends_with("musl") {
return Some(BunTarget::LinuxArm64Musl);
}
if norm == "x86_64-pc-windows-msvc" {
return Some(BunTarget::WindowsX64);
}
if norm == "aarch64-pc-windows-msvc" {
return Some(BunTarget::WindowsArm64);
}
if norm == "x86_64-apple-darwin" {
return Some(BunTarget::MacX64);
}
if norm == "aarch64-apple-darwin" {
return Some(BunTarget::MacArm64);
}
for bt in [
BunTarget::LinuxX64Glibc,
BunTarget::LinuxArm64Glibc,
BunTarget::LinuxX64Musl,
BunTarget::LinuxArm64Musl,
BunTarget::WindowsX64,
BunTarget::WindowsArm64,
BunTarget::MacX64,
BunTarget::MacArm64,
] {
for &t in bt.rust_triples() {
if t == norm {
return Some(bt);
}
}
}
None
}
pub fn from_cargo_env() -> Result<Self, BunTargetError> {
if let Ok(triple) = env::var("TARGET") {
if let Some(bt) = Self::from_rust_target(&triple) {
return Ok(bt);
}
return Err(BunTargetError::UnknownTriple(triple));
}
let os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
let envv = env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default();
let vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_else(|_| "unknown".into());
let triple = format!("{}-{}-{}-{}", arch, vendor, os, if envv.is_empty() { "gnu" } else { &envv });
if let Some(bt) = Self::from_rust_target(&triple) {
Ok(bt)
} else {
Err(BunTargetError::UnknownTriple(triple))
}
}
}
#[derive(Debug)]
pub enum BunTargetError {
UnknownTriple(String),
}
impl fmt::Display for BunTargetError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
BunTargetError::UnknownTriple(t) => write!(f, "unrecognized Rust target triple: {t}"),
}
}
}
impl std::error::Error for BunTargetError {}

View File

@@ -30,24 +30,23 @@ type ChunkStat = {
function printHelp() { function printHelp() {
console.log(` console.log(`
Usage: bun client_cli.ts [options] [prompt] ./cli [options] [prompt]
Simple CLI tool for testing the local OpenAI-compatible API server. Simple CLI tool for testing the local OpenAI-compatible API server.
Options: Options:
--model <model> Model to use (default: ${DEFAULT_MODEL}) --model <model> Model to use (default: gemma-3-1b-it)
--prompt <prompt> The prompt to send (can also be provided as positional argument) --prompt <prompt> The prompt to send (can also be provided as positional argument)
--list-models List all available models from the server --list-models List all available models from the server
--help Show this help message --help Show this help message
Examples: Examples:
./cli.ts "What is the capital of France?" ./cli "What is the capital of France?"
./cli.ts --model gemma-3-1b-it --prompt "Hello, world!" ./cli --model gemma-3-1b-it --prompt "Hello, world!"
./cli.ts --prompt "Who was the 16th president of the United States?" ./cli --prompt "Who was the 16th president of the United States?"
./cli.ts --list-models ./cli --list-models
The server should be running at http://localhost:8080 The server must be running at http://localhost:8080
Start it with: ./run_server.sh
`); `);
} }

View File

@@ -0,0 +1,11 @@
{
"name": "cli",
"main": "cli.ts",
"scripts": {
"build": "bun build cli.ts --compile --outfile cli"
},
"dependencies": {
"install": "^0.13.0",
"openai": "^5.16.0"
}
}

32
crates/cli/src/main.rs Normal file
View File

@@ -0,0 +1,32 @@
use std::{env, fs, io, path::PathBuf, process::Command};
#[cfg(unix)]
use std::os::unix::fs::PermissionsExt;
fn main() -> io::Result<()> {
// Absolute path provided by build.rs at compile time.
// `include_bytes!` accepts string literals; `env!` expands to a literal at compile time.
const CLIENT_CLI: &[u8] = include_bytes!(env!("CLIENT_CLI_BIN"));
// Write to a temp file
let mut tmp = env::temp_dir();
tmp.push("client-cli-embedded");
fs::write(&tmp, CLIENT_CLI)?;
// Ensure it's executable on Unix
#[cfg(unix)]
{
let mut perms = fs::metadata(&tmp)?.permissions();
perms.set_mode(0o755);
fs::set_permissions(&tmp, perms)?;
}
// Run it
let status = Command::new(&tmp).arg("--version").status()?;
if !status.success() {
return Err(io::Error::new(io::ErrorKind::Other, "client-cli failed"));
}
Ok(())
}

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "embeddings-engine" name = "embeddings-engine"
version = "0.1.0" version.workspace = true
edition = "2024" edition = "2024"
[lib] [lib]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "gemma-runner" name = "gemma-runner"
version = "0.1.0" version.workspace = true
edition = "2021" edition = "2021"

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "helm-chart-tool" name = "helm-chart-tool"
version = "0.1.0" version.workspace = true
edition = "2021" edition = "2021"
[[bin]] [[bin]]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "inference-engine" name = "inference-engine"
version = "0.1.0" version.workspace = true
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "leptos-app" name = "leptos-app"
version = "0.1.0" version.workspace = true
edition = "2021" edition = "2021"
[lib] [lib]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "llama-runner" name = "llama-runner"
version = "0.1.0" version.workspace = true
edition = "2021" edition = "2021"
[dependencies] [dependencies]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "predict-otron-9000" name = "predict-otron-9000"
version = "0.1.1" version.workspace = true
edition = "2024" edition = "2024"
[[bin]] [[bin]]
@@ -44,4 +44,8 @@ port = 8080
image = "ghcr.io/geoffsee/predict-otron-9000:latest" image = "ghcr.io/geoffsee/predict-otron-9000:latest"
replicas = 1 replicas = 1
port = 8080 port = 8080
env = { SERVER_CONFIG = "" } # SERVER_CONFIG Example: {\"serverMode\":\"HighAvailability\",\"services\":{\"inference_url\":\"http://custom-inference:9000\",\"embeddings_url\":\"http://custom-embeddings:9001\"}}
# you can generate this via node to avoid toil
# const server_config = {serverMode: "HighAvailability", services: {inference_url: "http://custom-inference:9000", embeddings_url: "http://custom-embeddings:9001"} };
# console.log(JSON.stringify(server_config).replace(/"/g, '\\"'));
env = { SERVER_CONFIG = "<your-json-value-here>" }

View File

@@ -2,7 +2,10 @@ use serde::{Deserialize, Serialize};
use std::env; use std::env;
use tracing::info; use tracing::info;
use tracing::log::error; use tracing::log::error;
/// # Generating `SERVER_CONFIG` with Node
// # const server_config = {serverMode: "HighAvailability", services: {inference_url: "http://custom-inference:9000", embeddings_url: "http://custom-embeddings:9001"} };
// # console.log(JSON.stringify(server_config).replace(/"/g, '\\"'));
///
#[derive(Serialize, Deserialize, Clone, Debug)] #[derive(Serialize, Deserialize, Clone, Debug)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct ServerConfig { pub struct ServerConfig {

View File

@@ -12,6 +12,120 @@ use std::time::Duration;
use crate::config::ServerConfig; use crate::config::ServerConfig;
/// # Generating `SERVER_CONFIG` for TOML using Node.js
///
/// You can still use the Node.js REPL to build the JSON, but when pasting into
/// a `.toml` file you must follow TOML's string rules. Below are the safest patterns.
///
/// ## 1) Generate the JSON in Node
/// ```bash
/// node
/// ```
/// ```javascript
/// const myobject = {
/// serverMode: "HighAvailability",
/// services: {
/// inference_url: "http://custom-inference:9000",
/// embeddings_url: "http://custom-embeddings:9001"
/// }
/// };
/// const json = JSON.stringify(myobject);
/// json
/// // -> '{"serverMode":"HighAvailability","services":{"inference_url":"http://custom-inference:9000","embeddings_url":"http://custom-embeddings:9001"}}'
/// ```
///
/// ## 2) Put it into `.toml`
///
/// ### Option A (recommended): single-quoted TOML *literal* string
/// Single quotes in TOML mean "no escaping", so your inner double quotes are safe.
/// ```toml
/// SERVER_CONFIG = '{"serverMode":"HighAvailability","services":{"inference_url":"http://custom-inference:9000","embeddings_url":"http://custom-embeddings:9001"}}'
/// ```
///
/// ### Option B: double-quoted TOML string (must escape inner quotes)
/// If you *must* use double quotes in TOML, escape all `"` inside the JSON.
/// You can have Node do this for you:
/// ```javascript
/// // In Node:
/// const jsonForToml = JSON.stringify(myobject).replace(/"/g, '\\"');
/// jsonForToml
/// // -> \"{\\\"serverMode\\\":\\\"HighAvailability\\\",...}\"
/// ```
/// Then paste into TOML:
/// ```toml
/// SERVER_CONFIG = "{\"serverMode\":\"HighAvailability\",\"services\":{\"inference_url\":\"http://custom-inference:9000\",\"embeddings_url\":\"http://custom-embeddings:9001\"}}"
/// ```
///
/// ### Option C: multi-line literal (for pretty JSON)
/// If you want pretty-printed JSON in the file, use TOML's triple single quotes:
/// ```javascript
/// // In Node (pretty with 2 spaces):
/// const pretty = JSON.stringify(myobject, null, 2);
/// ```
/// ```toml
/// SERVER_CONFIG = '''{
/// "serverMode": "HighAvailability",
/// "services": {
/// "inference_url": "http://custom-inference:9000",
/// "embeddings_url": "http://custom-embeddings:9001"
/// }
/// }'''
/// ```
///
/// ## 3) Reading it in Rust
///
/// If `SERVER_CONFIG` is stored as a **string** in TOML (Options A/B/C):
/// ```rust
/// use serde_json::Value;
///
/// // Suppose you've already loaded your .toml into a struct or a toml::Value:
/// // e.g., struct FileCfg { pub SERVER_CONFIG: String }
/// fn parse_server_config(raw: &str) -> anyhow::Result<Value> {
/// let v: Value = serde_json::from_str(raw)?;
/// Ok(v)
/// }
/// ```
///
/// ### Alternative: store it as TOML tables and serialize to JSON at runtime
/// Instead of a JSON string, you can make the TOML first-class tables:
/// ```toml
/// [SERVER_CONFIG]
/// serverMode = "HighAvailability"
///
/// [SERVER_CONFIG.services]
/// inference_url = "http://custom-inference:9000"
/// embeddings_url = "http://custom-embeddings:9001"
/// ```
/// ```rust
/// use serde::{Deserialize, Serialize};
/// use serde_json::Value;
///
/// #[derive(Debug, Serialize, Deserialize)]
/// struct Services {
/// inference_url: String,
/// embeddings_url: String,
/// }
///
/// #[derive(Debug, Serialize, Deserialize)]
/// struct ServerConfig {
/// serverMode: String,
/// services: Services,
/// }
///
/// // After loading the .toml (e.g., via `toml::from_str`):
/// // let cfg: ServerConfig = toml::from_str(toml_str)?;
/// // Convert to JSON if needed:
/// fn to_json(cfg: &ServerConfig) -> serde_json::Result<Value> {
/// Ok(serde_json::to_value(cfg)?)
/// }
/// ```
///
/// ## Gotchas
/// - Prefer **single-quoted** TOML strings for raw JSON to avoid escaping.
/// - If you use **double-quoted** TOML strings, escape every inner `"` in the JSON.
/// - Pretty JSON is fine in TOML using `''' ... '''`, but remember the newlines are part of the string.
/// - If you control the consumer, TOML tables (the alternative above) are more ergonomic than embedding JSON.
/// HTTP client configured for proxying requests /// HTTP client configured for proxying requests
#[derive(Clone)] #[derive(Clone)]
pub struct ProxyClient { pub struct ProxyClient {
@@ -31,7 +145,7 @@ impl ProxyClient {
} }
/// Create a router that proxies requests to external services in HighAvailability mode /// Create a router that proxies requests to external services in HighAvailability mode
pub fn create_proxy_router(config: ServerConfig) -> Router { pub fn create_ha_router(config: ServerConfig) -> Router {
let proxy_client = ProxyClient::new(config.clone()); let proxy_client = ProxyClient::new(config.clone());
Router::new() Router::new()

View File

@@ -1,16 +1,16 @@
mod config; mod config;
mod ha_mode;
mod middleware; mod middleware;
mod proxy; mod standalone_mode;
mod standalone;
use crate::standalone::create_standalone_router; use crate::standalone_mode::create_standalone_router;
use axum::response::IntoResponse; use axum::response::IntoResponse;
use axum::routing::get; use axum::routing::get;
use axum::{Router, http::Uri, response::Html, serve}; use axum::{Router, http::Uri, response::Html, serve};
use config::ServerConfig; use config::ServerConfig;
use ha_mode::create_ha_router;
use inference_engine::AppState; use inference_engine::AppState;
use middleware::{MetricsLayer, MetricsLoggerFuture, MetricsStore}; use middleware::{MetricsLayer, MetricsLoggerFuture, MetricsStore};
use proxy::create_proxy_router;
use rust_embed::Embed; use rust_embed::Embed;
use std::env; use std::env;
use std::path::Component::ParentDir; use std::path::Component::ParentDir;
@@ -56,7 +56,7 @@ async fn main() {
Ok(is_ha) => { Ok(is_ha) => {
if is_ha { if is_ha {
log_config(server_config.clone()); log_config(server_config.clone());
create_proxy_router(server_config.clone()) create_ha_router(server_config.clone())
} else { } else {
log_config(server_config.clone()); log_config(server_config.clone());
create_standalone_router(server_config) create_standalone_router(server_config)

View File

@@ -1,8 +1,8 @@
{ {
"dependencies": { "name": "predict-otron-9000",
"openai": "^5.16.0" "workspaces": ["crates/cli/package"],
},
"scripts": { "scripts": {
"cli": "./scripts/cli.ts" "# WORKSPACE ALIASES": "#",
"cli": "bun --filter crates/cli/package"
} }
} }

View File

@@ -1,389 +0,0 @@
#!/bin/bash
# Cross-platform build script for predict-otron-9000
# Builds all workspace crates for common platforms
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
BUILD_DIR="${PROJECT_ROOT}/build"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
# Supported platforms
PLATFORMS=(
"x86_64-unknown-linux-gnu"
"x86_64-pc-windows-msvc"
"x86_64-apple-darwin"
"aarch64-apple-darwin"
"aarch64-unknown-linux-gnu"
)
# Main binaries to build
MAIN_BINARIES=(
"predict-otron-9000"
"embeddings-engine"
)
# Inference engine binaries (with bin feature)
INFERENCE_BINARIES=(
"gemma_inference"
"llama_inference"
)
# Other workspace binaries
OTHER_BINARIES=(
"helm-chart-tool"
)
print_header() {
echo -e "${BLUE}================================${NC}"
echo -e "${BLUE}$1${NC}"
echo -e "${BLUE}================================${NC}"
}
print_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
print_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
check_dependencies() {
print_header "Checking Dependencies"
# Check rust
if ! command -v cargo >/dev/null 2>&1; then
print_error "Rust/Cargo is not installed"
exit 1
fi
# Check cargo-leptos for WASM frontend
if ! command -v cargo-leptos >/dev/null 2>&1; then
print_warn "cargo-leptos not found. Installing..."
cargo install cargo-leptos
fi
print_info "All dependencies available"
}
install_targets() {
print_header "Installing Rust Targets"
for platform in "${PLATFORMS[@]}"; do
print_info "Installing target: $platform"
rustup target add "$platform" || {
print_warn "Failed to install target $platform (may not be available on this host)"
}
done
# Add WASM target for leptos
print_info "Installing wasm32-unknown-unknown target for Leptos"
rustup target add wasm32-unknown-unknown
}
create_build_dirs() {
print_header "Setting up Build Directory"
rm -rf "$BUILD_DIR"
mkdir -p "$BUILD_DIR"
for platform in "${PLATFORMS[@]}"; do
mkdir -p "$BUILD_DIR/$platform"
done
mkdir -p "$BUILD_DIR/web"
print_info "Build directories created"
}
build_leptos_app() {
print_header "Building Leptos Web Frontend"
cd "$PROJECT_ROOT/crates/leptos-app"
# Build the WASM frontend
print_info "Building WASM frontend with cargo-leptos..."
cargo leptos build --release || {
print_error "Failed to build Leptos WASM frontend"
return 1
}
# Copy built assets to build directory
if [ -d "target/site" ]; then
cp -r target/site/* "$BUILD_DIR/web/"
print_info "Leptos frontend built and copied to $BUILD_DIR/web/"
else
print_error "Leptos build output not found at target/site"
return 1
fi
cd "$PROJECT_ROOT"
}
get_platform_features() {
local platform="$1"
local features=""
case "$platform" in
*-apple-darwin)
# macOS uses Metal but routes to CPU for Gemma stability
features=""
;;
*-unknown-linux-gnu|*-pc-windows-msvc)
# Linux and Windows can use CUDA if available
features=""
;;
*)
features=""
;;
esac
echo "$features"
}
build_binary_for_platform() {
local binary_name="$1"
local platform="$2"
local package_name="$3"
local additional_args="$4"
print_info "Building $binary_name for $platform"
local features=$(get_platform_features "$platform")
local feature_flag=""
if [ -n "$features" ]; then
feature_flag="--features $features"
fi
# Build command
local build_cmd="cargo build --release --target $platform --bin $binary_name"
if [ -n "$package_name" ]; then
build_cmd="$build_cmd --package $package_name"
fi
if [ -n "$additional_args" ]; then
build_cmd="$build_cmd $additional_args"
fi
if [ -n "$feature_flag" ]; then
build_cmd="$build_cmd $feature_flag"
fi
print_info "Running: $build_cmd"
if eval "$build_cmd"; then
# Copy binary to build directory
local target_dir="target/$platform/release"
local binary_file="$binary_name"
# Add .exe extension for Windows
if [[ "$platform" == *-pc-windows-msvc ]]; then
binary_file="$binary_name.exe"
fi
if [ -f "$target_dir/$binary_file" ]; then
cp "$target_dir/$binary_file" "$BUILD_DIR/$platform/"
print_info "$binary_name built and copied for $platform"
else
print_error "Binary not found: $target_dir/$binary_file"
return 1
fi
else
print_error "Failed to build $binary_name for $platform"
return 1
fi
}
build_for_platform() {
local platform="$1"
print_header "Building for $platform"
local failed_builds=()
# Build main binaries
for binary in "${MAIN_BINARIES[@]}"; do
if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then
failed_builds+=("$binary")
fi
done
# Build inference engine binaries with bin feature
for binary in "${INFERENCE_BINARIES[@]}"; do
if ! build_binary_for_platform "$binary" "$platform" "inference-engine" "--features bin"; then
failed_builds+=("$binary")
fi
done
# Build other workspace binaries
for binary in "${OTHER_BINARIES[@]}"; do
if ! build_binary_for_platform "$binary" "$platform" "$binary" ""; then
failed_builds+=("$binary")
fi
done
if [ ${#failed_builds[@]} -eq 0 ]; then
print_info "✓ All binaries built successfully for $platform"
else
print_warn "Some builds failed for $platform: ${failed_builds[*]}"
fi
}
create_archives() {
print_header "Creating Release Archives"
cd "$BUILD_DIR"
for platform in "${PLATFORMS[@]}"; do
if [ -d "$platform" ] && [ -n "$(ls -A "$platform" 2>/dev/null)" ]; then
local archive_name="predict-otron-9000-${platform}-${TIMESTAMP}"
print_info "Creating archive for $platform"
# Create platform-specific directory with all files
mkdir -p "$archive_name"
cp -r "$platform"/* "$archive_name/"
# Add web assets to each platform archive
if [ -d "web" ]; then
mkdir -p "$archive_name/web"
cp -r web/* "$archive_name/web/"
fi
# Create README for the platform
cat > "$archive_name/README.txt" << EOF
Predict-Otron-9000 - Platform: $platform
Build Date: $(date)
========================================
Binaries included:
$(ls -1 "$platform")
Web Frontend:
- Located in the 'web' directory
- Serve with any static file server on port 8788 or configure your server
Usage:
1. Start the main server: ./predict-otron-9000
2. Start embeddings service: ./embeddings-engine
3. Access web interface at http://localhost:8080 (served by main server)
For more information, visit: https://github.com/geoffsee/predict-otron-9000
EOF
# Create tar.gz archive
tar -czf "${archive_name}.tar.gz" "$archive_name"
rm -rf "$archive_name"
print_info "✓ Created ${archive_name}.tar.gz"
else
print_warn "No binaries found for $platform, skipping archive"
fi
done
cd "$PROJECT_ROOT"
}
generate_build_report() {
print_header "Build Report"
echo "Build completed at: $(date)"
echo "Build directory: $BUILD_DIR"
echo ""
echo "Archives created:"
ls -la "$BUILD_DIR"/*.tar.gz 2>/dev/null || echo "No archives created"
echo ""
echo "Platform directories:"
for platform in "${PLATFORMS[@]}"; do
if [ -d "$BUILD_DIR/$platform" ]; then
echo " $platform:"
ls -la "$BUILD_DIR/$platform" | sed 's/^/ /'
fi
done
if [ -d "$BUILD_DIR/web" ]; then
echo ""
echo "Web frontend assets:"
ls -la "$BUILD_DIR/web" | head -10 | sed 's/^/ /'
if [ $(ls -1 "$BUILD_DIR/web" | wc -l) -gt 10 ]; then
echo " ... and $(( $(ls -1 "$BUILD_DIR/web" | wc -l) - 10 )) more files"
fi
fi
}
main() {
print_header "Predict-Otron-9000 Cross-Platform Build Script"
cd "$PROJECT_ROOT"
check_dependencies
install_targets
create_build_dirs
# Build Leptos web frontend first
build_leptos_app
# Build for each platform
for platform in "${PLATFORMS[@]}"; do
build_for_platform "$platform"
done
create_archives
generate_build_report
print_header "Build Complete!"
print_info "All artifacts are available in: $BUILD_DIR"
}
# Handle command line arguments
case "${1:-}" in
--help|-h)
echo "Usage: $0 [options]"
echo ""
echo "Cross-platform build script for predict-otron-9000"
echo ""
echo "Options:"
echo " --help, -h Show this help message"
echo " --platforms Show supported platforms"
echo " --clean Clean build directory before building"
echo ""
echo "Supported platforms:"
for platform in "${PLATFORMS[@]}"; do
echo " - $platform"
done
echo ""
echo "Prerequisites:"
echo " - Rust toolchain with rustup"
echo " - cargo-leptos (will be installed if missing)"
echo " - Platform-specific toolchains for cross-compilation"
echo ""
exit 0
;;
--platforms)
echo "Supported platforms:"
for platform in "${PLATFORMS[@]}"; do
echo " - $platform"
done
exit 0
;;
--clean)
print_info "Cleaning build directory..."
rm -rf "$BUILD_DIR"
print_info "Build directory cleaned"
;;
esac
main "$@"

View File

@@ -1,19 +0,0 @@
#!/usr/bin/env sh
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
TEMP_DIR="$SCRIPT_DIR/temp"
mkdir -p "$TEMP_DIR"
cp "$SCRIPT_DIR/cli.ts" "$TEMP_DIR/cli.ts"
cp "$SCRIPT_DIR/../package.json" "$TEMP_DIR/package.json"
(
cd "$TEMP_DIR"
bun i
bun build ./cli.ts --compile --outfile "$SCRIPT_DIR/cli"
)
rm -rf "$TEMP_DIR"