[package] name = "inference-engine" version = "0.1.0" edition = "2021" [dependencies] candle-core = { git = "https://github.com/huggingface/candle.git" } candle-nn = { git = "https://github.com/huggingface/candle.git" } candle-transformers = { git = "https://github.com/huggingface/candle.git" } candle-flash-attn = { version = "=0.9.1", optional = true } candle-onnx = { version = "=0.9.1", optional = true } serde = { version = "1.0.171", features = ["derive"] } serde_json = "1.0.99" symphonia = { version = "0.5.3", features = ["all"], optional = true } tokenizers = { version = "0.21.0", default-features = false, features = ["onig", "http"] } cpal = { version = "0.15.2", optional = true } pdf2image = { version = "0.1.2" , optional = true} anyhow = "1.0.98" clap= { version = "4.2.4", features = ["derive"] } tracing = "0.1.37" tracing-chrome = "0.7.1" tracing-subscriber = { version = "0.3.7", features = ["env-filter"] } axum = { version = "0.8.4", features = ["json"] } tower = "0.5.2" tower-http = { version = "0.6.6", features = ["cors"] } tokio = { version = "1.43.0", features = ["full"] } tokio-stream = { version = "0.1.16", features = ["sync"] } either = { version = "1.9.0", features = ["serde"] } utoipa = { version = "4.2.0", features = ["axum_extras"] } uuid = { version = "1.7.0", features = ["v4"] } reborrow = "0.5.5" futures-util = "0.3.31" gemma-runner = { path = "../gemma-runner" } llama-runner = { path = "../llama-runner" } [target.'cfg(target_os = "macos")'.dependencies] candle-core = { git = "https://github.com/huggingface/candle.git", features = ["metal"] } candle-nn = { git = "https://github.com/huggingface/candle.git", features = ["metal"] } candle-transformers = { git = "https://github.com/huggingface/candle.git", features = ["metal"] } [dev-dependencies] anyhow = { version = "1", features = ["backtrace"] } byteorder = { version = "1.4.3" } clap = { version = "4.2.4", features = ["derive"] } imageproc = { version = "0.24.0", default-features = false } memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] } rand = { version = "0.9.0" } ab_glyph = { version = "0.2.23" } tracing = { version = "0.1.37" } tracing-chrome = { version = "0.7.1" } tracing-subscriber = { version = "0.3.7" } # Necessary to disambiguate with tokio in wasm examples which are 1.28.1 tokio = "1.43.0" [build-dependencies] anyhow = { version = "1", features = ["backtrace"] } bindgen_cuda = { version = "0.1.1", optional = true } [features] bin = [] [package.metadata.compose] image = "ghcr.io/geoffsee/inference-engine:latest" port = 8080 # generates kubernetes manifests [package.metadata.kube] image = "ghcr.io/geoffsee/inference-service:latest" replicas = 1 port = 8080