supports small llama and gemma models

Refactor inference

dedicated crates for llama and gemma inferencing, not integrated
This commit is contained in:
geoffsee
2025-08-29 18:15:29 -04:00
parent d06b16bb12
commit 315ef17605
26 changed files with 2136 additions and 1402 deletions

369
Cargo.lock generated
View File

@@ -686,6 +686,15 @@ dependencies = [
"generic-array",
]
[[package]]
name = "block2"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "340d2f0bdb2a43c1d3cd40513185b2bd7def0aa1052f956455114bc98f82dcf2"
dependencies = [
"objc2",
]
[[package]]
name = "brotli"
version = "3.5.0"
@@ -786,8 +795,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9f51e2ecf6efe9737af8f993433c839f956d2b6ed4fd2dd4a7c6d8b0fa667ff"
dependencies = [
"byteorder",
"candle-kernels",
"candle-metal-kernels",
"candle-kernels 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"candle-metal-kernels 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"cudarc",
"gemm 0.17.1",
"half",
@@ -807,6 +816,35 @@ dependencies = [
"zip",
]
[[package]]
name = "candle-core"
version = "0.9.1"
source = "git+https://github.com/huggingface/candle.git#06387ae55d8db4b5d29564d0e1e350246bc458af"
dependencies = [
"byteorder",
"candle-kernels 0.9.1 (git+https://github.com/huggingface/candle.git)",
"candle-metal-kernels 0.9.1 (git+https://github.com/huggingface/candle.git)",
"cudarc",
"float8",
"gemm 0.17.1",
"half",
"memmap2",
"num-traits",
"num_cpus",
"objc2-foundation",
"objc2-metal",
"rand 0.9.2",
"rand_distr 0.5.1",
"rayon",
"safetensors",
"thiserror 1.0.69",
"ug",
"ug-cuda",
"ug-metal",
"yoke 0.7.5",
"zip",
]
[[package]]
name = "candle-datasets"
version = "0.9.1"
@@ -814,15 +852,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0a7c351dd50cda83f00f17c4412e35c69d840e453edf06064974de1cc59343d"
dependencies = [
"byteorder",
"candle-core",
"candle-nn",
"hf-hub",
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"candle-nn 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"hf-hub 0.4.3",
"image",
"memmap2",
"parquet",
"rand 0.9.2",
"thiserror 1.0.69",
"tokenizers",
"tokenizers 0.21.4",
]
[[package]]
name = "candle-examples"
version = "0.9.1"
source = "git+https://github.com/huggingface/candle.git#06387ae55d8db4b5d29564d0e1e350246bc458af"
dependencies = [
"anyhow",
"candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)",
"candle-nn 0.9.1 (git+https://github.com/huggingface/candle.git)",
"candle-transformers 0.9.1 (git+https://github.com/huggingface/candle.git)",
"csv",
"hf-hub 0.4.3",
"image",
"num-traits",
"rayon",
"safetensors",
"serde",
"serde_json",
"tokenizers 0.21.4",
]
[[package]]
@@ -833,7 +891,7 @@ checksum = "fb38a5bfae09c4ae73fd00039e5eaf97a7d6d9400cc35ee8e603fc4a5f9cb0a3"
dependencies = [
"anyhow",
"bindgen_cuda",
"candle-core",
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"half",
]
@@ -846,6 +904,14 @@ dependencies = [
"bindgen_cuda",
]
[[package]]
name = "candle-kernels"
version = "0.9.1"
source = "git+https://github.com/huggingface/candle.git#06387ae55d8db4b5d29564d0e1e350246bc458af"
dependencies = [
"bindgen_cuda",
]
[[package]]
name = "candle-metal-kernels"
version = "0.9.1"
@@ -859,13 +925,27 @@ dependencies = [
"tracing",
]
[[package]]
name = "candle-metal-kernels"
version = "0.9.1"
source = "git+https://github.com/huggingface/candle.git#06387ae55d8db4b5d29564d0e1e350246bc458af"
dependencies = [
"half",
"objc2",
"objc2-foundation",
"objc2-metal",
"once_cell",
"thiserror 1.0.69",
"tracing",
]
[[package]]
name = "candle-nn"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1980d53280c8f9e2c6cbe1785855d7ff8010208b46e21252b978badf13ad69d"
dependencies = [
"candle-core",
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"half",
"num-traits",
"rayon",
@@ -874,14 +954,30 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "candle-nn"
version = "0.9.1"
source = "git+https://github.com/huggingface/candle.git#06387ae55d8db4b5d29564d0e1e350246bc458af"
dependencies = [
"candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)",
"candle-metal-kernels 0.9.1 (git+https://github.com/huggingface/candle.git)",
"half",
"num-traits",
"objc2-metal",
"rayon",
"safetensors",
"serde",
"thiserror 1.0.69",
]
[[package]]
name = "candle-onnx"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a8fa227a8176fd9b8fb58d63c908c08ad3af1503ee6fcd058be072a598044d2"
dependencies = [
"candle-core",
"candle-nn",
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"candle-nn 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"prost",
"prost-build",
]
@@ -893,8 +989,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "186cb80045dbe47e0b387ea6d3e906f02fb3056297080d9922984c90e90a72b0"
dependencies = [
"byteorder",
"candle-core",
"candle-nn",
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"candle-nn 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fancy-regex",
"num-traits",
"rand 0.9.2",
"rayon",
"serde",
"serde_json",
"serde_plain",
"tracing",
]
[[package]]
name = "candle-transformers"
version = "0.9.1"
source = "git+https://github.com/huggingface/candle.git#06387ae55d8db4b5d29564d0e1e350246bc458af"
dependencies = [
"byteorder",
"candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)",
"candle-nn 0.9.1 (git+https://github.com/huggingface/candle.git)",
"fancy-regex",
"num-traits",
"rand 0.9.2",
@@ -1523,6 +1637,15 @@ dependencies = [
"dirs-sys 0.4.1",
]
[[package]]
name = "dirs"
version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
dependencies = [
"dirs-sys 0.4.1",
]
[[package]]
name = "dirs"
version = "6.0.0"
@@ -1556,6 +1679,16 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "dispatch2"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89a09f22a6c6069a18470eb92d2298acf25463f14256d24778e1230d789a2aec"
dependencies = [
"bitflags 2.9.2",
"objc2",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
@@ -1715,6 +1848,9 @@ name = "esaxx-rs"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
dependencies = [
"cc",
]
[[package]]
name = "event-listener"
@@ -1793,14 +1929,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04c269a76bfc6cea69553b7d040acb16c793119cebd97c756d21e08d0f075ff8"
dependencies = [
"anyhow",
"hf-hub",
"hf-hub 0.4.3",
"image",
"ndarray",
"ort",
"ort-sys",
"rayon",
"serde_json",
"tokenizers",
"tokenizers 0.21.4",
]
[[package]]
@@ -1856,6 +1992,18 @@ dependencies = [
"miniz_oxide",
]
[[package]]
name = "float8"
version = "0.2.1"
source = "git+https://github.com/zackangelo/float8?branch=cudarc_0_16#03c1f5fe7cdb2f9cb690823fdd40593be57c408f"
dependencies = [
"cudarc",
"half",
"num-traits",
"rand 0.9.2",
"rand_distr 0.5.1",
]
[[package]]
name = "fnv"
version = "1.0.7"
@@ -2246,6 +2394,24 @@ dependencies = [
"seq-macro",
]
[[package]]
name = "gemma-runner"
version = "0.1.0"
dependencies = [
"anyhow",
"candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)",
"candle-examples",
"candle-nn 0.9.1 (git+https://github.com/huggingface/candle.git)",
"candle-transformers 0.9.1 (git+https://github.com/huggingface/candle.git)",
"clap",
"hf-hub 0.4.3",
"serde_json",
"tokenizers 0.21.4",
"tracing",
"tracing-chrome",
"tracing-subscriber",
]
[[package]]
name = "generic-array"
version = "0.14.7"
@@ -2421,19 +2587,48 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "helm-chart-tool"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"serde",
"serde_json",
"toml 0.8.23",
"walkdir",
]
[[package]]
name = "hermit-abi"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]]
name = "hf-hub"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b780635574b3d92f036890d8373433d6f9fc7abb320ee42a5c25897fc8ed732"
dependencies = [
"dirs 5.0.1",
"indicatif",
"log",
"native-tls",
"rand 0.8.5",
"serde",
"serde_json",
"thiserror 1.0.69",
"ureq",
]
[[package]]
name = "hf-hub"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97"
dependencies = [
"dirs",
"dirs 6.0.0",
"futures",
"http",
"indicatif",
@@ -2842,12 +3037,12 @@ dependencies = [
"axum",
"bindgen_cuda",
"byteorder",
"candle-core",
"candle-core 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"candle-datasets",
"candle-flash-attn",
"candle-nn",
"candle-nn 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"candle-onnx",
"candle-transformers",
"candle-transformers 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"clap",
"cpal",
"csv",
@@ -2855,11 +3050,13 @@ dependencies = [
"either",
"enterpolation",
"futures-util",
"gemma-runner",
"half",
"hf-hub",
"hf-hub 0.4.3",
"image",
"imageproc",
"intel-mkl-src",
"llama-runner",
"memmap2",
"num-traits",
"palette",
@@ -2873,7 +3070,7 @@ dependencies = [
"serde",
"serde_json",
"symphonia",
"tokenizers",
"tokenizers 0.21.4",
"tokio",
"tokio-stream",
"tower",
@@ -2981,6 +3178,15 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.12.1"
@@ -3405,7 +3611,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
dependencies = [
"cfg-if",
"windows-targets 0.53.3",
"windows-targets 0.48.5",
]
[[package]]
@@ -3443,6 +3649,20 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
[[package]]
name = "llama-runner"
version = "0.1.0"
dependencies = [
"anyhow",
"candle-core 0.9.1 (git+https://github.com/huggingface/candle.git)",
"candle-nn 0.9.1 (git+https://github.com/huggingface/candle.git)",
"candle-transformers 0.9.1 (git+https://github.com/huggingface/candle.git)",
"clap",
"hf-hub 0.3.2",
"serde_json",
"tokenizers 0.20.4",
]
[[package]]
name = "lock_api"
version = "0.4.13"
@@ -3965,6 +4185,59 @@ dependencies = [
"objc_exception",
]
[[package]]
name = "objc2"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "561f357ba7f3a2a61563a186a163d0a3a5247e1089524a3981d49adb775078bc"
dependencies = [
"objc2-encode",
]
[[package]]
name = "objc2-core-foundation"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166"
dependencies = [
"bitflags 2.9.2",
"dispatch2",
"objc2",
]
[[package]]
name = "objc2-encode"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33"
[[package]]
name = "objc2-foundation"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "900831247d2fe1a09a683278e5384cfb8c80c79fe6b166f9d14bfdde0ea1b03c"
dependencies = [
"bitflags 2.9.2",
"block2",
"libc",
"objc2",
"objc2-core-foundation",
]
[[package]]
name = "objc2-metal"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f246c183239540aab1782457b35ab2040d4259175bd1d0c58e46ada7b47a874"
dependencies = [
"bitflags 2.9.2",
"block2",
"dispatch2",
"objc2",
"objc2-core-foundation",
"objc2-foundation",
]
[[package]]
name = "objc_exception"
version = "0.1.2"
@@ -4803,7 +5076,7 @@ dependencies = [
"once_cell",
"socket2 0.5.10",
"tracing",
"windows-sys 0.59.0",
"windows-sys 0.52.0",
]
[[package]]
@@ -5006,6 +5279,17 @@ dependencies = [
"rayon-core",
]
[[package]]
name = "rayon-cond"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
dependencies = [
"either",
"itertools 0.11.0",
"rayon",
]
[[package]]
name = "rayon-cond"
version = "0.4.0"
@@ -6267,7 +6551,7 @@ dependencies = [
"getrandom 0.3.3",
"once_cell",
"rustix",
"windows-sys 0.59.0",
"windows-sys 0.52.0",
]
[[package]]
@@ -6384,6 +6668,38 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokenizers"
version = "0.20.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b08cc37428a476fc9e20ac850132a513a2e1ce32b6a31addf2b74fa7033b905"
dependencies = [
"aho-corasick",
"derive_builder",
"esaxx-rs",
"getrandom 0.2.16",
"indicatif",
"itertools 0.12.1",
"lazy_static",
"log",
"macro_rules_attribute",
"monostate",
"onig",
"paste",
"rand 0.8.5",
"rayon",
"rayon-cond 0.3.0",
"regex",
"regex-syntax 0.8.5",
"serde",
"serde_json",
"spm_precompiled",
"thiserror 1.0.69",
"unicode-normalization-alignments",
"unicode-segmentation",
"unicode_categories",
]
[[package]]
name = "tokenizers"
version = "0.21.4"
@@ -6397,7 +6713,8 @@ dependencies = [
"derive_builder",
"esaxx-rs",
"getrandom 0.3.3",
"hf-hub",
"hf-hub 0.4.3",
"indicatif",
"itertools 0.14.0",
"log",
"macro_rules_attribute",
@@ -6406,7 +6723,7 @@ dependencies = [
"paste",
"rand 0.9.2",
"rayon",
"rayon-cond",
"rayon-cond 0.4.0",
"regex",
"regex-syntax 0.8.5",
"serde",
@@ -7260,7 +7577,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.59.0",
"windows-sys 0.48.0",
]
[[package]]