- Change default server host to localhost for improved security.

- Increase default maximum tokens in CLI configuration to 256.
- Refactor and reorganize CLI
This commit is contained in:
geoffsee
2025-08-27 21:47:24 -04:00
parent 766d41af78
commit 719beb3791
20 changed files with 1703 additions and 490 deletions

View File

@@ -44,6 +44,7 @@ axum = { version = "0.8.4", features = ["json"] }
tower = "0.5.2"
tower-http = { version = "0.6.6", features = ["cors"] }
tokio = { version = "1.43.0", features = ["full"] }
tokio-stream = { version = "0.1.16", features = ["sync"] }
either = { version = "1.9.0", features = ["serde"] }
utoipa = { version = "4.2.0", features = ["axum_extras"] }
uuid = { version = "1.7.0", features = ["v4"] }
@@ -80,4 +81,13 @@ tokio = "1.43.0"
[build-dependencies]
anyhow = { version = "1", features = ["backtrace"] }
bindgen_cuda = { version = "0.1.1", optional = true }
bindgen_cuda = { version = "0.1.1", optional = true }
[package.metadata.kube]
image = "ghcr.io/geoffsee/inference-service:latest"
replicas = 1
port = 8080
resources.cpu = "500m"
resources.memory = "256Mi"
#ingress.host = "my-service.example.com"
#env = { RUST_LOG = "info", DATABASE_URL = "postgres://..." }