reorg + update docs with new paths

2025-09-08 22:46:44 +00:00 · 2025-09-04 12:27:13 -04:00
parent 400c70f17d
commit ff55d882c7
43 changed files with 493 additions and 182 deletions
--- a/crates/inference-engine/Cargo.toml
+++ b/crates/inference-engine/Cargo.toml
@@ -31,8 +31,9 @@ utoipa = { version = "4.2.0", features = ["axum_extras"] }
 uuid = { version = "1.7.0", features = ["v4"] }
 reborrow = "0.5.5"
 futures-util = "0.3.31"
-gemma-runner = { path = "../gemma-runner", features = ["metal"] }
-llama-runner = { path = "../llama-runner", features = ["metal"]}
+gemma-runner = { path = "../../integration/gemma-runner", features = ["metal"] }
+llama-runner = { path = "../../integration/llama-runner", features = ["metal"]}
+embeddings-engine = { path = "../embeddings-engine" }

 [target.'cfg(target_os = "macos")'.dependencies]
 candle-core = { git = "https://github.com/huggingface/candle.git", features = ["metal"] }
--- a/crates/inference-engine/src/server.rs
+++ b/crates/inference-engine/src/server.rs
@@ -19,6 +19,7 @@ use crate::openai_types::{
 };
 use crate::Which;
 use either::Either;
+use embeddings_engine::models_list;
 use gemma_runner::{run_gemma_api, GemmaInferenceConfig};
 use llama_runner::{run_llama_inference, LlamaInferenceConfig};
 use serde_json::Value;
@@ -530,7 +531,9 @@ pub async fn list_models() -> Json<ModelListResponse> {
        Which::Llama32_3BInstruct,
    ];

-    let models: Vec<Model> = which_variants.into_iter().map(|which| {
+
+
+    let mut models: Vec<Model> = which_variants.into_iter().map(|which| {
        let meta = which.meta();
        let model_id = match which {
            Which::Base2B => "gemma-2b",
@@ -566,11 +569,25 @@ pub async fn list_models() -> Json<ModelListResponse> {
        Model {
            id: model_id.to_string(),
            object: "model".to_string(),
-            created: 1686935002, // Using same timestamp as OpenAI example
+            created: 1686935002,
            owned_by: owned_by.to_string(),
        }
    }).collect();

+    // Get embeddings models and convert them to inference Model format
+    let embeddings_response = models_list().await;
+    let embeddings_models: Vec<Model> = embeddings_response.0.data.into_iter().map(|embedding_model| {
+        Model {
+            id: embedding_model.id,
+            object: embedding_model.object,
+            created: 1686935002,
+            owned_by: format!("{} - {}", embedding_model.owned_by, embedding_model.description),
+        }
+    }).collect();
+
+    // Add embeddings models to the main models list
+    models.extend(embeddings_models);
+
    Json(ModelListResponse {
        object: "list".to_string(),
        data: models,