chat client only displays available models

This commit is contained in:
geoffsee
2025-09-01 22:29:54 -04:00
parent 545e0c9831
commit 2deecb5e51
20 changed files with 3314 additions and 484 deletions

View File

@@ -31,8 +31,8 @@ utoipa = { version = "4.2.0", features = ["axum_extras"] }
uuid = { version = "1.7.0", features = ["v4"] }
reborrow = "0.5.5"
futures-util = "0.3.31"
gemma-runner = { path = "../gemma-runner" }
llama-runner = { path = "../llama-runner" }
gemma-runner = { path = "../gemma-runner", features = ["metal"] }
llama-runner = { path = "../llama-runner", features = ["metal"]}
[target.'cfg(target_os = "macos")'.dependencies]
candle-core = { git = "https://github.com/huggingface/candle.git", features = ["metal"] }

View File

@@ -1,49 +1,9 @@
// use candle_core::Tensor;
use candle_transformers::models::csm::{LlamaConfig, LlamaModel};
use candle_transformers::models::gemma::{Config as Config1, Model as Model1};
use candle_transformers::models::gemma2::{Config as Config2, Model as Model2};
use candle_transformers::models::gemma3::{Config as Config3, Model as Model3};
#[derive(Clone, Debug, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum Which {
#[value(name = "2b")]
Base2B,
#[value(name = "7b")]
Base7B,
#[value(name = "2b-it")]
Instruct2B,
#[value(name = "7b-it")]
Instruct7B,
#[value(name = "1.1-2b-it")]
InstructV1_1_2B,
#[value(name = "1.1-7b-it")]
InstructV1_1_7B,
#[value(name = "code-2b")]
CodeBase2B,
#[value(name = "code-7b")]
CodeBase7B,
#[value(name = "code-2b-it")]
CodeInstruct2B,
#[value(name = "code-7b-it")]
CodeInstruct7B,
#[value(name = "2-2b")]
BaseV2_2B,
#[value(name = "2-2b-it")]
InstructV2_2B,
#[value(name = "2-9b")]
BaseV2_9B,
#[value(name = "2-9b-it")]
InstructV2_9B,
#[value(name = "3-1b")]
BaseV3_1B,
#[value(name = "3-1b-it")]
InstructV3_1B,
#[value(name = "llama-3.2-1b-it")]
LlamaInstruct3_2_1B,
#[value(name = "llama-3.2-3b-it")]
LlamaInstruct3_2_3B,
}
#[derive(Clone, Debug)]
pub enum Model {
V1(Model1),
V2(Model2),
@@ -66,48 +26,127 @@ impl Model {
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Family {
GemmaV1,
GemmaV2,
GemmaV3,
Llama,
}
#[derive(Clone, Copy, Debug)]
pub struct ModelMeta {
pub id: &'static str,
pub family: Family,
pub instruct: bool,
}
const fn m(id: &'static str, family: Family, instruct: bool) -> ModelMeta {
ModelMeta { id, family, instruct }
}
#[derive(Clone, Debug, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum Which {
// Gemma 1.x
#[value(name = "2b")]
Base2B,
#[value(name = "7b")]
Base7B,
#[value(name = "2b-it")]
Instruct2B,
#[value(name = "7b-it")]
Instruct7B,
#[value(name = "1.1-2b-it")]
InstructV1_1_2B,
#[value(name = "1.1-7b-it")]
InstructV1_1_7B,
// CodeGemma
#[value(name = "code-2b")]
CodeBase2B,
#[value(name = "code-7b")]
CodeBase7B,
#[value(name = "code-2b-it")]
CodeInstruct2B,
#[value(name = "code-7b-it")]
CodeInstruct7B,
// Gemma 2
#[value(name = "2-2b")]
BaseV2_2B,
#[value(name = "2-2b-it")]
InstructV2_2B,
#[value(name = "2-9b")]
BaseV2_9B,
#[value(name = "2-9b-it")]
InstructV2_9B,
// Gemma 3
#[value(name = "3-1b")]
BaseV3_1B,
#[value(name = "3-1b-it")]
InstructV3_1B,
// Llama 3.2 (use aliases instead of duplicate variants)
#[value(name = "llama-3.2-1b")]
Llama32_1B,
#[value(name = "llama-3.2-1b-it", alias = "llama-3.2-1b-instruct")]
Llama32_1BInstruct,
#[value(name = "llama-3.2-3b")]
Llama32_3B,
#[value(name = "llama-3.2-3b-it", alias = "llama-3.2-3b-instruct")]
Llama32_3BInstruct,
}
impl Which {
pub fn to_model_id(&self) -> String {
pub const fn meta(&self) -> ModelMeta {
use Family::*;
match self {
Self::InstructV1_1_2B => "google/gemma-1.1-2b-it".to_string(),
Self::InstructV1_1_7B => "google/gemma-1.1-7b-it".to_string(),
Self::Base2B => "google/gemma-2b".to_string(),
Self::Base7B => "google/gemma-7b".to_string(),
Self::Instruct2B => "google/gemma-2b-it".to_string(),
Self::Instruct7B => "google/gemma-7b-it".to_string(),
Self::CodeBase2B => "google/codegemma-2b".to_string(),
Self::CodeBase7B => "google/codegemma-7b".to_string(),
Self::CodeInstruct2B => "google/codegemma-2b-it".to_string(),
Self::CodeInstruct7B => "google/codegemma-7b-it".to_string(),
Self::BaseV2_2B => "google/gemma-2-2b".to_string(),
Self::InstructV2_2B => "google/gemma-2-2b-it".to_string(),
Self::BaseV2_9B => "google/gemma-2-9b".to_string(),
Self::InstructV2_9B => "google/gemma-2-9b-it".to_string(),
Self::BaseV3_1B => "google/gemma-3-1b-pt".to_string(),
Self::InstructV3_1B => "google/gemma-3-1b-it".to_string(),
Self::LlamaInstruct3_2_1B => "meta-llama/Llama-3.2-1B-Instruct".to_string(),
Self::LlamaInstruct3_2_3B => "meta-llama/Llama-3.2-3B-Instruct".to_string(),
// Gemma 1.x
Self::Base2B => m("google/gemma-2b", GemmaV1, false),
Self::Base7B => m("google/gemma-7b", GemmaV1, false),
Self::Instruct2B => m("google/gemma-2b-it", GemmaV1, true),
Self::Instruct7B => m("google/gemma-7b-it", GemmaV1, true),
Self::InstructV1_1_2B => m("google/gemma-1.1-2b-it", GemmaV1, true),
Self::InstructV1_1_7B => m("google/gemma-1.1-7b-it", GemmaV1, true),
// CodeGemma
Self::CodeBase2B => m("google/codegemma-2b", GemmaV1, false),
Self::CodeBase7B => m("google/codegemma-7b", GemmaV1, false),
Self::CodeInstruct2B => m("google/codegemma-2b-it", GemmaV1, true),
Self::CodeInstruct7B => m("google/codegemma-7b-it", GemmaV1, true),
// Gemma 2
Self::BaseV2_2B => m("google/gemma-2-2b", GemmaV2, false),
Self::InstructV2_2B => m("google/gemma-2-2b-it", GemmaV2, true),
Self::BaseV2_9B => m("google/gemma-2-9b", GemmaV2, false),
Self::InstructV2_9B => m("google/gemma-2-9b-it", GemmaV2, true),
// Gemma 3
Self::BaseV3_1B => m("google/gemma-3-1b-pt", GemmaV3, false),
Self::InstructV3_1B => m("google/gemma-3-1b-it", GemmaV3, true),
// Llama 3.2
Self::Llama32_1B => m("meta-llama/Llama-3.2-1B", Llama, false),
Self::Llama32_1BInstruct => m("meta-llama/Llama-3.2-1B-Instruct", Llama, true),
Self::Llama32_3B => m("meta-llama/Llama-3.2-3B", Llama, false),
Self::Llama32_3BInstruct => m("meta-llama/Llama-3.2-3B-Instruct", Llama, true),
}
}
pub fn to_model_id(&self) -> String {
self.meta().id.to_string()
}
pub fn is_instruct_model(&self) -> bool {
match self {
Self::Base2B
| Self::Base7B
| Self::CodeBase2B
| Self::CodeBase7B
| Self::BaseV2_2B
| Self::BaseV2_9B
| Self::BaseV3_1B => false,
_ => true,
}
self.meta().instruct
}
pub fn is_v3_model(&self) -> bool {
matches!(self, Self::BaseV3_1B | Self::InstructV3_1B)
matches!(self.meta().family, Family::GemmaV3)
}
pub fn is_llama_model(&self) -> bool {
matches!(self, Self::LlamaInstruct3_2_1B | Self::LlamaInstruct3_2_3B)
matches!(self.meta().family, Family::Llama)
}
}

View File

@@ -42,13 +42,18 @@ pub struct AppState {
impl Default for AppState {
fn default() -> Self {
// Configure a default model to prevent 503 errors from the chat-ui
// This can be overridden by environment variables if needed
let default_model_id = std::env::var("DEFAULT_MODEL").unwrap_or_else(|_| "gemma-3-1b-it".to_string());
let gemma_config = GemmaInferenceConfig {
model: gemma_runner::WhichModel::InstructV3_1B,
..Default::default()
};
Self {
model_type: ModelType::Gemma,
model_id: "gemma-3-1b-it".to_string(),
model_id: default_model_id,
gemma_config: Some(gemma_config),
llama_config: None,
}
@@ -59,6 +64,34 @@ impl Default for AppState {
// Helper functions
// -------------------------
fn model_id_to_which(model_id: &str) -> Option<Which> {
let normalized = normalize_model_id(model_id);
match normalized.as_str() {
"gemma-2b" => Some(Which::Base2B),
"gemma-7b" => Some(Which::Base7B),
"gemma-2b-it" => Some(Which::Instruct2B),
"gemma-7b-it" => Some(Which::Instruct7B),
"gemma-1.1-2b-it" => Some(Which::InstructV1_1_2B),
"gemma-1.1-7b-it" => Some(Which::InstructV1_1_7B),
"codegemma-2b" => Some(Which::CodeBase2B),
"codegemma-7b" => Some(Which::CodeBase7B),
"codegemma-2b-it" => Some(Which::CodeInstruct2B),
"codegemma-7b-it" => Some(Which::CodeInstruct7B),
"gemma-2-2b" => Some(Which::BaseV2_2B),
"gemma-2-2b-it" => Some(Which::InstructV2_2B),
"gemma-2-9b" => Some(Which::BaseV2_9B),
"gemma-2-9b-it" => Some(Which::InstructV2_9B),
"gemma-3-1b" => Some(Which::BaseV3_1B),
"gemma-3-1b-it" => Some(Which::InstructV3_1B),
"llama-3.2-1b-instruct" => Some(Which::Llama32_1BInstruct),
"llama-3.2-3b-instruct" => Some(Which::Llama32_3BInstruct),
_ => None,
}
}
fn normalize_model_id(model_id: &str) -> String {
model_id.to_lowercase().replace("_", "-")
}
@@ -116,90 +149,76 @@ pub async fn chat_completions_non_streaming_proxy(
state: AppState,
request: ChatCompletionRequest,
) -> Result<impl IntoResponse, (StatusCode, Json<Value>)> {
// Enforce model selection behavior: reject if a different model is requested
let configured_model = state.model_id.clone();
let requested_model = request.model.clone();
if requested_model.to_lowercase() != "default" {
let normalized_requested = normalize_model_id(&requested_model);
let normalized_configured = normalize_model_id(&configured_model);
if normalized_requested != normalized_configured {
// Use the model specified in the request
let model_id = request.model.clone();
let which_model = model_id_to_which(&model_id);
// Validate that the requested model is supported
let which_model = match which_model {
Some(model) => model,
None => {
return Err((
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"error": {
"message": format!(
"Requested model '{}' is not available. This server is running '{}' only.",
requested_model, configured_model
),
"type": "model_mismatch"
"message": format!("Unsupported model: {}", model_id),
"type": "model_not_supported"
}
})),
));
}
}
let model_id = state.model_id.clone();
};
let max_tokens = request.max_tokens.unwrap_or(1000);
// Build prompt based on model type
let prompt = match state.model_type {
ModelType::Gemma => build_gemma_prompt(&request.messages),
ModelType::Llama => {
// For Llama, just use the last user message for now
request
.messages
.last()
.and_then(|m| m.content.as_ref())
.and_then(|c| match c {
MessageContent(Either::Left(text)) => Some(text.clone()),
_ => None,
})
.unwrap_or_default()
}
let prompt = if which_model.is_llama_model() {
// For Llama, just use the last user message for now
request
.messages
.last()
.and_then(|m| m.content.as_ref())
.and_then(|c| match c {
MessageContent(Either::Left(text)) => Some(text.clone()),
_ => None,
})
.unwrap_or_default()
} else {
build_gemma_prompt(&request.messages)
};
// Get streaming receiver based on model type
let rx =
match state.model_type {
ModelType::Gemma => {
if let Some(mut config) = state.gemma_config {
config.prompt = prompt.clone();
config.max_tokens = max_tokens;
run_gemma_api(config).map_err(|e| (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": format!("Error initializing Gemma model: {}", e) }
}))
))?
} else {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": "Gemma configuration not available" }
})),
));
}
}
ModelType::Llama => {
if let Some(mut config) = state.llama_config {
config.prompt = prompt.clone();
config.max_tokens = max_tokens;
run_llama_inference(config).map_err(|e| (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": format!("Error initializing Llama model: {}", e) }
}))
))?
} else {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": "Llama configuration not available" }
})),
));
}
}
let rx = if which_model.is_llama_model() {
// Create Llama configuration dynamically
let mut config = LlamaInferenceConfig::default();
config.prompt = prompt.clone();
config.max_tokens = max_tokens;
run_llama_inference(config).map_err(|e| (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": format!("Error initializing Llama model: {}", e) }
}))
))?
} else {
// Create Gemma configuration dynamically
let gemma_model = if which_model.is_v3_model() {
gemma_runner::WhichModel::InstructV3_1B
} else {
gemma_runner::WhichModel::InstructV3_1B // Default fallback
};
let mut config = GemmaInferenceConfig {
model: gemma_model,
..Default::default()
};
config.prompt = prompt.clone();
config.max_tokens = max_tokens;
run_gemma_api(config).map_err(|e| (
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": format!("Error initializing Gemma model: {}", e) }
}))
))?
};
// Collect all tokens from the stream
let mut completion = String::new();
@@ -258,27 +277,25 @@ async fn handle_streaming_request(
state: AppState,
request: ChatCompletionRequest,
) -> Result<Sse<impl Stream<Item = Result<Event, Infallible>>>, (StatusCode, Json<Value>)> {
// Validate requested model vs configured model
let configured_model = state.model_id.clone();
let requested_model = request.model.clone();
if requested_model.to_lowercase() != "default" {
let normalized_requested = normalize_model_id(&requested_model);
let normalized_configured = normalize_model_id(&configured_model);
if normalized_requested != normalized_configured {
// Use the model specified in the request
let model_id = request.model.clone();
let which_model = model_id_to_which(&model_id);
// Validate that the requested model is supported
let which_model = match which_model {
Some(model) => model,
None => {
return Err((
StatusCode::BAD_REQUEST,
Json(serde_json::json!({
"error": {
"message": format!(
"Requested model '{}' is not available. This server is running '{}' only.",
requested_model, configured_model
),
"type": "model_mismatch"
"message": format!("Unsupported model: {}", model_id),
"type": "model_not_supported"
}
})),
));
}
}
};
// Generate a unique ID and metadata
let response_id = format!("chatcmpl-{}", Uuid::new_v4().to_string().replace('-', ""));
@@ -286,24 +303,22 @@ async fn handle_streaming_request(
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let model_id = state.model_id.clone();
let max_tokens = request.max_tokens.unwrap_or(1000);
// Build prompt based on model type
let prompt = match state.model_type {
ModelType::Gemma => build_gemma_prompt(&request.messages),
ModelType::Llama => {
// For Llama, just use the last user message for now
request
.messages
.last()
.and_then(|m| m.content.as_ref())
.and_then(|c| match c {
MessageContent(Either::Left(text)) => Some(text.clone()),
_ => None,
})
.unwrap_or_default()
}
let prompt = if which_model.is_llama_model() {
// For Llama, just use the last user message for now
request
.messages
.last()
.and_then(|m| m.content.as_ref())
.and_then(|c| match c {
MessageContent(Either::Left(text)) => Some(text.clone()),
_ => None,
})
.unwrap_or_default()
} else {
build_gemma_prompt(&request.messages)
};
tracing::debug!("Formatted prompt: {}", prompt);
@@ -330,51 +345,43 @@ async fn handle_streaming_request(
}
// Get streaming receiver based on model type
let model_rx = match state.model_type {
ModelType::Gemma => {
if let Some(mut config) = state.gemma_config {
config.prompt = prompt.clone();
config.max_tokens = max_tokens;
match run_gemma_api(config) {
Ok(rx) => rx,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": format!("Error initializing Gemma model: {}", e) }
})),
));
}
}
} else {
let model_rx = if which_model.is_llama_model() {
// Create Llama configuration dynamically
let mut config = LlamaInferenceConfig::default();
config.prompt = prompt.clone();
config.max_tokens = max_tokens;
match run_llama_inference(config) {
Ok(rx) => rx,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": "Gemma configuration not available" }
"error": { "message": format!("Error initializing Llama model: {}", e) }
})),
));
}
}
ModelType::Llama => {
if let Some(mut config) = state.llama_config {
config.prompt = prompt.clone();
config.max_tokens = max_tokens;
match run_llama_inference(config) {
Ok(rx) => rx,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": format!("Error initializing Llama model: {}", e) }
})),
));
}
}
} else {
} else {
// Create Gemma configuration dynamically
let gemma_model = if which_model.is_v3_model() {
gemma_runner::WhichModel::InstructV3_1B
} else {
gemma_runner::WhichModel::InstructV3_1B // Default fallback
};
let mut config = GemmaInferenceConfig {
model: gemma_model,
..Default::default()
};
config.prompt = prompt.clone();
config.max_tokens = max_tokens;
match run_gemma_api(config) {
Ok(rx) => rx,
Err(e) => {
return Err((
StatusCode::INTERNAL_SERVER_ERROR,
Json(serde_json::json!({
"error": { "message": "Llama configuration not available" }
"error": { "message": format!("Error initializing Gemma model: {}", e) }
})),
));
}
@@ -500,172 +507,69 @@ pub fn create_router(app_state: AppState) -> Router {
/// Handler for GET /v1/models - returns list of available models
pub async fn list_models() -> Json<ModelListResponse> {
// Get all available model variants from the Which enum
let models = vec![
// Gemma models
let which_variants = vec![
Which::Base2B,
Which::Base7B,
Which::Instruct2B,
Which::Instruct7B,
Which::InstructV1_1_2B,
Which::InstructV1_1_7B,
Which::CodeBase2B,
Which::CodeBase7B,
Which::CodeInstruct2B,
Which::CodeInstruct7B,
Which::BaseV2_2B,
Which::InstructV2_2B,
Which::BaseV2_9B,
Which::InstructV2_9B,
Which::BaseV3_1B,
Which::InstructV3_1B,
Which::Llama32_1B,
Which::Llama32_1BInstruct,
Which::Llama32_3B,
Which::Llama32_3BInstruct,
];
let models: Vec<Model> = which_variants.into_iter().map(|which| {
let meta = which.meta();
let model_id = match which {
Which::Base2B => "gemma-2b",
Which::Base7B => "gemma-7b",
Which::Instruct2B => "gemma-2b-it",
Which::Instruct7B => "gemma-7b-it",
Which::InstructV1_1_2B => "gemma-1.1-2b-it",
Which::InstructV1_1_7B => "gemma-1.1-7b-it",
Which::CodeBase2B => "codegemma-2b",
Which::CodeBase7B => "codegemma-7b",
Which::CodeInstruct2B => "codegemma-2b-it",
Which::CodeInstruct7B => "codegemma-7b-it",
Which::BaseV2_2B => "gemma-2-2b",
Which::InstructV2_2B => "gemma-2-2b-it",
Which::BaseV2_9B => "gemma-2-9b",
Which::InstructV2_9B => "gemma-2-9b-it",
Which::BaseV3_1B => "gemma-3-1b",
Which::InstructV3_1B => "gemma-3-1b-it",
Which::Llama32_1B => "llama-3.2-1b",
Which::Llama32_1BInstruct => "llama-3.2-1b-instruct",
Which::Llama32_3B => "llama-3.2-3b",
Which::Llama32_3BInstruct => "llama-3.2-3b-instruct",
};
let owned_by = if meta.id.starts_with("google/") {
"google"
} else if meta.id.starts_with("meta-llama/") {
"meta"
} else {
"unknown"
};
Model {
id: "gemma-2b".to_string(),
id: model_id.to_string(),
object: "model".to_string(),
created: 1686935002, // Using same timestamp as OpenAI example
owned_by: "google".to_string(),
},
Model {
id: "gemma-7b".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-2b-it".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-7b-it".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-1.1-2b-it".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-1.1-7b-it".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "codegemma-2b".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "codegemma-7b".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "codegemma-2b-it".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "codegemma-7b-it".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-2-2b".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-2-2b-it".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-2-9b".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-2-9b-it".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-3-1b".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
Model {
id: "gemma-3-1b-it".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "google".to_string(),
},
// Llama models
Model {
id: "llama-3.2-1b".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "meta".to_string(),
},
Model {
id: "llama-3.2-1b-instruct".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "meta".to_string(),
},
Model {
id: "llama-3.2-3b".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "meta".to_string(),
},
Model {
id: "llama-3.2-3b-instruct".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "meta".to_string(),
},
Model {
id: "smollm2-135m".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "huggingface".to_string(),
},
Model {
id: "smollm2-135m-instruct".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "huggingface".to_string(),
},
Model {
id: "smollm2-360m".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "huggingface".to_string(),
},
Model {
id: "smollm2-360m-instruct".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "huggingface".to_string(),
},
Model {
id: "smollm2-1.7b".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "huggingface".to_string(),
},
Model {
id: "smollm2-1.7b-instruct".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "huggingface".to_string(),
},
Model {
id: "tinyllama-1.1b-chat".to_string(),
object: "model".to_string(),
created: 1686935002,
owned_by: "tinyllama".to_string(),
},
];
owned_by: owned_by.to_string(),
}
}).collect();
Json(ModelListResponse {
object: "list".to_string(),