chat client only displays available models

This commit is contained in:
geoffsee
2025-09-01 22:29:54 -04:00
parent 545e0c9831
commit 2deecb5e51
20 changed files with 3314 additions and 484 deletions

View File

@@ -82,7 +82,7 @@ impl Default for LlamaInferenceConfig {
// Performance flags
no_kv_cache: false, // keep cache ON for speed
use_flash_attn: true, // great speed boost if supported
use_flash_attn: false, // great speed boost if supported
// Precision: bf16 is a good default on Ampere+; fallback to fp16 if needed.
dtype: Some("bf16".to_string()),