supports small llama and gemma models

Refactor inference

dedicated crates for llama and gemma inferencing, not integrated
This commit is contained in:
geoffsee
2025-08-29 18:15:29 -04:00
parent d06b16bb12
commit 315ef17605
26 changed files with 2136 additions and 1402 deletions

View File

@@ -4,14 +4,16 @@ pub mod model;
pub mod text_generation;
pub mod utilities_lib;
pub mod openai_types;
pub mod cli;
// pub mod cli;
pub mod server;
pub mod inference;
// Re-export key components for easier access
pub use model::{Model, Which};
pub use text_generation::TextGeneration;
pub use token_output_stream::TokenOutputStream;
pub use server::{AppState, create_router};
pub use inference::ModelInference;
use std::env;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};