mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
65 lines
2.3 KiB
Rust
65 lines
2.3 KiB
Rust
use axum::{Router, serve, http::StatusCode};
|
|
use std::env;
|
|
use tokio::net::TcpListener;
|
|
use tower::Service;
|
|
use tower_http::trace::TraceLayer;
|
|
use tower_http::cors::{Any, CorsLayer};
|
|
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
|
|
|
const DEFAULT_SERVER_HOST: &str = "0.0.0.0";
|
|
const DEFAULT_SERVER_PORT: &str = "8080";
|
|
|
|
#[tokio::main]
|
|
async fn main() {
|
|
// Initialize tracing
|
|
tracing_subscriber::registry()
|
|
.with(
|
|
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| {
|
|
format!(
|
|
"{}=debug,tower_http=debug,axum::rejection=trace",
|
|
env!("CARGO_CRATE_NAME")
|
|
)
|
|
.into()
|
|
}),
|
|
)
|
|
.with(tracing_subscriber::fmt::layer())
|
|
.init();
|
|
|
|
// Create unified router by merging embeddings and inference routers
|
|
let embeddings_router = embeddings_engine::create_embeddings_router();
|
|
// Get the inference router directly from the inference engine
|
|
let inference_router = inference_engine::create_inference_router();
|
|
|
|
|
|
// Create CORS layer
|
|
let cors = CorsLayer::new()
|
|
.allow_origin(Any)
|
|
.allow_methods(Any)
|
|
.allow_headers(Any);
|
|
|
|
|
|
// Merge the routers
|
|
let app = Router::new()
|
|
.merge(embeddings_router)
|
|
.merge(inference_router)
|
|
.layer(cors)
|
|
.layer(TraceLayer::new_for_http());
|
|
|
|
// Server configuration
|
|
let server_host = env::var("SERVER_HOST").unwrap_or_else(|_| DEFAULT_SERVER_HOST.to_string());
|
|
let server_port = env::var("SERVER_PORT").unwrap_or_else(|_| DEFAULT_SERVER_PORT.to_string());
|
|
let server_address = format!("{}:{}", server_host, server_port);
|
|
|
|
let listener = TcpListener::bind(&server_address).await.unwrap();
|
|
tracing::info!("Unified predict-otron-9000 server listening on {}", listener.local_addr().unwrap());
|
|
tracing::info!("Available endpoints:");
|
|
tracing::info!(" GET / - Root endpoint from embeddings-engine");
|
|
tracing::info!(" POST /v1/embeddings - Text embeddings");
|
|
tracing::info!(" POST /v1/chat/completions - Chat completions");
|
|
|
|
serve(listener, app).await.unwrap();
|
|
}
|
|
|
|
// Chat completions handler that properly uses the inference server crate's error handling
|
|
// This function is no longer needed as we're using the inference_engine router directly
|