predict-otron-9000 serves a leptos SSR frontend

This commit is contained in:
geoffsee
2025-08-28 12:06:22 -04:00
parent 45d7cd8819
commit e38a2d4512
30 changed files with 1321 additions and 901 deletions

878
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@ members = [
"crates/predict-otron-9000",
"crates/inference-engine",
"crates/embeddings-engine",
"crates/leptos-chat"
"crates/leptos-app"
]
default-members = ["crates/predict-otron-9000"]
resolver = "2"
@@ -12,6 +12,5 @@ resolver = "2"
[[workspace.metadata.leptos]]
# project name
name = "leptos-project"
bin-package = "leptos-chat"
lib-package = "leptos-chat"
bin-package = "leptos-app"
lib-package = "leptos-app"

View File

@@ -0,0 +1,3 @@
# Ensure getrandom works on wasm32-unknown-unknown without needing manual RUSTFLAGS
[target.wasm32-unknown-unknown]
rustflags = ["--cfg", "getrandom_backend=\"wasm_js\""]

14
crates/leptos-app/.gitignore vendored Normal file
View File

@@ -0,0 +1,14 @@
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

View File

@@ -0,0 +1,135 @@
[package]
name = "leptos-app"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
leptos = { version = "0.8.0" }
leptos_router = { version = "0.8.0" }
axum = { version = "0.8.0", optional = true }
console_error_panic_hook = { version = "0.1", optional = true }
leptos_axum = { version = "0.8.0", optional = true }
leptos_meta = { version = "0.8.0" }
tokio = { version = "1", features = ["rt-multi-thread"], optional = true }
wasm-bindgen = { version = "=0.2.100", optional = true }
# Chat interface dependencies
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
async-openai-wasm = { version = "0.29", default-features = false }
futures-util = "0.3"
js-sys = { version = "0.3", optional = true }
either = { version = "1.9", features = ["serde"] }
web-sys = { version = "0.3", optional = true, features = [
"console",
"Window",
"Document",
"Element",
"HtmlElement",
"HtmlInputElement",
"HtmlSelectElement",
"HtmlTextAreaElement",
"Event",
"EventTarget",
"KeyboardEvent",
] }
[dependencies.uuid]
version = "1.0"
features = [
"v4",
"fast-rng",
"macro-diagnostics",
"js",
]
[features]
hydrate = [
"leptos/hydrate",
"dep:console_error_panic_hook",
"dep:wasm-bindgen",
"dep:js-sys",
"dep:web-sys",
]
ssr = [
"dep:axum",
"dep:tokio",
"dep:leptos_axum",
"leptos/ssr",
"leptos_meta/ssr",
"leptos_router/ssr",
]
# Defines a size-optimized profile for the WASM bundle in release mode
[profile.wasm-release]
inherits = "release"
opt-level = 'z'
lto = true
codegen-units = 1
panic = "abort"
[package.metadata.leptos]
# The name used by wasm-bindgen/cargo-leptos for the JS/WASM bundle. Defaults to the crate name
output-name = "leptos-app"
# The site root folder is where cargo-leptos generate all output. WARNING: all content of this folder will be erased on a rebuild. Use it in your server setup.
site-root = "target/site"
# The site-root relative folder where all compiled output (JS, WASM and CSS) is written
# Defaults to pkg
site-pkg-dir = "pkg"
# [Optional] The source CSS file. If it ends with .sass or .scss then it will be compiled by dart-sass into CSS. The CSS is optimized by Lightning CSS before being written to <site-root>/<site-pkg>/app.css
style-file = "style/main.scss"
# Assets source dir. All files found here will be copied and synchronized to site-root.
# The assets-dir cannot have a sub directory with the same name/path as site-pkg-dir.
#
# Optional. Env: LEPTOS_ASSETS_DIR.
assets-dir = "public"
# The IP and port (ex: 127.0.0.1:3000) where the server serves the content. Use it in your server setup.
site-addr = "127.0.0.1:3000"
# The port to use for automatic reload monitoring
reload-port = 3001
# [Optional] Command to use when running end2end tests. It will run in the end2end dir.
# [Windows] for non-WSL use "npx.cmd playwright test"
# This binary name can be checked in Powershell with Get-Command npx
end2end-cmd = "npx playwright test"
end2end-dir = "end2end"
# The browserlist query used for optimizing the CSS.
browserquery = "defaults"
# The environment Leptos will run in, usually either "DEV" or "PROD"
env = "DEV"
# The features to use when compiling the bin target
#
# Optional. Can be over-ridden with the command line parameter --bin-features
bin-features = ["ssr"]
# If the --no-default-features flag should be used when compiling the bin target
#
# Optional. Defaults to false.
bin-default-features = false
# The features to use when compiling the lib target
#
# Optional. Can be over-ridden with the command line parameter --lib-features
lib-features = ["hydrate"]
# If the --no-default-features flag should be used when compiling the lib target
#
# Optional. Defaults to false.
lib-default-features = false
# The profile to use for the lib target when compiling for release
#
# Optional. Defaults to "release".
lib-profile-release = "wasm-release"

View File

@@ -0,0 +1,21 @@
# Build stage
FROM rust:1-alpine AS builder
# Install build dependencies
RUN apk add --no-cache npm nodejs musl-dev pkgconfig openssl-dev git curl bash
RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
WORKDIR /app
# Copy manifest first (cache deps)
COPY . .
# Install cargo-leptos
RUN cargo binstall cargo-leptos
# Build release artifacts
RUN cargo leptos build --release
EXPOSE 8788
CMD ["cargo", "leptos", "serve", "--release"]

3
crates/leptos-app/end2end/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
node_modules
playwright-report
test-results

View File

@@ -0,0 +1,15 @@
{
"name": "end2end",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {},
"keywords": [],
"author": "",
"license": "ISC",
"devDependencies": {
"@playwright/test": "^1.44.1",
"@types/node": "^20.12.12",
"typescript": "^5.4.5"
}
}

View File

@@ -0,0 +1,105 @@
import type { PlaywrightTestConfig } from "@playwright/test";
import { devices, defineConfig } from "@playwright/test";
/**
* Read environment variables from file.
* https://github.com/motdotla/dotenv
*/
// require('dotenv').config();
/**
* See https://playwright.dev/docs/test-configuration.
*/
export default defineConfig({
testDir: "./tests",
/* Maximum time one test can run for. */
timeout: 30 * 1000,
expect: {
/**
* Maximum time expect() should wait for the condition to be met.
* For example in `await expect(locator).toHaveText();`
*/
timeout: 5000,
},
/* Run tests in files in parallel */
fullyParallel: true,
/* Fail the build on CI if you accidentally left test.only in the source code. */
forbidOnly: !!process.env.CI,
/* Retry on CI only */
retries: process.env.CI ? 2 : 0,
/* Opt out of parallel tests on CI. */
workers: process.env.CI ? 1 : undefined,
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
reporter: "html",
/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
use: {
/* Maximum time each action such as `click()` can take. Defaults to 0 (no limit). */
actionTimeout: 0,
/* Base URL to use in actions like `await page.goto('/')`. */
// baseURL: 'http://localhost:3000',
/* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
trace: "on-first-retry",
},
/* Configure projects for major browsers */
projects: [
{
name: "chromium",
use: {
...devices["Desktop Chrome"],
},
},
{
name: "firefox",
use: {
...devices["Desktop Firefox"],
},
},
{
name: "webkit",
use: {
...devices["Desktop Safari"],
},
},
/* Test against mobile viewports. */
// {
// name: 'Mobile Chrome',
// use: {
// ...devices['Pixel 5'],
// },
// },
// {
// name: 'Mobile Safari',
// use: {
// ...devices['iPhone 12'],
// },
// },
/* Test against branded browsers. */
// {
// name: 'Microsoft Edge',
// use: {
// channel: 'msedge',
// },
// },
// {
// name: 'Google Chrome',
// use: {
// channel: 'chrome',
// },
// },
],
/* Folder for test artifacts such as screenshots, videos, traces, etc. */
// outputDir: 'test-results/',
/* Run your local dev server before starting the tests */
// webServer: {
// command: 'npm run start',
// port: 3000,
// },
});

View File

@@ -0,0 +1,9 @@
import { test, expect } from "@playwright/test";
test("homepage has title and heading text", async ({ page }) => {
await page.goto("http://localhost:3000/");
await expect(page).toHaveTitle("Welcome to Leptos");
await expect(page.locator("h1")).toHaveText("Welcome to Leptos!");
});

View File

@@ -0,0 +1,109 @@
{
"compilerOptions": {
/* Visit https://aka.ms/tsconfig to read more about this file */
/* Projects */
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
/* Language and Environment */
"target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
// "jsx": "preserve", /* Specify what JSX code is generated. */
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
/* Modules */
"module": "commonjs", /* Specify what module code is generated. */
// "rootDir": "./", /* Specify the root folder within your source files. */
// "moduleResolution": "node10", /* Specify how TypeScript looks up a file from a given module specifier. */
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
// "resolveJsonModule": true, /* Enable importing .json files. */
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
/* JavaScript Support */
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
/* Emit */
// "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
// "outDir": "./", /* Specify an output folder for all emitted files. */
// "removeComments": true, /* Disable emitting comments. */
// "noEmit": true, /* Disable emitting files from a compilation. */
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
// "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
// "newLine": "crlf", /* Set the newline character for emitting files. */
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
// "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
/* Interop Constraints */
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
/* Type Checking */
"strict": true, /* Enable all strict type-checking options. */
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
/* Completeness */
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -1,12 +1,23 @@
use leptos::*;
use leptos_meta::*;
use leptos_router::*;
use leptos::prelude::*;
use leptos_meta::{provide_meta_context, MetaTags, Stylesheet, Title};
use leptos_router::{
components::{Route, Router, Routes},
StaticSegment,
};
#[cfg(feature = "hydrate")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "hydrate")]
use std::collections::VecDeque;
#[cfg(feature = "hydrate")]
use uuid::Uuid;
#[cfg(feature = "hydrate")]
use js_sys::Date;
#[cfg(feature = "hydrate")]
use web_sys::{HtmlInputElement, KeyboardEvent, SubmitEvent};
#[cfg(feature = "hydrate")]
use futures_util::StreamExt;
#[cfg(feature = "hydrate")]
use async_openai_wasm::{
types::{
ChatCompletionRequestAssistantMessageArgs, ChatCompletionRequestSystemMessageArgs,
@@ -14,9 +25,14 @@ use async_openai_wasm::{
},
Client,
};
#[cfg(feature = "hydrate")]
use async_openai_wasm::config::OpenAIConfig;
use async_openai_wasm::types::{ChatCompletionResponseStream, Model, Role, FinishReason};
#[cfg(feature = "hydrate")]
use async_openai_wasm::types::{Role, FinishReason};
#[cfg(feature = "hydrate")]
use leptos::task::spawn_local;
#[cfg(feature = "hydrate")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
pub id: String,
@@ -25,12 +41,15 @@ pub struct Message {
pub timestamp: f64,
}
#[cfg(feature = "hydrate")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MessageContent(pub either::Either<String, Vec<std::collections::HashMap<String, MessageInnerContent>>>);
#[cfg(feature = "hydrate")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MessageInnerContent(pub either::Either<String, std::collections::HashMap<String, String>>);
#[cfg(feature = "hydrate")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatMessage {
pub role: String,
@@ -38,59 +57,12 @@ pub struct ChatMessage {
pub name: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatRequest {
pub model: String,
pub messages: Vec<ChatMessage>,
pub max_tokens: Option<usize>,
pub temperature: Option<f64>,
pub top_p: Option<f64>,
pub stream: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatResponse {
pub id: String,
pub object: String,
pub created: u64,
pub model: String,
pub choices: Vec<Choice>,
pub usage: Usage,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Choice {
pub index: usize,
pub message: ChatMessage,
pub finish_reason: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Usage {
pub prompt_tokens: usize,
pub completion_tokens: usize,
pub total_tokens: usize,
}
#[component]
pub fn App() -> impl IntoView {
provide_meta_context();
view! {
<Stylesheet id="leptos" href="/style/main.css"/>
<Title text="Chat Interface"/>
<Router>
<main>
<Routes>
<Route path="/" view=ChatInterface/>
</Routes>
</main>
</Router>
}
}
#[cfg(feature = "hydrate")]
const DEFAULT_MODEL: &str = "default";
#[cfg(feature = "hydrate")]
async fn fetch_available_models() -> Result<Vec<OpenAIModel>, String> {
log::info!("[DEBUG_LOG] fetch_available_models: Starting model fetch from http://localhost:8080/v1");
leptos::logging::log!("[DEBUG_LOG] fetch_available_models: Starting model fetch from http://localhost:8080/v1");
let config = OpenAIConfig::new().with_api_base("http://localhost:8080/v1".to_string());
let client = Client::with_config(config);
@@ -98,126 +70,110 @@ async fn fetch_available_models() -> Result<Vec<OpenAIModel>, String> {
match client.models().list().await {
Ok(response) => {
let model_count = response.data.len();
log::info!("[DEBUG_LOG] fetch_available_models: Successfully fetched {} models", model_count);
leptos::logging::log!("[DEBUG_LOG] fetch_available_models: Successfully fetched {} models", model_count);
if model_count > 0 {
let model_names: Vec<String> = response.data.iter().map(|m| m.id.clone()).collect();
log::debug!("[DEBUG_LOG] fetch_available_models: Available models: {:?}", model_names);
leptos::logging::log!("[DEBUG_LOG] fetch_available_models: Available models: {:?}", model_names);
} else {
log::warn!("[DEBUG_LOG] fetch_available_models: No models returned by server");
leptos::logging::log!("[DEBUG_LOG] fetch_available_models: No models returned by server");
}
Ok(response.data)
},
Err(e) => {
log::error!("[DEBUG_LOG] fetch_available_models: Failed to fetch models: {:?}", e);
let error_details = format!("{:?}", e);
if error_details.contains("400") || error_details.contains("Bad Request") {
log::error!("[DEBUG_LOG] fetch_available_models: HTTP 400 - Server rejected models request");
} else if error_details.contains("404") || error_details.contains("Not Found") {
log::error!("[DEBUG_LOG] fetch_available_models: HTTP 404 - Models endpoint not found");
} else if error_details.contains("Connection") || error_details.contains("connection") {
log::error!("[DEBUG_LOG] fetch_available_models: Connection error - server may be down");
}
leptos::logging::log!("[DEBUG_LOG] fetch_available_models: Failed to fetch models: {:?}", e);
Err(format!("Failed to fetch models: {}", e))
}
}
}
async fn send_chat_request(chat_request: ChatRequest) -> ChatCompletionResponseStream {
let config = OpenAIConfig::new().with_api_base("http://localhost:8080/v1".to_string());
let client = Client::with_config(config);
let mut typed_chat = async_openai_wasm::types::CreateChatCompletionRequest {
messages: vec![],
model: "".to_string(),
store: None,
reasoning_effort: None,
metadata: None,
frequency_penalty: None,
logit_bias: None,
logprobs: None,
top_logprobs: None,
max_tokens: None,
max_completion_tokens: None,
n: None,
modalities: None,
prediction: None,
audio: None,
presence_penalty: None,
response_format: None,
seed: None,
service_tier: None,
stop: None,
stream: None,
stream_options: None,
temperature: None,
top_p: None,
tools: None,
tool_choice: None,
parallel_tool_calls: None,
user: None,
function_call: None,
functions: None,
web_search_options: None,
extra_params: None,
};
typed_chat.messages = chat_request.messages
.iter()
.map(|msg| {
let content = match &msg.content {
Some(MessageContent(either::Either::Left(text))) => text.clone(),
_ => "".to_string()
};
let role = msg.role.clone();
match role.as_str() {
"system" => ChatCompletionRequestSystemMessageArgs::default()
.content(content)
.build()
.expect("failed to build system message")
.into(),
"user" => ChatCompletionRequestUserMessageArgs::default()
.content(content)
.build()
.expect("failed to build user message")
.into(),
"assistant" => ChatCompletionRequestAssistantMessageArgs::default()
.content(content)
.build()
.expect("failed to build assistant message")
.into(),
_ => ChatCompletionRequestUserMessageArgs::default()
.content(content)
.build()
.expect("failed to build default message")
.into()
}
})
.collect();
client.chat().create_stream(typed_chat).await.unwrap()
pub fn shell(options: LeptosOptions) -> impl IntoView {
view! {
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<AutoReload options=options.clone() />
<HydrationScripts options/>
<MetaTags/>
</head>
<body>
<App/>
</body>
</html>
}
}
// #[cfg(not(target_arch = "wasm32"))]
// async fn send_chat_request(_chat_request: ChatRequest) -> Result<ChatResponse, String> {
// Err("leptos-chat chat request only supported on wasm32 target".to_string())
// }
#[component]
pub fn App() -> impl IntoView {
// Provides context that manages stylesheets, titles, meta tags, etc.
provide_meta_context();
const DEFAULT_MODEL: &str = "default";
view! {
// injects a stylesheet into the document <head>
// id=leptos means cargo-leptos will hot-reload this stylesheet
<Stylesheet id="leptos" href="/pkg/leptos-app.css"/>
// sets the document title
<Title text="Chat Interface"/>
// content for this chat interface
<Router>
<main>
<Routes fallback=|| "Page not found.".into_view()>
<Route path=StaticSegment("") view=ChatInterface/>
</Routes>
</main>
</Router>
}
}
/// Renders the home page of your application.
#[component]
fn HomePage() -> impl IntoView {
// Creates a reactive value to update the button
let count = RwSignal::new(0);
let on_click = move |_| *count.write() += 1;
view! {
<h1>"Welcome to Leptos!"</h1>
<button on:click=on_click>"Click Me: " {count}</button>
}
}
/// Renders the chat interface
#[component]
fn ChatInterface() -> impl IntoView {
let (messages, set_messages) = create_signal::<VecDeque<Message>>(VecDeque::new());
let (input_value, set_input_value) = create_signal(String::new());
let (is_loading, set_is_loading) = create_signal(false);
let (available_models, set_available_models) = create_signal::<Vec<OpenAIModel>>(Vec::new());
let (selected_model, set_selected_model) = create_signal(DEFAULT_MODEL.to_string());
let (models_loading, set_models_loading) = create_signal(false);
#[cfg(feature = "hydrate")]
{
ChatInterfaceImpl()
}
#[cfg(not(feature = "hydrate"))]
{
view! {
<div class="chat-container">
<h1>"Chat Interface"</h1>
<p>"Loading chat interface..."</p>
</div>
}
}
}
#[cfg(feature = "hydrate")]
#[component]
fn ChatInterfaceImpl() -> impl IntoView {
let (messages, set_messages) = RwSignal::new(VecDeque::<Message>::new()).split();
let (input_value, set_input_value) = RwSignal::new(String::new()).split();
let (is_loading, set_is_loading) = RwSignal::new(false).split();
let (available_models, set_available_models) = RwSignal::new(Vec::<OpenAIModel>::new()).split();
let (selected_model, set_selected_model) = RwSignal::new(DEFAULT_MODEL.to_string()).split();
let (models_loading, set_models_loading) = RwSignal::new(false).split();
// Fetch models on component initialization
create_effect(move |_| {
Effect::new(move |_| {
spawn_local(async move {
set_models_loading.set(true);
match fetch_available_models().await {
@@ -226,8 +182,7 @@ fn ChatInterface() -> impl IntoView {
set_models_loading.set(false);
}
Err(e) => {
log::error!("Failed to fetch models: {}", e);
// Set a default model if fetching fails
leptos::logging::log!("Failed to fetch models: {}", e);
set_available_models.set(vec![]);
set_models_loading.set(false);
}
@@ -235,17 +190,15 @@ fn ChatInterface() -> impl IntoView {
});
});
let send_message = create_action(move |content: &String| {
let send_message = Action::new_unsync(move |content: &String| {
let content = content.clone();
async move {
if content.trim().is_empty() {
log::debug!("[DEBUG_LOG] send_message: Empty content, skipping");
leptos::logging::log!("[DEBUG_LOG] send_message: Empty content, skipping");
return;
}
log::info!("[DEBUG_LOG] send_message: Starting message send process");
log::debug!("[DEBUG_LOG] send_message: User message content length: {}", content.len());
leptos::logging::log!("[DEBUG_LOG] send_message: Starting message send process");
set_is_loading.set(true);
// Add user message to chat
@@ -269,43 +222,26 @@ fn ChatInterface() -> impl IntoView {
chat_messages.push(system_message.into());
// Add history messages
let history_count = messages.with_untracked(|msgs| {
let count = msgs.len();
for msg in msgs.iter() {
match msg.role.as_str() {
"user" => {
let message = ChatCompletionRequestUserMessageArgs::default()
.content(msg.content.clone())
.build()
.expect("failed to build user message");
chat_messages.push(message.into());
}
"assistant" => {
let message = ChatCompletionRequestAssistantMessageArgs::default()
.content(msg.content.clone())
.build()
.expect("failed to build assistant message");
chat_messages.push(message.into());
}
"system" => {
let message = ChatCompletionRequestSystemMessageArgs::default()
.content(msg.content.clone())
.build()
.expect("failed to build system message");
chat_messages.push(message.into());
}
_ => {
// Default to user message for unknown roles
let message = ChatCompletionRequestUserMessageArgs::default()
.content(msg.content.clone())
.build()
.expect("failed to build default message");
chat_messages.push(message.into());
}
let history_count = messages.get_untracked().len();
for msg in messages.get_untracked().iter() {
match msg.role.as_str() {
"user" => {
let message = ChatCompletionRequestUserMessageArgs::default()
.content(msg.content.clone())
.build()
.expect("failed to build user message");
chat_messages.push(message.into());
}
"assistant" => {
let message = ChatCompletionRequestAssistantMessageArgs::default()
.content(msg.content.clone())
.build()
.expect("failed to build assistant message");
chat_messages.push(message.into());
}
_ => {}
}
count
});
}
// Add current user message
let message = ChatCompletionRequestUserMessageArgs::default()
@@ -317,45 +253,36 @@ fn ChatInterface() -> impl IntoView {
let current_model = selected_model.get_untracked();
let total_messages = chat_messages.len();
log::info!("[DEBUG_LOG] send_message: Preparing request - model: '{}', history_count: {}, total_messages: {}",
current_model, history_count, total_messages);
leptos::logging::log!("[DEBUG_LOG] send_message: Preparing request - model: '{}', history_count: {}, total_messages: {}",
current_model, history_count, total_messages);
let request = CreateChatCompletionRequestArgs::default()
.model(current_model.as_str())
.max_tokens(512u32)
.messages(chat_messages)
.stream(true) // ensure server streams
.stream(true)
.build()
.expect("failed to build request");
// Log request details for debugging server issues
log::info!("[DEBUG_LOG] send_message: Request configuration - model: '{}', max_tokens: 512, stream: true, messages_count: {}",
current_model, total_messages);
log::debug!("[DEBUG_LOG] send_message: Request details - history_messages: {}, system_messages: 1, user_messages: {}",
history_count, total_messages - history_count - 1);
// Send request
let config = OpenAIConfig::new().with_api_base("http://localhost:8080/v1".to_string());
let client = Client::with_config(config);
log::info!("[DEBUG_LOG] send_message: Sending request to http://localhost:8080/v1 with model: '{}'", current_model);
leptos::logging::log!("[DEBUG_LOG] send_message: Sending request to http://localhost:8080/v1 with model: '{}'", current_model);
match client.chat().create_stream(request).await {
Ok(mut stream) => {
log::info!("[DEBUG_LOG] send_message: Successfully created stream, starting to receive response");
leptos::logging::log!("[DEBUG_LOG] send_message: Successfully created stream");
// Defer creating assistant message until we receive role=assistant from the stream
let mut assistant_created = false;
let mut content_appended = false;
let mut chunks_received = 0;
// Stream loop: handle deltas and finish events
while let Some(next) = stream.next().await {
match next {
Ok(chunk) => {
chunks_received += 1;
if let Some(choice) = chunk.choices.get(0) {
// 1) Create assistant message when role arrives
if !assistant_created {
if let Some(role) = &choice.delta.role {
if role == &Role::Assistant {
@@ -373,10 +300,8 @@ fn ChatInterface() -> impl IntoView {
}
}
// 2) Append content tokens when provided
if let Some(content) = &choice.delta.content {
if !content.is_empty() {
// If content arrives before role, create assistant message now
if !assistant_created {
assistant_created = true;
let assistant_id = Uuid::new_v4().to_string();
@@ -401,23 +326,21 @@ fn ChatInterface() -> impl IntoView {
}
}
// 3) Stop on finish_reason=="stop" (mirrors [DONE])
if let Some(reason) = &choice.finish_reason {
if reason == &FinishReason::Stop {
log::info!("[DEBUG_LOG] send_message: Received finish_reason=stop after {} chunks", chunks_received);
leptos::logging::log!("[DEBUG_LOG] send_message: Received finish_reason=stop after {} chunks", chunks_received);
break;
}
}
}
}
Err(e) => {
log::error!("[DEBUG_LOG] send_message: Stream error after {} chunks: {:?}", chunks_received, e);
log::error!("[DEBUG_LOG] send_message: Stream error details - model: '{}', chunks_received: {}", current_model, chunks_received);
leptos::logging::log!("[DEBUG_LOG] send_message: Stream error after {} chunks: {:?}", chunks_received, e);
set_messages.update(|msgs| {
msgs.push_back(Message {
id: Uuid::new_v4().to_string(),
role: "system".to_string(),
content: format!("Stream error after {} chunks: {}", chunks_received, e),
content: format!("Stream error: {}", e),
timestamp: Date::now(),
});
});
@@ -426,7 +349,6 @@ fn ChatInterface() -> impl IntoView {
}
}
// Cleanup: If we created an assistant message but no content ever arrived, remove the empty message
if assistant_created && !content_appended {
set_messages.update(|msgs| {
let should_pop = msgs
@@ -434,45 +356,19 @@ fn ChatInterface() -> impl IntoView {
.map(|m| m.role == "assistant" && m.content.is_empty())
.unwrap_or(false);
if should_pop {
log::info!("[DEBUG_LOG] send_message: Removing empty assistant message (no content received)");
msgs.pop_back();
}
});
}
log::info!("[DEBUG_LOG] send_message: Stream completed successfully, received {} chunks", chunks_received);
leptos::logging::log!("[DEBUG_LOG] send_message: Stream completed successfully, received {} chunks", chunks_received);
}
Err(e) => {
// Detailed error logging for different types of errors
log::error!("[DEBUG_LOG] send_message: Request failed with error: {:?}", e);
log::error!("[DEBUG_LOG] send_message: Request context - model: '{}', total_messages: {}, endpoint: http://localhost:8080/v1",
current_model, total_messages);
// Try to extract more specific error information
let error_details = format!("{:?}", e);
let user_message = if error_details.contains("400") || error_details.contains("Bad Request") {
log::error!("[DEBUG_LOG] send_message: HTTP 400 Bad Request detected - possible issues:");
log::error!("[DEBUG_LOG] send_message: - Invalid model name: '{}'", current_model);
log::error!("[DEBUG_LOG] send_message: - Invalid message format or content");
log::error!("[DEBUG_LOG] send_message: - Server configuration issue");
format!("Error: HTTP 400 Bad Request - Check model '{}' and message format. See console for details.", current_model)
} else if error_details.contains("404") || error_details.contains("Not Found") {
log::error!("[DEBUG_LOG] send_message: HTTP 404 Not Found - server endpoint may be incorrect");
"Error: HTTP 404 Not Found - Server endpoint not found".to_string()
} else if error_details.contains("500") || error_details.contains("Internal Server Error") {
log::error!("[DEBUG_LOG] send_message: HTTP 500 Internal Server Error - server-side issue");
"Error: HTTP 500 Internal Server Error - Server problem".to_string()
} else if error_details.contains("Connection") || error_details.contains("connection") {
log::error!("[DEBUG_LOG] send_message: Connection error - server may be down");
"Error: Cannot connect to server at http://localhost:8080".to_string()
} else {
format!("Error: Request failed - {}", e)
};
leptos::logging::log!("[DEBUG_LOG] send_message: Request failed with error: {:?}", e);
let error_message = Message {
id: Uuid::new_v4().to_string(),
role: "system".to_string(),
content: user_message,
content: format!("Error: Request failed - {}", e),
timestamp: Date::now(),
};
set_messages.update(|msgs| msgs.push_back(error_message));
@@ -524,7 +420,7 @@ fn ChatInterface() -> impl IntoView {
</div>
}
})
.collect_view()
.collect::<Vec<_>>()
};
let loading_indicator = move || {
@@ -551,21 +447,21 @@ fn ChatInterface() -> impl IntoView {
>
{move || {
if models_loading.get() {
view! {
<option value="">"Loading models..."</option>
}.into_view()
vec![view! {
<option value={String::from("")} selected=false>{String::from("Loading models...")}</option>
}]
} else {
let models = available_models.get();
if models.is_empty() {
view! {
<option selected=true value="gemma-3b-it">"gemma-3b-it (default)"</option>
}.into_view()
vec![view! {
<option value={String::from("default")} selected=true>{String::from("default")}</option>
}]
} else {
models.into_iter().map(|model| {
view! {
<option value=model.id.clone() selected={model.id == DEFAULT_MODEL}>{model.id}</option>
<option value=model.id.clone() selected={model.id == DEFAULT_MODEL}>{model.id.clone()}</option>
}
}).collect_view()
}).collect::<Vec<_>>()
}
}
}}
@@ -596,14 +492,3 @@ fn ChatInterface() -> impl IntoView {
</div>
}
}
#[wasm_bindgen::prelude::wasm_bindgen(start)]
pub fn main() {
// Set up error handling and logging for WebAssembly
console_error_panic_hook::set_once();
console_log::init_with_level(log::Level::Debug).expect("error initializing logger");
// Mount the App component to the document body
leptos::mount_to_body(App)
}

View File

@@ -0,0 +1,30 @@
pub mod app;
#[cfg(feature = "hydrate")]
#[wasm_bindgen::prelude::wasm_bindgen]
pub fn hydrate() {
use crate::app::*;
console_error_panic_hook::set_once();
leptos::mount::hydrate_body(App);
}
#[cfg(feature = "ssr")]
pub fn create_leptos_router() -> axum::Router {
use axum::Router;
use leptos::prelude::*;
use leptos_axum::{generate_route_list, LeptosRoutes};
use crate::app::*;
let conf = get_configuration(None).unwrap();
let leptos_options = conf.leptos_options;
// Generate the list of routes in your Leptos App
let routes = generate_route_list(App);
Router::new()
.leptos_routes(&leptos_options, routes, {
let leptos_options = leptos_options.clone();
move || shell(leptos_options.clone())
})
.fallback(leptos_axum::file_and_error_handler(shell))
.with_state(leptos_options)
}

View File

@@ -0,0 +1,39 @@
#[cfg(feature = "ssr")]
#[tokio::main]
async fn main() {
use axum::Router;
use leptos::logging::log;
use leptos::prelude::*;
use leptos_axum::{generate_route_list, LeptosRoutes};
use leptos_app::app::*;
let conf = get_configuration(None).unwrap();
let addr = conf.leptos_options.site_addr;
let leptos_options = conf.leptos_options;
// Generate the list of routes in your Leptos App
let routes = generate_route_list(App);
let app = Router::new()
.leptos_routes(&leptos_options, routes, {
let leptos_options = leptos_options.clone();
move || shell(leptos_options.clone())
})
.fallback(leptos_axum::file_and_error_handler(shell))
.with_state(leptos_options);
// run our app with hyper
// `axum::Server` is a re-export of `hyper::Server`
log!("listening on http://{}", &addr);
let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
axum::serve(listener, app.into_make_service())
.await
.unwrap();
}
#[cfg(not(feature = "ssr"))]
pub fn main() {
// no client-side main function
// unless we want this to work with e.g., Trunk for pure client-side testing
// see lib.rs for hydration function instead
}

View File

@@ -0,0 +1,4 @@
body {
font-family: sans-serif;
text-align: center;
}

View File

@@ -1,7 +0,0 @@
*
!src/
!style/
!Cargo.toml/
!index.html
!Trunk.toml

View File

@@ -1,64 +0,0 @@
[package]
name = "leptos-chat"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]
leptos = { version = "0.6", features = ["csr"] }
leptos_meta = { version = "0.6", features = ["csr"] }
leptos_router = { version = "0.6", features = ["csr"] }
wasm-bindgen = "0.2"
console_error_panic_hook = "0.1"
console_log = "1"
log = "0.4"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
js-sys = "0.3"
either = { version = "1.9", features = ["serde"] }
# Make async-openai optional and only included for non-wasm targets
async-openai-wasm = { default-features = false, version = "0.29" }
# Only include tokio for non-wasm targets
#tokio = { version = "1", default-features = false, features = ["sync", "macros", "io-util", "rt"] }
#reqwest = {version = "0.12.23", default-features = false, optional = false}
futures-util = "0.3"
web-sys = { version = "0.3", features = [
"console",
"Window",
"Document",
"Element",
"HtmlElement",
"HtmlInputElement",
"HtmlSelectElement",
"HtmlTextAreaElement",
"Event",
"EventTarget",
"KeyboardEvent",
] }
gloo-net = "0.6.0"
[dependencies.uuid]
version = "1.0"
features = [
"v4", # Lets you generate random UUIDs
"fast-rng", # Use a faster (but still sufficiently random) RNG
"macro-diagnostics", # Enable better diagnostics for compile-time UUIDs
"js", # Enable JavaScript RNG for WASM targets
]
# generates docker compose configuration
[package.metadata.compose]
image = "ghcr.io/geoffsee/leptos-chat:latest"
port = 8788
# generates kubernetes manifests
[package.metadata.kube]
image = "ghcr.io/geoffsee/leptos-chat:latest"
replicas = 1
port = 8788

View File

@@ -1,28 +0,0 @@
# Build stage
FROM rust:1-alpine AS builder
# Install build dependencies
RUN apk add --no-cache npm nodejs musl-dev pkgconfig openssl-dev git curl bash
RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
WORKDIR /app
# Copy manifest first (cache deps)
COPY . .
# Install trunk
RUN cargo install wasm-bindgen-cli
RUN cargo binstall trunk
# Build release artifacts
RUN rustup target add wasm32-unknown-unknown && export RUSTFLAGS='--cfg getrandom_backend="wasm_js"' && trunk build --release
# Final stage: static web server
FROM caddy:2-alpine
# Copy built assets into Caddy's web root
COPY --from=builder /app/dist /usr/share/caddy
EXPOSE 8788
CMD ["caddy", "file-server", "--root", "/usr/share/caddy", "--listen", ":8788"]

View File

@@ -1,7 +0,0 @@
[build]
# Set the RUSTFLAGS environment variable for getrandom's WebAssembly support
rustflags = ["--cfg", "getrandom_backend=\"wasm_js\""]
[serve]
# Use the same port as in the run.sh script
port = 8788

View File

@@ -1,15 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Chat Interface</title>
<link rel="stylesheet" href="style/main.css" />
</head>
<body>
<script type="module">
import init from './pkg/leptos_chat.js';
init();
</script>
</body>
</html>

View File

@@ -1,6 +0,0 @@
#!/usr/bin/env sh
# Set RUSTFLAGS for getrandom's WebAssembly support
export RUSTFLAGS='--cfg getrandom_backend="wasm_js"'
trunk serve --port 8788

View File

@@ -1,165 +0,0 @@
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
margin: 0;
padding: 0;
background-color: #f5f5f5;
}
.chat-container {
max-width: 800px;
margin: 0 auto;
height: 100vh;
display: flex;
flex-direction: column;
background-color: white;
box-shadow: 0 0 20px rgba(0, 0, 0, 0.1);
}
h1 {
background-color: #4a90e2;
color: white;
margin: 0;
padding: 20px;
text-align: center;
font-size: 24px;
font-weight: 600;
}
.messages-container {
flex: 1;
overflow-y: auto;
padding: 20px;
display: flex;
flex-direction: column;
gap: 15px;
}
.message {
display: flex;
flex-direction: column;
max-width: 70%;
padding: 12px 16px;
border-radius: 18px;
word-wrap: break-word;
}
.user-message {
align-self: flex-end;
background-color: #4a90e2;
color: white;
}
.assistant-message {
align-self: flex-start;
background-color: #e9ecef;
color: #333;
}
.system-message {
align-self: center;
background-color: #ffebcc;
color: #856404;
border: 1px solid #ffeaa7;
}
.message-role {
font-size: 12px;
font-weight: 600;
margin-bottom: 4px;
opacity: 0.7;
text-transform: capitalize;
}
.message-content {
font-size: 14px;
line-height: 1.4;
}
.input-form {
display: flex;
padding: 20px;
gap: 10px;
background-color: #f8f9fa;
border-top: 1px solid #dee2e6;
}
.message-input {
flex: 1;
padding: 12px 16px;
border: 1px solid #ced4da;
border-radius: 25px;
font-size: 14px;
outline: none;
transition: border-color 0.2s ease;
}
.message-input:focus {
border-color: #4a90e2;
box-shadow: 0 0 0 2px rgba(74, 144, 226, 0.25);
}
.message-input:disabled {
background-color: #f8f9fa;
color: #6c757d;
cursor: not-allowed;
}
.send-button {
padding: 12px 24px;
background-color: #4a90e2;
color: white;
border: none;
border-radius: 25px;
font-size: 14px;
font-weight: 600;
cursor: pointer;
transition: background-color 0.2s ease;
min-width: 80px;
}
.send-button:hover:not(:disabled) {
background-color: #357abd;
}
.send-button:disabled {
background-color: #6c757d;
cursor: not-allowed;
}
/* Scrollbar styling */
.messages-container::-webkit-scrollbar {
width: 8px;
}
.messages-container::-webkit-scrollbar-track {
background: #f1f1f1;
}
.messages-container::-webkit-scrollbar-thumb {
background: #c1c1c1;
border-radius: 4px;
}
.messages-container::-webkit-scrollbar-thumb:hover {
background: #a1a1a1;
}
/* Responsive design */
@media (max-width: 768px) {
.chat-container {
height: 100vh;
}
.message {
max-width: 85%;
}
.input-form {
padding: 15px;
}
h1 {
padding: 15px;
font-size: 20px;
}
}

View File

@@ -12,14 +12,14 @@ path = "src/main.rs"
axum = "0.8.4"
tokio = { version = "1.45.1", features = ["full"] }
tower = "0.5.2"
tower-http = { version = "0.6.6", features = ["trace", "cors"] }
tower-http = { version = "0.6.6", features = ["trace", "cors", "fs"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
uuid = { version = "1.7.0", features = ["v4"] }
reqwest = { version = "0.12", features = ["json"] }
rust-embed = "8.7.2"
rust-embed = { version = "8.7.2", features = ["include-exclude"] }
# Dependencies for embeddings functionality
embeddings-engine = { path = "../embeddings-engine" }
@@ -27,6 +27,11 @@ embeddings-engine = { path = "../embeddings-engine" }
# Dependencies for inference functionality
inference-engine = { path = "../inference-engine" }
# Dependencies for leptos web app
leptos-app = { path = "../leptos-app", features = ["ssr"] }
mime_guess = "2.0.5"
# generates docker compose configuration
[package.metadata.compose]
name = "predict-otron-9000"

View File

@@ -89,7 +89,7 @@ impl ServerConfig {
}
}
Err(_) => {
tracing::info!("SERVER_CONFIG not set, using default Local mode");
tracing::info!("SERVER_CONFIG not set, Standalone mode active");
ServerConfig::default()
}
}

View File

@@ -1,22 +1,21 @@
mod middleware;
mod config;
mod middleware;
mod proxy;
use axum::{
Router,
serve,
};
use std::env;
use axum::response::IntoResponse;
use axum::routing::get;
use tokio::net::TcpListener;
use tower_http::trace::TraceLayer;
use tower_http::cors::{Any, CorsLayer};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
use inference_engine::AppState;
use middleware::{MetricsStore, MetricsLoggerFuture, MetricsLayer};
use axum::{Router, http::Uri, response::Html, serve};
use config::ServerConfig;
use inference_engine::AppState;
use middleware::{MetricsLayer, MetricsLoggerFuture, MetricsStore};
use proxy::create_proxy_router;
use rust_embed::Embed;
use std::env;
use tokio::net::TcpListener;
use tower_http::classify::ServerErrorsFailureClass::StatusCode;
use tower_http::cors::{Any, CorsLayer};
use tower_http::trace::TraceLayer;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
#[tokio::main]
async fn main() {
@@ -34,7 +33,6 @@ async fn main() {
.with(tracing_subscriber::fmt::layer())
.init();
// Initialize metrics store for performance tracking
let metrics_store = MetricsStore::new();
@@ -55,19 +53,22 @@ async fn main() {
let service_router = if server_config.clone().is_high_availability() {
tracing::info!("Running in HighAvailability mode - proxying to external services");
tracing::info!(" Inference service URL: {}", server_config.inference_url());
tracing::info!(" Embeddings service URL: {}", server_config.embeddings_url());
tracing::info!(
" Embeddings service URL: {}",
server_config.embeddings_url()
);
// Use proxy router that forwards requests to external services
create_proxy_router(server_config.clone())
} else {
tracing::info!("Running in Local mode - using embedded services");
tracing::info!("Running in Standalone mode - using embedded services");
// Create unified router by merging embeddings and inference routers (existing behavior)
let embeddings_router = embeddings_engine::create_embeddings_router();
// Create AppState with correct model configuration
use inference_engine::server::{PipelineArgs, build_pipeline};
use inference_engine::Which;
use inference_engine::server::{PipelineArgs, build_pipeline};
let mut pipeline_args = PipelineArgs::default();
pipeline_args.model_id = "google/gemma-3-1b-it".to_string();
pipeline_args.which = Which::InstructV3_1B;
@@ -98,39 +99,41 @@ async fn main() {
// Create metrics layer
let metrics_layer = MetricsLayer::new(metrics_store);
// Create the leptos router for the web frontend
let leptos_router = leptos_app::create_leptos_router();
// Merge the service router with base routes and add middleware layers
let app = Router::new()
.route("/", get(|| async { "API ready. This can serve the Leptos web app, but it doesn't." }))
.route("/health", get(|| async { "ok" }))
.merge(service_router)
.layer(metrics_layer) // Add metrics tracking
.merge(leptos_router) // Add leptos web frontend routes
.layer(metrics_layer) // Add metrics tracking
.layer(cors)
.layer(TraceLayer::new_for_http());
// Server configuration
let server_host = env::var("SERVER_HOST").unwrap_or_else(|_| {
String::from(default_host)
});
let server_host = env::var("SERVER_HOST").unwrap_or_else(|_| String::from(default_host));
let server_port = env::var("SERVER_PORT").map(|v| v.parse::<u16>().unwrap_or(default_port)).unwrap_or_else(|_| {
default_port
});
let server_port = env::var("SERVER_PORT")
.map(|v| v.parse::<u16>().unwrap_or(default_port))
.unwrap_or_else(|_| default_port);
let server_address = format!("{}:{}", server_host, server_port);
let listener = TcpListener::bind(&server_address).await.unwrap();
tracing::info!("Unified predict-otron-9000 server listening on {}", listener.local_addr().unwrap());
tracing::info!(
"Unified predict-otron-9000 server listening on {}",
listener.local_addr().unwrap()
);
tracing::info!("Performance metrics tracking enabled - summary logs every 60 seconds");
tracing::info!("Available endpoints:");
tracing::info!(" GET / - Root endpoint from embeddings-engine");
tracing::info!(" POST /v1/embeddings - Text embeddings");
tracing::info!(" POST /v1/chat/completions - Chat completions");
tracing::info!(" GET / - Leptos chat web application");
tracing::info!(" GET /health - Health check");
tracing::info!(" POST /v1/embeddings - Text embeddings API");
tracing::info!(" POST /v1/chat/completions - Chat completions API");
serve(listener, app).await.unwrap();
}
// Chat completions handler that properly uses the inference server crate's error handling
// This function is no longer needed as we're using the inference_engine router directly

View File

@@ -25,22 +25,6 @@ services:
retries: 10
start_period: 10s
# Web frontend - Leptos WASM chat interface
leptos-chat:
build:
context: crates/leptos-chat
dockerfile: Dockerfile
ports:
- "8788:8788"
depends_on:
predict-otron-9000:
condition: service_healthy
networks:
- predict-otron-network
environment:
# Configure API endpoint for the frontend to connect to backend
- API_BASE_URL=http://predict-otron-9000:8080
volumes:
# Persistent storage for Hugging Face model cache
hf-cache:

View File

@@ -173,7 +173,7 @@ The server logs the selected mode on startup:
**Local Mode:**
```
INFO predict_otron_9000: Running in Local mode - using embedded services
INFO predict_otron_9000: Running in Standalone mode
```
**HighAvailability Mode:**

33
package-lock.json generated
View File

@@ -1,33 +0,0 @@
{
"name": "predict-otron-9000",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"openai": "^5.16.0"
}
},
"node_modules/openai": {
"version": "5.16.0",
"resolved": "https://registry.npmjs.org/openai/-/openai-5.16.0.tgz",
"integrity": "sha512-hoEH8ZNvg1HXjU9mp88L/ZH8O082Z8r6FHCXGiWAzVRrEv443aI57qhch4snu07yQydj+AUAWLenAiBXhu89Tw==",
"license": "Apache-2.0",
"bin": {
"openai": "bin/cli"
},
"peerDependencies": {
"ws": "^8.18.0",
"zod": "^3.23.8"
},
"peerDependenciesMeta": {
"ws": {
"optional": true
},
"zod": {
"optional": true
}
}
}
}
}

View File

@@ -59,11 +59,11 @@ echo "[INFO] Server is ready!"
# Run first CLI request
echo "[INFO] Running first CLI request - listing models..."
bun run cli.ts --list-models
./cli.ts --list-models
echo ""
echo "[INFO] Running second CLI request - chat completion..."
bun run cli.ts "What is 2+2?"
./cli.ts "What is 2+2?"
echo ""
echo "[INFO] Both CLI requests completed successfully!"