streaming implementaion re-added to UI

This commit is contained in:
geoffsee
2025-09-02 14:45:16 -04:00
parent bcbc6c4693
commit 400c70f17d
6 changed files with 295 additions and 42 deletions

2
Cargo.lock generated
View File

@@ -893,6 +893,7 @@ dependencies = [
"axum",
"console_error_panic_hook",
"gloo-net",
"js-sys",
"leptos",
"leptos_axum",
"leptos_meta",
@@ -902,6 +903,7 @@ dependencies = [
"serde_json",
"tokio",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]

View File

@@ -15,10 +15,26 @@ leptos_axum = { version = "0.8.0", optional = true }
leptos_meta = { version = "0.8.0" }
tokio = { version = "1", features = ["rt-multi-thread"], optional = true }
wasm-bindgen = { version = "=0.2.100", optional = true }
wasm-bindgen-futures = "0.4"
js-sys = "0.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
reqwest = { version = "0.12", features = ["json"] }
web-sys = { version = "0.3", features = ["console"] }
web-sys = { version = "0.3", features = [
"console",
"EventSource",
"MessageEvent",
"Window",
"Request",
"RequestInit",
"Response",
"Headers",
"ReadableStream",
"ReadableStreamDefaultReader",
"TextDecoder",
"TextDecoderOptions",
"HtmlInputElement"
] }
gloo-net = { version = "0.6", features = ["http"] }
[features]

View File

@@ -1,24 +0,0 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <https://unlicense.org>

View File

@@ -49,7 +49,6 @@ use leptos_router::{
};
use serde::{Deserialize, Serialize};
use web_sys::console;
// Remove spawn_local import as we'll use different approach
// Data structures for OpenAI-compatible API
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -73,6 +72,29 @@ pub struct ChatChoice {
pub finish_reason: Option<String>,
}
// Streaming response structures
#[derive(Debug, Deserialize)]
pub struct StreamDelta {
pub role: Option<String>,
pub content: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct StreamChoice {
pub index: u32,
pub delta: StreamDelta,
pub finish_reason: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct StreamChatResponse {
pub id: String,
pub object: String,
pub created: u64,
pub model: String,
pub choices: Vec<StreamChoice>,
}
#[derive(Debug, Deserialize)]
pub struct ChatResponse {
pub id: String,
@@ -161,6 +183,129 @@ pub async fn send_chat_completion(
}
}
// Streaming chat completion using EventSource
#[cfg(target_arch = "wasm32")]
pub fn send_chat_completion_stream(
messages: Vec<ChatMessage>,
model: String,
on_chunk: impl Fn(String) + 'static,
on_complete: impl Fn() + 'static,
on_error: impl Fn(String) + 'static,
) {
use wasm_bindgen::prelude::*;
use wasm_bindgen::JsCast;
let request = ChatRequest {
model,
messages,
max_tokens: Some(1024),
stream: Some(true),
};
// We need to send a POST request but EventSource only supports GET
// So we'll use fetch with a readable stream instead
let window = web_sys::window().unwrap();
let request_json = serde_json::to_string(&request).unwrap();
let opts = web_sys::RequestInit::new();
opts.set_method("POST");
opts.set_body(&JsValue::from_str(&request_json));
let headers = web_sys::Headers::new().unwrap();
headers.set("Content-Type", "application/json").unwrap();
headers.set("Accept", "text/event-stream").unwrap();
opts.set_headers(&headers);
let request = web_sys::Request::new_with_str_and_init("/v1/chat/completions", &opts).unwrap();
let promise = window.fetch_with_request(&request);
wasm_bindgen_futures::spawn_local(async move {
match wasm_bindgen_futures::JsFuture::from(promise).await {
Ok(resp_value) => {
let resp: web_sys::Response = resp_value.dyn_into().unwrap();
if !resp.ok() {
on_error(format!("Server error: {}", resp.status()));
return;
}
let body = resp.body();
if body.is_none() {
on_error("No response body".to_string());
return;
}
let reader = body
.unwrap()
.get_reader()
.dyn_into::<web_sys::ReadableStreamDefaultReader>()
.unwrap();
let decoder = web_sys::TextDecoder::new().unwrap();
let mut buffer = String::new();
loop {
match wasm_bindgen_futures::JsFuture::from(reader.read()).await {
Ok(result) => {
let done = js_sys::Reflect::get(&result, &JsValue::from_str("done"))
.unwrap()
.as_bool()
.unwrap_or(false);
if done {
break;
}
let value = js_sys::Reflect::get(&result, &JsValue::from_str("value")).unwrap();
let array = js_sys::Uint8Array::new(&value);
let mut bytes = vec![0; array.length() as usize];
array.copy_to(&mut bytes);
let text = decoder.decode_with_u8_array(&bytes).unwrap();
buffer.push_str(&text);
// Process complete SSE events from buffer
while let Some(event_end) = buffer.find("\n\n") {
let event = buffer[..event_end].to_string();
buffer = buffer[event_end + 2..].to_string();
// Parse SSE event
for line in event.lines() {
if let Some(data) = line.strip_prefix("data: ") {
if data == "[DONE]" {
on_complete();
return;
}
// Parse JSON chunk
if let Ok(chunk) = serde_json::from_str::<StreamChatResponse>(data) {
if let Some(choice) = chunk.choices.first() {
if let Some(content) = &choice.delta.content {
on_chunk(content.clone());
}
}
}
}
}
}
}
Err(e) => {
on_error(format!("Read error: {:?}", e));
break;
}
}
}
on_complete();
}
Err(e) => {
on_error(format!("Fetch error: {:?}", e));
}
}
});
}
pub fn shell(options: LeptosOptions) -> impl IntoView {
view! {
<!DOCTYPE html>
@@ -222,6 +367,13 @@ fn ChatPage() -> impl IntoView {
let available_models = RwSignal::new(Vec::<ModelInfo>::new());
let selected_model = RwSignal::new(String::from("gemma-3-1b-it")); // Default model
// State for streaming response
let streaming_content = RwSignal::new(String::new());
let is_streaming = RwSignal::new(false);
// State for streaming mode toggle
let use_streaming = RwSignal::new(true); // Default to streaming
// Client-side only: Fetch models on component mount
#[cfg(target_arch = "wasm32")]
{
@@ -265,8 +417,45 @@ fn ChatPage() -> impl IntoView {
// Prepare messages for API call
let current_messages = messages.get();
let current_model = selected_model.get();
let should_stream = use_streaming.get();
// Spawn async task to call API
if should_stream {
// Clear streaming content and set streaming flag
streaming_content.set(String::new());
is_streaming.set(true);
// Use streaming API
send_chat_completion_stream(
current_messages,
current_model,
move |chunk| {
// Append chunk to streaming content
streaming_content.update(|content| content.push_str(&chunk));
},
move || {
// On complete, move streaming content to messages
let final_content = streaming_content.get();
if !final_content.is_empty() {
let assistant_message = ChatMessage {
role: "assistant".to_string(),
content: final_content,
};
messages.update(|msgs| msgs.push(assistant_message));
}
streaming_content.set(String::new());
is_streaming.set(false);
is_loading.set(false);
},
move |error| {
console::log_1(&format!("Streaming Error: {}", error).into());
error_message.set(Some(error));
is_streaming.set(false);
is_loading.set(false);
streaming_content.set(String::new());
},
);
} else {
// Use non-streaming API
spawn_local(async move {
match send_chat_completion(current_messages, current_model).await {
Ok(response_content) => {
@@ -285,6 +474,7 @@ fn ChatPage() -> impl IntoView {
}
});
}
}
};
// Button click handler
@@ -329,6 +519,19 @@ fn ChatPage() -> impl IntoView {
}
/>
</select>
<div class="streaming-toggle">
<label>
<input
type="checkbox"
prop:checked=move || use_streaming.get()
on:change=move |ev| {
let target = event_target::<web_sys::HtmlInputElement>(&ev);
use_streaming.set(target.checked());
}
/>
" Use streaming"
</label>
</div>
</div>
</div>
@@ -348,7 +551,24 @@ fn ChatPage() -> impl IntoView {
/>
{move || {
if is_loading.get() {
if is_streaming.get() {
let content = streaming_content.get();
if !content.is_empty() {
view! {
<div class="message assistant-message streaming">
<div class="message-role">"assistant"</div>
<div class="message-content">{content}<span class="cursor">""</span></div>
</div>
}.into_any()
} else {
view! {
<div class="message assistant-message loading">
<div class="message-role">"assistant"</div>
<div class="message-content">"Thinking..."</div>
</div>
}.into_any()
}
} else if is_loading.get() && !use_streaming.get() {
view! {
<div class="message assistant-message loading">
<div class="message-role">"assistant"</div>

View File

@@ -42,6 +42,7 @@ body {
align-items: center;
justify-content: center;
gap: 0.5rem;
flex-wrap: wrap;
label {
font-weight: 500;
@@ -69,6 +70,24 @@ body {
padding: 0.5rem;
}
}
.streaming-toggle {
display: flex;
align-items: center;
margin-left: 1rem;
label {
display: flex;
align-items: center;
gap: 0.5rem;
cursor: pointer;
font-size: 0.9rem;
input[type="checkbox"] {
cursor: pointer;
}
}
}
}
}
@@ -134,6 +153,16 @@ body {
color: #6b7280;
}
}
&.streaming {
.message-content {
.cursor {
display: inline-block;
animation: blink 1s infinite;
color: #9ca3af;
}
}
}
}
}
@@ -224,3 +253,13 @@ body {
.chat-messages::-webkit-scrollbar-thumb:hover {
background: #a8a8a8;
}
/* Cursor blink animation */
@keyframes blink {
0%, 50% {
opacity: 1;
}
51%, 100% {
opacity: 0;
}
}

View File

@@ -15,7 +15,7 @@ CONNECT_TIMEOUT=${CONNECT_TIMEOUT:-10}
MAX_TIME=${MAX_TIME:-30}
cat <<EOF
[info] POST $SERVER_URL/v1/chat/completions/stream (SSE)
[info] POST $SERVER_URL/v1/chat/completions (SSE)
[info] model=$MODEL_ID, max_tokens=$MAX_TOKENS
[info] prompt=$PROMPT
[info] timeouts: connect=${CONNECT_TIMEOUT}s, max=${MAX_TIME}s