mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
Refactor apply_cached_repeat_penalty
for optimized caching and reuse, add extensive unit tests, and integrate special handling for gemma-specific models.
Removed `test_request.sh`, deprecated functionality, and unused imports; introduced a new CLI tool (`cli.ts`) for testing inference engine and adjusted handling of non-streaming/streaming chat completions. - Add CPU fallback support for text generation when primary device is unsupported - Introduce `execute_with_fallback` method to handle device compatibility and shape mismatch errors - Extend unit tests to reproduce tensor shape mismatch errors specific to model configurations - Increase HTTP timeout limits in `curl_chat_stream.sh` script for reliable API testing chat completion endpoint functions with gemma3 (no streaming) Add benchmarking guide with HTML reporting, Leptos chat crate, and middleware for metrics tracking
This commit is contained in:
67
crates/legacy-inference-engine/tests/model_tests.rs
Normal file
67
crates/legacy-inference-engine/tests/model_tests.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use legacy_inference_engine::model::{Model, Which};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_which_to_model_id() {
|
||||
// Test a few representative model variants
|
||||
assert_eq!(Which::Base2B.to_model_id(), "google/gemma-2b");
|
||||
assert_eq!(Which::Instruct7B.to_model_id(), "google/gemma-7b-it");
|
||||
assert_eq!(Which::InstructV1_1_2B.to_model_id(), "google/gemma-1.1-2b-it");
|
||||
assert_eq!(Which::CodeBase2B.to_model_id(), "google/codegemma-2b");
|
||||
assert_eq!(Which::BaseV2_2B.to_model_id(), "google/gemma-2-2b");
|
||||
assert_eq!(Which::InstructV3_1B.to_model_id(), "google/gemma-3-1b-it");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_which_is_instruct_model() {
|
||||
// Test base models (should return false)
|
||||
assert!(!Which::Base2B.is_instruct_model());
|
||||
assert!(!Which::Base7B.is_instruct_model());
|
||||
assert!(!Which::CodeBase2B.is_instruct_model());
|
||||
assert!(!Which::CodeBase7B.is_instruct_model());
|
||||
assert!(!Which::BaseV2_2B.is_instruct_model());
|
||||
assert!(!Which::BaseV2_9B.is_instruct_model());
|
||||
assert!(!Which::BaseV3_1B.is_instruct_model());
|
||||
|
||||
// Test instruct models (should return true)
|
||||
assert!(Which::Instruct2B.is_instruct_model());
|
||||
assert!(Which::Instruct7B.is_instruct_model());
|
||||
assert!(Which::InstructV1_1_2B.is_instruct_model());
|
||||
assert!(Which::InstructV1_1_7B.is_instruct_model());
|
||||
assert!(Which::CodeInstruct2B.is_instruct_model());
|
||||
assert!(Which::CodeInstruct7B.is_instruct_model());
|
||||
assert!(Which::InstructV2_2B.is_instruct_model());
|
||||
assert!(Which::InstructV2_9B.is_instruct_model());
|
||||
assert!(Which::InstructV3_1B.is_instruct_model());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_which_is_v3_model() {
|
||||
// Test non-v3 models (should return false)
|
||||
assert!(!Which::Base2B.is_v3_model());
|
||||
assert!(!Which::Base7B.is_v3_model());
|
||||
assert!(!Which::Instruct2B.is_v3_model());
|
||||
assert!(!Which::Instruct7B.is_v3_model());
|
||||
assert!(!Which::InstructV1_1_2B.is_v3_model());
|
||||
assert!(!Which::InstructV1_1_7B.is_v3_model());
|
||||
assert!(!Which::CodeBase2B.is_v3_model());
|
||||
assert!(!Which::CodeBase7B.is_v3_model());
|
||||
assert!(!Which::CodeInstruct2B.is_v3_model());
|
||||
assert!(!Which::CodeInstruct7B.is_v3_model());
|
||||
assert!(!Which::BaseV2_2B.is_v3_model());
|
||||
assert!(!Which::InstructV2_2B.is_v3_model());
|
||||
assert!(!Which::BaseV2_9B.is_v3_model());
|
||||
assert!(!Which::InstructV2_9B.is_v3_model());
|
||||
|
||||
// Test v3 models (should return true)
|
||||
assert!(Which::BaseV3_1B.is_v3_model());
|
||||
assert!(Which::InstructV3_1B.is_v3_model());
|
||||
}
|
||||
|
||||
// Note: Testing the Model enum's forward method would require creating actual model instances,
|
||||
// which is complex and would require loading model weights. This is better suited for
|
||||
// integration tests or mocking the models.
|
||||
}
|
101
crates/legacy-inference-engine/tests/text_generation_tests.rs
Normal file
101
crates/legacy-inference-engine/tests/text_generation_tests.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
use anyhow::Result;
|
||||
use candle_transformers::generation::LogitsProcessor;
|
||||
use legacy_inference_engine::model::Which;
|
||||
use legacy_inference_engine::token_output_stream::TokenOutputStream;
|
||||
use tokenizers::Tokenizer;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// Helper function to create a simple tokenizer for testing
|
||||
fn create_test_tokenizer() -> Result<Tokenizer> {
|
||||
// Create a simple tokenizer from the pretrained model
|
||||
// This uses the tokenizer from the Hugging Face hub
|
||||
let tokenizer = Tokenizer::from_pretrained("google/gemma-2b", None).unwrap();
|
||||
Ok(tokenizer)
|
||||
}
|
||||
|
||||
// Test the Which enum's to_model_id method
|
||||
#[test]
|
||||
fn test_which_model_id() {
|
||||
assert_eq!(Which::Base2B.to_model_id(), "google/gemma-2b");
|
||||
assert_eq!(Which::Instruct7B.to_model_id(), "google/gemma-7b-it");
|
||||
}
|
||||
|
||||
// Test the Which enum's is_instruct_model method
|
||||
#[test]
|
||||
fn test_which_is_instruct() {
|
||||
assert!(!Which::Base2B.is_instruct_model());
|
||||
assert!(Which::Instruct7B.is_instruct_model());
|
||||
}
|
||||
|
||||
// Test the Which enum's is_v3_model method
|
||||
#[test]
|
||||
fn test_which_is_v3() {
|
||||
assert!(!Which::Base2B.is_v3_model());
|
||||
assert!(Which::BaseV3_1B.is_v3_model());
|
||||
}
|
||||
|
||||
// Test the TokenOutputStream functionality
|
||||
#[test]
|
||||
fn test_token_output_stream() -> Result<()> {
|
||||
let tokenizer = create_test_tokenizer()?;
|
||||
let mut token_stream = TokenOutputStream::new(tokenizer);
|
||||
|
||||
// Test encoding and decoding
|
||||
let text = "Hello, world!";
|
||||
let encoded = token_stream.tokenizer().encode(text, true).unwrap();
|
||||
let token_ids = encoded.get_ids();
|
||||
|
||||
// Add tokens one by one
|
||||
for &token_id in token_ids {
|
||||
token_stream.next_token(token_id)?;
|
||||
}
|
||||
|
||||
// Decode all and check
|
||||
let decoded = token_stream.decode_all()?;
|
||||
assert_eq!(decoded.trim(), text);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Test the LogitsProcessor
|
||||
#[test]
|
||||
fn test_logits_processor() -> Result<()> {
|
||||
// Create a LogitsProcessor with default settings
|
||||
let seed = 42;
|
||||
let temp = Some(0.8);
|
||||
let top_p = Some(0.9);
|
||||
let logits_processor = LogitsProcessor::new(seed, temp, top_p);
|
||||
|
||||
// Create a simple logits tensor
|
||||
// In a real test, we would create a tensor with known values and verify
|
||||
// that sampling produces expected results
|
||||
|
||||
// For now, we'll just verify that the LogitsProcessor can be created
|
||||
assert!(true);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Test the TextGeneration constructor
|
||||
#[test]
|
||||
fn test_text_generation_constructor() -> Result<()> {
|
||||
// We can't easily create a Model instance for testing,
|
||||
// but we can test that the constructor compiles and the types are correct
|
||||
|
||||
// In a real test with a mock Model, we would:
|
||||
// 1. Create a mock model
|
||||
// 2. Create a tokenizer
|
||||
// 3. Call TextGeneration::new
|
||||
// 4. Verify the properties of the created instance
|
||||
|
||||
// For now, we'll just verify that the code compiles
|
||||
assert!(true);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Note: Testing the actual text generation functionality would require
|
||||
// integration tests with real models, which is beyond the scope of these unit tests.
|
||||
// The tests above focus on the components that can be tested in isolation.
|
||||
}
|
@@ -0,0 +1,129 @@
|
||||
use legacy_inference_engine::token_output_stream::TokenOutputStream;
|
||||
use tokenizers::Tokenizer;
|
||||
use std::path::PathBuf;
|
||||
use anyhow::Result;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// Helper function to create a simple tokenizer for testing
|
||||
fn create_test_tokenizer() -> Result<Tokenizer> {
|
||||
// Create a simple tokenizer from the pretrained model
|
||||
// This uses the tokenizer from the Hugging Face hub
|
||||
let tokenizer = Tokenizer::from_pretrained("google/gemma-2b", None).unwrap();
|
||||
Ok(tokenizer)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_new_token_output_stream() -> Result<()> {
|
||||
let tokenizer = create_test_tokenizer()?;
|
||||
let token_stream = TokenOutputStream::new(tokenizer);
|
||||
|
||||
// Check that the token stream was created successfully
|
||||
assert!(token_stream.tokenizer().get_vocab(true).len() > 0);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clear() -> Result<()> {
|
||||
let tokenizer = create_test_tokenizer()?;
|
||||
let mut token_stream = TokenOutputStream::new(tokenizer);
|
||||
|
||||
// Add a token
|
||||
let token_id = token_stream.get_token("<eos>").unwrap();
|
||||
token_stream.next_token(token_id)?;
|
||||
|
||||
// Clear the stream
|
||||
token_stream.clear();
|
||||
|
||||
// Check that the stream is empty by trying to decode all
|
||||
let decoded = token_stream.decode_all()?;
|
||||
assert_eq!(decoded, "");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_token() -> Result<()> {
|
||||
let tokenizer = create_test_tokenizer()?;
|
||||
let token_stream = TokenOutputStream::new(tokenizer);
|
||||
|
||||
// Get a token that should exist
|
||||
let eos_token = token_stream.get_token("<eos>");
|
||||
assert!(eos_token.is_some());
|
||||
|
||||
// Get a token that shouldn't exist
|
||||
let nonexistent_token = token_stream.get_token("<this_token_does_not_exist>");
|
||||
assert!(nonexistent_token.is_none());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_next_token_and_decode() -> Result<()> {
|
||||
let tokenizer = create_test_tokenizer()?;
|
||||
let mut token_stream = TokenOutputStream::new(tokenizer);
|
||||
|
||||
// Get some tokens
|
||||
let hello_tokens = token_stream.tokenizer().encode("Hello world", true).unwrap();
|
||||
let token_ids = hello_tokens.get_ids();
|
||||
|
||||
// Add tokens one by one
|
||||
let mut output = String::new();
|
||||
for &token_id in token_ids {
|
||||
if let Some(text) = token_stream.next_token(token_id)? {
|
||||
output.push_str(&text);
|
||||
}
|
||||
}
|
||||
|
||||
// Get any remaining text
|
||||
if let Some(rest) = token_stream.decode_rest()? {
|
||||
output.push_str(&rest);
|
||||
}
|
||||
|
||||
// Check the output
|
||||
assert!(!output.is_empty());
|
||||
assert_eq!(output.trim(), "Hello world");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_all() -> Result<()> {
|
||||
let tokenizer = create_test_tokenizer()?;
|
||||
let mut token_stream = TokenOutputStream::new(tokenizer);
|
||||
|
||||
// Get some tokens
|
||||
let hello_tokens = token_stream.tokenizer().encode("Hello world", true).unwrap();
|
||||
let token_ids = hello_tokens.get_ids();
|
||||
|
||||
// Add tokens one by one
|
||||
for &token_id in token_ids {
|
||||
token_stream.next_token(token_id)?;
|
||||
}
|
||||
|
||||
// Decode all
|
||||
let decoded = token_stream.decode_all()?;
|
||||
|
||||
// Check the output
|
||||
assert_eq!(decoded.trim(), "Hello world");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_into_inner() -> Result<()> {
|
||||
let tokenizer = create_test_tokenizer()?;
|
||||
let token_stream = TokenOutputStream::new(tokenizer);
|
||||
|
||||
// Get the inner tokenizer
|
||||
let inner_tokenizer = token_stream.into_inner();
|
||||
|
||||
// Check that the inner tokenizer works
|
||||
let encoded = inner_tokenizer.encode("Test", true).unwrap();
|
||||
assert!(encoded.get_ids().len() > 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user