- Refactored build_pipeline usage to ensure pipeline arguments are cloned.

- Introduced `reset_state` for clearing cached state between requests. - Enhanced chat UI with model selector and dynamic model fetching. - Improved error logging and detailed debug messages for chat request flows. - Added fresh instantiation of `TextGeneration` to prevent tensor shape mismatches.
2025-09-08 22:46:44 +00:00 · 2025-08-27 17:53:50 -04:00
parent f1b57866e1
commit 766d41af78
5 changed files with 185 additions and 209 deletions
--- a/crates/inference-engine/src/text_generation.rs
+++ b/crates/inference-engine/src/text_generation.rs
@@ -117,6 +117,16 @@ impl TextGeneration {
        }
    }
    
+    // Reset method to clear state between requests
+    pub fn reset_state(&mut self) {
+        // Reset the primary device flag so we try the primary device first for each new request
+        if !self.device.is_cpu() {
+            self.try_primary_device = true;
+        }
+        // Clear the penalty cache to avoid stale cached values from previous requests
+        self.penalty_cache.clear();
+    }
+
    // Helper method to apply repeat penalty with caching for optimization
    pub fn apply_cached_repeat_penalty(
        &mut self,