mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00
- Refactored build_pipeline
usage to ensure pipeline arguments are cloned.
- Introduced `reset_state` for clearing cached state between requests. - Enhanced chat UI with model selector and dynamic model fetching. - Improved error logging and detailed debug messages for chat request flows. - Added fresh instantiation of `TextGeneration` to prevent tensor shape mismatches.
This commit is contained in:
@@ -117,6 +117,16 @@ impl TextGeneration {
|
||||
}
|
||||
}
|
||||
|
||||
// Reset method to clear state between requests
|
||||
pub fn reset_state(&mut self) {
|
||||
// Reset the primary device flag so we try the primary device first for each new request
|
||||
if !self.device.is_cpu() {
|
||||
self.try_primary_device = true;
|
||||
}
|
||||
// Clear the penalty cache to avoid stale cached values from previous requests
|
||||
self.penalty_cache.clear();
|
||||
}
|
||||
|
||||
// Helper method to apply repeat penalty with caching for optimization
|
||||
pub fn apply_cached_repeat_penalty(
|
||||
&mut self,
|
||||
|
Reference in New Issue
Block a user