mirror of
https://github.com/geoffsee/predict-otron-9001.git
synced 2025-09-08 22:46:44 +00:00

Removed `test_request.sh`, deprecated functionality, and unused imports; introduced a new CLI tool (`cli.ts`) for testing inference engine and adjusted handling of non-streaming/streaming chat completions. - Add CPU fallback support for text generation when primary device is unsupported - Introduce `execute_with_fallback` method to handle device compatibility and shape mismatch errors - Extend unit tests to reproduce tensor shape mismatch errors specific to model configurations - Increase HTTP timeout limits in `curl_chat_stream.sh` script for reliable API testing chat completion endpoint functions with gemma3 (no streaming) Add benchmarking guide with HTML reporting, Leptos chat crate, and middleware for metrics tracking
295 lines
11 KiB
HTML
295 lines
11 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>OpenAI-Compatible API Tester</title>
|
|
<style>
|
|
body {
|
|
font-family: Arial, sans-serif;
|
|
max-width: 800px;
|
|
margin: 0 auto;
|
|
padding: 20px;
|
|
line-height: 1.6;
|
|
}
|
|
h1, h2 {
|
|
color: #333;
|
|
}
|
|
.container {
|
|
margin-bottom: 20px;
|
|
}
|
|
textarea {
|
|
width: 100%;
|
|
height: 150px;
|
|
padding: 10px;
|
|
margin-bottom: 10px;
|
|
border: 1px solid #ddd;
|
|
border-radius: 4px;
|
|
font-family: monospace;
|
|
}
|
|
button {
|
|
background-color: #4CAF50;
|
|
color: white;
|
|
padding: 10px 15px;
|
|
border: none;
|
|
border-radius: 4px;
|
|
cursor: pointer;
|
|
font-size: 16px;
|
|
}
|
|
button:hover {
|
|
background-color: #45a049;
|
|
}
|
|
pre {
|
|
background-color: #f5f5f5;
|
|
padding: 15px;
|
|
border-radius: 4px;
|
|
overflow-x: auto;
|
|
white-space: pre-wrap;
|
|
}
|
|
.response {
|
|
margin-top: 20px;
|
|
}
|
|
.error {
|
|
color: red;
|
|
}
|
|
.settings {
|
|
display: flex;
|
|
flex-wrap: wrap;
|
|
gap: 10px;
|
|
margin-bottom: 15px;
|
|
}
|
|
.settings div {
|
|
display: flex;
|
|
flex-direction: column;
|
|
}
|
|
label {
|
|
margin-bottom: 5px;
|
|
font-weight: bold;
|
|
}
|
|
input {
|
|
padding: 8px;
|
|
border: 1px solid #ddd;
|
|
border-radius: 4px;
|
|
}
|
|
.examples {
|
|
margin-top: 30px;
|
|
}
|
|
.example-btn {
|
|
background-color: #2196F3;
|
|
margin-right: 10px;
|
|
margin-bottom: 10px;
|
|
}
|
|
.example-btn:hover {
|
|
background-color: #0b7dda;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>OpenAI-Compatible API Tester</h1>
|
|
<p>Use this page to test the OpenAI-compatible chat completions endpoint of the local inference engine.</p>
|
|
|
|
<div class="container">
|
|
<h2>Request Settings</h2>
|
|
<div class="settings">
|
|
<div>
|
|
<label for="serverUrl">Server URL:</label>
|
|
<input type="text" id="serverUrl" value="http://localhost:3777" />
|
|
</div>
|
|
<div>
|
|
<label for="model">Model:</label>
|
|
<input type="text" id="model" value="gemma-3-1b-it" />
|
|
</div>
|
|
<div>
|
|
<label for="maxTokens">Max Tokens:</label>
|
|
<input type="number" id="maxTokens" value="150" />
|
|
</div>
|
|
<div>
|
|
<label for="temperature">Temperature:</label>
|
|
<input type="number" id="temperature" value="0.7" step="0.1" min="0" max="2" />
|
|
</div>
|
|
<div>
|
|
<label for="topP">Top P:</label>
|
|
<input type="number" id="topP" value="0.9" step="0.1" min="0" max="1" />
|
|
</div>
|
|
</div>
|
|
|
|
<h2>Request Body</h2>
|
|
<textarea id="requestBody">{
|
|
"model": "gemma-3-1b-it",
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "Hello, how are you today?"
|
|
}
|
|
],
|
|
"max_tokens": 150,
|
|
"temperature": 0.7,
|
|
"top_p": 0.9
|
|
}</textarea>
|
|
<button id="sendRequest">Send Request</button>
|
|
|
|
<div class="examples">
|
|
<h3>Example Requests</h3>
|
|
<button class="example-btn" id="example1">Basic Question</button>
|
|
<button class="example-btn" id="example2">Multi-turn Conversation</button>
|
|
<button class="example-btn" id="example3">Creative Writing</button>
|
|
<button class="example-btn" id="example4">Code Generation</button>
|
|
</div>
|
|
|
|
<div class="response">
|
|
<h2>Response</h2>
|
|
<pre id="responseOutput">Response will appear here...</pre>
|
|
</div>
|
|
</div>
|
|
|
|
<script>
|
|
document.addEventListener('DOMContentLoaded', function() {
|
|
// Update request body when settings change
|
|
const serverUrlInput = document.getElementById('serverUrl');
|
|
const modelInput = document.getElementById('model');
|
|
const maxTokensInput = document.getElementById('maxTokens');
|
|
const temperatureInput = document.getElementById('temperature');
|
|
const topPInput = document.getElementById('topP');
|
|
const requestBodyTextarea = document.getElementById('requestBody');
|
|
const responseOutput = document.getElementById('responseOutput');
|
|
|
|
// Function to update request body from settings
|
|
function updateRequestBodyFromSettings() {
|
|
try {
|
|
const requestBody = JSON.parse(requestBodyTextarea.value);
|
|
requestBody.model = modelInput.value;
|
|
requestBody.max_tokens = parseInt(maxTokensInput.value);
|
|
requestBody.temperature = parseFloat(temperatureInput.value);
|
|
requestBody.top_p = parseFloat(topPInput.value);
|
|
requestBodyTextarea.value = JSON.stringify(requestBody, null, 2);
|
|
} catch (error) {
|
|
console.error("Error updating request body:", error);
|
|
}
|
|
}
|
|
|
|
// Update settings when request body changes
|
|
function updateSettingsFromRequestBody() {
|
|
try {
|
|
const requestBody = JSON.parse(requestBodyTextarea.value);
|
|
if (requestBody.model) modelInput.value = requestBody.model;
|
|
if (requestBody.max_tokens) maxTokensInput.value = requestBody.max_tokens;
|
|
if (requestBody.temperature) temperatureInput.value = requestBody.temperature;
|
|
if (requestBody.top_p) topPInput.value = requestBody.top_p;
|
|
} catch (error) {
|
|
console.error("Error updating settings:", error);
|
|
}
|
|
}
|
|
|
|
// Add event listeners for settings changes
|
|
modelInput.addEventListener('change', updateRequestBodyFromSettings);
|
|
maxTokensInput.addEventListener('change', updateRequestBodyFromSettings);
|
|
temperatureInput.addEventListener('change', updateRequestBodyFromSettings);
|
|
topPInput.addEventListener('change', updateRequestBodyFromSettings);
|
|
|
|
// Add event listener for request body changes
|
|
requestBodyTextarea.addEventListener('blur', updateSettingsFromRequestBody);
|
|
|
|
// Send request button
|
|
document.getElementById('sendRequest').addEventListener('click', async function() {
|
|
try {
|
|
responseOutput.textContent = "Sending request...";
|
|
const serverUrl = serverUrlInput.value;
|
|
const endpoint = '/v1/chat/completions';
|
|
const url = serverUrl + endpoint;
|
|
|
|
const requestBody = JSON.parse(requestBodyTextarea.value);
|
|
|
|
const response = await fetch(url, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify(requestBody)
|
|
});
|
|
|
|
const data = await response.json();
|
|
responseOutput.textContent = JSON.stringify(data, null, 2);
|
|
} catch (error) {
|
|
responseOutput.textContent = "Error: " + error.message;
|
|
responseOutput.classList.add('error');
|
|
}
|
|
});
|
|
|
|
// Example requests
|
|
document.getElementById('example1').addEventListener('click', function() {
|
|
requestBodyTextarea.value = JSON.stringify({
|
|
model: modelInput.value,
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
content: "Who was the 16th president of the United States?"
|
|
}
|
|
],
|
|
max_tokens: parseInt(maxTokensInput.value),
|
|
temperature: parseFloat(temperatureInput.value),
|
|
top_p: parseFloat(topPInput.value)
|
|
}, null, 2);
|
|
});
|
|
|
|
document.getElementById('example2').addEventListener('click', function() {
|
|
requestBodyTextarea.value = JSON.stringify({
|
|
model: modelInput.value,
|
|
messages: [
|
|
{
|
|
role: "system",
|
|
content: "You are a helpful assistant that provides concise answers."
|
|
},
|
|
{
|
|
role: "user",
|
|
content: "What is machine learning?"
|
|
},
|
|
{
|
|
role: "assistant",
|
|
content: "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed."
|
|
},
|
|
{
|
|
role: "user",
|
|
content: "Give me an example of a machine learning algorithm."
|
|
}
|
|
],
|
|
max_tokens: parseInt(maxTokensInput.value),
|
|
temperature: parseFloat(temperatureInput.value),
|
|
top_p: parseFloat(topPInput.value)
|
|
}, null, 2);
|
|
});
|
|
|
|
document.getElementById('example3').addEventListener('click', function() {
|
|
requestBodyTextarea.value = JSON.stringify({
|
|
model: modelInput.value,
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
content: "Write a short poem about artificial intelligence."
|
|
}
|
|
],
|
|
max_tokens: parseInt(maxTokensInput.value),
|
|
temperature: 0.9, // Higher temperature for creative tasks
|
|
top_p: 0.9
|
|
}, null, 2);
|
|
temperatureInput.value = 0.9;
|
|
});
|
|
|
|
document.getElementById('example4').addEventListener('click', function() {
|
|
requestBodyTextarea.value = JSON.stringify({
|
|
model: modelInput.value,
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
content: "Write a Python function to calculate the Fibonacci sequence up to n terms."
|
|
}
|
|
],
|
|
max_tokens: parseInt(maxTokensInput.value),
|
|
temperature: 0.3, // Lower temperature for code generation
|
|
top_p: 0.9
|
|
}, null, 2);
|
|
temperatureInput.value = 0.3;
|
|
});
|
|
});
|
|
</script>
|
|
</body>
|
|
</html> |