// Test requests for the OpenAI-compatible endpoint in the inference server // This file contains IIFE (Immediately Invoked Function Expression) JavaScript requests // to test the /v1/chat/completions endpoint // Basic chat completion request (async function testBasicChatCompletion() { console.log("Test 1: Basic chat completion request"); try { const response = await fetch('http://localhost:3000/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ model: "gemma-2-2b-it", messages: [ { role: "user", content: "Who was the 16th president of the United States?" } ], max_tokens: 100 }) }); const data = await response.json(); console.log("Response:", JSON.stringify(data, null, 2)); } catch (error) { console.error("Error:", error); } })(); // Multi-turn conversation (async function testMultiTurnConversation() { console.log("\nTest 2: Multi-turn conversation"); try { const response = await fetch('http://localhost:3000/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ model: "gemma-2-2b-it", messages: [ { role: "system", content: "You are a helpful assistant that provides concise answers." }, { role: "user", content: "What is machine learning?" }, { role: "assistant", content: "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed." }, { role: "user", content: "Give me an example of a machine learning algorithm." } ], max_tokens: 150 }) }); const data = await response.json(); console.log("Response:", JSON.stringify(data, null, 2)); } catch (error) { console.error("Error:", error); } })(); // Request with temperature and top_p parameters (async function testTemperatureAndTopP() { console.log("\nTest 3: Request with temperature and top_p parameters"); try { const response = await fetch('http://localhost:3000/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ model: "gemma-2-2b-it", messages: [ { role: "user", content: "Write a short poem about artificial intelligence." } ], max_tokens: 200, temperature: 0.8, top_p: 0.9 }) }); const data = await response.json(); console.log("Response:", JSON.stringify(data, null, 2)); } catch (error) { console.error("Error:", error); } })(); // Request with streaming enabled (async function testStreaming() { console.log("\nTest 4: Request with streaming enabled"); try { const response = await fetch('http://localhost:3000/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ model: "gemma-2-2b-it", messages: [ { role: "user", content: "Explain quantum computing in simple terms." } ], max_tokens: 150, stream: true }) }); // Note: Streaming might not be implemented yet, this is to test the API's handling of the parameter if (response.headers.get('content-type')?.includes('text/event-stream')) { console.log("Streaming response detected. Reading stream..."); const reader = response.body.getReader(); const decoder = new TextDecoder(); while (true) { const { done, value } = await reader.read(); if (done) break; const chunk = decoder.decode(value); console.log("Chunk:", chunk); } } else { const data = await response.json(); console.log("Non-streaming response:", JSON.stringify(data, null, 2)); } } catch (error) { console.error("Error:", error); } })(); // Request with a different model (async function testDifferentModel() { console.log("\nTest 5: Request with a different model"); try { const response = await fetch('http://localhost:3000/v1/chat/completions', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ model: "gemma-2-2b-it", // Using a different model if available messages: [ { role: "user", content: "What are the benefits of renewable energy?" } ], max_tokens: 150 }) }); const data = await response.json(); console.log("Response:", JSON.stringify(data, null, 2)); } catch (error) { console.error("Error:", error); } })(); console.log("\nAll test requests have been sent. Check the server logs for more details."); console.log("To run the server, use: cargo run --bin local_inference_engine -- --server");