diff --git a/README.md b/README.md index 6476a98..dd8bad6 100644 --- a/README.md +++ b/README.md @@ -6,17 +6,18 @@

-## Project Status: Testing - ## Table of Contents + - [Stack](#stack) - [Installation](#installation) - [Deployment](#deployment) - [Local Inference](#local-inference) - [Ollama](#ollama) + - [Adding models for local inference (ollama)](#adding-models-for-local-inference-ollama) - [mlx-omni-server (Apple Silicon Only)](#mlx-omni-server-apple-silicon-only) - [Adding models for local inference (Apple Silicon)](#adding-models-for-local-inference-apple-silicon) - [Testing](#testing) +- [Troubleshooting](#troubleshooting) - [History](#history) - [License](#license) @@ -51,26 +52,33 @@ > Local inference is achieved by overriding the `OPENAI_API_KEY` and `OPENAI_API_ENDPOINT` environment variables. See below. ### Ollama ~~~bash -docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama ## Run Ollama (Can also be installed natively) -bun run openai:local # Start OpenAI-compatible server -sed -i '' '/^OPENAI_API_KEY=/d' .dev.vars; echo >> .dev.vars; echo 'OPENAI_API_KEY=required-but-not-used' >> .dev.vars # Reset API key -sed -i '' '/^OPENAI_API_ENDPOINT=/d' .dev.vars; echo >> .dev.vars; echo 'OPENAI_API_ENDPOINT=http://localhost:11434' >> .dev.vars # Reset endpoint -bun run server:dev # Start dev server +bun run openai:local ollama # Start ollama server +bun run openai:local:enable # Configure connection +bun run server:dev # Restart server ~~~ +#### Adding models for local inference (ollama) +~~~bash +# See https://ollama.com/library for available models +MODEL_TO_ADD=gemma3 +docker exec -it ollama ollama run ${MODEL_TO_ADD} +~~~ ### mlx-omni-server (Apple Silicon Only) ~~~bash -brew tap seemueller-io/tap # Add seemueller-io tap -brew install seemueller-io/tap/mlx-omni-server # Install mlx-omni-server -bun run openai:local # Start OpenAI-compatible server -sed -i '' '/^OPENAI_API_KEY=/d' .dev.vars; echo >> .dev.vars; echo 'OPENAI_API_KEY=required-but-not-used' >> .dev.vars # Reset API key -sed -i '' '/^OPENAI_API_ENDPOINT=/d' .dev.vars; echo >> .dev.vars; echo 'OPENAI_API_ENDPOINT=http://localhost:10240' >> .dev.vars # Reset endpoint -bun run server:dev # Start dev server +# (prereq) install mlx-omni-server +brew tap seemueller-io/tap +brew install seemueller-io/tap/mlx-omni-server + +bun run openai:local mlx-omni-server # Start mlx-omni-server +bun run openai:local:enable # Configure connection +bun run server:dev # Restart server ~~~ #### Adding models for local inference (Apple Silicon) ~~~bash -# ensure mlx-omni-server is running in the background +# ensure mlx-omni-server is running + +# See https://huggingface.co/mlx-community for available models MODEL_TO_ADD=mlx-community/gemma-3-4b-it-8bit curl http://localhost:10240/v1/chat/completions \ @@ -81,15 +89,20 @@ curl http://localhost:10240/v1/chat/completions \ }" ~~~ - - - ## Testing Tests are located in `__tests__` directories next to the code they test. Testing is incomplete at this time. > `bun run test` will run all tests + +## Troubleshooting +1. `bun run clean` +1. `bun i` +1. `bun server:dev` +1. `bun client:dev` +1. Submit an issue + History --- A high-level overview for the development history of the parent repository, [geoff-seemueller-io](https://geoff.seemueller.io), is provided in [LEGACY.md](./LEGACY.md). diff --git a/package.json b/package.json index 5950c45..73df755 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,9 @@ "tail:analytics-service": "wrangler tail -c workers/analytics/wrangler-analytics.toml", "tail:session-proxy": "wrangler tail -c workers/session-proxy/wrangler-session-proxy.toml --env production", "openai:local": "./scripts/start_inference_server.sh", + "openai:local:mlx": "./scripts/start_inference_server.sh mlx-omni-server", + "openai:local:ollama": "./scripts/start_inference_server.sh ollama", + "openai:local:configure": "scripts/configure_local_inference.sh", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage.enabled=true" diff --git a/scripts/configure_local_inference.sh b/scripts/configure_local_inference.sh new file mode 100755 index 0000000..455de2c --- /dev/null +++ b/scripts/configure_local_inference.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + + +# Ensure .dev.vars file exists. +# This prevents errors if sed tries to edit a non-existent file and ensures '>>' appends. +# Function to configure .dev.vars with the specified API key and endpoint +configure_dev_vars() { + local endpoint_url=$1 + local api_key="required-but-not-used" + + echo "Configuring .dev.vars for endpoint: ${endpoint_url}" + + # Configure OPENAI_API_KEY + # 1. Remove any existing OPENAI_API_KEY line + sed -i '' '/^OPENAI_API_KEY=/d' .dev.vars + # 2. Append a blank line (ensures the new variable is on a new line and adds spacing) + # 3. Append the new OPENAI_API_KEY line + echo "OPENAI_API_KEY=${api_key}" >> .dev.vars + + # Configure OPENAI_API_ENDPOINT + # 1. Remove any existing OPENAI_API_ENDPOINT line + sed -i '' '/^OPENAI_API_ENDPOINT=/d' .dev.vars + # 3. Append the new OPENAI_API_ENDPOINT line + echo "OPENAI_API_ENDPOINT=${endpoint_url}" >> .dev.vars + + echo "Local inference is configured for $endpoint_url" +} + +echo "Checking for local inference services..." + +# Check for Ollama on port 11434 +# nc -z -w1 localhost 11434: +# -z: Zero-I/O mode (port scanning) +# -w1: Timeout after 1 second +# >/dev/null 2>&1: Suppress output from nc +if nc -z -w1 localhost 11434 >/dev/null 2>&1; then + echo "Ollama service detected on port 11434." + configure_dev_vars "http://localhost:11434" +# Else, check for mlx-omni-server on port 10240 +elif nc -z -w1 localhost 10240 >/dev/null 2>&1; then + echo "mlx-omni-server service detected on port 10240." + configure_dev_vars "http://localhost:10240" +else + echo "No active local inference service (Ollama or mlx-omni-server) found on default ports (11434, 10240)." + echo "If a service is running on a different port, .dev.vars may need manual configuration." + echo ".dev.vars was not modified by this script for OpenAI local inference settings." +fi + +echo "Script finished." diff --git a/scripts/start_inference_server.sh b/scripts/start_inference_server.sh index 0f7d04b..1de83b1 100755 --- a/scripts/start_inference_server.sh +++ b/scripts/start_inference_server.sh @@ -1,8 +1,12 @@ #!/usr/bin/env bash -SERVER_TYPE="mlx-omni-server" - -printf "Starting Inference Server: %s\n" ${SERVER_TYPE} - - -mlx-omni-server --log-level debug \ No newline at end of file +if [ "$1" = "mlx-omni-server" ]; then + printf "Starting Inference Server: %s\n" "$1" + mlx-omni-server --log-level debug +elif [ "$1" = "ollama" ]; then + echo "starting ollama" + docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama +else + printf "Error: First argument must be 'mlx-omni-server'\n" + exit 1 +fi diff --git a/src/components/chat/messages/MessageBubble.tsx b/src/components/chat/messages/MessageBubble.tsx index 1cbee25..52f12a0 100644 --- a/src/components/chat/messages/MessageBubble.tsx +++ b/src/components/chat/messages/MessageBubble.tsx @@ -1,5 +1,4 @@ import React, { useEffect, useRef, useState } from "react"; -import { motion } from "framer-motion"; import { Box, Flex, Text } from "@chakra-ui/react"; import MessageRenderer from "./ChatMessageContent"; import { observer } from "mobx-react-lite"; @@ -65,14 +64,7 @@ const MessageBubble = observer(({ msg, scrollRef }) => { }; useEffect(() => { - if ( - clientChatStore.items.length > 0 && - clientChatStore.isLoading && - UserOptionsStore.followModeEnabled - ) { - console.log( - `${clientChatStore.items.length}/${clientChatStore.isLoading}/${UserOptionsStore.followModeEnabled}`, - ); + if (clientChatStore.items.length > 0 && clientChatStore.isLoading && UserOptionsStore.followModeEnabled) { // Refine condition scrollRef.current?.scrollTo({ top: scrollRef.current.scrollHeight, behavior: "auto",