From a9bbea8c341d734c6a2b9b145a4cee90c1990961 Mon Sep 17 00:00:00 2001 From: geoffsee <> Date: Mon, 2 Jun 2025 13:51:05 -0400 Subject: [PATCH] hotfix: add default for local-inference --- README.md | 36 ++++++++++++++++++++---------------- package.json | 2 +- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 4c50064..df5a648 100644 --- a/README.md +++ b/README.md @@ -15,10 +15,10 @@ - [Installation](#installation) - [Deployment](#deployment) - [Local Inference](#local-inference) + - [mlx-omni-server (default)](#mlx-omni-server) + - [Adding models](#adding-models-for-local-inference-apple-silicon) - [Ollama](#ollama) - - [Adding models for local inference (ollama)](#adding-models-for-local-inference-ollama) - - [mlx-omni-server (Apple Silicon Only)](#mlx-omni-server-apple-silicon-only) - - [Adding models for local inference (Apple Silicon)](#adding-models-for-local-inference-apple-silicon) + - [Adding models](#adding-models-for-local-inference-ollama) - [Testing](#testing) - [Troubleshooting](#troubleshooting) - [History](#history) @@ -53,20 +53,9 @@ ## Local Inference > Local inference is achieved by overriding the `OPENAI_API_KEY` and `OPENAI_API_ENDPOINT` environment variables. See below. -### Ollama -~~~bash -bun run openai:local ollama # Start ollama server -bun run openai:local:enable # Configure connection -bun run server:dev # Restart server -~~~ -#### Adding models for local inference (ollama) -~~~bash -# See https://ollama.com/library for available models -MODEL_TO_ADD=gemma3 -docker exec -it ollama ollama run ${MODEL_TO_ADD} -~~~ -### mlx-omni-server (Apple Silicon Only) +### mlx-omni-server +(default) (Apple Silicon Only) - Use Ollama for other platforms. ~~~bash # (prereq) install mlx-omni-server brew tap seemueller-io/tap @@ -92,6 +81,21 @@ curl http://localhost:10240/v1/chat/completions \ }" ~~~ +### Ollama +~~~bash +bun run openai:local ollama # Start ollama server +bun run openai:local:enable # Configure connection +bun run server:dev # Restart server +~~~ +#### Adding models for local inference (ollama) + +~~~bash +# See https://ollama.com/library for available models +MODEL_TO_ADD=gemma3 +docker exec -it ollama ollama run ${MODEL_TO_ADD} +~~~ + + ## Testing Tests are located in `__tests__` directories next to the code they test. Testing is incomplete at this time. diff --git a/package.json b/package.json index 73df755..7e464ed 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "tail:email-service": "wrangler tail -c workers/email/wrangler-email.toml", "tail:analytics-service": "wrangler tail -c workers/analytics/wrangler-analytics.toml", "tail:session-proxy": "wrangler tail -c workers/session-proxy/wrangler-session-proxy.toml --env production", - "openai:local": "./scripts/start_inference_server.sh", + "openai:local": "./scripts/start_inference_server.sh mlx-omni-server", "openai:local:mlx": "./scripts/start_inference_server.sh mlx-omni-server", "openai:local:ollama": "./scripts/start_inference_server.sh ollama", "openai:local:configure": "scripts/configure_local_inference.sh",