diff --git a/.github/workflows/docker-publish-openclaw.yml b/.github/workflows/docker-publish-openclaw.yml new file mode 100644 index 0000000..7cf12cb --- /dev/null +++ b/.github/workflows/docker-publish-openclaw.yml @@ -0,0 +1,119 @@ +name: Build and Publish OpenClaw Image + +on: + push: + branches: + - main + - integration-okr-1 # TODO: remove after testing — limit to main only + - feat/openclaw-ci # TODO: remove after testing — limit to main only + paths: + - 'internal/openclaw/OPENCLAW_VERSION' + workflow_dispatch: + inputs: + version: + description: 'OpenClaw version to build (e.g. v2026.2.3)' + required: false + type: string + +env: + REGISTRY: ghcr.io + IMAGE_NAME: obolnetwork/openclaw + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout obol-stack + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Read pinned version + id: version + run: | + if [ -n "${{ github.event.inputs.version }}" ]; then + VERSION="${{ github.event.inputs.version }}" + else + VERSION=$(grep -v '^#' internal/openclaw/OPENCLAW_VERSION | tr -d '[:space:]') + fi + echo "version=$VERSION" >> "$GITHUB_OUTPUT" + echo "Building OpenClaw $VERSION" + + - name: Checkout upstream OpenClaw + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + repository: openclaw/openclaw + ref: ${{ steps.version.outputs.version }} + path: openclaw-src + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 + + - name: Set up QEMU + uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0 + + - name: Login to GitHub Container Registry + uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}},value=${{ steps.version.outputs.version }} + type=semver,pattern={{major}}.{{minor}},value=${{ steps.version.outputs.version }} + type=sha,prefix= + type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} + labels: | + org.opencontainers.image.title=OpenClaw + org.opencontainers.image.description=AI agent gateway for Obol Stack + org.opencontainers.image.vendor=Obol Network + org.opencontainers.image.source=https://github.com/openclaw/openclaw + org.opencontainers.image.version=${{ steps.version.outputs.version }} + + - name: Build and push Docker image + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + with: + context: openclaw-src + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + provenance: true + sbom: true + + security-scan: + needs: build-and-push + runs-on: ubuntu-latest + permissions: + security-events: write + + steps: + - name: Read pinned version + id: version + run: | + # Re-derive for the scan job + echo "Scanning latest pushed image" + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@22438a435773de8c97dc0958cc0b823c45b064ac # master + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + format: 'sarif' + output: 'trivy-results.sarif' + severity: 'CRITICAL,HIGH' + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@b13d724d35ff0a814e21683638ed68ed34cf53d1 # main + with: + sarif_file: 'trivy-results.sarif' + if: always() diff --git a/CLAUDE.md b/CLAUDE.md index bc40752..92bb40f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -20,6 +20,19 @@ The Obol Stack is a local Kubernetes-based framework for running blockchain netw 5. **Two-stage templating**: CLI flags → Go templates → Helmfile → Kubernetes resources 6. **Development mode**: Local `.workspace/` directory with `go run` wrapper for rapid development +### Routing and Gateway API + +Obol Stack uses Traefik with the Kubernetes Gateway API for HTTP routing. + +- Controller: Traefik Helm chart (`traefik` namespace) +- GatewayClass: `traefik` +- Gateway: `traefik-gateway` in `traefik` namespace +- HTTPRoute patterns: + - `/` → `obol-frontend` + - `/rpc` → `erpc` + - `/ethereum-/execution` and `/ethereum-/beacon` + - `/aztec-` and `/helios-` + ## Bootstrap Installer: obolup.sh ### Purpose @@ -58,12 +71,12 @@ Uses local workspace: **Pinned versions** (lines 50-57): ```bash -KUBECTL_VERSION="1.31.0" -HELM_VERSION="3.16.2" +KUBECTL_VERSION="1.35.0" +HELM_VERSION="3.19.4" K3D_VERSION="5.8.3" -HELMFILE_VERSION="0.169.1" -K9S_VERSION="0.32.5" -HELM_DIFF_VERSION="3.9.11" +HELMFILE_VERSION="1.2.3" +K9S_VERSION="0.50.18" +HELM_DIFF_VERSION="3.14.1" ``` **Smart installation logic**: @@ -135,6 +148,12 @@ obol │ │ ├── helios (dynamically generated) │ │ └── aztec (dynamically generated) │ └── delete +├── llm (LLM provider management) +│ └── configure +├── openclaw (OpenClaw AI assistant) +│ ├── setup +│ ├── onboard +│ └── dashboard ├── kubectl (passthrough with KUBECONFIG) ├── helm (passthrough with KUBECONFIG) ├── helmfile (passthrough with KUBECONFIG) @@ -547,6 +566,118 @@ obol network install ethereum --id hoodi-test --network=hoodi - k3s auto-applies all YAML files on startup - Uses k3s HelmChart CRD for Helm deployments +## LLM Configuration Architecture + +The stack uses a two-tier architecture for LLM routing. A cluster-wide proxy (llmspy) handles actual provider communication, while each application instance (e.g., OpenClaw) sees a simplified single-provider view. + +### Tier 1: Global llmspy Gateway (`llm` namespace) + +**Purpose**: Shared OpenAI-compatible proxy that routes LLM traffic from all applications to actual providers (Ollama, Anthropic, OpenAI). + +**Kubernetes resources** (defined in `internal/embed/infrastructure/base/templates/llm.yaml`): + +| Resource | Type | Purpose | +|----------|------|---------| +| `llm` | Namespace | Dedicated namespace for LLM infrastructure | +| `llmspy-config` | ConfigMap | `llms.json` (provider enable/disable) + `providers.json` (provider definitions) | +| `llms-secrets` | Secret | Cloud API keys (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`) — empty by default | +| `llmspy` | Deployment | `ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.1`, port 8000 | +| `llmspy` | Service (ClusterIP) | `llmspy.llm.svc.cluster.local:8000` | +| `ollama` | Service (ExternalName) | Routes to host Ollama via `{{OLLAMA_HOST}}` placeholder | + +**Configuration mechanism** (`internal/llm/llm.go` — `ConfigureLLMSpy()`): +1. Patches `llms-secrets` Secret with the API key +2. Reads `llmspy-config` ConfigMap, sets `providers..enabled = true` in `llms.json` +3. Restarts `llmspy` Deployment via rollout restart +4. Waits for rollout to complete (60s timeout) + +**CLI surface** (`cmd/obol/llm.go`): +- `obol llm configure --provider=anthropic --api-key=sk-...` +- Interactive prompt if flags omitted (choice of Anthropic or OpenAI) + +**Key design**: Ollama is enabled by default; cloud providers are disabled until configured via `obol llm configure`. An init container copies the ConfigMap into a writable emptyDir so llmspy can write runtime state. + +### Tier 2: Per-Instance Application Config (per-deployment namespace) + +**Purpose**: Each application instance (e.g., OpenClaw) has its own model configuration, rendered by its Helm chart from values files. + +**Values file hierarchy** (helmfile merges in order): +1. `values.yaml` — chart defaults (from embedded chart, e.g., `internal/openclaw/chart/values.yaml`) +2. `values-obol.yaml` — Obol Stack overlay (generated by `generateOverlayValues()`) + +**How providers become application config** (OpenClaw example, `_helpers.tpl` lines 167-189): +- Iterates provider list from `.Values.models` +- Only emits providers where `enabled == true` +- For each enabled provider: `baseUrl`, `apiKey` (as `${ENV_VAR}` reference), `models` array +- `api` field is only emitted if non-empty (required for llmspy routing) + +### The llmspy-Routed Overlay Pattern + +When a cloud provider is selected during setup, two things happen simultaneously: + +1. **Global tier**: `llm.ConfigureLLMSpy()` patches the cluster-wide llmspy gateway with the API key and enables the provider +2. **Instance tier**: `buildLLMSpyRoutedOverlay()` creates an overlay where a single "ollama" provider points at llmspy, the cloud model is listed under that provider, and `api` is set to `openai-completions` + +**Result**: The application never talks directly to cloud APIs. All traffic is routed through llmspy. + +**Data flow**: +``` +Application (openclaw.json) + │ model: "ollama/claude-sonnet-4-5-20250929" + │ api: "openai-completions" + │ baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + │ + ▼ +llmspy (llm namespace, port 8000) + │ POST /v1/chat/completions + │ → resolves "claude-sonnet-4-5-20250929" to anthropic provider + │ + ▼ +Anthropic API (or Ollama, OpenAI — depending on provider) +``` + +**Overlay example** (`values-obol.yaml`): +```yaml +models: + ollama: + enabled: true + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + api: openai-completions + apiKeyEnvVar: OLLAMA_API_KEY + apiKeyValue: ollama-local + models: + - id: claude-sonnet-4-5-20250929 + name: Claude Sonnet 4.5 + anthropic: + enabled: false + openai: + enabled: false +``` + +### Summary Table + +| Aspect | Tier 1 (llmspy) | Tier 2 (Application instance) | +|--------|-----------------|-------------------------------| +| **Scope** | Cluster-wide | Per-deployment | +| **Namespace** | `llm` | `-` (e.g., `openclaw-`) | +| **Config storage** | ConfigMap `llmspy-config` | ConfigMap `-config` | +| **Secrets** | Secret `llms-secrets` | Secret `-secrets` | +| **Configure via** | `obol llm configure` | `obol openclaw setup ` | +| **Providers** | Real (Ollama, Anthropic, OpenAI) | Virtual: everything appears as "ollama" pointing at llmspy | +| **API field** | N/A (provider-native) | Must be `openai-completions` for llmspy routing | + +### Key Source Files + +| File | Role | +|------|------| +| `internal/llm/llm.go` | `ConfigureLLMSpy()` — patches global Secret + ConfigMap + restart | +| `cmd/obol/llm.go` | `obol llm configure` CLI command | +| `internal/embed/infrastructure/base/templates/llm.yaml` | llmspy Kubernetes resource definitions | +| `internal/openclaw/openclaw.go` | `Setup()`, `interactiveSetup()`, `generateOverlayValues()`, `buildLLMSpyRoutedOverlay()` | +| `internal/openclaw/import.go` | `DetectExistingConfig()`, `TranslateToOverlayYAML()` | +| `internal/openclaw/chart/values.yaml` | Default per-instance model config | +| `internal/openclaw/chart/templates/_helpers.tpl` | Renders model providers into application JSON config | + ## Network Install Implementation Details ### Template Field Parser @@ -786,6 +917,14 @@ obol network delete ethereum- --force - `internal/network/network.go` - Network deployment - `internal/embed/embed.go` - Embedded asset management +**LLM and OpenClaw**: +- `internal/llm/llm.go` - llmspy gateway configuration (`ConfigureLLMSpy()`) +- `cmd/obol/llm.go` - `obol llm configure` CLI command +- `internal/embed/infrastructure/base/templates/llm.yaml` - llmspy K8s resources +- `internal/openclaw/openclaw.go` - OpenClaw setup, overlay generation, llmspy routing +- `internal/openclaw/import.go` - Existing config detection and translation +- `internal/openclaw/chart/` - OpenClaw Helm chart (values, templates, helpers) + **Embedded assets**: - `internal/embed/k3d-config.yaml` - k3d configuration template - `internal/embed/networks/` - Network definitions @@ -793,6 +932,7 @@ obol network delete ethereum- --force - `helios/helmfile.yaml.gotmpl` - `aztec/helmfile.yaml.gotmpl` - `internal/embed/defaults/` - Default stack resources +- `internal/embed/infrastructure/` - Infrastructure resources (llmspy, Traefik) **Build and version**: - `justfile` - Task runner (install, build, up, down commands) @@ -811,12 +951,12 @@ obol network delete ethereum- --force - Go 1.21+ (for building from source) **Installed by obolup.sh**: -- kubectl 1.31.0 -- helm 3.16.2 +- kubectl 1.35.0 +- helm 3.19.4 - k3d 5.8.3 -- helmfile 0.169.1 -- k9s 0.32.5 -- helm-diff plugin 3.9.11 +- helmfile 1.2.3 +- k9s 0.50.18 +- helm-diff plugin 3.14.1 **Go dependencies** (key packages): - `github.com/urfave/cli/v2` - CLI framework diff --git a/Dockerfile.inference-gateway b/Dockerfile.inference-gateway new file mode 100644 index 0000000..42164c1 --- /dev/null +++ b/Dockerfile.inference-gateway @@ -0,0 +1,11 @@ +FROM golang:1.25-alpine AS builder + +WORKDIR /build +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +RUN CGO_ENABLED=0 go build -o /inference-gateway ./cmd/inference-gateway + +FROM gcr.io/distroless/static-debian12:nonroot +COPY --from=builder /inference-gateway /inference-gateway +ENTRYPOINT ["/inference-gateway"] diff --git a/README.md b/README.md index 0f24b0d..b7ec40d 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,24 @@ obol k9s The stack will create a local Kubernetes cluster. Each network installation creates a uniquely-namespaced deployment instance, allowing you to run multiple configurations simultaneously. +## Public Access (Cloudflare Tunnel) + +By default, the stack deploys a Cloudflare Tunnel connector in “quick tunnel” mode, which provides a random public URL. Check it with: + +```bash +obol tunnel status +``` + +To use a persistent hostname instead: + +- Browser login flow (requires `cloudflared` installed locally, e.g. `brew install cloudflared` on macOS): + - `obol tunnel login --hostname stack.example.com` +- API-driven provisioning: + - `obol tunnel provision --hostname stack.example.com --account-id ... --zone-id ... --api-token ...` + - Or set `CLOUDFLARE_ACCOUNT_ID`, `CLOUDFLARE_ZONE_ID`, `CLOUDFLARE_API_TOKEN`. + +Note: the stack ID (used in tunnel naming) is preserved across `obol stack init --force`. Use `obol stack purge` to reset it. + > [!TIP] > Use `obol network list` to see all available networks. Customize installations with flags (e.g., `obol network install ethereum --network=holesky --execution-client=geth`) to create different deployment configurations. After installation, deploy to the cluster with `obol network sync /`. @@ -394,6 +412,35 @@ obol stack purge -f > [!WARNING] > The `purge` command permanently deletes all cluster data and configuration. The `-f` flag is required to remove persistent volume claims (PVCs) owned by root. Use with caution. +### Dashboard Authentication (Better Auth) + +The dashboard UI is protected behind login when configured. RPC endpoints under `/rpc/*` remain unauthenticated (the x402 payment flow is handled separately). + +**Required environment variables (set before `obol stack up`):** + +- `STACK_PUBLIC_DOMAIN` (defaults to `obol.stack`; set to your Cloudflare tunnel hostname for internet exposure) +- `BETTER_AUTH_SECRET` (min 32 chars) +- `OBOL_GOOGLE_CLIENT_ID` +- `OBOL_GOOGLE_CLIENT_SECRET` + +**Google OAuth redirect URI:** + +Register this in Google Cloud Console: + +```text +https:///api/auth/callback/google +``` + +**Nodecore token refresh (for eRPC upstream header injection):** + +Create/update the Secret `erpc/nodecore-oauth-refresh` with: + +- `client_id` +- `client_secret` +- `refresh_token` + +The in-cluster CronJob refreshes a short-lived Google `id_token` and writes it into `erpc/obol-oauth-token`, which eRPC uses to inject `X-Nodecore-Token` on upstream requests. + ### Working with Kubernetes The `obol` CLI includes convenient wrappers for common Kubernetes tools. These automatically use the correct cluster configuration: diff --git a/cmd/inference-gateway/main.go b/cmd/inference-gateway/main.go new file mode 100644 index 0000000..d9e3f6a --- /dev/null +++ b/cmd/inference-gateway/main.go @@ -0,0 +1,67 @@ +package main + +import ( + "flag" + "log" + "os" + "os/signal" + "syscall" + + "github.com/ObolNetwork/obol-stack/internal/inference" + "github.com/mark3labs/x402-go" +) + +func main() { + listen := flag.String("listen", ":8402", "Listen address") + upstream := flag.String("upstream", "http://ollama:11434", "Upstream inference service URL") + wallet := flag.String("wallet", "", "USDC recipient wallet address (required)") + price := flag.String("price", "0.001", "USDC price per request") + chain := flag.String("chain", "base-sepolia", "Blockchain network (base, base-sepolia)") + facilitator := flag.String("facilitator", "https://facilitator.x402.rs", "x402 facilitator URL") + flag.Parse() + + if *wallet == "" { + // Check environment variable + *wallet = os.Getenv("X402_WALLET") + if *wallet == "" { + log.Fatal("--wallet flag or X402_WALLET env var required") + } + } + + var x402Chain x402.ChainConfig + switch *chain { + case "base", "base-mainnet": + x402Chain = x402.BaseMainnet + case "base-sepolia": + x402Chain = x402.BaseSepolia + default: + log.Fatalf("unsupported chain: %s (use: base, base-sepolia)", *chain) + } + + gw, err := inference.NewGateway(inference.GatewayConfig{ + ListenAddr: *listen, + UpstreamURL: *upstream, + WalletAddress: *wallet, + PricePerRequest: *price, + Chain: x402Chain, + FacilitatorURL: *facilitator, + }) + if err != nil { + log.Fatalf("failed to create gateway: %v", err) + } + + // Handle graceful shutdown + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigCh + log.Println("shutting down...") + if err := gw.Stop(); err != nil { + log.Printf("shutdown error: %v", err) + } + }() + + if err := gw.Start(); err != nil { + log.Fatalf("gateway error: %v", err) + } +} diff --git a/cmd/obol/inference.go b/cmd/obol/inference.go new file mode 100644 index 0000000..59b2d06 --- /dev/null +++ b/cmd/obol/inference.go @@ -0,0 +1,114 @@ +package main + +import ( + "fmt" + "os" + "os/signal" + "syscall" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/inference" + "github.com/mark3labs/x402-go" + "github.com/urfave/cli/v2" +) + +// inferenceCommand returns the inference management command group +func inferenceCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "inference", + Usage: "Manage paid inference services (x402)", + Subcommands: []*cli.Command{ + { + Name: "serve", + Usage: "Start the x402 inference gateway (local process)", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "listen", + Aliases: []string{"l"}, + Usage: "Listen address for the gateway", + Value: ":8402", + }, + &cli.StringFlag{ + Name: "upstream", + Aliases: []string{"u"}, + Usage: "Upstream inference service URL", + Value: "http://localhost:11434", + }, + &cli.StringFlag{ + Name: "wallet", + Aliases: []string{"w"}, + Usage: "USDC recipient wallet address", + EnvVars: []string{"X402_WALLET"}, + Required: true, + }, + &cli.StringFlag{ + Name: "price", + Usage: "USDC price per inference request", + Value: "0.001", + }, + &cli.StringFlag{ + Name: "chain", + Usage: "Blockchain network for payments (base, base-sepolia)", + Value: "base-sepolia", + }, + &cli.StringFlag{ + Name: "facilitator", + Usage: "x402 facilitator service URL", + Value: "https://facilitator.x402.rs", + }, + }, + Action: func(c *cli.Context) error { + chain, err := resolveChain(c.String("chain")) + if err != nil { + return err + } + + gw, err := inference.NewGateway(inference.GatewayConfig{ + ListenAddr: c.String("listen"), + UpstreamURL: c.String("upstream"), + WalletAddress: c.String("wallet"), + PricePerRequest: c.String("price"), + Chain: chain, + FacilitatorURL: c.String("facilitator"), + }) + if err != nil { + return fmt.Errorf("failed to create gateway: %w", err) + } + + // Handle graceful shutdown + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + go func() { + <-sigCh + fmt.Println("\nShutting down gateway...") + if err := gw.Stop(); err != nil { + fmt.Fprintf(os.Stderr, "shutdown error: %v\n", err) + } + }() + + return gw.Start() + }, + }, + }, + } +} + +// resolveChain maps a chain name string to an x402 ChainConfig. +func resolveChain(name string) (x402.ChainConfig, error) { + switch name { + case "base", "base-mainnet": + return x402.BaseMainnet, nil + case "base-sepolia": + return x402.BaseSepolia, nil + case "polygon", "polygon-mainnet": + return x402.PolygonMainnet, nil + case "polygon-amoy": + return x402.PolygonAmoy, nil + case "avalanche", "avalanche-mainnet": + return x402.AvalancheMainnet, nil + case "avalanche-fuji": + return x402.AvalancheFuji, nil + default: + return x402.ChainConfig{}, fmt.Errorf("unsupported chain: %s (use: base, base-sepolia, polygon, polygon-amoy, avalanche, avalanche-fuji)", name) + } +} diff --git a/cmd/obol/llm.go b/cmd/obol/llm.go new file mode 100644 index 0000000..8f11ac5 --- /dev/null +++ b/cmd/obol/llm.go @@ -0,0 +1,88 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/llm" + "github.com/urfave/cli/v2" +) + +func llmCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "llm", + Usage: "Manage LLM providers (llmspy universal proxy)", + Subcommands: []*cli.Command{ + { + Name: "configure", + Usage: "Configure a cloud AI provider in the llmspy gateway", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "provider", + Usage: "Provider name (anthropic, openai)", + }, + &cli.StringFlag{ + Name: "api-key", + Usage: "API key for the provider", + EnvVars: []string{"LLM_API_KEY"}, + }, + }, + Action: func(c *cli.Context) error { + provider := c.String("provider") + apiKey := c.String("api-key") + + // Interactive mode if flags not provided + if provider == "" || apiKey == "" { + var err error + provider, apiKey, err = promptLLMConfig() + if err != nil { + return err + } + } + + return llm.ConfigureLLMSpy(cfg, provider, apiKey) + }, + }, + }, + } +} + +// promptLLMConfig interactively asks the user for provider and API key. +func promptLLMConfig() (string, string, error) { + reader := bufio.NewReader(os.Stdin) + + fmt.Println("Select a provider:") + fmt.Println(" [1] Anthropic") + fmt.Println(" [2] OpenAI") + fmt.Print("\nChoice [1]: ") + + line, _ := reader.ReadString('\n') + choice := strings.TrimSpace(line) + if choice == "" { + choice = "1" + } + + var provider, display string + switch choice { + case "1": + provider = "anthropic" + display = "Anthropic" + case "2": + provider = "openai" + display = "OpenAI" + default: + return "", "", fmt.Errorf("unknown choice: %s", choice) + } + + fmt.Printf("\n%s API key: ", display) + apiKey, _ := reader.ReadString('\n') + apiKey = strings.TrimSpace(apiKey) + if apiKey == "" { + return "", "", fmt.Errorf("API key is required") + } + + return provider, apiKey, nil +} diff --git a/cmd/obol/main.go b/cmd/obol/main.go index cde6626..203c662 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -12,6 +12,7 @@ import ( "github.com/ObolNetwork/obol-stack/internal/app" "github.com/ObolNetwork/obol-stack/internal/config" "github.com/ObolNetwork/obol-stack/internal/stack" + "github.com/ObolNetwork/obol-stack/internal/tunnel" "github.com/ObolNetwork/obol-stack/internal/version" "github.com/urfave/cli/v2" ) @@ -51,12 +52,36 @@ COMMANDS: network install Install and deploy network to cluster network delete Remove network and clean up cluster resources + OpenClaw (AI Agent): + openclaw onboard Create and deploy an OpenClaw instance + openclaw setup Reconfigure model providers for a deployed instance + openclaw dashboard Open the dashboard in a browser + openclaw cli Run openclaw CLI against a deployed instance + openclaw sync Deploy or update an instance + openclaw token Retrieve gateway token + openclaw list List instances + openclaw delete Remove instance and cluster resources + openclaw skills Manage skills (sync from local dir) + + LLM Gateway: + llm configure Configure cloud AI provider in llmspy gateway + + Inference (x402 Pay-Per-Request): + inference serve Start the x402 inference gateway + App Management: app install Install a Helm chart as an application app list List installed applications app sync Deploy application to cluster app delete Remove application and cluster resources + Tunnel Management: + tunnel status Show tunnel status and public URL + tunnel login Authenticate and create persistent tunnel (browser) + tunnel provision Provision persistent tunnel (API token) + tunnel restart Restart tunnel connector (quick tunnels get new URL) + tunnel logs View cloudflared logs + Kubernetes Tools (with auto-configured KUBECONFIG): kubectl Run kubectl with stack kubeconfig (passthrough) helm Run helm with stack kubeconfig (passthrough) @@ -157,6 +182,98 @@ GLOBAL OPTIONS: }, }, // ============================================================ + // Tunnel Management Commands + // ============================================================ + { + Name: "tunnel", + Usage: "Manage Cloudflare tunnel for public access", + Subcommands: []*cli.Command{ + { + Name: "status", + Usage: "Show tunnel status and public URL", + Action: func(c *cli.Context) error { + return tunnel.Status(cfg) + }, + }, + { + Name: "login", + Usage: "Authenticate via browser and create a locally-managed tunnel (no API token)", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "hostname", + Aliases: []string{"H"}, + Usage: "Public hostname to route (e.g. stack.example.com)", + Required: true, + }, + }, + Action: func(c *cli.Context) error { + return tunnel.Login(cfg, tunnel.LoginOptions{ + Hostname: c.String("hostname"), + }) + }, + }, + { + Name: "provision", + Usage: "Provision a persistent (DNS-routed) Cloudflare Tunnel", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "hostname", + Aliases: []string{"H"}, + Usage: "Public hostname to route (e.g. stack.example.com)", + Required: true, + }, + &cli.StringFlag{ + Name: "account-id", + Aliases: []string{"a"}, + Usage: "Cloudflare account ID (or set CLOUDFLARE_ACCOUNT_ID)", + EnvVars: []string{"CLOUDFLARE_ACCOUNT_ID"}, + }, + &cli.StringFlag{ + Name: "zone-id", + Aliases: []string{"z"}, + Usage: "Cloudflare zone ID for the hostname (or set CLOUDFLARE_ZONE_ID)", + EnvVars: []string{"CLOUDFLARE_ZONE_ID"}, + }, + &cli.StringFlag{ + Name: "api-token", + Aliases: []string{"t"}, + Usage: "Cloudflare API token (or set CLOUDFLARE_API_TOKEN)", + EnvVars: []string{"CLOUDFLARE_API_TOKEN"}, + }, + }, + Action: func(c *cli.Context) error { + return tunnel.Provision(cfg, tunnel.ProvisionOptions{ + Hostname: c.String("hostname"), + AccountID: c.String("account-id"), + ZoneID: c.String("zone-id"), + APIToken: c.String("api-token"), + }) + }, + }, + { + Name: "restart", + Usage: "Restart the tunnel connector (quick tunnels get a new URL)", + Action: func(c *cli.Context) error { + return tunnel.Restart(cfg) + }, + }, + { + Name: "logs", + Usage: "View cloudflared logs", + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "follow", + Aliases: []string{"f"}, + Usage: "Follow log output", + }, + }, + Action: func(c *cli.Context) error { + return tunnel.Logs(cfg, c.Bool("follow")) + }, + }, + }, + }, + // ============================================================ // Kubernetes Tool Passthroughs (with auto-configured KUBECONFIG) // ============================================================ { @@ -327,6 +444,9 @@ GLOBAL OPTIONS: }, }, networkCommand(cfg), + openclawCommand(cfg), + inferenceCommand(cfg), + llmCommand(cfg), { Name: "app", Usage: "Manage applications", diff --git a/cmd/obol/openclaw.go b/cmd/obol/openclaw.go new file mode 100644 index 0000000..80d4ec6 --- /dev/null +++ b/cmd/obol/openclaw.go @@ -0,0 +1,190 @@ +package main + +import ( + "fmt" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/openclaw" + "github.com/urfave/cli/v2" +) + +func openclawCommand(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "openclaw", + Usage: "Manage OpenClaw AI agent instances", + Subcommands: []*cli.Command{ + { + Name: "onboard", + Usage: "Create and deploy an OpenClaw instance", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "id", + Usage: "Instance ID (defaults to generated petname)", + }, + &cli.BoolFlag{ + Name: "force", + Aliases: []string{"f"}, + Usage: "Overwrite existing instance", + }, + &cli.BoolFlag{ + Name: "no-sync", + Usage: "Only scaffold config, don't deploy to cluster", + }, + }, + Action: func(c *cli.Context) error { + return openclaw.Onboard(cfg, openclaw.OnboardOptions{ + ID: c.String("id"), + Force: c.Bool("force"), + Sync: !c.Bool("no-sync"), + Interactive: true, + }) + }, + }, + { + Name: "sync", + Usage: "Deploy or update an OpenClaw instance", + ArgsUsage: "", + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw sync happy-otter)") + } + return openclaw.Sync(cfg, c.Args().First()) + }, + }, + { + Name: "token", + Usage: "Retrieve gateway token for an OpenClaw instance", + ArgsUsage: "", + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw token happy-otter)") + } + return openclaw.Token(cfg, c.Args().First()) + }, + }, + { + Name: "list", + Usage: "List OpenClaw instances", + Action: func(c *cli.Context) error { + return openclaw.List(cfg) + }, + }, + { + Name: "delete", + Usage: "Remove an OpenClaw instance and its cluster resources", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.BoolFlag{ + Name: "force", + Aliases: []string{"f"}, + Usage: "Skip confirmation prompt", + }, + }, + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw delete happy-otter)") + } + return openclaw.Delete(cfg, c.Args().First(), c.Bool("force")) + }, + }, + { + Name: "setup", + Usage: "Reconfigure model providers for a deployed instance", + ArgsUsage: "", + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw setup default)") + } + return openclaw.Setup(cfg, c.Args().First(), openclaw.SetupOptions{}) + }, + }, + { + Name: "dashboard", + Usage: "Open the OpenClaw dashboard in a browser", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.IntFlag{ + Name: "port", + Usage: "Local port for port-forward (0 = auto)", + Value: 0, + }, + &cli.BoolFlag{ + Name: "no-browser", + Usage: "Print URL without opening browser", + }, + }, + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw dashboard default)") + } + noBrowser := c.Bool("no-browser") + return openclaw.Dashboard(cfg, c.Args().First(), openclaw.DashboardOptions{ + Port: c.Int("port"), + NoBrowser: noBrowser, + }, func(url string) { + if !noBrowser { + openBrowser(url) + } + }) + }, + }, + { + Name: "skills", + Usage: "Manage OpenClaw skills", + Subcommands: []*cli.Command{ + { + Name: "sync", + Usage: "Package a local skills directory into a ConfigMap", + ArgsUsage: "", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "from", + Usage: "Path to local skills directory", + Required: true, + }, + }, + Action: func(c *cli.Context) error { + if c.NArg() == 0 { + return fmt.Errorf("instance ID required (e.g., obol openclaw skills sync happy-otter --from ./skills)") + } + return openclaw.SkillsSync(cfg, c.Args().First(), c.String("from")) + }, + }, + }, + }, + { + Name: "cli", + Usage: "Run openclaw CLI commands against a deployed instance", + ArgsUsage: " [-- ]", + SkipFlagParsing: true, + Action: func(c *cli.Context) error { + args := c.Args().Slice() + if len(args) == 0 { + return fmt.Errorf("instance ID required\n\nUsage:\n" + + " obol openclaw cli -- \n\n" + + "Examples:\n" + + " obol openclaw cli default -- gateway health\n" + + " obol openclaw cli default -- gateway call config.get\n" + + " obol openclaw cli default -- doctor") + } + + id := args[0] + // Everything after "--" is the openclaw command + var openclawArgs []string + for i, arg := range args[1:] { + if arg == "--" { + openclawArgs = args[i+2:] + break + } + } + if len(openclawArgs) == 0 && len(args) > 1 { + // No "--" separator found; treat remaining args as openclaw command + openclawArgs = args[1:] + } + + return openclaw.CLI(cfg, id, openclawArgs) + }, + }, + }, + } +} diff --git a/go.mod b/go.mod index ac5aa02..f7424b9 100644 --- a/go.mod +++ b/go.mod @@ -1,15 +1,44 @@ module github.com/ObolNetwork/obol-stack -go 1.25 +go 1.25.1 require ( github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0 + github.com/mark3labs/x402-go v0.13.0 github.com/urfave/cli/v2 v2.27.7 gopkg.in/yaml.v3 v3.0.1 ) require ( + filippo.io/edwards25519 v1.1.0 // indirect + github.com/benbjohnson/clock v1.3.5 // indirect + github.com/blendle/zapdriver v1.3.1 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/fatih/color v1.18.0 // indirect + github.com/gagliardetto/binary v0.8.0 // indirect + github.com/gagliardetto/solana-go v1.14.0 // indirect + github.com/gagliardetto/treeout v0.1.4 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.18.1 // indirect + github.com/logrusorgru/aurora v2.0.3+incompatible // indirect + github.com/mattn/go-colorable v0.1.14 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mitchellh/go-testing-interface v1.14.1 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1 // indirect + github.com/mr-tron/base58 v1.2.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/streamingfast/logging v0.0.0-20250918142248-ac5a1e292845 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect + go.mongodb.org/mongo-driver v1.17.6 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/ratelimit v0.3.1 // indirect + go.uber.org/zap v1.27.0 // indirect + golang.org/x/crypto v0.43.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/term v0.36.0 // indirect + golang.org/x/time v0.14.0 // indirect ) diff --git a/go.sum b/go.sum index 5b3c61c..4fdd793 100644 --- a/go.sum +++ b/go.sum @@ -1,14 +1,159 @@ +filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= +github.com/AlekSi/pointer v1.1.0 h1:SSDMPcXD9jSl8FPy9cRzoRaMJtm9g9ggGTxecRUbQoI= +github.com/AlekSi/pointer v1.1.0/go.mod h1:y7BvfRI3wXPWKXEBhU71nbnIEEZX0QTSB2Bj48UJIZE= +github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/benbjohnson/clock v1.3.5 h1:VvXlSJBzZpA/zum6Sj74hxwYI2DIxRWuNIoXAzHZz5o= +github.com/benbjohnson/clock v1.3.5/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= +github.com/blendle/zapdriver v1.3.1 h1:C3dydBOWYRiOk+B8X9IVZ5IOe+7cl+tGOexN4QqHfpE= +github.com/blendle/zapdriver v1.3.1/go.mod h1:mdXfREi6u5MArG4j9fewC+FGnXaBR+T4Ox4J2u4eHCc= github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0 h1:aYo8nnk3ojoQkP5iErif5Xxv0Mo0Ga/FR5+ffl/7+Nk= github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0/go.mod h1:8AuBTZBRSFqEYBPYULd+NN474/zZBLP+6WeT5S9xlAc= +github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= +github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= +github.com/gagliardetto/binary v0.8.0 h1:U9ahc45v9HW0d15LoN++vIXSJyqR/pWw8DDlhd7zvxg= +github.com/gagliardetto/binary v0.8.0/go.mod h1:2tfj51g5o9dnvsc+fL3Jxr22MuWzYXwx9wEoN0XQ7/c= +github.com/gagliardetto/gofuzz v1.2.2 h1:XL/8qDMzcgvR4+CyRQW9UGdwPRPMHVJfqQ/uMvSUuQw= +github.com/gagliardetto/gofuzz v1.2.2/go.mod h1:bkH/3hYLZrMLbfYWA0pWzXmi5TTRZnu4pMGZBkqMKvY= +github.com/gagliardetto/solana-go v1.14.0 h1:3WfAi70jOOjAJ0deFMjdhFYlLXATF4tOQXsDNWJtOLw= +github.com/gagliardetto/solana-go v1.14.0/go.mod h1:l/qqqIN6qJJPtxW/G1PF4JtcE3Zg2vD2EliZrr9Gn5k= +github.com/gagliardetto/treeout v0.1.4 h1:ozeYerrLCmCubo1TcIjFiOWTTGteOOHND1twdFpgwaw= +github.com/gagliardetto/treeout v0.1.4/go.mod h1:loUefvXTrlRG5rYmJmExNryyBRh8f89VZhmMOyCyqok= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= +github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8= +github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= +github.com/mark3labs/x402-go v0.13.0 h1:Ppm3GXZx2ZCLJM511mFYeMOw/605h9+M6UT630GdRG0= +github.com/mark3labs/x402-go v0.13.0/go.mod h1:srAvV9FosjBiqrclF15thrQbz0fVVfNXtMcqD0e1hKU= +github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= +github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU= +github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1 h1:mPMvm6X6tf4w8y7j9YIt6V9jfWhL6QlbEc7CCmeQlWk= +github.com/mostynb/zstdpool-freelist v0.0.0-20201229113212-927304c0c3b1/go.mod h1:ye2e/VUEtE2BHE+G/QcKkcLQVAEJoYRFj5VUOQatCRE= +github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= +github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/onsi/gomega v1.10.1 h1:o0+MgICZLuZ7xjH7Vx6zS/zcu93/BEp1VwkIW1mEXCE= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8= +github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/streamingfast/logging v0.0.0-20230608130331-f22c91403091/go.mod h1:VlduQ80JcGJSargkRU4Sg9Xo63wZD/l8A5NC/Uo1/uU= +github.com/streamingfast/logging v0.0.0-20250918142248-ac5a1e292845 h1:VMA0pZ3MI8BErRA3kh8dKJThP5d0Xh5vZVk5yFIgH/A= +github.com/streamingfast/logging v0.0.0-20250918142248-ac5a1e292845/go.mod h1:BtDq81Tyc7H8up5aXNi/I95nPmG3C0PLEqGWY/iWQ2E= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/test-go/testify v1.1.4 h1:Tf9lntrKUMHiXQ07qBScBTSA0dhYQlu83hswqelv1iE= +github.com/test-go/testify v1.1.4/go.mod h1:rH7cfJo/47vWGdi4GPj16x3/t1xGOj2YxzmNQzk2ghU= github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.mongodb.org/mongo-driver v1.17.6 h1:87JUG1wZfWsr6rIz3ZmpH90rL5tea7O3IHuSwHUpsss= +go.mongodb.org/mongo-driver v1.17.6/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/ratelimit v0.3.1 h1:K4qVE+byfv/B3tC+4nYWP7v/6SimcO7HzHekoMNBma0= +go.uber.org/ratelimit v0.3.1/go.mod h1:6euWsTB6U/Nb3X++xEUXA8ciPJvr19Q/0h1+oDcJhRk= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +go.uber.org/zap v1.21.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= +golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/dns/resolver.go b/internal/dns/resolver.go new file mode 100644 index 0000000..b4bf512 --- /dev/null +++ b/internal/dns/resolver.go @@ -0,0 +1,272 @@ +// Package dns manages a local DNS resolver for wildcard *.obol.stack resolution. +// +// It runs a dnsmasq Docker container that answers DNS queries for the obol.stack +// domain with 127.0.0.1, and configures the host OS to use it. This enables +// per-instance hostname routing (e.g., openclaw-myid.obol.stack) without manual +// /etc/hosts entries. +// +// macOS: binds to port 5553, uses /etc/resolver/obol.stack (supports custom port). +// Linux: binds to 127.0.0.2:53, uses systemd-resolved drop-in (requires port 53). +package dns + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" +) + +const ( + containerName = "obol-dns" + dnsImage = "alpine:3.21" + domain = "obol.stack" + + // macOS: custom port, /etc/resolver handles port directive + macHostPort = "5553" + + // Linux: systemd-resolved can't forward to non-standard ports, so we bind + // to a loopback alias (127.0.0.2) on port 53 to avoid conflicting with + // systemd-resolved's stub listener on 127.0.0.53:53. + linuxBindIP = "127.0.0.2" + linuxBindPort = "53" + + // macOS resolver config + macResolverDir = "/etc/resolver" + macResolverFile = "obol.stack" + + // Linux systemd-resolved drop-in + resolvedDropInDir = "/etc/systemd/resolved.conf.d" + resolvedDropInFile = "obol-stack.conf" +) + +// portBindings returns the Docker -p flags for the current OS. +func portBindings() []string { + if runtime.GOOS == "linux" { + return []string{ + "-p", linuxBindIP + ":" + linuxBindPort + ":53/udp", + "-p", linuxBindIP + ":" + linuxBindPort + ":53/tcp", + } + } + // macOS (and fallback) + return []string{ + "-p", macHostPort + ":53/udp", + "-p", macHostPort + ":53/tcp", + } +} + +// EnsureRunning starts the DNS resolver container if not already running. +// Idempotent: no-ops if the container is already healthy. +func EnsureRunning() error { + // Check if container exists and is running + out, err := exec.Command("docker", "inspect", "-f", "{{.State.Running}}", containerName).Output() + if err == nil && strings.TrimSpace(string(out)) == "true" { + return nil // Already running + } + + // Remove stale container if exists (ignore errors) + exec.Command("docker", "rm", "-f", containerName).Run() //nolint:errcheck + + fmt.Println("Starting DNS resolver for *.obol.stack...") + + args := []string{"run", "-d", "--name", containerName} + args = append(args, portBindings()...) + args = append(args, + "--restart", "unless-stopped", + dnsImage, + "sh", "-c", + "apk add --no-cache dnsmasq >/dev/null 2>&1 && "+ + "exec dnsmasq --no-daemon "+ + "--conf-file=/dev/null "+ + "--address=/"+domain+"/127.0.0.1 "+ + "--log-facility=-", + ) + + cmd := exec.Command("docker", args...) + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to start DNS container: %w\n%s", err, output) + } + + if runtime.GOOS == "linux" { + fmt.Printf("DNS resolver running (*.obol.stack → 127.0.0.1, %s:%s)\n", linuxBindIP, linuxBindPort) + } else { + fmt.Printf("DNS resolver running (*.obol.stack → 127.0.0.1, port %s)\n", macHostPort) + } + return nil +} + +// Stop removes the DNS resolver container. +func Stop() { + if out, err := exec.Command("docker", "inspect", "-f", "{{.State.Running}}", containerName).Output(); err != nil || strings.TrimSpace(string(out)) != "true" { + return // Not running + } + exec.Command("docker", "rm", "-f", containerName).Run() //nolint:errcheck + fmt.Println("DNS resolver stopped") +} + +// ConfigureSystemResolver sets up the host OS to route *.obol.stack queries +// to our local DNS container. Requires sudo on first run. +// +// macOS: creates /etc/resolver/obol.stack (port 5553) +// Linux: creates systemd-resolved drop-in pointing to 127.0.0.2 +func ConfigureSystemResolver() error { + switch runtime.GOOS { + case "darwin": + return configureMacOSResolver() + case "linux": + return configureLinuxResolver() + default: + return fmt.Errorf("unsupported OS for DNS resolver: %s", runtime.GOOS) + } +} + +// RemoveSystemResolver removes the host OS DNS configuration for *.obol.stack. +func RemoveSystemResolver() { + switch runtime.GOOS { + case "darwin": + removeMacOSResolver() + case "linux": + removeLinuxResolver() + } +} + +// IsResolverConfigured checks whether the system resolver is already set up. +func IsResolverConfigured() bool { + switch runtime.GOOS { + case "darwin": + path := filepath.Join(macResolverDir, macResolverFile) + _, err := os.Stat(path) + return err == nil + case "linux": + path := filepath.Join(resolvedDropInDir, resolvedDropInFile) + _, err := os.Stat(path) + return err == nil + default: + return false + } +} + +// --- macOS --- + +// configureMacOSResolver creates /etc/resolver/obol.stack pointing to our DNS. +func configureMacOSResolver() error { + path := filepath.Join(macResolverDir, macResolverFile) + + // Check if already configured correctly + if data, err := os.ReadFile(path); err == nil { + content := string(data) + if strings.Contains(content, "port "+macHostPort) { + return nil // Already configured + } + } + + content := fmt.Sprintf("# Managed by obol-stack — resolves *.obol.stack to localhost\nnameserver 127.0.0.1\nport %s\n", macHostPort) + + // /etc/resolver/ needs root — try sudo + fmt.Println("Configuring macOS DNS resolver for *.obol.stack (requires sudo)...") + + mkdirCmd := exec.Command("sudo", "mkdir", "-p", macResolverDir) + mkdirCmd.Stdout = os.Stdout + mkdirCmd.Stderr = os.Stderr + if err := mkdirCmd.Run(); err != nil { + return fmt.Errorf("failed to create %s (sudo required): %w", macResolverDir, err) + } + + writeCmd := exec.Command("sudo", "tee", path) + writeCmd.Stdin = strings.NewReader(content) + writeCmd.Stderr = os.Stderr + if err := writeCmd.Run(); err != nil { + return fmt.Errorf("failed to write %s: %w", path, err) + } + + fmt.Printf("Resolver configured: %s → 127.0.0.1:%s\n", path, macHostPort) + return nil +} + +// removeMacOSResolver removes /etc/resolver/obol.stack. +func removeMacOSResolver() { + path := filepath.Join(macResolverDir, macResolverFile) + if _, err := os.Stat(path); os.IsNotExist(err) { + return + } + if err := exec.Command("sudo", "rm", path).Run(); err != nil { + fmt.Printf("Warning: failed to remove %s: %v\n", path, err) + fmt.Printf(" Remove manually: sudo rm %s\n", path) + return + } + fmt.Printf("Removed DNS resolver config: %s\n", path) +} + +// --- Linux (systemd-resolved) --- + +// configureLinuxResolver creates a systemd-resolved drop-in that forwards +// *.obol.stack queries to our dnsmasq on 127.0.0.2:53. +func configureLinuxResolver() error { + // Check if systemd-resolved is active + if err := exec.Command("systemctl", "is-active", "--quiet", "systemd-resolved").Run(); err != nil { + fmt.Println("Note: systemd-resolved not detected.") + fmt.Println("To resolve *.obol.stack, configure your DNS resolver to forward the domain:") + fmt.Printf(" DNS server: %s (port %s) for domain %s\n", linuxBindIP, linuxBindPort, domain) + return nil + } + + path := filepath.Join(resolvedDropInDir, resolvedDropInFile) + + // Check if already configured + if data, err := os.ReadFile(path); err == nil { + if strings.Contains(string(data), linuxBindIP) { + return nil // Already configured + } + } + + content := fmt.Sprintf("# Managed by obol-stack — resolves *.obol.stack via local dnsmasq\n[Resolve]\nDNS=%s\nDomains=~%s\n", linuxBindIP, domain) + + fmt.Println("Configuring systemd-resolved for *.obol.stack (requires sudo)...") + + mkdirCmd := exec.Command("sudo", "mkdir", "-p", resolvedDropInDir) + mkdirCmd.Stdout = os.Stdout + mkdirCmd.Stderr = os.Stderr + if err := mkdirCmd.Run(); err != nil { + return fmt.Errorf("failed to create %s (sudo required): %w", resolvedDropInDir, err) + } + + writeCmd := exec.Command("sudo", "tee", path) + writeCmd.Stdin = strings.NewReader(content) + writeCmd.Stderr = os.Stderr + if err := writeCmd.Run(); err != nil { + return fmt.Errorf("failed to write %s: %w", path, err) + } + + // Restart systemd-resolved to pick up the new config + restartCmd := exec.Command("sudo", "systemctl", "restart", "systemd-resolved") + restartCmd.Stdout = os.Stdout + restartCmd.Stderr = os.Stderr + if err := restartCmd.Run(); err != nil { + fmt.Printf("Warning: failed to restart systemd-resolved: %v\n", err) + fmt.Println(" Run manually: sudo systemctl restart systemd-resolved") + } + + fmt.Printf("Resolver configured: %s → %s:%s\n", path, linuxBindIP, linuxBindPort) + return nil +} + +// removeLinuxResolver removes the systemd-resolved drop-in and restarts the service. +func removeLinuxResolver() { + path := filepath.Join(resolvedDropInDir, resolvedDropInFile) + if _, err := os.Stat(path); os.IsNotExist(err) { + return + } + if err := exec.Command("sudo", "rm", path).Run(); err != nil { + fmt.Printf("Warning: failed to remove %s: %v\n", path, err) + fmt.Printf(" Remove manually: sudo rm %s\n", path) + return + } + + // Restart systemd-resolved to drop the forwarding rule + if err := exec.Command("sudo", "systemctl", "restart", "systemd-resolved").Run(); err != nil { + fmt.Printf("Warning: failed to restart systemd-resolved: %v\n", err) + } + + fmt.Printf("Removed DNS resolver config: %s\n", path) +} diff --git a/internal/dns/resolver_test.go b/internal/dns/resolver_test.go new file mode 100644 index 0000000..734dfc0 --- /dev/null +++ b/internal/dns/resolver_test.go @@ -0,0 +1,58 @@ +package dns + +import ( + "runtime" + "testing" +) + +func TestConstants(t *testing.T) { + if containerName != "obol-dns" { + t.Errorf("containerName = %q, want %q", containerName, "obol-dns") + } + if domain != "obol.stack" { + t.Errorf("domain = %q, want %q", domain, "obol.stack") + } + + // macOS constants + if macHostPort != "5553" { + t.Errorf("macHostPort = %q, want %q", macHostPort, "5553") + } + if macResolverFile != "obol.stack" { + t.Errorf("macResolverFile = %q, want %q", macResolverFile, "obol.stack") + } + + // Linux constants + if linuxBindIP != "127.0.0.2" { + t.Errorf("linuxBindIP = %q, want %q", linuxBindIP, "127.0.0.2") + } + if linuxBindPort != "53" { + t.Errorf("linuxBindPort = %q, want %q", linuxBindPort, "53") + } + if resolvedDropInFile != "obol-stack.conf" { + t.Errorf("resolvedDropInFile = %q, want %q", resolvedDropInFile, "obol-stack.conf") + } +} + +func TestPortBindings(t *testing.T) { + bindings := portBindings() + if len(bindings) != 4 { + t.Fatalf("portBindings() returned %d elements, want 4", len(bindings)) + } + + switch runtime.GOOS { + case "darwin": + if bindings[1] != "5553:53/udp" { + t.Errorf("macOS UDP binding = %q, want %q", bindings[1], "5553:53/udp") + } + if bindings[3] != "5553:53/tcp" { + t.Errorf("macOS TCP binding = %q, want %q", bindings[3], "5553:53/tcp") + } + case "linux": + if bindings[1] != "127.0.0.2:53:53/udp" { + t.Errorf("Linux UDP binding = %q, want %q", bindings[1], "127.0.0.2:53:53/udp") + } + if bindings[3] != "127.0.0.2:53:53/tcp" { + t.Errorf("Linux TCP binding = %q, want %q", bindings[3], "127.0.0.2:53:53/tcp") + } + } +} diff --git a/internal/embed/embed.go b/internal/embed/embed.go index 2c189eb..99b8607 100644 --- a/internal/embed/embed.go +++ b/internal/embed/embed.go @@ -21,8 +21,10 @@ var infrastructureFS embed.FS //go:embed all:networks var networksFS embed.FS -// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory -func CopyDefaults(destDir string) error { +// CopyDefaults recursively copies all embedded infrastructure manifests to the destination directory. +// The replacements map is applied to every file: each key (e.g. "{{OLLAMA_HOST}}") is replaced +// with its value. Pass nil for a verbatim copy. +func CopyDefaults(destDir string, replacements map[string]string) error { return fs.WalkDir(infrastructureFS, "infrastructure", func(path string, d fs.DirEntry, err error) error { if err != nil { return err @@ -57,8 +59,14 @@ func CopyDefaults(destDir string) error { return fmt.Errorf("failed to read embedded file %s: %w", path, err) } + // Apply placeholder replacements + content := string(data) + for placeholder, value := range replacements { + content = strings.ReplaceAll(content, placeholder, value) + } + // Write to destination - if err := os.WriteFile(destPath, data, 0644); err != nil { + if err := os.WriteFile(destPath, []byte(content), 0644); err != nil { return fmt.Errorf("failed to write file %s: %w", destPath, err) } diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml new file mode 100644 index 0000000..cb0166c --- /dev/null +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -0,0 +1,229 @@ +--- +# LLM foundation services (OKR-1) +# +# This deploys: +# - An ExternalName Service "ollama" that resolves to the host's Ollama server +# - llms.py (LLMSpy) as an OpenAI-compatible gateway / router over providers +# +# Design notes: +# - No in-cluster Ollama is deployed; the host is expected to run Ollama +# (or another OpenAI-compatible server) on port 11434. +# - The ollama Service abstracts host resolution: +# k3d → host.k3d.internal +# k3s → resolved at stack init via node IP +# - LLMSpy and all consumers reference ollama.llm.svc.cluster.local:11434, +# which the ExternalName Service routes to the host. +apiVersion: v1 +kind: Namespace +metadata: + name: llm + +--- +# ExternalName Service: routes ollama.llm.svc.cluster.local → host Ollama. +# The externalName is resolved during `obol stack init` via the {{OLLAMA_HOST}} placeholder. +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: llm + labels: + app: ollama +spec: + type: ExternalName + externalName: {{OLLAMA_HOST}} + ports: + - name: http + port: 11434 + protocol: TCP + +--- +# llms.py v3 configuration for Obol Stack: +# - Ollama provider enabled by default (host machine via ollama Service) +# - Anthropic and OpenAI providers available (disabled by default; enabled via `obol llm configure`) +# - Default model is glm-4.7-flash +apiVersion: v1 +kind: ConfigMap +metadata: + name: llmspy-config + namespace: llm +data: + llms.json: | + { + "version": 3, + "defaults": { + "headers": { + "Content-Type": "application/json", + "User-Agent": "llmspy.org/3.0" + }, + "text": { + "model": "glm-4.7-flash", + "messages": [ + { "role": "user", "content": [{ "type": "text", "text": "" }] } + ] + } + }, + "providers": { + "ollama": { + "enabled": true + }, + "anthropic": { + "enabled": false + }, + "openai": { + "enabled": false + } + } + } + providers.json: | + { + "ollama": { + "id": "ollama", + "npm": "ollama", + "api": "http://ollama.llm.svc.cluster.local:11434", + "models": {}, + "all_models": true + }, + "anthropic": { + "id": "anthropic", + "api_key": "$ANTHROPIC_API_KEY", + "models": {}, + "all_models": true + }, + "openai": { + "id": "openai", + "api_key": "$OPENAI_API_KEY", + "models": {}, + "all_models": true + } + } + +--- +# Secret for cloud provider API keys. Empty by default; patched imperatively +# via `obol llm configure` or `obol openclaw setup`. +apiVersion: v1 +kind: Secret +metadata: + name: llms-secrets + namespace: llm +type: Opaque +stringData: + ANTHROPIC_API_KEY: "" + OPENAI_API_KEY: "" + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llmspy + namespace: llm + labels: + app: llmspy +spec: + replicas: 1 + selector: + matchLabels: + app: llmspy + template: + metadata: + labels: + app: llmspy + spec: + initContainers: + # Seed ~/.llms/llms.json from the ConfigMap. llms.py also writes runtime + # state (e.g. analytics) under ~/.llms, so we keep the directory writable. + - name: seed-config + image: busybox:1.36.1 + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + set -eu + mkdir -p /data + cp /config/llms.json /data/llms.json + cp /config/providers.json /data/providers.json + chmod 666 /data/llms.json /data/providers.json + volumeMounts: + - name: llmspy-config + mountPath: /config + readOnly: true + - name: llmspy-home + mountPath: /data + containers: + - name: llmspy + # Obol fork of LLMSpy with smart routing extension. + # Pin a specific version for reproducibility. + image: ghcr.io/obolnetwork/llms:3.0.32-obol.1-rc.1 + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 8000 + protocol: TCP + command: + - llms + args: + - --config + - /home/llms/.llms/llms.json + - --serve + - "8000" + envFrom: + - secretRef: + name: llms-secrets + optional: true + env: + # Avoid surprises if the image changes its default HOME. + - name: HOME + value: /home/llms + volumeMounts: + - name: llmspy-home + mountPath: /home/llms/.llms + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 2 + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 1000m + memory: 1Gi + volumes: + - name: llmspy-config + configMap: + name: llmspy-config + items: + - key: llms.json + path: llms.json + - key: providers.json + path: providers.json + - name: llmspy-home + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: llmspy + namespace: llm + labels: + app: llmspy +spec: + type: ClusterIP + selector: + app: llmspy + ports: + - name: http + port: 8000 + targetPort: http + protocol: TCP diff --git a/internal/embed/infrastructure/base/templates/oauth-token.yaml b/internal/embed/infrastructure/base/templates/oauth-token.yaml new file mode 100644 index 0000000..d5baf56 --- /dev/null +++ b/internal/embed/infrastructure/base/templates/oauth-token.yaml @@ -0,0 +1,176 @@ +--- +# Nodecore OAuth token plumbing for eRPC upstream auth (issue #124) +apiVersion: v1 +kind: Namespace +metadata: + name: erpc + +--- +apiVersion: v1 +kind: Secret +metadata: + name: obol-oauth-token + namespace: erpc +type: Opaque +stringData: + # Google `id_token` (JWT). CronJob refreshes and writes into this Secret. + token: "" + +--- +apiVersion: v1 +kind: Secret +metadata: + name: nodecore-oauth-refresh + namespace: erpc +type: Opaque +stringData: + # Google OAuth client credentials + refresh token. + # This is intentionally stored separately from the ID token written to `obol-oauth-token`. + client_id: "" + client_secret: "" + refresh_token: "" + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: nodecore-token-writer + namespace: erpc +rules: + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["obol-oauth-token"] + verbs: ["get", "update", "patch"] + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nodecore-token-refresher + namespace: erpc + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: nodecore-token-writer + namespace: erpc +subjects: + - kind: ServiceAccount + name: nodecore-token-refresher + namespace: erpc +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: nodecore-token-writer + +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: nodecore-token-refresher + namespace: erpc +spec: + # Refresh every 45 minutes to stay ahead of typical 1h ID token expiry. + schedule: "0,45 * * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + template: + spec: + serviceAccountName: nodecore-token-refresher + restartPolicy: OnFailure + containers: + - name: refresh + image: python:3.12-alpine + imagePullPolicy: IfNotPresent + env: + - name: GOOGLE_CLIENT_ID + valueFrom: + secretKeyRef: + name: nodecore-oauth-refresh + key: client_id + - name: GOOGLE_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: nodecore-oauth-refresh + key: client_secret + - name: GOOGLE_REFRESH_TOKEN + valueFrom: + secretKeyRef: + name: nodecore-oauth-refresh + key: refresh_token + command: + - python + - -c + - | + import base64 + import json + import os + import ssl + import urllib.parse + import urllib.request + + client_id = os.environ.get("GOOGLE_CLIENT_ID") + client_secret = os.environ.get("GOOGLE_CLIENT_SECRET") + refresh_token = os.environ.get("GOOGLE_REFRESH_TOKEN") + + if not client_id or not client_secret or not refresh_token: + raise SystemExit("Missing GOOGLE_CLIENT_ID/GOOGLE_CLIENT_SECRET/GOOGLE_REFRESH_TOKEN in Secret erpc/nodecore-oauth-refresh") + + token_url = "https://oauth2.googleapis.com/token" + body = urllib.parse.urlencode({ + "client_id": client_id, + "client_secret": client_secret, + "refresh_token": refresh_token, + "grant_type": "refresh_token", + }).encode("utf-8") + + req = urllib.request.Request( + token_url, + data=body, + method="POST", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + + with urllib.request.urlopen(req, timeout=20) as resp: + payload = json.loads(resp.read().decode("utf-8")) + + id_token = payload.get("id_token") + if not id_token: + raise SystemExit(f"Google token endpoint response missing id_token: {payload}") + + token_b64 = base64.b64encode(id_token.encode("utf-8")).decode("utf-8") + + namespace = "erpc" + secret_name = "obol-oauth-token" + api_server = "https://kubernetes.default.svc" + + sa_token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" + sa_ca_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + + with open(sa_token_path, "r", encoding="utf-8") as f: + sa_token = f.read().strip() + + patch = json.dumps({"data": {"token": token_b64}}).encode("utf-8") + patch_url = f"{api_server}/api/v1/namespaces/{namespace}/secrets/{secret_name}" + + ctx = ssl.create_default_context(cafile=sa_ca_path) + patch_req = urllib.request.Request( + patch_url, + data=patch, + method="PATCH", + headers={ + "Authorization": f"Bearer {sa_token}", + "Content-Type": "application/merge-patch+json", + "Accept": "application/json", + }, + ) + + with urllib.request.urlopen(patch_req, timeout=20, context=ctx) as resp: + if resp.status < 200 or resp.status >= 300: + raise SystemExit(f"Failed to patch Secret {namespace}/{secret_name}: HTTP {resp.status} {resp.read().decode('utf-8')}") + + print("Updated Secret erpc/obol-oauth-token") diff --git a/internal/embed/infrastructure/base/templates/obol-agent.yaml b/internal/embed/infrastructure/base/templates/obol-agent.yaml index f73dda7..7451db7 100644 --- a/internal/embed/infrastructure/base/templates/obol-agent.yaml +++ b/internal/embed/infrastructure/base/templates/obol-agent.yaml @@ -1,8 +1,12 @@ +{{- if .Values.obolAgent.enabled }} --- # Obol Agent Kubernetes Manifest # This manifest deploys the Obol AI Agent with namespace-scoped RBAC permissions # The agent can read cluster-wide resources (nodes, namespaces) but can only modify # resources in specific namespaces: agent (and others via dynamic bindings) +# +# To enable the obol-agent, set obolAgent.enabled=true in the base chart values +# (infrastructure helmfile.yaml → base release → values). #------------------------------------------------------------------------------ # Namespace - Ensure the agent namespace exists @@ -139,6 +143,24 @@ spec: - name: PUBLIC_MODE value: "false" + # OKR-1: Default LLM backend via llms.py + Ollama Cloud + # + # The Obol Stack agent is provider-agnostic: + # - `llms.py` (LLMSpy) exposes an OpenAI-compatible API at /v1 + # - LLMSpy forwards to Ollama (in-cluster), which can run `*:cloud` models + # + # Important: Ollama Cloud requires a one-time "connect" of the pod identity + # (public key derived from /root/.ollama/id_ed25519). We persist that key + # in the `llm/ollama-home` PVC so upgrades/restarts don't require re-connect. + - name: LLM_BACKEND + value: "llmspy" + - name: LLM_MODEL + value: "glm-4.7:cloud" + - name: OPENAI_API_BASE + value: "http://llmspy.llm.svc.cluster.local:8000/v1" + - name: OPENAI_API_KEY + value: "ollama" + # Health checks ensure the pod is ready to receive traffic livenessProbe: httpGet: @@ -179,4 +201,5 @@ spec: protocol: TCP name: http selector: - app: obol-agent # Routes traffic to pods with this label \ No newline at end of file + app: obol-agent # Routes traffic to pods with this label +{{- end }} diff --git a/internal/embed/infrastructure/cloudflared/Chart.yaml b/internal/embed/infrastructure/cloudflared/Chart.yaml new file mode 100644 index 0000000..894505e --- /dev/null +++ b/internal/embed/infrastructure/cloudflared/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: cloudflared +description: Cloudflare Tunnel for public access +type: application +version: 0.1.0 +appVersion: "2024.12.2" diff --git a/internal/embed/infrastructure/cloudflared/templates/deployment.yaml b/internal/embed/infrastructure/cloudflared/templates/deployment.yaml new file mode 100644 index 0000000..c4e0b77 --- /dev/null +++ b/internal/embed/infrastructure/cloudflared/templates/deployment.yaml @@ -0,0 +1,116 @@ +{{- $mode := default "auto" .Values.mode -}} +{{- $remoteSecretName := default "cloudflared-tunnel-token" .Values.remoteManaged.tokenSecretName -}} +{{- $remoteSecretKey := default "TUNNEL_TOKEN" .Values.remoteManaged.tokenSecretKey -}} +{{- $localSecretName := default "cloudflared-local-credentials" .Values.localManaged.secretName -}} +{{- $localConfigMapName := default "cloudflared-local-config" .Values.localManaged.configMapName -}} +{{- $localTunnelIDKey := default "tunnel_id" .Values.localManaged.tunnelIDKey -}} + +{{- $useLocal := false -}} +{{- if eq $mode "local" -}} +{{- $useLocal = true -}} +{{- else if eq $mode "auto" -}} +{{- $ls := lookup "v1" "Secret" .Release.Namespace $localSecretName -}} +{{- $cm := lookup "v1" "ConfigMap" .Release.Namespace $localConfigMapName -}} +{{- if and $ls $cm -}} +{{- $useLocal = true -}} +{{- end -}} +{{- end -}} + +{{- $useRemote := false -}} +{{- if not $useLocal -}} +{{- if eq $mode "remote" -}} +{{- $useRemote = true -}} +{{- else if eq $mode "auto" -}} +{{- $rs := lookup "v1" "Secret" .Release.Namespace $remoteSecretName -}} +{{- if $rs -}} +{{- $useRemote = true -}} +{{- end -}} +{{- end -}} +{{- end -}} + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudflared + labels: + app.kubernetes.io/name: cloudflared + app.kubernetes.io/part-of: obol-stack +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: cloudflared + template: + metadata: + labels: + app.kubernetes.io/name: cloudflared + spec: + containers: + - name: cloudflared + image: {{ printf "%s:%s" .Values.image.repository .Values.image.tag | quote }} + args: + - tunnel + - --no-autoupdate + - --metrics + - {{ .Values.metrics.address | quote }} + {{ if $useLocal }} + - --origincert + - /etc/cloudflared/cert.pem + - --config + - /etc/cloudflared/config.yml + - run + - "$(TUNNEL_ID)" + {{ else if $useRemote }} + - run + - --token + - "$(TUNNEL_TOKEN)" + {{ else }} + - --url + - {{ .Values.quickTunnel.url | quote }} + {{ end }} + {{ if $useLocal }} + env: + - name: TUNNEL_ID + valueFrom: + configMapKeyRef: + name: {{ $localConfigMapName | quote }} + key: {{ $localTunnelIDKey | quote }} + volumeMounts: + - name: cloudflared-local + mountPath: /etc/cloudflared + readOnly: true + {{ else if $useRemote }} + env: + - name: TUNNEL_TOKEN + valueFrom: + secretKeyRef: + name: {{ $remoteSecretName | quote }} + key: {{ $remoteSecretKey | quote }} + {{ end }} + ports: + - name: metrics + containerPort: 2000 + livenessProbe: + httpGet: + path: /ready + port: metrics + initialDelaySeconds: 10 + periodSeconds: 10 + resources: + requests: + cpu: 10m + memory: 64Mi + limits: + cpu: 100m + memory: 128Mi + {{ if $useLocal }} + volumes: + - name: cloudflared-local + projected: + sources: + - secret: + name: {{ $localSecretName | quote }} + - configMap: + name: {{ $localConfigMapName | quote }} + {{ end }} + restartPolicy: Always diff --git a/internal/embed/infrastructure/cloudflared/values.yaml b/internal/embed/infrastructure/cloudflared/values.yaml new file mode 100644 index 0000000..58b3d8f --- /dev/null +++ b/internal/embed/infrastructure/cloudflared/values.yaml @@ -0,0 +1,20 @@ +mode: auto + +image: + repository: cloudflare/cloudflared + tag: "2026.1.2" + +metrics: + address: "0.0.0.0:2000" + +quickTunnel: + url: "http://traefik.traefik.svc.cluster.local:80" + +remoteManaged: + tokenSecretName: "cloudflared-tunnel-token" + tokenSecretKey: "TUNNEL_TOKEN" + +localManaged: + secretName: "cloudflared-local-credentials" + configMapName: "cloudflared-local-config" + tunnelIDKey: "tunnel_id" diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index 9f49d09..5501463 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -1,17 +1,25 @@ # Helmfile for Obol Stack default infrastructure # Orchestrates core infrastructure components deployed with every stack +# Uses Traefik with Gateway API for routing (replaces nginx-ingress) repositories: - - name: ingress-nginx - url: https://kubernetes.github.io/ingress-nginx + - name: traefik + url: https://traefik.github.io/charts + - name: prometheus-community + url: https://prometheus-community.github.io/helm-charts - name: obol url: https://obolnetwork.github.io/helm-charts/ - name: ethereum url: https://ethpandaops.github.io/ethereum-helm-charts + - name: bedag + url: https://bedag.github.io/helm-charts/ + - name: stakater + url: https://stakater.github.io/stakater-charts # Single source of truth: change this to switch networks values: - - network: mainnet + - network: mainnet + - gatewayApiVersion: v1.4.1 releases: # Local storage provisioner (raw manifests wrapped as chart) @@ -21,46 +29,210 @@ releases: values: - dataDir: /data - network: "{{ .Values.network }}" + # obol-agent is disabled by default (image not publicly available). + # Set obolAgent.enabled=true to deploy it. + - obolAgent: + enabled: false - # Nginx ingress controller (upstream chart) - - name: ingress-nginx - namespace: ingress-nginx - chart: ingress-nginx/ingress-nginx - version: 4.13.3 + # Monitoring stack (Prometheus operator + Prometheus) + - name: monitoring + namespace: monitoring + createNamespace: true + chart: prometheus-community/kube-prometheus-stack + version: 79.5.0 values: - - controller: - replicaCount: 1 - service: - type: LoadBalancer - externalTrafficPolicy: Local - resources: - limits: - cpu: 500m - memory: 512Mi - requests: - cpu: 100m - memory: 128Mi - tolerations: [] - admissionWebhooks: + - ./values/monitoring.yaml.gotmpl + + # Traefik ingress controller with Gateway API support + # Traefik v38+ bundles Gateway API CRDs in its crds/ directory + - name: traefik + namespace: traefik + createNamespace: true + chart: traefik/traefik + version: 38.0.2 + values: + # Gateway API provider configuration + - providers: + kubernetesGateway: + enabled: true + namespaces: [] # Watch all namespaces + kubernetesCRD: + enabled: true + kubernetesIngress: + enabled: false # Disable legacy Ingress support + # GatewayClass configuration + - gatewayClass: + enabled: true + name: traefik + # Gateway configuration (main entry point) + - gateway: + enabled: true + name: traefik-gateway + namespace: traefik + listeners: + web: + port: 8000 + protocol: HTTP + namespacePolicy: + from: All + # Ports configuration + - ports: + web: + port: 8000 + expose: + default: true + exposedPort: 80 + protocol: TCP + websecure: + port: 8443 + expose: + default: true + exposedPort: 443 + protocol: TCP + tls: + enabled: false # TLS termination disabled for local dev + # Service configuration + - service: + type: LoadBalancer + externalTrafficPolicy: Local + # Resource limits + - resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 128Mi + # Disable dashboard by default + - ingressRoute: + dashboard: enabled: false + # Cloudflare Tunnel (quick tunnel mode for public access) + - name: cloudflared + namespace: traefik + chart: ./cloudflared + needs: + - traefik/traefik + + # Stakater Reloader (restarts workloads on Secret/ConfigMap change) + - name: reloader + namespace: reloader + createNamespace: true + chart: stakater/reloader + version: 2.2.7 + # eRPC - name: erpc namespace: erpc + createNamespace: true chart: ethereum/erpc needs: - kube-system/base - - ingress-nginx/ingress-nginx + - traefik/traefik values: - ./values/erpc.yaml.gotmpl + # eRPC HTTPRoute + - name: erpc-httproute + namespace: erpc + chart: bedag/raw + needs: + - traefik/traefik + - erpc/erpc + values: + - resources: + - apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: erpc + namespace: erpc + spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + rules: + - matches: + - path: + type: PathPrefix + value: /rpc + backendRefs: + - name: erpc + port: 4000 + # Obol Stack frontend - name: obol-frontend namespace: obol-frontend + createNamespace: true chart: obol/obol-app version: 0.1.0 needs: - - ingress-nginx/ingress-nginx + - traefik/traefik - erpc/erpc values: - ./values/obol-frontend.yaml.gotmpl + + # Obol Frontend HTTPRoute + - name: obol-frontend-httproute + namespace: obol-frontend + chart: bedag/raw + needs: + - traefik/traefik + - obol-frontend/obol-frontend + values: + - resources: + - apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: obol-frontend + namespace: obol-frontend + spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: obol-frontend-obol-app + port: 3000 + + # Obol Frontend RBAC (OpenClaw instance discovery via Kubernetes API) + - name: obol-frontend-rbac + namespace: obol-frontend + chart: bedag/raw + needs: + - obol-frontend/obol-frontend + values: + - resources: + - apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: obol-frontend-openclaw-discovery + labels: + app.kubernetes.io/name: obol-frontend + rules: + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["pods", "configmaps", "secrets"] + verbs: ["get", "list"] + - apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: obol-frontend-openclaw-discovery + labels: + app.kubernetes.io/name: obol-frontend + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: obol-frontend-openclaw-discovery + subjects: + - kind: ServiceAccount + name: obol-frontend + namespace: obol-frontend diff --git a/internal/embed/infrastructure/values/erpc.yaml.gotmpl b/internal/embed/infrastructure/values/erpc.yaml.gotmpl index fdedc69..051670c 100644 --- a/internal/embed/infrastructure/values/erpc.yaml.gotmpl +++ b/internal/embed/infrastructure/values/erpc.yaml.gotmpl @@ -1,4 +1,5 @@ {{- $network := .Values.network -}} +{{- $publicDomain := env "STACK_PUBLIC_DOMAIN" | default "obol.stack" -}} {{- $chainId := 1 -}} {{/* Default: mainnet */}} {{- if eq $network "hoodi" -}} {{- $chainId = 560048 -}} @@ -48,6 +49,14 @@ config: |- projects: - id: rpc + upstreams: + - id: nodecore + endpoint: https://rpc.nodecore.io + evm: + chainId: {{ $chainId }} + jsonRpc: + headers: + X-Nodecore-Token: "${OBOL_OAUTH_TOKEN}" networks: - architecture: evm evm: @@ -78,23 +87,27 @@ config: |- allowCredentials: true maxAge: 3600 -# Secret env variables +# Secret env variables (chart-managed secret for inline values) secretEnv: {} +# Extra env variables (reference external obol-oauth-token secret) +extraEnv: + - name: OBOL_OAUTH_TOKEN + valueFrom: + secretKeyRef: + name: obol-oauth-token + key: token + optional: true + # Extra args for the erpc container extraArgs: [] # Command replacement for the erpc container customCommand: [] +# Disable legacy Ingress - using Gateway API HTTPRoute instead ingress: - enabled: true - className: nginx - hosts: - - host: obol.stack - paths: - - path: /rpc - pathType: Prefix + enabled: false service: type: ClusterIP @@ -106,7 +119,8 @@ affinity: {} imagePullSecrets: [] # Annotations for the Deployment -annotations: {} +annotations: + secret.reloader.stakater.com/reload: "obol-oauth-token" # Liveness probe livenessProbe: @@ -131,7 +145,8 @@ nodeSelector: {} podLabels: {} # Pod annotations -podAnnotations: {} +podAnnotations: + secret.reloader.stakater.com/reload: "obol-oauth-token" # Pod management policy podManagementPolicy: OrderedReady @@ -188,8 +203,7 @@ extraVolumeMounts: [] # Additional ports extraPorts: [] -# Additional env variables -extraEnv: [] +# Additional env variables (defined above with OBOL_OAUTH_TOKEN) serviceMonitor: enabled: false diff --git a/internal/embed/infrastructure/values/monitoring.yaml.gotmpl b/internal/embed/infrastructure/values/monitoring.yaml.gotmpl new file mode 100644 index 0000000..d7a0dc1 --- /dev/null +++ b/internal/embed/infrastructure/values/monitoring.yaml.gotmpl @@ -0,0 +1,33 @@ +prometheus: + enabled: true + prometheusSpec: + serviceMonitorSelectorNilUsesHelmValues: false + serviceMonitorSelector: + matchLabels: + release: monitoring + serviceMonitorNamespaceSelector: {} + podMonitorSelectorNilUsesHelmValues: false + podMonitorSelector: + matchLabels: + release: monitoring + podMonitorNamespaceSelector: {} + retention: 6h + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 1Gi + +grafana: + enabled: false # Enable when we want UI access + +alertmanager: + enabled: false # Disable to keep the local stack lean + +kubeStateMetrics: + enabled: true + +nodeExporter: + enabled: true diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index 3301156..f95ca2b 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -1,29 +1,59 @@ {{- $network := .Values.network -}} +{{- $publicDomain := env "STACK_PUBLIC_DOMAIN" | default "obol.stack" -}} replicaCount: 1 +serviceAccount: + name: obol-frontend + image: environment: - name: NEXT_PUBLIC_HELIOS_CLIENT_URL value: "http://helios-{{ $network }}.helios.svc.cluster.local:8545" - name: NEXT_PUBLIC_ERPC_URL - value: "http://erpc.default.svc.cluster.local:4000/rpc" + value: "https://{{ $publicDomain }}/rpc" - name: NEXT_PUBLIC_AZTEC_SEQUENCER_URL value: "http://l2-sequencer-node-mainnet-node.aztec.svc.cluster.local:8080" + - name: BETTER_AUTH_SECRET + value: {{ env "BETTER_AUTH_SECRET" | default "" | quote }} + - name: BETTER_AUTH_URL + value: "https://{{ $publicDomain }}" + - name: OBOL_GOOGLE_CLIENT_ID + value: {{ env "OBOL_GOOGLE_CLIENT_ID" | default "" | quote }} + - name: OBOL_GOOGLE_CLIENT_SECRET + value: {{ env "OBOL_GOOGLE_CLIENT_SECRET" | default "" | quote }} + - name: OBOL_AUTH_DB_PATH + value: "/data/auth.sqlite" + + # Obol Agent (ADK) in-cluster URL for CopilotKit runtime + - name: ADK_AGENT_URL + value: "http://obol-agent.agent.svc.cluster.local:8000/" + - name: NEXT_PUBLIC_ADK_AGENT_URL + value: "http://obol-agent.agent.svc.cluster.local:8000/" + + # Ollama in-cluster URL (used by dashboard to surface Ollama Cloud connect URL) + - name: OLLAMA_URL + value: "http://ollama.llm.svc.cluster.local:11434" repository: obolnetwork/obol-stack-front-end pullPolicy: Always - tag: "v0.1.1" + tag: "latest" service: type: ClusterIP port: 3000 +podSecurityContext: + fsGroup: 1001 + +volumes: + - name: auth-db + emptyDir: {} + +volumeMounts: + - name: auth-db + mountPath: /data + +# Disable legacy Ingress - using Gateway API HTTPRoute instead ingress: - enabled: true - className: "nginx" - hosts: - - host: obol.stack - paths: - - path: / - pathType: Prefix + enabled: false diff --git a/internal/embed/k3d-config.yaml b/internal/embed/k3d-config.yaml index 563d697..9a97c5d 100644 --- a/internal/embed/k3d-config.yaml +++ b/internal/embed/k3d-config.yaml @@ -35,7 +35,7 @@ options: - arg: --kube-apiserver-arg=feature-gates=KubeletInUserNamespace=true nodeFilters: - server:* - # Disable Traefik to use nginx instead + # Disable bundled Traefik (we install Traefik via Helm) - arg: --disable=traefik nodeFilters: - server:* diff --git a/internal/embed/networks/aztec/templates/ingress.yaml b/internal/embed/networks/aztec/templates/ingress.yaml index 1e8ddd3..821537d 100644 --- a/internal/embed/networks/aztec/templates/ingress.yaml +++ b/internal/embed/networks/aztec/templates/ingress.yaml @@ -1,23 +1,32 @@ {{- if eq .Release.Name "aztec-ingress" }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +# HTTPRoute for Aztec sequencer node RPC +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: name: aztec namespace: {{ .Release.Namespace }} - annotations: - nginx.ingress.kubernetes.io/rewrite-target: /$2 - nginx.ingress.kubernetes.io/use-regex: "true" spec: - ingressClassName: nginx + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack rules: - - host: obol.stack - http: - paths: - - path: /{{ .Release.Namespace }}(/|$)(.*) - pathType: ImplementationSpecific - backend: - service: - name: l2-sequencer-node-{{ .Values.id }}-node - port: - number: 8080 + - matches: + - path: + type: Exact + value: /{{ .Release.Namespace }} + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/ + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: l2-sequencer-node-{{ .Values.id }}-node + port: 8080 {{- end }} diff --git a/internal/embed/networks/ethereum/templates/ingress.yaml b/internal/embed/networks/ethereum/templates/ingress.yaml index 75a39a6..a8cda39 100644 --- a/internal/embed/networks/ethereum/templates/ingress.yaml +++ b/internal/embed/networks/ethereum/templates/ingress.yaml @@ -1,30 +1,57 @@ {{- if eq .Release.Name "ethereum-ingress" }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +# HTTPRoute for Ethereum execution client RPC +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - name: ethereum + name: ethereum-execution namespace: {{ .Release.Namespace }} - annotations: - nginx.ingress.kubernetes.io/rewrite-target: /$2 - nginx.ingress.kubernetes.io/use-regex: "true" spec: - ingressClassName: nginx + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack rules: - - host: obol.stack - http: - paths: - - path: /{{ .Release.Namespace }}/execution(/|$)(.*) - pathType: ImplementationSpecific - backend: - service: - name: ethereum-execution - port: - number: 8545 - - path: /{{ .Release.Namespace }}/beacon(/|$)(.*) - pathType: ImplementationSpecific - backend: - service: - name: ethereum-beacon - port: - number: 5052 + - matches: + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/execution + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: ethereum-execution + port: 8545 +--- +# HTTPRoute for Ethereum beacon client RPC +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: ethereum-beacon + namespace: {{ .Release.Namespace }} +spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack + rules: + - matches: + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/beacon + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: ethereum-beacon + port: 5052 {{- end }} diff --git a/internal/embed/networks/helios/helmfile.yaml.gotmpl b/internal/embed/networks/helios/helmfile.yaml.gotmpl index 2be4293..c0a5d96 100644 --- a/internal/embed/networks/helios/helmfile.yaml.gotmpl +++ b/internal/embed/networks/helios/helmfile.yaml.gotmpl @@ -28,17 +28,45 @@ releases: size: 10Gi storageClass: local-path + # Disable legacy Ingress - using Gateway API HTTPRoute instead - ingress: - enabled: true - className: nginx - annotations: - nginx.ingress.kubernetes.io/rewrite-target: /$2 - nginx.ingress.kubernetes.io/use-regex: "true" - hosts: - - host: obol.stack - paths: - - path: /helios-{{ .Values.id }}(/|$)(.*) - pathType: ImplementationSpecific + enabled: false + + # HTTPRoute for Helios RPC endpoint + - name: helios-httproute + namespace: helios-{{ .Values.id }} + chart: bedag/raw + values: + - resources: + - apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: helios + namespace: helios-{{ .Values.id }} + spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack + rules: + - matches: + - path: + type: Exact + value: /helios-{{ .Values.id }} + - path: + type: PathPrefix + value: /helios-{{ .Values.id }}/ + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: helios-{{ .Values.network }} + port: 8545 # Metadata ConfigMap for frontend discovery - name: helios-metadata diff --git a/internal/embed/networks/inference/Chart.yaml b/internal/embed/networks/inference/Chart.yaml new file mode 100644 index 0000000..7859bbc --- /dev/null +++ b/internal/embed/networks/inference/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: inference-core +description: x402-enabled inference gateway with Ollama +type: application +version: 0.1.0 diff --git a/internal/embed/networks/inference/helmfile.yaml.gotmpl b/internal/embed/networks/inference/helmfile.yaml.gotmpl new file mode 100644 index 0000000..e9af653 --- /dev/null +++ b/internal/embed/networks/inference/helmfile.yaml.gotmpl @@ -0,0 +1,49 @@ +repositories: + - name: bedag + url: https://bedag.github.io/helm-charts/ + +releases: + # Core inference resources: Ollama, x402 gateway, Services, HTTPRoute + - name: inference-core + namespace: inference-{{ .Values.id }} + createNamespace: true + chart: . + values: + - id: '{{ .Values.id }}' + model: '{{ .Values.model }}' + pricePerRequest: '{{ .Values.pricePerRequest }}' + walletAddress: '{{ .Values.walletAddress }}' + chain: '{{ .Values.chain }}' + gatewayPort: '{{ .Values.gatewayPort }}' + + # Metadata ConfigMap for frontend discovery + - name: inference-metadata + namespace: inference-{{ .Values.id }} + chart: bedag/raw + values: + - resources: + - apiVersion: v1 + kind: ConfigMap + metadata: + name: inference-{{ .Values.id }}-metadata + namespace: inference-{{ .Values.id }} + labels: + app.kubernetes.io/part-of: obol.stack + obol.stack/id: {{ .Values.id }} + obol.stack/app: inference + data: + metadata.json: | + { + "model": "{{ .Values.model }}", + "pricing": { + "pricePerRequest": "{{ .Values.pricePerRequest }}", + "currency": "USDC", + "chain": "{{ .Values.chain }}" + }, + "endpoints": { + "gateway": { + "external": "http://obol.stack/inference-{{ .Values.id }}/v1", + "internal": "http://inference-gateway.inference-{{ .Values.id }}.svc.cluster.local:{{ .Values.gatewayPort }}" + } + } + } diff --git a/internal/embed/networks/inference/templates/gateway.yaml b/internal/embed/networks/inference/templates/gateway.yaml new file mode 100644 index 0000000..7f4d0ea --- /dev/null +++ b/internal/embed/networks/inference/templates/gateway.yaml @@ -0,0 +1,211 @@ +{{- if eq .Release.Name "inference-core" }} +--- +# Ollama inference runtime +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama + namespace: {{ .Release.Namespace }} + labels: + app: ollama + app.kubernetes.io/part-of: obol.stack +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + containers: + - name: ollama + image: ollama/ollama:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 11434 + protocol: TCP + env: + - name: OLLAMA_MODELS + value: /models + - name: OLLAMA_HOST + value: 0.0.0.0:11434 + volumeMounts: + - name: ollama-models + mountPath: /models + readinessProbe: + httpGet: + path: /api/version + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + livenessProbe: + httpGet: + path: /api/version + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 2 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 4000m + memory: 8Gi + volumes: + - name: ollama-models + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: ollama + namespace: {{ .Release.Namespace }} + labels: + app: ollama +spec: + type: ClusterIP + selector: + app: ollama + ports: + - name: http + port: 11434 + targetPort: http + protocol: TCP + +--- +# x402 inference gateway +apiVersion: v1 +kind: ConfigMap +metadata: + name: gateway-config + namespace: {{ .Release.Namespace }} +data: + UPSTREAM_URL: "http://ollama.{{ .Release.Namespace }}.svc.cluster.local:11434" + LISTEN_ADDR: ":{{ .Values.gatewayPort }}" + PRICE_PER_REQUEST: "{{ .Values.pricePerRequest }}" + WALLET_ADDRESS: "{{ .Values.walletAddress }}" + CHAIN: "{{ .Values.chain }}" + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: inference-gateway + namespace: {{ .Release.Namespace }} + labels: + app: inference-gateway + app.kubernetes.io/part-of: obol.stack +spec: + replicas: 1 + selector: + matchLabels: + app: inference-gateway + template: + metadata: + labels: + app: inference-gateway + spec: + containers: + - name: gateway + image: ghcr.io/obolnetwork/inference-gateway:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: {{ .Values.gatewayPort }} + protocol: TCP + args: + - --listen=:{{ .Values.gatewayPort }} + - --upstream=http://ollama.{{ .Release.Namespace }}.svc.cluster.local:11434 + - --wallet={{ .Values.walletAddress }} + - --price={{ .Values.pricePerRequest }} + - --chain={{ .Values.chain }} + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 500m + memory: 256Mi + +--- +apiVersion: v1 +kind: Service +metadata: + name: inference-gateway + namespace: {{ .Release.Namespace }} + labels: + app: inference-gateway +spec: + type: ClusterIP + selector: + app: inference-gateway + ports: + - name: http + port: {{ .Values.gatewayPort }} + targetPort: http + protocol: TCP + +--- +# HTTPRoute for external access via Traefik Gateway API +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: inference-gateway + namespace: {{ .Release.Namespace }} +spec: + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + hostnames: + - obol.stack + rules: + - matches: + - path: + type: PathPrefix + value: /{{ .Release.Namespace }}/v1 + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: /v1 + backendRefs: + - name: inference-gateway + port: {{ .Values.gatewayPort }} + - matches: + - path: + type: Exact + value: /{{ .Release.Namespace }}/health + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: /health + backendRefs: + - name: inference-gateway + port: {{ .Values.gatewayPort }} +{{- end }} diff --git a/internal/embed/networks/inference/values.yaml.gotmpl b/internal/embed/networks/inference/values.yaml.gotmpl new file mode 100644 index 0000000..75f5ed6 --- /dev/null +++ b/internal/embed/networks/inference/values.yaml.gotmpl @@ -0,0 +1,23 @@ +# Configuration via CLI flags +# Template fields populated by obol CLI during network installation + +# @enum llama3.3:70b,llama3.2:3b,qwen2.5:72b,qwen2.5:7b,glm-4.7:cloud,deepseek-r1:7b,phi4:14b +# @default glm-4.7:cloud +# @description Ollama model to serve for inference +model: {{.Model}} + +# @default 0.001 +# @description USDC price per inference request +pricePerRequest: {{.PricePerRequest}} + +# @description USDC recipient wallet address (EVM) +walletAddress: {{.WalletAddress}} + +# @enum base,base-sepolia +# @default base-sepolia +# @description Blockchain network for x402 payments +chain: {{.Chain}} + +# @default 8402 +# @description Port for the x402 inference gateway +gatewayPort: {{.GatewayPort}} diff --git a/internal/inference/gateway.go b/internal/inference/gateway.go new file mode 100644 index 0000000..43379e5 --- /dev/null +++ b/internal/inference/gateway.go @@ -0,0 +1,140 @@ +package inference + +import ( + "context" + "fmt" + "log" + "net" + "net/http" + "net/http/httputil" + "net/url" + "time" + + "github.com/mark3labs/x402-go" + x402http "github.com/mark3labs/x402-go/http" +) + +// GatewayConfig holds configuration for the x402 inference gateway. +type GatewayConfig struct { + // ListenAddr is the address to listen on (e.g., ":8402"). + ListenAddr string + + // UpstreamURL is the upstream inference service URL (e.g., "http://localhost:11434"). + UpstreamURL string + + // WalletAddress is the USDC recipient address for payments. + WalletAddress string + + // PricePerRequest is the USDC amount charged per inference request (e.g., "0.001"). + PricePerRequest string + + // Chain is the x402 chain configuration (e.g., x402.BaseMainnet). + Chain x402.ChainConfig + + // FacilitatorURL is the x402 facilitator service URL. + FacilitatorURL string +} + +// Gateway is an x402-enabled reverse proxy for LLM inference. +type Gateway struct { + config GatewayConfig + server *http.Server +} + +// NewGateway creates a new inference gateway with the given configuration. +func NewGateway(cfg GatewayConfig) (*Gateway, error) { + if cfg.ListenAddr == "" { + cfg.ListenAddr = ":8402" + } + if cfg.FacilitatorURL == "" { + cfg.FacilitatorURL = "https://facilitator.x402.rs" + } + if cfg.Chain.NetworkID == "" { + cfg.Chain = x402.BaseSepolia + } + if cfg.PricePerRequest == "" { + cfg.PricePerRequest = "0.001" + } + + return &Gateway{config: cfg}, nil +} + +// Start begins serving the gateway. Blocks until the server is shut down. +func (g *Gateway) Start() error { + upstream, err := url.Parse(g.config.UpstreamURL) + if err != nil { + return fmt.Errorf("invalid upstream URL %q: %w", g.config.UpstreamURL, err) + } + + // Build reverse proxy to upstream inference service + proxy := httputil.NewSingleHostReverseProxy(upstream) + proxy.ErrorHandler = func(w http.ResponseWriter, r *http.Request, err error) { + log.Printf("proxy error: %v", err) + http.Error(w, "upstream unavailable", http.StatusBadGateway) + } + + // Create x402 payment requirement + requirement, err := x402.NewUSDCPaymentRequirement(x402.USDCRequirementConfig{ + Chain: g.config.Chain, + Amount: g.config.PricePerRequest, + RecipientAddress: g.config.WalletAddress, + }) + if err != nil { + return fmt.Errorf("failed to create payment requirement: %w", err) + } + + // Configure x402 middleware + x402Config := &x402http.Config{ + FacilitatorURL: g.config.FacilitatorURL, + PaymentRequirements: []x402.PaymentRequirement{requirement}, + } + paymentMiddleware := x402http.NewX402Middleware(x402Config) + + // Build HTTP mux + mux := http.NewServeMux() + + // Health check (no payment required) + mux.HandleFunc("GET /health", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + fmt.Fprintln(w, `{"status":"ok"}`) + }) + + // Protected inference endpoints (x402 payment required) + mux.Handle("POST /v1/chat/completions", paymentMiddleware(proxy)) + mux.Handle("POST /v1/completions", paymentMiddleware(proxy)) + mux.Handle("POST /v1/embeddings", paymentMiddleware(proxy)) + mux.Handle("GET /v1/models", paymentMiddleware(proxy)) + + // Unprotected OpenAI-compat metadata + mux.Handle("/", proxy) + + g.server = &http.Server{ + Addr: g.config.ListenAddr, + Handler: mux, + ReadHeaderTimeout: 10 * time.Second, + } + + listener, err := net.Listen("tcp", g.config.ListenAddr) + if err != nil { + return fmt.Errorf("failed to listen on %s: %w", g.config.ListenAddr, err) + } + + log.Printf("x402 inference gateway listening on %s", g.config.ListenAddr) + log.Printf(" upstream: %s", g.config.UpstreamURL) + log.Printf(" wallet: %s", g.config.WalletAddress) + log.Printf(" price: %s USDC/request", g.config.PricePerRequest) + log.Printf(" chain: %s", g.config.Chain.NetworkID) + log.Printf(" facilitator: %s", g.config.FacilitatorURL) + + return g.server.Serve(listener) +} + +// Stop gracefully shuts down the gateway. +func (g *Gateway) Stop() error { + if g.server == nil { + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + return g.server.Shutdown(ctx) +} diff --git a/internal/llm/llm.go b/internal/llm/llm.go new file mode 100644 index 0000000..2980069 --- /dev/null +++ b/internal/llm/llm.go @@ -0,0 +1,152 @@ +package llm + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +const ( + namespace = "llm" + secretName = "llms-secrets" + configMapName = "llmspy-config" + deployName = "llmspy" +) + +// providerEnvKeys maps provider names to their Secret key names. +var providerEnvKeys = map[string]string{ + "anthropic": "ANTHROPIC_API_KEY", + "openai": "OPENAI_API_KEY", +} + +// ConfigureLLMSpy enables a cloud provider in the llmspy gateway. +// It patches the llms-secrets Secret with the API key, enables the provider +// in the llmspy-config ConfigMap, and restarts the deployment. +func ConfigureLLMSpy(cfg *config.Config, provider, apiKey string) error { + envKey, ok := providerEnvKeys[provider] + if !ok { + return fmt.Errorf("unsupported llmspy provider: %s (supported: anthropic, openai)", provider) + } + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + // 1. Patch the Secret with the API key + fmt.Printf("Configuring llmspy: setting %s key...\n", provider) + patchJSON := fmt.Sprintf(`{"stringData":{"%s":"%s"}}`, envKey, apiKey) + if err := kubectl(kubectlBinary, kubeconfigPath, + "patch", "secret", secretName, "-n", namespace, + "-p", patchJSON, "--type=merge"); err != nil { + return fmt.Errorf("failed to patch llmspy secret: %w", err) + } + + // 2. Read current ConfigMap, enable the provider in llms.json + fmt.Printf("Enabling %s provider in llmspy config...\n", provider) + if err := enableProviderInConfigMap(kubectlBinary, kubeconfigPath, provider); err != nil { + return fmt.Errorf("failed to update llmspy config: %w", err) + } + + // 3. Restart the deployment so it picks up new Secret + ConfigMap + fmt.Printf("Restarting llmspy deployment...\n") + if err := kubectl(kubectlBinary, kubeconfigPath, + "rollout", "restart", fmt.Sprintf("deployment/%s", deployName), "-n", namespace); err != nil { + return fmt.Errorf("failed to restart llmspy: %w", err) + } + + // 4. Wait for rollout to complete + if err := kubectl(kubectlBinary, kubeconfigPath, + "rollout", "status", fmt.Sprintf("deployment/%s", deployName), "-n", namespace, + "--timeout=60s"); err != nil { + fmt.Printf("Warning: llmspy rollout not confirmed: %v\n", err) + fmt.Println("The deployment may still be rolling out.") + } else { + fmt.Printf("llmspy restarted with %s provider enabled.\n", provider) + } + + return nil +} + +// enableProviderInConfigMap reads the llmspy-config ConfigMap, parses llms.json, +// sets providers..enabled = true, and patches the ConfigMap back. +func enableProviderInConfigMap(kubectlBinary, kubeconfigPath, provider string) error { + // Read current llms.json from ConfigMap + var stdout bytes.Buffer + cmd := exec.Command(kubectlBinary, "get", "configmap", configMapName, + "-n", namespace, "-o", "jsonpath={.data.llms\\.json}") + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + cmd.Stdout = &stdout + var stderr bytes.Buffer + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to read ConfigMap: %w\n%s", err, stderr.String()) + } + + // Parse JSON + var llmsConfig map[string]interface{} + if err := json.Unmarshal(stdout.Bytes(), &llmsConfig); err != nil { + return fmt.Errorf("failed to parse llms.json: %w", err) + } + + // Set providers..enabled = true + providers, ok := llmsConfig["providers"].(map[string]interface{}) + if !ok { + providers = make(map[string]interface{}) + llmsConfig["providers"] = providers + } + + providerCfg, ok := providers[provider].(map[string]interface{}) + if !ok { + providerCfg = make(map[string]interface{}) + providers[provider] = providerCfg + } + providerCfg["enabled"] = true + + // Marshal back to JSON + updated, err := json.Marshal(llmsConfig) + if err != nil { + return fmt.Errorf("failed to marshal llms.json: %w", err) + } + + // Patch ConfigMap + // Use strategic merge patch with the new llms.json + patchData := map[string]interface{}{ + "data": map[string]string{ + "llms.json": string(updated), + }, + } + patchJSON, err := json.Marshal(patchData) + if err != nil { + return fmt.Errorf("failed to marshal patch: %w", err) + } + + return kubectl(kubectlBinary, kubeconfigPath, + "patch", "configmap", configMapName, "-n", namespace, + "-p", string(patchJSON), "--type=merge") +} + +// kubectl runs a kubectl command with the given kubeconfig and returns any error. +func kubectl(binary, kubeconfig string, args ...string) error { + cmd := exec.Command(binary, args...) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfig)) + var stderr bytes.Buffer + cmd.Stderr = &stderr + cmd.Stdout = os.Stdout + if err := cmd.Run(); err != nil { + errMsg := strings.TrimSpace(stderr.String()) + if errMsg != "" { + return fmt.Errorf("%w: %s", err, errMsg) + } + return err + } + return nil +} diff --git a/internal/openclaw/OPENCLAW_VERSION b/internal/openclaw/OPENCLAW_VERSION new file mode 100644 index 0000000..04b5d69 --- /dev/null +++ b/internal/openclaw/OPENCLAW_VERSION @@ -0,0 +1,3 @@ +# renovate: datasource=github-releases depName=openclaw/openclaw +# Pins the upstream OpenClaw version to build and publish. +v2026.2.9 diff --git a/internal/openclaw/chart/Chart.yaml b/internal/openclaw/chart/Chart.yaml new file mode 100644 index 0000000..970d251 --- /dev/null +++ b/internal/openclaw/chart/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +name: openclaw +description: OpenClaw gateway deployment (agent runtime) for Kubernetes. +type: application +version: 0.1.0 +appVersion: "2026.2.9" +kubeVersion: ">=1.26.0-0" + +home: https://docs.openclaw.ai +sources: + - https://docs.openclaw.ai +maintainers: + - name: Obol Platform Team + email: platform@obol.tech +keywords: + - openclaw + - agent + - ai + - gateway + - obol diff --git a/internal/openclaw/chart/templates/NOTES.txt b/internal/openclaw/chart/templates/NOTES.txt new file mode 100644 index 0000000..2bbb013 --- /dev/null +++ b/internal/openclaw/chart/templates/NOTES.txt @@ -0,0 +1,47 @@ +OpenClaw is now installed. + +Namespace: {{ .Release.Namespace }} +Service: {{ include "openclaw.fullname" . }} +Port: {{ .Values.service.port }} + +Gateway token: + kubectl get secret -n {{ .Release.Namespace }} {{ include "openclaw.secretsName" . }} -o jsonpath='{.data.{{ .Values.secrets.gatewayToken.key }}}' | base64 --decode + +{{- if .Values.httpRoute.enabled }} + +HTTPRoute is enabled. Access OpenClaw at: +{{- range .Values.httpRoute.hostnames }} + http://{{ . }} +{{- end }} + +{{- else if .Values.ingress.enabled }} + +Ingress is enabled. Access OpenClaw at: +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} + +{{- else }} + +Port-forward for local access: + export POD_NAME=$(kubectl get pods -n {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "openclaw.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + kubectl -n {{ .Release.Namespace }} port-forward $POD_NAME 18789:{{ .Values.service.port }} + open http://127.0.0.1:18789 + +{{- end }} + +Next steps: +{{- if and .Values.models.ollama.enabled (not .Values.models.anthropic.enabled) (not .Values.models.openai.enabled) }} + You are using the default Ollama provider. To configure a cloud LLM provider: + obol llm configure --provider=anthropic --api-key= + obol openclaw setup {{ .Release.Name }} +{{- end }} +{{- if not (or .Values.models.ollama.enabled .Values.models.anthropic.enabled .Values.models.openai.enabled) }} + WARNING: No model providers are enabled. Configure at least one provider: + obol llm configure --provider=anthropic --api-key= + obol openclaw setup {{ .Release.Name }} +{{- end }} + Dashboard: open the URL above and enter your gateway token + CLI docs: obol openclaw --help diff --git a/internal/openclaw/chart/templates/_helpers.tpl b/internal/openclaw/chart/templates/_helpers.tpl new file mode 100644 index 0000000..aa9df60 --- /dev/null +++ b/internal/openclaw/chart/templates/_helpers.tpl @@ -0,0 +1,230 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "openclaw.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "openclaw.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "openclaw.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels. +*/}} +{{- define "openclaw.labels" -}} +helm.sh/chart: {{ include "openclaw.chart" . }} +{{ include "openclaw.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels. +*/}} +{{- define "openclaw.selectorLabels" -}} +app.kubernetes.io/name: {{ include "openclaw.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use. +*/}} +{{- define "openclaw.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "openclaw.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Compute the full image reference. +*/}} +{{- define "openclaw.image" -}} +{{- $tag := .Values.image.tag -}} +{{- if not $tag -}} +{{- $tag = .Chart.AppVersion -}} +{{- end -}} +{{- printf "%s:%s" .Values.image.repository $tag -}} +{{- end }} + +{{/* +Name of the Secret used for envFrom. +*/}} +{{- define "openclaw.secretsName" -}} +{{- if .Values.secrets.existingSecret -}} +{{- .Values.secrets.existingSecret -}} +{{- else if .Values.secrets.name -}} +{{- .Values.secrets.name -}} +{{- else -}} +{{- printf "%s-secrets" (include "openclaw.fullname" .) -}} +{{- end -}} +{{- end }} + +{{/* +Name of the ConfigMap containing openclaw.json. +*/}} +{{- define "openclaw.configMapName" -}} +{{- if .Values.config.existingConfigMap -}} +{{- .Values.config.existingConfigMap -}} +{{- else -}} +{{- printf "%s-config" (include "openclaw.fullname" .) -}} +{{- end -}} +{{- end }} + +{{/* +Name of the PVC used for state storage. +*/}} +{{- define "openclaw.pvcName" -}} +{{- if .Values.persistence.existingClaim -}} +{{- .Values.persistence.existingClaim -}} +{{- else -}} +{{- printf "%s-data" (include "openclaw.fullname" .) -}} +{{- end -}} +{{- end }} + +{{/* +Compute (or reuse) the gateway token value. +*/}} +{{- define "openclaw.gatewayTokenValue" -}} +{{- if .Values.secrets.gatewayToken.value -}} +{{- .Values.secrets.gatewayToken.value -}} +{{- else -}} +{{- $secretName := include "openclaw.secretsName" . -}} +{{- $key := .Values.secrets.gatewayToken.key -}} +{{- $existing := (lookup "v1" "Secret" .Release.Namespace $secretName) -}} +{{- if $existing -}} + {{- $data := index $existing "data" -}} + {{- if and $data (hasKey $data $key) -}} + {{- index $data $key | b64dec -}} + {{- end -}} +{{- end -}} +{{- end -}} +{{- end }} + +{{/* +Render openclaw.json as strict JSON. If config.content is provided, it is used verbatim. +*/}} +{{- define "openclaw.configJson" -}} +{{- if .Values.config.content -}} +{{- .Values.config.content -}} +{{- else -}} +{{- $gatewayAuth := dict "mode" .Values.openclaw.gateway.auth.mode -}} +{{- if ne .Values.openclaw.gateway.auth.mode "none" -}} +{{- $_ := set $gatewayAuth "token" (printf "${%s}" .Values.secrets.gatewayToken.key) -}} +{{- end -}} + +{{- $gateway := dict + "mode" .Values.openclaw.gateway.mode + "bind" .Values.openclaw.gateway.bind + "port" .Values.service.port + "auth" $gatewayAuth + "http" (dict "endpoints" (dict "chatCompletions" (dict "enabled" .Values.openclaw.gateway.http.endpoints.chatCompletions.enabled))) +-}} +{{- if .Values.openclaw.gateway.trustedProxies -}} +{{- $_ := set $gateway "trustedProxies" .Values.openclaw.gateway.trustedProxies -}} +{{- end -}} +{{- if .Values.openclaw.gateway.controlUi.allowInsecureAuth -}} +{{- $_ := set $gateway "controlUi" (dict "allowInsecureAuth" true) -}} +{{- end -}} + +{{- $agentDefaults := dict "workspace" .Values.openclaw.workspaceDir -}} +{{- if .Values.openclaw.agentModel -}} +{{- $_ := set $agentDefaults "model" (dict "primary" .Values.openclaw.agentModel) -}} +{{- end -}} + +{{- $cfg := dict + "gateway" $gateway + "agents" (dict "defaults" $agentDefaults) +-}} + +{{- if .Values.skills.enabled -}} +{{- $_ := set $cfg "skills" (dict "load" (dict + "extraDirs" (list .Values.skills.extractDir) +)) -}} +{{- end -}} + +{{- /* Build providers map from all enabled model providers */ -}} +{{- $providers := dict -}} +{{- range $name := list "anthropic" "openai" "ollama" -}} +{{- $p := index $.Values.models $name -}} +{{- if $p.enabled -}} +{{- $models := list -}} +{{- range $m := $p.models -}} +{{- $models = append $models (dict "id" $m.id "name" $m.name) -}} +{{- end -}} +{{- $entry := dict + "baseUrl" $p.baseUrl + "apiKey" (printf "${%s}" $p.apiKeyEnvVar) + "models" $models +-}} +{{- if $p.api -}} +{{- $_ := set $entry "api" $p.api -}} +{{- end -}} +{{- $_ := set $providers $name $entry -}} +{{- end -}} +{{- end -}} +{{- if $providers -}} +{{- $_ := set $cfg "models" (dict "providers" $providers) -}} +{{- end -}} + +{{- /* Build channels config from enabled integrations */ -}} +{{- $channels := dict -}} +{{- if .Values.channels.telegram.enabled -}} +{{- $tg := dict "botToken" (printf "${TELEGRAM_BOT_TOKEN}") -}} +{{- if .Values.channels.telegram.dmPolicy -}} +{{- $_ := set $tg "dmPolicy" .Values.channels.telegram.dmPolicy -}} +{{- end -}} +{{- $_ := set $channels "telegram" $tg -}} +{{- end -}} +{{- if .Values.channels.discord.enabled -}} +{{- $dc := dict "botToken" (printf "${DISCORD_BOT_TOKEN}") -}} +{{- if .Values.channels.discord.dmPolicy -}} +{{- $_ := set $dc "dmPolicy" .Values.channels.discord.dmPolicy -}} +{{- end -}} +{{- $_ := set $channels "discord" $dc -}} +{{- end -}} +{{- if .Values.channels.slack.enabled -}} +{{- $sl := dict "botToken" (printf "${SLACK_BOT_TOKEN}") "appToken" (printf "${SLACK_APP_TOKEN}") -}} +{{- $_ := set $channels "slack" $sl -}} +{{- end -}} +{{- if $channels -}} +{{- $_ := set $cfg "channels" $channels -}} +{{- end -}} + +{{- $cfg | toPrettyJson -}} +{{- end -}} +{{- end }} + +{{/* +Name of the skills ConfigMap (user-provided or chart-created default). +*/}} +{{- define "openclaw.skillsConfigMapName" -}} +{{- if .Values.skills.configMapName -}} +{{- .Values.skills.configMapName -}} +{{- else -}} +{{- printf "%s-skills" (include "openclaw.fullname" .) -}} +{{- end -}} +{{- end }} diff --git a/internal/openclaw/chart/templates/configmap.yaml b/internal/openclaw/chart/templates/configmap.yaml new file mode 100644 index 0000000..fafe456 --- /dev/null +++ b/internal/openclaw/chart/templates/configmap.yaml @@ -0,0 +1,11 @@ +{{- if not .Values.config.existingConfigMap -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "openclaw.configMapName" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +data: + {{ .Values.config.key }}: |- + {{- include "openclaw.configJson" . | nindent 4 }} +{{- end }} diff --git a/internal/openclaw/chart/templates/deployment.yaml b/internal/openclaw/chart/templates/deployment.yaml new file mode 100644 index 0000000..599c646 --- /dev/null +++ b/internal/openclaw/chart/templates/deployment.yaml @@ -0,0 +1,187 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + strategy: + type: Recreate + selector: + matchLabels: + {{- include "openclaw.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "openclaw.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "openclaw.serviceAccountName" . }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName | quote }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.skills.enabled }} + initContainers: + - name: extract-skills + image: "{{ .Values.skills.initContainer.image.repository }}:{{ .Values.skills.initContainer.image.tag }}" + imagePullPolicy: {{ .Values.skills.initContainer.image.pullPolicy }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + command: + - sh + - -c + - | + set -eu + mkdir -p {{ .Values.skills.extractDir | quote }} + if [ -f /skills/{{ .Values.skills.archiveKey }} ]; then + rm -rf {{ .Values.skills.extractDir | quote }}/* + tar -xzf /skills/{{ .Values.skills.archiveKey }} -C {{ .Values.skills.extractDir | quote }} + echo "Skills extracted successfully" + else + echo "No skills archive found, skipping extraction" + fi + volumeMounts: + - name: data + mountPath: {{ .Values.persistence.mountPath }} + - name: skills-archive + mountPath: /skills + readOnly: true + {{- end }} + containers: + - name: openclaw + image: "{{ include "openclaw.image" . }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.image.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.image.args }} + args: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + env: + - name: OPENCLAW_CONFIG_PATH + value: "/etc/openclaw/{{ .Values.config.key }}" + - name: OPENCLAW_STATE_DIR + value: {{ .Values.openclaw.stateDir | quote }} + - name: ERPC_URL + value: {{ .Values.erpc.url | quote }} + {{- /* Inject non-secret provider API key values (e.g. Ollama placeholder) */ -}} + {{- range $name := list "anthropic" "openai" "ollama" }} + {{- $p := index $.Values.models $name }} + {{- if and $p.enabled $p.apiKeyValue (not (has $name (list "anthropic" "openai"))) }} + - name: {{ $p.apiKeyEnvVar }} + value: {{ $p.apiKeyValue | quote }} + {{- end }} + {{- end }} + {{- with .Values.image.env }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + - secretRef: + name: {{ include "openclaw.secretsName" . }} + {{- range .Values.secrets.extraEnvFromSecrets }} + - secretRef: + name: {{ . | quote }} + {{- end }} + {{- if .Values.startupProbe.enabled }} + startupProbe: + tcpSocket: + port: http + periodSeconds: {{ .Values.startupProbe.periodSeconds }} + failureThreshold: {{ .Values.startupProbe.failureThreshold }} + timeoutSeconds: {{ .Values.startupProbe.timeoutSeconds }} + {{- end }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + tcpSocket: + port: http + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + {{- end }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + tcpSocket: + port: http + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: config + mountPath: /etc/openclaw + readOnly: true + - name: data + mountPath: {{ .Values.persistence.mountPath }} + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ include "openclaw.configMapName" . }} + - name: data + {{- if .Values.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ include "openclaw.pvcName" . }} + {{- else }} + emptyDir: {} + {{- end }} + {{- if .Values.skills.enabled }} + - name: skills-archive + configMap: + name: {{ include "openclaw.skillsConfigMapName" . }} + optional: true + {{- end }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/internal/openclaw/chart/templates/httproute.yaml b/internal/openclaw/chart/templates/httproute.yaml new file mode 100644 index 0000000..d7c6518 --- /dev/null +++ b/internal/openclaw/chart/templates/httproute.yaml @@ -0,0 +1,25 @@ +{{- if .Values.httpRoute.enabled -}} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} + {{- with .Values.httpRoute.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + parentRefs: + {{- toYaml .Values.httpRoute.parentRefs | nindent 4 }} + hostnames: + {{- toYaml .Values.httpRoute.hostnames | nindent 4 }} + rules: + - matches: + - path: + type: PathPrefix + value: {{ .Values.httpRoute.pathPrefix | quote }} + backendRefs: + - name: {{ include "openclaw.fullname" . }} + port: {{ .Values.service.port }} +{{- end }} diff --git a/internal/openclaw/chart/templates/ingress.yaml b/internal/openclaw/chart/templates/ingress.yaml new file mode 100644 index 0000000..cf55fb0 --- /dev/null +++ b/internal/openclaw/chart/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- with .pathType }} + pathType: {{ . }} + {{- end }} + backend: + service: + name: {{ include "openclaw.fullname" $ }} + port: + number: {{ $.Values.service.port }} + {{- end }} + {{- end }} +{{- end }} diff --git a/internal/openclaw/chart/templates/init-job.yaml b/internal/openclaw/chart/templates/init-job.yaml new file mode 100644 index 0000000..a58bbd7 --- /dev/null +++ b/internal/openclaw/chart/templates/init-job.yaml @@ -0,0 +1,64 @@ +{{- if .Values.initJob.enabled -}} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "openclaw.fullname" . }}-init + labels: + {{- include "openclaw.labels" . | nindent 4 }} + annotations: + helm.sh/hook: post-install + helm.sh/hook-weight: "0" + helm.sh/hook-delete-policy: before-hook-creation +spec: + backoffLimit: 3 + template: + metadata: + labels: + {{- include "openclaw.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: init + spec: + restartPolicy: OnFailure + serviceAccountName: {{ include "openclaw.serviceAccountName" . }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: init + {{- $tag := .Values.initJob.image.tag | default .Values.image.tag | default .Chart.AppVersion }} + image: "{{ .Values.initJob.image.repository }}:{{ $tag }}" + imagePullPolicy: {{ .Values.initJob.image.pullPolicy }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.initJob.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.initJob.args }} + args: + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: OPENCLAW_STATE_DIR + value: {{ .Values.openclaw.stateDir | quote }} + {{- with .Values.initJob.env }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.initJob.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: data + mountPath: {{ .Values.persistence.mountPath }} + volumes: + - name: data + {{- if .Values.persistence.enabled }} + persistentVolumeClaim: + claimName: {{ include "openclaw.pvcName" . }} + {{- else }} + emptyDir: {} + {{- end }} +{{- end }} diff --git a/internal/openclaw/chart/templates/pvc.yaml b/internal/openclaw/chart/templates/pvc.yaml new file mode 100644 index 0000000..69bdda3 --- /dev/null +++ b/internal/openclaw/chart/templates/pvc.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "openclaw.pvcName" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} + annotations: + "helm.sh/resource-policy": keep +spec: + accessModes: + {{- toYaml .Values.persistence.accessModes | nindent 4 }} + {{- if .Values.persistence.storageClass }} + storageClassName: {{ .Values.persistence.storageClass }} + {{- end }} + resources: + requests: + storage: {{ .Values.persistence.size }} +{{- end }} diff --git a/internal/openclaw/chart/templates/role.yaml b/internal/openclaw/chart/templates/role.yaml new file mode 100644 index 0000000..e7d7a55 --- /dev/null +++ b/internal/openclaw/chart/templates/role.yaml @@ -0,0 +1,22 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +rules: + # Read-only access to common namespace resources + - apiGroups: [""] + resources: ["pods", "pods/log", "services", "configmaps", "events", "persistentvolumeclaims"] + verbs: ["get", "list", "watch"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets", "replicasets"] + verbs: ["get", "list", "watch"] + - apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["get", "list", "watch"] + {{- with .Values.rbac.extraRules }} + {{- toYaml . | nindent 2 }} + {{- end }} +{{- end }} diff --git a/internal/openclaw/chart/templates/rolebinding.yaml b/internal/openclaw/chart/templates/rolebinding.yaml new file mode 100644 index 0000000..06f5d48 --- /dev/null +++ b/internal/openclaw/chart/templates/rolebinding.yaml @@ -0,0 +1,16 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "openclaw.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ include "openclaw.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/internal/openclaw/chart/templates/secret.yaml b/internal/openclaw/chart/templates/secret.yaml new file mode 100644 index 0000000..61a8f89 --- /dev/null +++ b/internal/openclaw/chart/templates/secret.yaml @@ -0,0 +1,35 @@ +{{- if and .Values.secrets.create (not .Values.secrets.existingSecret) -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "openclaw.secretsName" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +type: Opaque +stringData: + {{ .Values.secrets.gatewayToken.key }}: {{ include "openclaw.gatewayTokenValue" . | quote }} + {{- if and .Values.models.anthropic.enabled .Values.models.anthropic.apiKeyValue }} + {{ .Values.models.anthropic.apiKeyEnvVar }}: {{ .Values.models.anthropic.apiKeyValue | quote }} + {{- end }} + {{- if and .Values.models.openai.enabled .Values.models.openai.apiKeyValue }} + {{ .Values.models.openai.apiKeyEnvVar }}: {{ .Values.models.openai.apiKeyValue | quote }} + {{- end }} + {{- if and .Values.channels.telegram.enabled .Values.channels.telegram.botToken }} + TELEGRAM_BOT_TOKEN: {{ .Values.channels.telegram.botToken | quote }} + {{- end }} + {{- if .Values.channels.telegram.dmPolicy }} + TELEGRAM_DM_POLICY: {{ .Values.channels.telegram.dmPolicy | quote }} + {{- end }} + {{- if and .Values.channels.discord.enabled .Values.channels.discord.botToken }} + DISCORD_BOT_TOKEN: {{ .Values.channels.discord.botToken | quote }} + {{- end }} + {{- if .Values.channels.discord.dmPolicy }} + DISCORD_DM_POLICY: {{ .Values.channels.discord.dmPolicy | quote }} + {{- end }} + {{- if and .Values.channels.slack.enabled .Values.channels.slack.botToken }} + SLACK_BOT_TOKEN: {{ .Values.channels.slack.botToken | quote }} + {{- end }} + {{- if and .Values.channels.slack.enabled .Values.channels.slack.appToken }} + SLACK_APP_TOKEN: {{ .Values.channels.slack.appToken | quote }} + {{- end }} +{{- end }} diff --git a/internal/openclaw/chart/templates/service.yaml b/internal/openclaw/chart/templates/service.yaml new file mode 100644 index 0000000..4fae8c4 --- /dev/null +++ b/internal/openclaw/chart/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "openclaw.fullname" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "openclaw.selectorLabels" . | nindent 4 }} diff --git a/internal/openclaw/chart/templates/serviceaccount.yaml b/internal/openclaw/chart/templates/serviceaccount.yaml new file mode 100644 index 0000000..906e6e4 --- /dev/null +++ b/internal/openclaw/chart/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "openclaw.serviceAccountName" . }} + labels: + {{- include "openclaw.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/internal/openclaw/chart/templates/skills-configmap.yaml b/internal/openclaw/chart/templates/skills-configmap.yaml new file mode 100644 index 0000000..a184edd --- /dev/null +++ b/internal/openclaw/chart/templates/skills-configmap.yaml @@ -0,0 +1,11 @@ +{{- if and .Values.skills.enabled .Values.skills.createDefault (not .Values.skills.configMapName) -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "openclaw.fullname" . }}-skills + labels: + {{- include "openclaw.labels" . | nindent 4 }} + annotations: + helm.sh/resource-policy: keep +data: {} +{{- end }} diff --git a/internal/openclaw/chart/templates/tests/test-connection.yaml b/internal/openclaw/chart/templates/tests/test-connection.yaml new file mode 100644 index 0000000..b529313 --- /dev/null +++ b/internal/openclaw/chart/templates/tests/test-connection.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "openclaw.fullname" . }}-test-connection + labels: + {{- include "openclaw.labels" . | nindent 4 }} + app.kubernetes.io/component: test + annotations: + "helm.sh/hook": test + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + restartPolicy: Never + containers: + - name: tcp-check + image: busybox:1.36.1 + command: + - sh + - -c + - | + echo "Testing TCP connection to {{ include "openclaw.fullname" . }}:{{ .Values.service.port }}..." + for i in $(seq 1 10); do + if nc -z {{ include "openclaw.fullname" . }} {{ .Values.service.port }} 2>/dev/null; then + echo "Connection successful!" + exit 0 + fi + echo "Attempt $i/10 failed, retrying in 3s..." + sleep 3 + done + echo "Connection failed after 10 attempts" + exit 1 diff --git a/internal/openclaw/chart/templates/validate.yaml b/internal/openclaw/chart/templates/validate.yaml new file mode 100644 index 0000000..e7f8846 --- /dev/null +++ b/internal/openclaw/chart/templates/validate.yaml @@ -0,0 +1,32 @@ +{{- if ne (int .Values.replicaCount) 1 -}} +{{- fail "openclaw: replicaCount must be 1 (OpenClaw stores state on disk and should not be scaled horizontally)" -}} +{{- end -}} + +{{- if and .Values.secrets.existingSecret .Values.secrets.create -}} +{{- fail "openclaw: secrets.existingSecret is set; set secrets.create=false" -}} +{{- end -}} + +{{- if and (not .Values.secrets.existingSecret) (not .Values.secrets.create) -}} +{{- fail "openclaw: set secrets.existingSecret or enable secrets.create" -}} +{{- end -}} + +{{- $gatewayToken := include "openclaw.gatewayTokenValue" . | trim -}} +{{- if and (eq .Values.openclaw.gateway.auth.mode "token") .Values.secrets.create (not .Values.secrets.existingSecret) (eq $gatewayToken "") -}} +{{- fail "openclaw: token auth is enabled; set secrets.gatewayToken.value or use secrets.existingSecret" -}} +{{- end -}} + +{{- if and .Values.httpRoute.enabled (eq (len .Values.httpRoute.hostnames) 0) -}} +{{- fail "openclaw: httpRoute.enabled is true but httpRoute.hostnames is empty" -}} +{{- end -}} + +{{- if and .Values.skills.enabled (eq .Values.skills.configMapName "") (not .Values.skills.createDefault) -}} +{{- fail "openclaw: skills.enabled is true but no skills.configMapName or skills.createDefault" -}} +{{- end -}} + +{{- if and .Values.rbac.create (not .Values.serviceAccount.create) (eq (.Values.serviceAccount.name | trim) "") -}} +{{- fail "openclaw: rbac.create=true with serviceAccount.create=false requires serviceAccount.name" -}} +{{- end -}} + +{{- if and .Values.initJob.enabled (not .Values.persistence.enabled) -}} +{{- fail "openclaw: initJob.enabled requires persistence.enabled=true" -}} +{{- end -}} diff --git a/internal/openclaw/chart/values.schema.json b/internal/openclaw/chart/values.schema.json new file mode 100644 index 0000000..e52dc8a --- /dev/null +++ b/internal/openclaw/chart/values.schema.json @@ -0,0 +1,377 @@ +{ + "$schema": "https://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["image", "service", "secrets"], + "properties": { + "replicaCount": { + "type": "integer", + "minimum": 1, + "maximum": 1, + "description": "Must be 1 — OpenClaw uses SQLite and cannot scale horizontally" + }, + "image": { + "type": "object", + "required": ["repository"], + "properties": { + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + }, + "pullPolicy": { + "type": "string", + "enum": ["Always", "IfNotPresent", "Never"] + }, + "command": { + "type": "array", + "items": { "type": "string" } + }, + "args": { + "type": "array", + "items": { "type": "string" } + }, + "env": { + "type": "array" + } + } + }, + "imagePullSecrets": { + "type": "array" + }, + "nameOverride": { + "type": "string" + }, + "fullnameOverride": { + "type": "string" + }, + "serviceAccount": { + "type": "object", + "properties": { + "create": { "type": "boolean" }, + "automount": { "type": "boolean" }, + "annotations": { "type": "object" }, + "name": { "type": "string" } + } + }, + "rbac": { + "type": "object", + "properties": { + "create": { "type": "boolean" }, + "extraRules": { "type": "array" } + } + }, + "initJob": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "image": { + "type": "object", + "properties": { + "repository": { "type": "string" }, + "tag": { "type": "string" }, + "pullPolicy": { + "type": "string", + "enum": ["Always", "IfNotPresent", "Never"] + } + } + }, + "command": { + "type": "array", + "items": { "type": "string" } + }, + "args": { + "type": "array", + "items": { "type": "string" } + }, + "env": { "type": "array" }, + "resources": { "type": "object" } + } + }, + "podAnnotations": { "type": "object" }, + "podLabels": { "type": "object" }, + "podSecurityContext": { "type": "object" }, + "containerSecurityContext": { "type": "object" }, + "service": { + "type": "object", + "required": ["port"], + "properties": { + "type": { + "type": "string", + "enum": ["ClusterIP", "NodePort", "LoadBalancer"] + }, + "port": { + "type": "integer", + "minimum": 1, + "maximum": 65535 + } + } + }, + "ingress": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "className": { "type": "string" }, + "annotations": { "type": "object" }, + "hosts": { "type": "array" }, + "tls": { "type": "array" } + } + }, + "httpRoute": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "annotations": { "type": "object" }, + "hostnames": { + "type": "array", + "items": { "type": "string" } + }, + "parentRefs": { "type": "array" }, + "pathPrefix": { "type": "string" } + } + }, + "persistence": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "existingClaim": { "type": "string" }, + "storageClass": { "type": "string" }, + "accessModes": { + "type": "array", + "items": { "type": "string" } + }, + "size": { "type": "string" }, + "mountPath": { "type": "string" } + } + }, + "config": { + "type": "object", + "properties": { + "existingConfigMap": { "type": "string" }, + "key": { "type": "string" }, + "content": { "type": "string" } + } + }, + "openclaw": { + "type": "object", + "properties": { + "stateDir": { "type": "string" }, + "workspaceDir": { "type": "string" }, + "gateway": { + "type": "object", + "properties": { + "mode": { "type": "string" }, + "bind": { "type": "string" }, + "auth": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["token", "none"] + } + } + }, + "http": { "type": "object" } + } + } + } + }, + "models": { + "type": "object", + "properties": { + "anthropic": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "baseUrl": { "type": "string", "format": "uri" }, + "api": { "type": "string" }, + "apiKeyEnvVar": { "type": "string" }, + "apiKeyValue": { "type": "string" }, + "models": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "name"], + "properties": { + "id": { "type": "string" }, + "name": { "type": "string" } + } + } + } + } + }, + "openai": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "baseUrl": { "type": "string", "format": "uri" }, + "api": { "type": "string" }, + "apiKeyEnvVar": { "type": "string" }, + "apiKeyValue": { "type": "string" }, + "models": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "name"], + "properties": { + "id": { "type": "string" }, + "name": { "type": "string" } + } + } + } + } + }, + "ollama": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "baseUrl": { "type": "string" }, + "api": { "type": "string" }, + "apiKeyEnvVar": { "type": "string" }, + "apiKeyValue": { "type": "string" }, + "models": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "name"], + "properties": { + "id": { "type": "string" }, + "name": { "type": "string" } + } + } + } + } + } + } + }, + "channels": { + "type": "object", + "properties": { + "telegram": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "botToken": { "type": "string" }, + "dmPolicy": { + "type": "string", + "enum": ["", "open", "paired", "closed"] + } + } + }, + "discord": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "botToken": { "type": "string" }, + "dmPolicy": { + "type": "string", + "enum": ["", "open", "paired", "closed"] + } + } + }, + "slack": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "botToken": { "type": "string" }, + "appToken": { "type": "string" } + } + } + } + }, + "skills": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "createDefault": { "type": "boolean" }, + "configMapName": { "type": "string" }, + "archiveKey": { "type": "string" }, + "extractDir": { "type": "string" }, + "initContainer": { + "type": "object", + "properties": { + "image": { + "type": "object", + "properties": { + "repository": { "type": "string" }, + "tag": { "type": "string" }, + "pullPolicy": { + "type": "string", + "enum": ["Always", "IfNotPresent", "Never"] + } + } + } + } + } + } + }, + "erpc": { + "type": "object", + "properties": { + "url": { "type": "string" } + } + }, + "secrets": { + "type": "object", + "properties": { + "existingSecret": { "type": "string" }, + "create": { "type": "boolean" }, + "name": { "type": "string" }, + "gatewayToken": { + "type": "object", + "properties": { + "key": { "type": "string" }, + "value": { "type": "string" } + } + }, + "extraEnvFromSecrets": { + "type": "array", + "items": { "type": "string" } + } + } + }, + "resources": { + "type": "object", + "properties": { + "limits": { "type": "object" }, + "requests": { "type": "object" } + } + }, + "startupProbe": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "periodSeconds": { "type": "integer", "minimum": 1 }, + "failureThreshold": { "type": "integer", "minimum": 1 }, + "timeoutSeconds": { "type": "integer", "minimum": 1 } + } + }, + "livenessProbe": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "initialDelaySeconds": { "type": "integer", "minimum": 0 }, + "periodSeconds": { "type": "integer", "minimum": 1 }, + "timeoutSeconds": { "type": "integer", "minimum": 1 }, + "failureThreshold": { "type": "integer", "minimum": 1 } + } + }, + "readinessProbe": { + "type": "object", + "properties": { + "enabled": { "type": "boolean" }, + "initialDelaySeconds": { "type": "integer", "minimum": 0 }, + "periodSeconds": { "type": "integer", "minimum": 1 }, + "timeoutSeconds": { "type": "integer", "minimum": 1 }, + "failureThreshold": { "type": "integer", "minimum": 1 } + } + }, + "extraVolumes": { "type": "array" }, + "extraVolumeMounts": { "type": "array" }, + "extraEnv": { "type": "array" }, + "nodeSelector": { "type": "object" }, + "tolerations": { "type": "array" }, + "affinity": { "type": "object" }, + "priorityClassName": { "type": "string" } + } +} diff --git a/internal/openclaw/chart/values.yaml b/internal/openclaw/chart/values.yaml new file mode 100644 index 0000000..59bb185 --- /dev/null +++ b/internal/openclaw/chart/values.yaml @@ -0,0 +1,317 @@ +# Default values for openclaw. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of replicas (OpenClaw should run as a single instance) +replicaCount: 1 + +# -- OpenClaw image repository, pull policy, and tag version +image: + repository: ghcr.io/obolnetwork/openclaw + pullPolicy: IfNotPresent + tag: "2026.2.9" + + # -- Override the container command (ENTRYPOINT) + command: + - node + # -- Override the container args (CMD) + args: + - openclaw.mjs + - gateway + - --allow-unconfigured + + # -- Additional environment variables for the container + env: [] + # - name: FOO + # value: bar + +# -- Credentials to fetch images from private registry +imagePullSecrets: [] + +# -- Override the chart name +nameOverride: "" +# -- Override the full resource name +fullnameOverride: "" + +# -- Create a ServiceAccount for OpenClaw +serviceAccount: + create: true + # -- Automatically mount a ServiceAccount's API credentials? + # Set to true when rbac.create is true so the agent can access the K8s API. + automount: false + annotations: {} + # -- ServiceAccount name. Required when serviceAccount.create=false and rbac.create=true. + name: "" + +# -- RBAC for the ServiceAccount (read-only access to namespace resources) +rbac: + create: false + # -- Extra rules to append to the generated Role (list of PolicyRule objects) + extraRules: [] + +# -- One-shot init Job (runs once to bootstrap workspace/personality) +initJob: + # -- Enable a one-shot post-install bootstrap Job. Requires persistence.enabled=true. + enabled: false + image: + repository: ghcr.io/obolnetwork/openclaw + tag: "" + pullPolicy: IfNotPresent + command: + - node + - openclaw.mjs + - agent + - init + args: [] + # -- Extra environment variables for the init job + env: [] + # -- Resource requests/limits for the init job + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + memory: 512Mi + +# -- Pod annotations +podAnnotations: {} +# -- Pod labels +podLabels: {} + +# -- Pod security context +podSecurityContext: + fsGroup: 1000 + +# -- Container security context +containerSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +# -- Service configuration +service: + type: ClusterIP + port: 18789 + +# -- Kubernetes Ingress (optional; not used in Obol Stack which uses Gateway API) +ingress: + enabled: false + className: "" + annotations: {} + hosts: + - host: chart-example.local + paths: + - path: / + pathType: Prefix + tls: [] + +# -- Gateway API HTTPRoute (recommended for Obol Stack / Traefik Gateway API) +httpRoute: + enabled: false + annotations: {} + # -- Hostnames for routing (required when enabled) + hostnames: [] + # - openclaw-myid.obol.stack + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + pathPrefix: / + +# -- Persistence settings for OpenClaw state directory (contains runtime state + secrets) +persistence: + enabled: true + existingClaim: "" + storageClass: "" + accessModes: + - ReadWriteOnce + size: 1Gi + mountPath: /data + +# -- Configuration for the OpenClaw config file (openclaw.json) +config: + # -- Use an existing ConfigMap instead of creating one + existingConfigMap: "" + # -- ConfigMap key / filename + key: openclaw.json + # -- Optional raw JSON5 configuration (overrides generated config when set) + content: "" + +# -- OpenClaw state/workspace settings (paths should be inside persistence.mountPath) +openclaw: + stateDir: /data/.openclaw + workspaceDir: /data/.openclaw/workspace + # -- Default agent model (e.g. "anthropic/claude-sonnet-4-5-20250929"). Empty = use provider default. + agentModel: "" + + gateway: + mode: local + bind: lan + # -- Trusted proxy IPs for secure context detection behind a reverse proxy (e.g. Traefik). + # OpenClaw uses exact IP matching (no CIDR support). + trustedProxies: [] + # -- Control UI settings for running behind a reverse proxy. + # allowInsecureAuth permits token-only auth when the browser lacks crypto.subtle + # (non-localhost HTTP). Device auth is naturally skipped (browser can't provide it). + controlUi: + # -- Allow control UI over HTTP (required when behind a non-TLS proxy like Traefik in dev) + allowInsecureAuth: false + auth: + mode: token + + http: + endpoints: + chatCompletions: + enabled: true + +# -- Model provider configuration +# Each provider is independently toggled. All providers may be disabled. +# API keys are stored in the chart Secret and injected as env vars. +models: + anthropic: + enabled: false + baseUrl: https://api.anthropic.com/v1 + api: "" + apiKeyEnvVar: ANTHROPIC_API_KEY + # -- API key value (stored in Secret). Leave empty to provide via extraEnvFromSecrets. + apiKeyValue: "" + models: + - id: claude-sonnet-4-5-20250929 + name: Claude Sonnet 4.5 + - id: claude-opus-4-6 + name: Claude Opus 4.6 + openai: + enabled: false + baseUrl: https://api.openai.com/v1 + api: "" + apiKeyEnvVar: OPENAI_API_KEY + apiKeyValue: "" + models: + - id: gpt-5.2 + name: GPT-5.2 + ollama: + enabled: true + # -- OpenAI-compatible base URL for Ollama (routed through llmspy global proxy) + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + # -- OpenClaw provider API type. Set to "openai-completions" because llmspy exposes an OpenAI-compatible chat/completions endpoint. + api: "openai-completions" + # -- Env var used for provider API key interpolation in openclaw.json + apiKeyEnvVar: OLLAMA_API_KEY + # -- Value set for the apiKey env var (not a secret for Ollama) + apiKeyValue: ollama-local + models: + - id: glm-4.7-flash + name: glm-4.7-flash + +# -- Chat channel integrations +# Tokens are stored in the chart Secret and injected as env vars. +channels: + telegram: + enabled: false + # -- Telegram Bot API token (from @BotFather) + botToken: "" + # -- DM policy: "open" | "paired" | "closed" + dmPolicy: "" + discord: + enabled: false + # -- Discord bot token + botToken: "" + # -- DM policy: "open" | "paired" | "closed" + dmPolicy: "" + slack: + enabled: false + # -- Slack Bot User OAuth Token (xoxb-...) + botToken: "" + # -- Slack App-Level Token (xapp-...) + appToken: "" + +# -- Skills injection from a ConfigMap archive (created by an external tool; e.g. `obol openclaw skills sync`). +# The archive is extracted to `extractDir` by a busybox init container and wired into OpenClaw +# via `skills.load.extraDirs` in _helpers.tpl. Note: ConfigMap total size is limited to ~1 MB by Kubernetes. +skills: + enabled: false + # -- Create a default empty skills ConfigMap when configMapName is not set. + # This allows the chart to deploy without requiring an external ConfigMap. + # Use `obol openclaw skills sync` to populate it later. + createDefault: true + # -- Name of the ConfigMap containing the skills archive (overrides createDefault) + configMapName: "" + archiveKey: skills.tgz + extractDir: /data/.openclaw/skills-injected + initContainer: + image: + repository: busybox + tag: 1.36.1 + pullPolicy: IfNotPresent + +# -- eRPC integration (exposed as ERPC_URL env var) +erpc: + url: http://erpc.erpc.svc.cluster.local:4000/rpc + +# -- OpenClaw secrets (one Secret per instance) +secrets: + # -- Use an existing secret instead of creating one + existingSecret: "" + # -- Create the secret when existingSecret is not set + create: true + # -- Override the created Secret name (defaults to -openclaw-secrets) + name: "" + + gatewayToken: + # -- Secret key name + env var name for the gateway API authentication token. + # This token is required to access OpenClaw's HTTP gateway (chat/completions endpoint and dashboard). + key: OPENCLAW_GATEWAY_TOKEN + # -- Explicit token value. Required for token auth unless using secrets.existingSecret. + value: "" + + # -- Extra Secret names to load via envFrom (for provider/channel keys, etc.) + extraEnvFromSecrets: [] + +# -- Resource requests and limits +resources: + requests: + cpu: 250m + memory: 512Mi + limits: + memory: 2Gi + +# -- Startup probe (tcpSocket; allows generous boot time before liveness kicks in) +startupProbe: + enabled: true + periodSeconds: 5 + failureThreshold: 30 + timeoutSeconds: 3 + +# -- Liveness probe (tcpSocket by default to avoid auth-protected HTTP endpoints) +livenessProbe: + enabled: true + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + +# -- Readiness probe (tcpSocket by default to avoid auth-protected HTTP endpoints) +readinessProbe: + enabled: true + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + +# -- Additional volumes +extraVolumes: [] +# -- Additional volume mounts +extraVolumeMounts: [] +# -- Additional environment variables +extraEnv: [] + +nodeSelector: {} +tolerations: [] +affinity: {} +priorityClassName: "" diff --git a/internal/openclaw/import.go b/internal/openclaw/import.go new file mode 100644 index 0000000..159c702 --- /dev/null +++ b/internal/openclaw/import.go @@ -0,0 +1,356 @@ +package openclaw + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" +) + +// ImportResult holds the parsed configuration from ~/.openclaw/openclaw.json +type ImportResult struct { + Providers []ImportedProvider + AgentModel string + GatewayToken string // gateway.remote.token from openclaw.json + Channels ImportedChannels + WorkspaceDir string // path to ~/.openclaw/workspace/ if it exists and contains marker files +} + +// ImportedProvider represents a model provider extracted from openclaw.json +type ImportedProvider struct { + Name string + BaseURL string + API string + APIKey string // literal only; empty if env-var reference + APIKeyEnvVar string // env var name for apiKey interpolation (e.g. OLLAMA_API_KEY) + Models []ImportedModel + Disabled bool // when true, emit only enabled: false (used to override chart defaults) +} + +// ImportedModel represents a model entry +type ImportedModel struct { + ID string + Name string +} + +// ImportedChannels holds detected channel configurations +type ImportedChannels struct { + Telegram *ImportedTelegram + Discord *ImportedDiscord + Slack *ImportedSlack +} + +// ImportedTelegram holds Telegram bot config +type ImportedTelegram struct { + BotToken string +} + +// ImportedDiscord holds Discord bot config +type ImportedDiscord struct { + BotToken string +} + +// ImportedSlack holds Slack bot config +type ImportedSlack struct { + BotToken string + AppToken string +} + +// openclawConfig mirrors the relevant parts of ~/.openclaw/openclaw.json +type openclawConfig struct { + Models struct { + Providers map[string]openclawProvider `json:"providers"` + } `json:"models"` + Agents struct { + Defaults struct { + Model struct { + Primary string `json:"primary"` + } `json:"model"` + Workspace string `json:"workspace"` + } `json:"defaults"` + } `json:"agents"` + Gateway struct { + Remote struct { + Token string `json:"token"` + } `json:"remote"` + } `json:"gateway"` + Channels struct { + Telegram *struct { + BotToken string `json:"botToken"` + } `json:"telegram"` + Discord *struct { + BotToken string `json:"botToken"` + } `json:"discord"` + Slack *struct { + BotToken string `json:"botToken"` + AppToken string `json:"appToken"` + } `json:"slack"` + } `json:"channels"` +} + +type openclawProvider struct { + BaseURL string `json:"baseUrl"` + API string `json:"api"` + APIKey string `json:"apiKey"` + Models []openclawModel `json:"models"` +} + +type openclawModel struct { + ID string `json:"id"` + Name string `json:"name"` +} + +// DetectExistingConfig checks for ~/.openclaw/openclaw.json and parses it. +// Returns nil (not an error) if the file does not exist. +func DetectExistingConfig() (*ImportResult, error) { + home, err := os.UserHomeDir() + if err != nil { + return nil, nil + } + + configPath := filepath.Join(home, ".openclaw", "openclaw.json") + data, err := os.ReadFile(configPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("failed to read %s: %w", configPath, err) + } + + var cfg openclawConfig + if err := json.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("failed to parse %s: %w", configPath, err) + } + + result := &ImportResult{ + AgentModel: cfg.Agents.Defaults.Model.Primary, + GatewayToken: cfg.Gateway.Remote.Token, + } + + // Detect workspace directory + result.WorkspaceDir = detectWorkspace(home, cfg.Agents.Defaults.Workspace) + + for name, p := range cfg.Models.Providers { + ip := ImportedProvider{ + Name: name, + BaseURL: p.BaseURL, + API: sanitizeModelAPI(p.API), + } + // Only import literal API keys, skip env-var references like ${...} + if p.APIKey != "" && !isEnvVarRef(p.APIKey) { + ip.APIKey = p.APIKey + } + for _, m := range p.Models { + ip.Models = append(ip.Models, ImportedModel{ID: m.ID, Name: m.Name}) + } + result.Providers = append(result.Providers, ip) + } + + if cfg.Channels.Telegram != nil && cfg.Channels.Telegram.BotToken != "" && !isEnvVarRef(cfg.Channels.Telegram.BotToken) { + result.Channels.Telegram = &ImportedTelegram{BotToken: cfg.Channels.Telegram.BotToken} + } + if cfg.Channels.Discord != nil && cfg.Channels.Discord.BotToken != "" && !isEnvVarRef(cfg.Channels.Discord.BotToken) { + result.Channels.Discord = &ImportedDiscord{BotToken: cfg.Channels.Discord.BotToken} + } + if cfg.Channels.Slack != nil { + botToken := cfg.Channels.Slack.BotToken + appToken := cfg.Channels.Slack.AppToken + if botToken != "" && !isEnvVarRef(botToken) { + result.Channels.Slack = &ImportedSlack{ + BotToken: botToken, + } + if appToken != "" && !isEnvVarRef(appToken) { + result.Channels.Slack.AppToken = appToken + } + } + } + + return result, nil +} + +// TranslateToOverlayYAML maps imported config fields to chart values YAML fragment. +// The returned string is appended to the base overlay. +func TranslateToOverlayYAML(result *ImportResult) string { + if result == nil { + return "" + } + + var b strings.Builder + + if result.AgentModel != "" { + b.WriteString(fmt.Sprintf("openclaw:\n agentModel: %s\n\n", result.AgentModel)) + } + + if len(result.Providers) > 0 { + b.WriteString("models:\n") + for _, p := range result.Providers { + b.WriteString(fmt.Sprintf(" %s:\n", p.Name)) + if p.Disabled { + b.WriteString(" enabled: false\n") + continue + } + b.WriteString(" enabled: true\n") + if p.BaseURL != "" { + b.WriteString(fmt.Sprintf(" baseUrl: %s\n", p.BaseURL)) + } + // Always emit api to override any stale base chart value. + // Empty string makes the Helm template omit it from JSON, + // letting OpenClaw auto-detect the protocol. + if p.API != "" { + b.WriteString(fmt.Sprintf(" api: %s\n", p.API)) + } else { + b.WriteString(" api: \"\"\n") + } + if p.APIKeyEnvVar != "" { + b.WriteString(fmt.Sprintf(" apiKeyEnvVar: %s\n", p.APIKeyEnvVar)) + } + if p.APIKey != "" { + b.WriteString(fmt.Sprintf(" apiKeyValue: %s\n", p.APIKey)) + } + if len(p.Models) > 0 { + b.WriteString(" models:\n") + for _, m := range p.Models { + b.WriteString(fmt.Sprintf(" - id: %s\n", m.ID)) + if m.Name != "" { + b.WriteString(fmt.Sprintf(" name: %s\n", m.Name)) + } + } + } + } + b.WriteString("\n") + } + + // Channels + hasChannels := result.Channels.Telegram != nil || result.Channels.Discord != nil || result.Channels.Slack != nil + if hasChannels { + b.WriteString("channels:\n") + if result.Channels.Telegram != nil { + b.WriteString(" telegram:\n") + b.WriteString(" enabled: true\n") + b.WriteString(fmt.Sprintf(" botToken: %s\n", result.Channels.Telegram.BotToken)) + } + if result.Channels.Discord != nil { + b.WriteString(" discord:\n") + b.WriteString(" enabled: true\n") + b.WriteString(fmt.Sprintf(" botToken: %s\n", result.Channels.Discord.BotToken)) + } + if result.Channels.Slack != nil { + b.WriteString(" slack:\n") + b.WriteString(" enabled: true\n") + b.WriteString(fmt.Sprintf(" botToken: %s\n", result.Channels.Slack.BotToken)) + if result.Channels.Slack.AppToken != "" { + b.WriteString(fmt.Sprintf(" appToken: %s\n", result.Channels.Slack.AppToken)) + } + } + b.WriteString("\n") + } + + return b.String() +} + +// PrintImportSummary prints a human-readable summary of detected config +func PrintImportSummary(result *ImportResult) { + if result == nil { + return + } + + fmt.Println("Detected existing OpenClaw installation (~/.openclaw/):") + if len(result.Providers) > 0 { + fmt.Printf(" Providers: ") + names := make([]string, 0, len(result.Providers)) + for _, p := range result.Providers { + names = append(names, p.Name) + } + fmt.Println(strings.Join(names, ", ")) + } + if result.AgentModel != "" { + fmt.Printf(" Agent model: %s\n", result.AgentModel) + } + if result.Channels.Telegram != nil { + fmt.Println(" Telegram: configured") + } + if result.Channels.Discord != nil { + fmt.Println(" Discord: configured") + } + if result.Channels.Slack != nil { + fmt.Println(" Slack: configured") + } + if result.WorkspaceDir != "" { + files := detectWorkspaceFiles(result.WorkspaceDir) + fmt.Printf(" Workspace: %s (%s)\n", result.WorkspaceDir, strings.Join(files, ", ")) + } +} + +// workspaceMarkers are files that indicate a valid OpenClaw workspace +var workspaceMarkers = []string{"SOUL.md", "AGENTS.md", "IDENTITY.md"} + +// detectWorkspace checks for an OpenClaw workspace directory and returns +// its path if it exists and contains at least one marker file. +func detectWorkspace(home, configWorkspace string) string { + // Use custom workspace path from config if set + wsDir := configWorkspace + if wsDir == "" { + wsDir = filepath.Join(home, ".openclaw", "workspace") + } + + info, err := os.Stat(wsDir) + if err != nil || !info.IsDir() { + return "" + } + + // Verify at least one marker file exists + for _, marker := range workspaceMarkers { + if _, err := os.Stat(filepath.Join(wsDir, marker)); err == nil { + return wsDir + } + } + + return "" +} + +// detectWorkspaceFiles returns the names of workspace files that exist +func detectWorkspaceFiles(wsDir string) []string { + candidates := []string{ + "SOUL.md", "AGENTS.md", "IDENTITY.md", "USER.md", + "TOOLS.md", "MEMORY.md", + } + var found []string + for _, name := range candidates { + if _, err := os.Stat(filepath.Join(wsDir, name)); err == nil { + found = append(found, name) + } + } + // Check for memory/ directory + if info, err := os.Stat(filepath.Join(wsDir, "memory")); err == nil && info.IsDir() { + found = append(found, "memory/") + } + return found +} + +// validModelAPIs is the set of values accepted by OpenClaw's ModelApiSchema (Zod enum). +// Any other value will be rejected at startup. When the api field is omitted, +// OpenClaw auto-detects the protocol from the provider name / baseUrl. +var validModelAPIs = map[string]bool{ + "openai-completions": true, + "openai-responses": true, + "anthropic-messages": true, + "google-generative-ai": true, + "github-copilot": true, + "bedrock-converse-stream": true, +} + +// sanitizeModelAPI returns api unchanged if it is a valid OpenClaw ModelApi enum +// value, or "" (omit) if it is unrecognised. This prevents invalid values +// imported from ~/.openclaw/openclaw.json from crashing the gateway. +func sanitizeModelAPI(api string) string { + if validModelAPIs[api] { + return api + } + return "" +} + +// isEnvVarRef returns true if the value looks like an environment variable reference (${...}) +func isEnvVarRef(s string) bool { + return strings.Contains(s, "${") +} diff --git a/internal/openclaw/openclaw.go b/internal/openclaw/openclaw.go new file mode 100644 index 0000000..0c31bca --- /dev/null +++ b/internal/openclaw/openclaw.go @@ -0,0 +1,1290 @@ +package openclaw + +import ( + "bufio" + "bytes" + "context" + "crypto/rand" + "embed" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "io/fs" + "net/http" + "net/url" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/llm" + oboltls "github.com/ObolNetwork/obol-stack/internal/tls" + "github.com/dustinkirkland/golang-petname" +) + +// CloudProviderInfo holds the cloud provider selection from interactive setup. +// This is used to configure llmspy with the API key separately from the +// OpenClaw overlay (which routes through llmspy). +type CloudProviderInfo struct { + Name string // "anthropic" or "openai" + APIKey string + ModelID string // e.g. "claude-sonnet-4-5-20250929" + Display string // e.g. "Claude Sonnet 4.5" +} + +const ( + appName = "openclaw" + defaultDomain = "obol.stack" +) + +// Embed the OpenClaw Helm chart from the shared charts directory. +// The chart source lives in internal/embed/charts/openclaw/ and is +// referenced here so the openclaw package owns its own chart lifecycle. +// +//go:embed all:chart +var chartFS embed.FS + +// OnboardOptions contains options for the onboard command +type OnboardOptions struct { + ID string // Deployment ID (empty = generate petname) + Force bool // Overwrite existing deployment + Sync bool // Also run helmfile sync after install + Interactive bool // true = prompt for provider choice; false = silent defaults + IsDefault bool // true = use fixed ID "default", idempotent on re-run +} + +// SetupDefault deploys a default OpenClaw instance as part of stack setup. +// It is idempotent: if a "default" deployment already exists, it re-syncs. +// When Ollama is not detected on the host and no existing ~/.openclaw config +// is found, it skips provider setup gracefully so the user can configure +// later with `obol openclaw setup`. +func SetupDefault(cfg *config.Config) error { + // Check whether the default deployment already exists (re-sync path). + // If it does, proceed unconditionally — the overlay was already written. + deploymentDir := deploymentPath(cfg, "default") + if _, err := os.Stat(deploymentDir); err == nil { + // Existing deployment — always re-sync regardless of Ollama status. + return Onboard(cfg, OnboardOptions{ + ID: "default", + Sync: true, + IsDefault: true, + }) + } + + // Check if there is an existing ~/.openclaw config with providers + imported, _ := DetectExistingConfig() + hasImportedProviders := imported != nil && len(imported.Providers) > 0 + + // If no imported providers, check Ollama availability for the default overlay + if !hasImportedProviders { + ollamaAvailable := detectOllama() + if ollamaAvailable { + fmt.Printf(" ✓ Ollama detected at %s\n", ollamaEndpoint()) + } else { + fmt.Printf(" ⚠ Ollama not detected on host (%s)\n", ollamaEndpoint()) + fmt.Println(" Skipping default OpenClaw provider setup.") + fmt.Println(" Run 'obol openclaw setup default' to configure a provider later.") + return nil + } + } + + return Onboard(cfg, OnboardOptions{ + ID: "default", + Sync: true, + IsDefault: true, + }) +} + +// Onboard creates and optionally deploys an OpenClaw instance +func Onboard(cfg *config.Config, opts OnboardOptions) error { + id := opts.ID + if opts.IsDefault { + id = "default" + } + if id == "" { + id = petname.Generate(2, "-") + fmt.Printf("Generated deployment ID: %s\n", id) + } else { + fmt.Printf("Using deployment ID: %s\n", id) + } + + deploymentDir := deploymentPath(cfg, id) + + // Idempotent re-run for default deployment: just re-sync + if opts.IsDefault && !opts.Force { + if _, err := os.Stat(deploymentDir); err == nil { + fmt.Println("Default OpenClaw instance already configured, re-syncing...") + if opts.Sync { + if err := doSync(cfg, id); err != nil { + return err + } + // Import workspace on re-sync too + imported, _ := DetectExistingConfig() + if imported != nil && imported.WorkspaceDir != "" { + copyWorkspaceToPod(cfg, id, imported.WorkspaceDir) + } + return nil + } + return nil + } + } + + if _, err := os.Stat(deploymentDir); err == nil { + if !opts.Force && !opts.IsDefault { + return fmt.Errorf("deployment already exists: %s/%s\n"+ + "Directory: %s\n"+ + "Use --force or -f to overwrite", appName, id, deploymentDir) + } + fmt.Printf("WARNING: Overwriting existing deployment at %s\n", deploymentDir) + } + + // Detect existing ~/.openclaw config + imported, err := DetectExistingConfig() + if err != nil { + fmt.Printf("Warning: failed to read existing config: %v\n", err) + } + if imported != nil { + PrintImportSummary(imported) + } + + // Interactive setup: auto-skip prompts when existing config has providers + if opts.Interactive { + if imported != nil && len(imported.Providers) > 0 { + fmt.Println("\nUsing detected configuration from ~/.openclaw/") + } else { + var cloudProvider *CloudProviderInfo + imported, cloudProvider, err = interactiveSetup(imported) + if err != nil { + return fmt.Errorf("interactive setup failed: %w", err) + } + // Push cloud API key to llmspy if a cloud provider was selected + if cloudProvider != nil { + if llmErr := llm.ConfigureLLMSpy(cfg, cloudProvider.Name, cloudProvider.APIKey); llmErr != nil { + fmt.Printf("Warning: failed to configure llmspy: %v\n", llmErr) + fmt.Println("You can configure it later with: obol llm configure") + } + } + } + } + + if err := os.MkdirAll(deploymentDir, 0755); err != nil { + return fmt.Errorf("failed to create deployment directory: %w", err) + } + + // Copy embedded chart to deployment/chart/ + chartDir := filepath.Join(deploymentDir, "chart") + if err := copyEmbeddedChart(chartDir); err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to copy chart: %w", err) + } + + // Write values.yaml from the embedded chart defaults + defaultValues, err := chartFS.ReadFile("chart/values.yaml") + if err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to read chart defaults: %w", err) + } + if err := os.WriteFile(filepath.Join(deploymentDir, "values.yaml"), defaultValues, 0644); err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to write values.yaml: %w", err) + } + + // Write Obol Stack overlay values (httpRoute, provider config, eRPC, skills) + hostname := fmt.Sprintf("openclaw-%s.%s", id, defaultDomain) + namespace := fmt.Sprintf("%s-%s", appName, id) + overlay := generateOverlayValues(hostname, imported) + if err := os.WriteFile(filepath.Join(deploymentDir, "values-obol.yaml"), []byte(overlay), 0644); err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to write overlay values: %w", err) + } + + // Generate helmfile.yaml referencing local chart + helmfileContent := generateHelmfile(id, namespace) + if err := os.WriteFile(filepath.Join(deploymentDir, "helmfile.yaml"), []byte(helmfileContent), 0644); err != nil { + os.RemoveAll(deploymentDir) + return fmt.Errorf("failed to write helmfile.yaml: %w", err) + } + + fmt.Printf("\n✓ OpenClaw instance configured!\n") + fmt.Printf(" Deployment: %s/%s\n", appName, id) + fmt.Printf(" Namespace: %s\n", namespace) + fmt.Printf(" Hostname: %s\n", hostname) + fmt.Printf(" Location: %s\n", deploymentDir) + fmt.Printf("\nFiles created:\n") + fmt.Printf(" - chart/ Embedded OpenClaw Helm chart\n") + fmt.Printf(" - values.yaml Chart defaults (edit to customize)\n") + fmt.Printf(" - values-obol.yaml Obol Stack defaults (httpRoute, providers, eRPC)\n") + fmt.Printf(" - helmfile.yaml Deployment configuration\n") + + if opts.Sync { + fmt.Printf("\nDeploying to cluster...\n\n") + if err := doSync(cfg, id); err != nil { + return err + } + // Copy workspace files into the pod after sync succeeds + if imported != nil && imported.WorkspaceDir != "" { + copyWorkspaceToPod(cfg, id, imported.WorkspaceDir) + } + return nil + } + + fmt.Printf("\nTo deploy: obol openclaw sync %s\n", id) + return nil +} + +// Sync deploys or updates an OpenClaw instance +func Sync(cfg *config.Config, id string) error { + return doSync(cfg, id) +} + +func doSync(cfg *config.Config, id string) error { + deploymentDir := deploymentPath(cfg, id) + if _, err := os.Stat(deploymentDir); os.IsNotExist(err) { + return fmt.Errorf("deployment not found: %s/%s\nDirectory: %s", appName, id, deploymentDir) + } + + helmfilePath := filepath.Join(deploymentDir, "helmfile.yaml") + if _, err := os.Stat(helmfilePath); os.IsNotExist(err) { + return fmt.Errorf("helmfile.yaml not found in: %s", deploymentDir) + } + + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + helmfileBinary := filepath.Join(cfg.BinDir, "helmfile") + if _, err := os.Stat(helmfileBinary); os.IsNotExist(err) { + return fmt.Errorf("helmfile not found at %s", helmfileBinary) + } + + fmt.Printf("Syncing OpenClaw: %s/%s\n", appName, id) + fmt.Printf("Deployment directory: %s\n", deploymentDir) + fmt.Printf("Running helmfile sync...\n\n") + + cmd := exec.Command(helmfileBinary, "-f", helmfilePath, "sync") + cmd.Dir = deploymentDir + cmd.Env = append(os.Environ(), + fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath), + ) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("helmfile sync failed: %w", err) + } + + namespace := fmt.Sprintf("%s-%s", appName, id) + hostname := fmt.Sprintf("openclaw-%s.%s", id, defaultDomain) + fmt.Printf("\n✓ OpenClaw synced successfully!\n") + fmt.Printf(" Namespace: %s\n", namespace) + fmt.Printf(" URL: %s\n", instanceURL(cfg, hostname)) + fmt.Printf("\nRetrieve gateway token:\n") + fmt.Printf(" obol openclaw token %s\n", id) + fmt.Printf("\nPort-forward fallback:\n") + fmt.Printf(" obol kubectl -n %s port-forward svc/openclaw 18789:18789\n", namespace) + + return nil +} + +// copyWorkspaceToPod copies the local workspace directory into the OpenClaw pod's PVC. +// This is non-fatal: failures print a warning and continue. +func copyWorkspaceToPod(cfg *config.Config, id, workspaceDir string) { + namespace := fmt.Sprintf("%s-%s", appName, id) + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + fmt.Printf("\nImporting workspace from %s...\n", workspaceDir) + + // Wait for pod to be ready + podName, err := waitForPod(kubectlBinary, kubeconfigPath, namespace, 60) + if err != nil { + fmt.Printf("Warning: could not find ready pod, skipping workspace import: %v\n", err) + return + } + + // kubectl cp /. :/data/.openclaw/workspace/ -n + dest := fmt.Sprintf("%s:/data/.openclaw/workspace/", podName) + src := workspaceDir + "/." + cmd := exec.Command(kubectlBinary, "cp", src, dest, "-n", namespace) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + fmt.Printf("Warning: workspace copy failed: %v\n%s", err, stderr.String()) + return + } + + fmt.Printf("Imported workspace into pod %s\n", podName) +} + +// waitForPod polls for a Running pod matching the openclaw label and returns its name. +// Returns an error if no ready pod is found within timeoutSec seconds. +func waitForPod(kubectlBinary, kubeconfigPath, namespace string, timeoutSec int) (string, error) { + labelSelector := fmt.Sprintf("app.kubernetes.io/name=%s", appName) + + for i := 0; i < timeoutSec; i += 3 { + cmd := exec.Command(kubectlBinary, "get", "pods", + "-n", namespace, + "-l", labelSelector, + "-o", "jsonpath={.items[?(@.status.phase=='Running')].metadata.name}", + ) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + var stdout bytes.Buffer + cmd.Stdout = &stdout + cmd.Run() + + podName := strings.TrimSpace(stdout.String()) + if podName != "" { + // If multiple pods, take the first + if idx := strings.Index(podName, " "); idx > 0 { + podName = podName[:idx] + } + return podName, nil + } + + time.Sleep(3 * time.Second) + } + + return "", fmt.Errorf("timed out waiting for pod in namespace %s", namespace) +} + +// getToken retrieves the gateway token for an OpenClaw instance as a string. +func getToken(cfg *config.Config, id string) (string, error) { + namespace := fmt.Sprintf("%s-%s", appName, id) + + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return "", fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + cmd := exec.Command(kubectlBinary, "get", "secret", "-n", namespace, + "-l", fmt.Sprintf("app.kubernetes.io/name=%s", appName), + "-o", "json") + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("failed to get secret: %w\n%s", err, stderr.String()) + } + + var secretList struct { + Items []struct { + Data map[string]string `json:"data"` + } `json:"items"` + } + if err := json.Unmarshal(stdout.Bytes(), &secretList); err != nil { + return "", fmt.Errorf("failed to parse secret: %w", err) + } + + if len(secretList.Items) == 0 { + return "", fmt.Errorf("no secrets found in namespace %s. Is OpenClaw deployed?", namespace) + } + + for _, item := range secretList.Items { + if encoded, ok := item.Data["OPENCLAW_GATEWAY_TOKEN"]; ok { + decoded, err := base64.StdEncoding.DecodeString(encoded) + if err != nil { + return "", fmt.Errorf("failed to decode token: %w", err) + } + return string(decoded), nil + } + } + + return "", fmt.Errorf("OPENCLAW_GATEWAY_TOKEN not found in namespace %s secrets", namespace) +} + +// Token retrieves the gateway token for an OpenClaw instance and prints it. +func Token(cfg *config.Config, id string) error { + token, err := getToken(cfg, id) + if err != nil { + return err + } + fmt.Printf("%s\n", token) + return nil +} + +// findOpenClawBinary locates the openclaw CLI binary. +// Search order: PATH, then cfg.BinDir. +func findOpenClawBinary(cfg *config.Config) (string, error) { + if p, err := exec.LookPath("openclaw"); err == nil { + return p, nil + } + candidate := filepath.Join(cfg.BinDir, "openclaw") + if _, err := os.Stat(candidate); err == nil { + return candidate, nil + } + return "", fmt.Errorf("openclaw CLI not found.\n\nInstall with one of:\n obolup.sh (re-run bootstrap installer)\n curl -fsSL https://openclaw.ai/install.sh | bash\n npm install -g openclaw (requires Node.js 22+)") +} + +// portForwarder manages a background kubectl port-forward process. +type portForwarder struct { + cmd *exec.Cmd + localPort int + done chan error + cancel context.CancelFunc +} + +// startPortForward launches kubectl port-forward in the background and waits +// until it reports the forwarding address on stdout. +func startPortForward(cfg *config.Config, namespace string, localPort int) (*portForwarder, error) { + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return nil, fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + portArg := fmt.Sprintf("%d:18789", localPort) + if localPort == 0 { + portArg = ":18789" + } + + ctx, cancel := context.WithCancel(context.Background()) + cmd := exec.CommandContext(ctx, kubectlBinary, "port-forward", + fmt.Sprintf("svc/%s", appName), portArg, "-n", namespace) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + + // kubectl prints "Forwarding from ..." to stdout (not stderr) + stdoutPipe, err := cmd.StdoutPipe() + if err != nil { + cancel() + return nil, fmt.Errorf("failed to create stdout pipe: %w", err) + } + + if err := cmd.Start(); err != nil { + cancel() + return nil, fmt.Errorf("failed to start port-forward: %w", err) + } + + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + + // Parse the "Forwarding from 127.0.0.1:" line from stdout + parsedPort := make(chan int, 1) + parseErr := make(chan error, 1) + go func() { + scanner := bufio.NewScanner(stdoutPipe) + for scanner.Scan() { + line := scanner.Text() + // kubectl prints: "Forwarding from 127.0.0.1: -> 18789" + if strings.Contains(line, "Forwarding from") { + parts := strings.Split(line, ":") + if len(parts) >= 2 { + portPart := strings.Fields(parts[len(parts)-1])[0] + var p int + if _, err := fmt.Sscanf(portPart, "%d", &p); err == nil { + parsedPort <- p + // Continue draining to prevent pipe blocking + io.Copy(io.Discard, stdoutPipe) + return + } + } + } + } + parseErr <- fmt.Errorf("port-forward exited without reporting a local port") + }() + + select { + case p := <-parsedPort: + return &portForwarder{cmd: cmd, localPort: p, done: done, cancel: cancel}, nil + case err := <-parseErr: + cancel() + return nil, err + case err := <-done: + cancel() + if err != nil { + return nil, fmt.Errorf("port-forward process exited unexpectedly: %w", err) + } + return nil, fmt.Errorf("port-forward process exited unexpectedly") + case <-time.After(30 * time.Second): + cancel() + return nil, fmt.Errorf("timed out waiting for port-forward to become ready") + } +} + +// Stop terminates the port-forward process gracefully. +func (pf *portForwarder) Stop() { + pf.cancel() + select { + case <-pf.done: + case <-time.After(5 * time.Second): + if pf.cmd.Process != nil { + pf.cmd.Process.Kill() + } + } +} + +// SetupOptions contains options for the setup command. +type SetupOptions struct { + Port int // kept for backward compat; currently unused +} + +// Setup reconfigures model providers for a deployed OpenClaw instance. +// It runs the interactive provider prompt, regenerates the overlay values, +// and syncs via helmfile so the pod picks up the new configuration. +func Setup(cfg *config.Config, id string, _ SetupOptions) error { + deploymentDir := deploymentPath(cfg, id) + if _, err := os.Stat(deploymentDir); os.IsNotExist(err) { + return fmt.Errorf("deployment not found: %s/%s\nRun 'obol openclaw up' first", appName, id) + } + + // Always show the provider prompt — that's the whole point of setup. + imported, cloudProvider, err := interactiveSetup(nil) + if err != nil { + return fmt.Errorf("setup failed: %w", err) + } + + // Push cloud API key to llmspy if a cloud provider was selected + if cloudProvider != nil { + if llmErr := llm.ConfigureLLMSpy(cfg, cloudProvider.Name, cloudProvider.APIKey); llmErr != nil { + fmt.Printf("Warning: failed to configure llmspy: %v\n", llmErr) + fmt.Println("You can configure it later with: obol llm configure") + } + } + + // Re-copy the embedded chart so the deployment dir picks up any chart fixes + // (e.g. corrected default values, template changes) from the current binary. + chartDir := filepath.Join(deploymentDir, "chart") + if err := copyEmbeddedChart(chartDir); err != nil { + return fmt.Errorf("failed to update chart: %w", err) + } + + // Write updated base values.yaml from the embedded chart defaults + defaultValues, err := chartFS.ReadFile("chart/values.yaml") + if err != nil { + return fmt.Errorf("failed to read chart defaults: %w", err) + } + if err := os.WriteFile(filepath.Join(deploymentDir, "values.yaml"), defaultValues, 0644); err != nil { + return fmt.Errorf("failed to write values.yaml: %w", err) + } + + // Regenerate overlay values with the selected provider + hostname := fmt.Sprintf("openclaw-%s.%s", id, defaultDomain) + overlay := generateOverlayValues(hostname, imported) + overlayPath := filepath.Join(deploymentDir, "values-obol.yaml") + if err := os.WriteFile(overlayPath, []byte(overlay), 0644); err != nil { + return fmt.Errorf("failed to write overlay values: %w", err) + } + + fmt.Printf("\nApplying configuration...\n\n") + if err := doSync(cfg, id); err != nil { + return err + } + + namespace := fmt.Sprintf("%s-%s", appName, id) + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + fmt.Printf("\nWaiting for pod to be ready...\n") + if _, err := waitForPod(kubectlBinary, kubeconfigPath, namespace, 90); err != nil { + fmt.Printf("Warning: pod not ready yet: %v\n", err) + fmt.Println("The deployment may still be rolling out. Check with: obol kubectl get pods -n", namespace) + } else { + fmt.Printf("\n✓ Setup complete!\n") + fmt.Printf(" Open dashboard: obol openclaw dashboard %s\n", id) + } + return nil +} + +// DashboardOptions contains options for the dashboard command. +type DashboardOptions struct { + Port int + NoBrowser bool +} + +// Dashboard port-forwards to the OpenClaw instance and opens the web dashboard. +// The onReady callback is invoked with the dashboard URL; the CLI layer uses it +// to open a browser. +func Dashboard(cfg *config.Config, id string, opts DashboardOptions, onReady func(url string)) error { + deploymentDir := deploymentPath(cfg, id) + if _, err := os.Stat(deploymentDir); os.IsNotExist(err) { + return fmt.Errorf("deployment not found: %s/%s\nRun 'obol openclaw up' first", appName, id) + } + + token, err := getToken(cfg, id) + if err != nil { + return err + } + + namespace := fmt.Sprintf("%s-%s", appName, id) + fmt.Printf("Starting port-forward to %s...\n", namespace) + + pf, err := startPortForward(cfg, namespace, opts.Port) + if err != nil { + return fmt.Errorf("port-forward failed: %w", err) + } + defer pf.Stop() + + dashboardURL := fmt.Sprintf("http://localhost:%d/#token=%s", pf.localPort, token) + fmt.Printf("Port-forward active: localhost:%d -> %s:18789\n", pf.localPort, namespace) + fmt.Printf("\nDashboard URL: %s\n", dashboardURL) + fmt.Printf("Gateway token: %s\n", token) + fmt.Printf("\nPress Ctrl+C to stop.\n") + + if onReady != nil { + onReady(dashboardURL) + } + + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(sigCh) + + select { + case <-sigCh: + fmt.Printf("\nShutting down...\n") + case err := <-pf.done: + if err != nil { + return fmt.Errorf("port-forward died unexpectedly: %w", err) + } + } + + return nil +} + +// List displays installed OpenClaw instances +func List(cfg *config.Config) error { + appsDir := filepath.Join(cfg.ConfigDir, "applications", appName) + + if _, err := os.Stat(appsDir); os.IsNotExist(err) { + fmt.Println("No OpenClaw instances installed") + fmt.Println("\nTo create one: obol openclaw up") + return nil + } + + entries, err := os.ReadDir(appsDir) + if err != nil { + return fmt.Errorf("failed to read directory: %w", err) + } + + if len(entries) == 0 { + fmt.Println("No OpenClaw instances installed") + return nil + } + + fmt.Println("OpenClaw instances:") + fmt.Println() + + count := 0 + for _, entry := range entries { + if !entry.IsDir() { + continue + } + id := entry.Name() + namespace := fmt.Sprintf("%s-%s", appName, id) + hostname := fmt.Sprintf("openclaw-%s.%s", id, defaultDomain) + fmt.Printf(" %s\n", id) + fmt.Printf(" Namespace: %s\n", namespace) + fmt.Printf(" URL: %s\n", instanceURL(cfg, hostname)) + fmt.Println() + count++ + } + + fmt.Printf("Total: %d instance(s)\n", count) + return nil +} + +// Delete removes an OpenClaw instance +func Delete(cfg *config.Config, id string, force bool) error { + namespace := fmt.Sprintf("%s-%s", appName, id) + deploymentDir := deploymentPath(cfg, id) + + fmt.Printf("Deleting OpenClaw: %s/%s\n", appName, id) + fmt.Printf("Namespace: %s\n", namespace) + + configExists := false + if _, err := os.Stat(deploymentDir); err == nil { + configExists = true + } + + namespaceExists := false + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); err == nil { + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + cmd := exec.Command(kubectlBinary, "get", "namespace", namespace) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + if err := cmd.Run(); err == nil { + namespaceExists = true + } + } + + if !namespaceExists && !configExists { + return fmt.Errorf("instance not found: %s", id) + } + + fmt.Println("\nResources to be deleted:") + if namespaceExists { + fmt.Printf(" [x] Kubernetes namespace: %s\n", namespace) + } else { + fmt.Printf(" [ ] Kubernetes namespace: %s (not found)\n", namespace) + } + if configExists { + fmt.Printf(" [x] Configuration: %s\n", deploymentDir) + } + + if !force { + fmt.Print("\nProceed with deletion? [y/N]: ") + var response string + fmt.Scanln(&response) + if strings.ToLower(response) != "y" && strings.ToLower(response) != "yes" { + fmt.Println("Deletion cancelled") + return nil + } + } + + if namespaceExists { + fmt.Printf("\nDeleting namespace %s...\n", namespace) + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + cmd := exec.Command(kubectlBinary, "delete", "namespace", namespace, + "--force", "--grace-period=0") + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to delete namespace: %w", err) + } + fmt.Println("Namespace deleted") + } + + if configExists { + fmt.Printf("Deleting configuration...\n") + if err := os.RemoveAll(deploymentDir); err != nil { + return fmt.Errorf("failed to delete config directory: %w", err) + } + fmt.Println("Configuration deleted") + + parentDir := filepath.Join(cfg.ConfigDir, "applications", appName) + entries, err := os.ReadDir(parentDir) + if err == nil && len(entries) == 0 { + os.Remove(parentDir) + } + } + + fmt.Printf("\n✓ OpenClaw %s deleted successfully!\n", id) + return nil +} + +// SkillsSync packages a local skills directory into a ConfigMap and rolls the deployment +func SkillsSync(cfg *config.Config, id, skillsDir string) error { + namespace := fmt.Sprintf("%s-%s", appName, id) + + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + if _, err := os.Stat(skillsDir); os.IsNotExist(err) { + return fmt.Errorf("skills directory not found: %s", skillsDir) + } + + configMapName := fmt.Sprintf("openclaw-%s-skills", id) + archiveKey := "skills.tgz" + + fmt.Printf("Packaging skills from %s...\n", skillsDir) + + var archiveBuf bytes.Buffer + tarCmd := exec.Command("tar", "-czf", "-", "-C", skillsDir, ".") + tarCmd.Stdout = &archiveBuf + var tarStderr bytes.Buffer + tarCmd.Stderr = &tarStderr + if err := tarCmd.Run(); err != nil { + return fmt.Errorf("failed to create skills archive: %w\n%s", err, tarStderr.String()) + } + + tmpFile, err := os.CreateTemp("", "openclaw-skills-*.tgz") + if err != nil { + return fmt.Errorf("failed to create temp file: %w", err) + } + defer os.Remove(tmpFile.Name()) + + if _, err := tmpFile.Write(archiveBuf.Bytes()); err != nil { + tmpFile.Close() + return fmt.Errorf("failed to write archive: %w", err) + } + tmpFile.Close() + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + delCmd := exec.Command(kubectlBinary, "delete", "configmap", configMapName, + "-n", namespace, "--ignore-not-found") + delCmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + delCmd.Run() + + fmt.Printf("Creating ConfigMap %s in namespace %s...\n", configMapName, namespace) + createCmd := exec.Command(kubectlBinary, "create", "configmap", configMapName, + "-n", namespace, + fmt.Sprintf("--from-file=%s=%s", archiveKey, tmpFile.Name())) + createCmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + var createStderr bytes.Buffer + createCmd.Stderr = &createStderr + if err := createCmd.Run(); err != nil { + return fmt.Errorf("failed to create ConfigMap: %w\n%s", err, createStderr.String()) + } + + fmt.Printf("✓ Skills ConfigMap updated: %s\n", configMapName) + fmt.Printf("\nTo apply, re-sync: obol openclaw sync %s\n", id) + return nil +} + +// remoteCapableCommands lists openclaw subcommands that support --url and --token flags. +var remoteCapableCommands = map[string]bool{ + "gateway": true, + "acp": true, + "browser": true, + "logs": true, +} + +// CLI runs an openclaw CLI command against a deployed instance. +// Commands that support --url/--token are executed locally with a port-forward; +// others are executed via kubectl exec into the pod. +func CLI(cfg *config.Config, id string, args []string) error { + deploymentDir := deploymentPath(cfg, id) + if _, err := os.Stat(deploymentDir); os.IsNotExist(err) { + return fmt.Errorf("deployment not found: %s/%s\nRun 'obol openclaw up' first", appName, id) + } + + namespace := fmt.Sprintf("%s-%s", appName, id) + + if len(args) == 0 { + return fmt.Errorf("no openclaw command specified\n\nExamples:\n" + + " obol openclaw cli %s -- gateway health\n" + + " obol openclaw cli %s -- gateway call config.get\n" + + " obol openclaw cli %s -- doctor", id, id, id) + } + + // Determine if the command supports --url/--token (remote-capable) + firstArg := args[0] + if remoteCapableCommands[firstArg] { + return cliViaPortForward(cfg, id, namespace, args) + } + return cliViaKubectlExec(cfg, namespace, args) +} + +// cliViaPortForward runs an openclaw command locally with port-forward + --url/--token. +func cliViaPortForward(cfg *config.Config, id, namespace string, args []string) error { + openclawBinary, err := findOpenClawBinary(cfg) + if err != nil { + return err + } + + token, err := getToken(cfg, id) + if err != nil { + return fmt.Errorf("failed to get gateway token: %w", err) + } + + pf, err := startPortForward(cfg, namespace, 0) + if err != nil { + return fmt.Errorf("port-forward failed: %w", err) + } + defer pf.Stop() + + // Append --url and --token to the args + wsURL := fmt.Sprintf("ws://localhost:%d", pf.localPort) + fullArgs := append(args, "--url", wsURL, "--token", token) + + cmd := exec.Command(openclawBinary, fullArgs...) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + // Handle signals to clean up port-forward + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(sigCh) + + go func() { + <-sigCh + pf.Stop() + }() + + if err := cmd.Run(); err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { + os.Exit(status.ExitStatus()) + } + } + return err + } + return nil +} + +// cliViaKubectlExec runs an openclaw command inside the pod via kubectl exec. +func cliViaKubectlExec(cfg *config.Config, namespace string, args []string) error { + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("cluster not running. Run 'obol stack up' first") + } + + kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") + + // Build: kubectl exec -it -n deploy/openclaw -- node openclaw.mjs + // The pod runs `node openclaw.mjs` (no standalone binary in PATH). + execArgs := []string{ + "exec", "-it", + "-n", namespace, + "deploy/openclaw", + "--", + "node", "openclaw.mjs", + } + execArgs = append(execArgs, args...) + + cmd := exec.Command(kubectlBinary, execArgs...) + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath)) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { + os.Exit(status.ExitStatus()) + } + } + return err + } + return nil +} + +// instanceURL returns the URL for an OpenClaw instance, using HTTPS on port 8443 +// when TLS certs are available, or HTTP (default port) otherwise. +func instanceURL(cfg *config.Config, hostname string) string { + if oboltls.CertsExist(cfg.ConfigDir) { + return fmt.Sprintf("https://%s:8443", hostname) + } + return fmt.Sprintf("http://%s", hostname) +} + +// deploymentPath returns the path to a deployment directory +func deploymentPath(cfg *config.Config, id string) string { + return filepath.Join(cfg.ConfigDir, "applications", appName, id) +} + +// copyEmbeddedChart extracts the embedded chart FS to destDir +func copyEmbeddedChart(destDir string) error { + return fs.WalkDir(chartFS, "chart", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if path == "chart" { + return nil + } + + relPath := strings.TrimPrefix(path, "chart/") + destPath := filepath.Join(destDir, relPath) + + if d.IsDir() { + return os.MkdirAll(destPath, 0755) + } + + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return err + } + + data, err := chartFS.ReadFile(path) + if err != nil { + return fmt.Errorf("failed to read embedded %s: %w", path, err) + } + return os.WriteFile(destPath, data, 0644) + }) +} + +// generateOverlayValues creates the Obol Stack-specific values overlay. +// If imported is non-nil, provider/channel config from the import is used +// instead of the default Ollama configuration. +func generateOverlayValues(hostname string, imported *ImportResult) string { + var b strings.Builder + + b.WriteString(`# Obol Stack overlay values for OpenClaw +# This file contains stack-specific defaults. Edit to customize. + +# Enable Gateway API HTTPRoute for stack routing +httpRoute: + enabled: true + hostnames: +`) + b.WriteString(fmt.Sprintf(" - %s\n", hostname)) + b.WriteString(` parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + - name: traefik-gateway + namespace: traefik + sectionName: websecure + +# SA needs API token mount for K8s read access +serviceAccount: + automount: true + +# Read-only RBAC for K8s API (pods, services, deployments, etc.) +rbac: + create: true + +`) + + // Gateway token: import from existing config or generate a new one + token := "" + if imported != nil && imported.GatewayToken != "" { + token = imported.GatewayToken + } else { + tokenBytes := make([]byte, 32) + if _, err := rand.Read(tokenBytes); err == nil { + token = base64.RawURLEncoding.EncodeToString(tokenBytes) + } + } + if token != "" { + b.WriteString(fmt.Sprintf("secrets:\n gatewayToken:\n value: %s\n\n", token)) + } + + // Provider and agent model configuration + importedOverlay := TranslateToOverlayYAML(imported) + if importedOverlay != "" { + b.WriteString("# Imported from ~/.openclaw/openclaw.json\n") + // Inject gateway controlUi settings for Traefik reverse proxy. + // allowInsecureAuth is required because the browser accesses OpenClaw via + // http://.obol.stack (non-localhost HTTP), where crypto.subtle is + // unavailable. Without it, the gateway rejects with 1008 "requires HTTPS or + // localhost (secure context)". Token auth is still enforced. + if strings.Contains(importedOverlay, "openclaw:\n") { + importedOverlay = strings.Replace(importedOverlay, "openclaw:\n", "openclaw:\n gateway:\n controlUi:\n allowInsecureAuth: true\n", 1) + } else { + b.WriteString("openclaw:\n gateway:\n controlUi:\n allowInsecureAuth: true\n\n") + } + b.WriteString(importedOverlay) + } else { + b.WriteString(`# Route agent traffic to in-cluster Ollama via llmspy proxy +openclaw: + agentModel: ollama/gpt-oss:20b-cloud + gateway: + # Allow control UI over HTTP behind Traefik (local dev stack). + # Required: browser on non-localhost HTTP has no crypto.subtle, + # so device identity is unavailable. Token auth is still enforced. + controlUi: + allowInsecureAuth: true + +# Default model provider: in-cluster Ollama (routed through llmspy) +models: + ollama: + enabled: true + baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1 + apiKeyEnvVar: OLLAMA_API_KEY + apiKeyValue: ollama-local + models: + - id: gpt-oss:20b-cloud + name: GPT-OSS 20B + +`) + } + + b.WriteString(`# eRPC integration +erpc: + url: http://erpc.erpc.svc.cluster.local:4000/rpc + +# Skills: chart creates a default empty ConfigMap; populate with obol openclaw skills sync +skills: + enabled: true + createDefault: true + +# Agent init Job (enable to bootstrap workspace on first deploy) +initJob: + enabled: false +`) + + return b.String() +} + +// ollamaEndpoint returns the base URL where host Ollama should be reachable. +// It respects the OLLAMA_HOST environment variable, falling back to http://localhost:11434. +func ollamaEndpoint() string { + if host := os.Getenv("OLLAMA_HOST"); host != "" { + // OLLAMA_HOST may be just "host:port" or a full URL. + if !strings.HasPrefix(host, "http://") && !strings.HasPrefix(host, "https://") { + host = "http://" + host + } + return strings.TrimRight(host, "/") + } + return "http://localhost:11434" +} + +// detectOllama checks whether Ollama is reachable on the host machine by +// hitting the /api/tags endpoint with a short timeout. Returns true if the +// server responds with HTTP 200. +func detectOllama() bool { + endpoint := ollamaEndpoint() + tagsURL, err := url.JoinPath(endpoint, "api", "tags") + if err != nil { + return false + } + + client := &http.Client{Timeout: 2 * time.Second} + resp, err := client.Get(tagsURL) + if err != nil { + return false + } + resp.Body.Close() + return resp.StatusCode == http.StatusOK +} + +// interactiveSetup prompts the user for provider configuration. +// If imported is non-nil, offers to use the detected config. +// Returns the ImportResult for overlay generation, and optionally a CloudProviderInfo +// when a cloud provider was selected (so the caller can configure llmspy). +func interactiveSetup(imported *ImportResult) (*ImportResult, *CloudProviderInfo, error) { + reader := bufio.NewReader(os.Stdin) + + if imported != nil { + fmt.Print("\nUse detected configuration? [Y/n]: ") + line, _ := reader.ReadString('\n') + line = strings.TrimSpace(strings.ToLower(line)) + if line == "" || line == "y" || line == "yes" { + fmt.Println("Using detected configuration.") + return imported, nil, nil + } + } + + // Detect Ollama on the host to decide whether to offer it as an option + ollamaAvailable := detectOllama() + if ollamaAvailable { + fmt.Printf(" ✓ Ollama detected at %s\n", ollamaEndpoint()) + } else { + fmt.Printf(" ⚠ Ollama not detected on host (%s)\n", ollamaEndpoint()) + } + + if ollamaAvailable { + fmt.Println("\nSelect a model provider:") + fmt.Println(" [1] Ollama (default, runs in-cluster)") + fmt.Println(" [2] OpenAI") + fmt.Println(" [3] Anthropic") + fmt.Print("\nChoice [1]: ") + + line, _ := reader.ReadString('\n') + choice := strings.TrimSpace(line) + if choice == "" { + choice = "1" + } + + switch choice { + case "1": + fmt.Println("Using Ollama (in-cluster) as default provider.") + return nil, nil, nil + case "2": + cloud, err := promptForCloudProvider(reader, "openai", "OpenAI", "gpt-5.2", "GPT-5.2") + if err != nil { + return nil, nil, err + } + result := buildLLMSpyRoutedOverlay(cloud) + return result, cloud, nil + case "3": + cloud, err := promptForCloudProvider(reader, "anthropic", "Anthropic", "claude-opus-4-6", "Claude Opus 4.6") + if err != nil { + return nil, nil, err + } + result := buildLLMSpyRoutedOverlay(cloud) + return result, cloud, nil + default: + fmt.Printf("Unknown choice '%s', using Ollama defaults.\n", choice) + return nil, nil, nil + } + } + + // Ollama not available — only offer cloud providers + fmt.Println("\nSelect a model provider:") + fmt.Println(" [1] OpenAI") + fmt.Println(" [2] Anthropic") + fmt.Print("\nChoice [1]: ") + + line, _ := reader.ReadString('\n') + choice := strings.TrimSpace(line) + if choice == "" { + choice = "1" + } + + switch choice { + case "1": + cloud, err := promptForCloudProvider(reader, "openai", "OpenAI", "gpt-5.2", "GPT-5.2") + if err != nil { + return nil, nil, err + } + result := buildLLMSpyRoutedOverlay(cloud) + return result, cloud, nil + case "2": + cloud, err := promptForCloudProvider(reader, "anthropic", "Anthropic", "claude-opus-4-6", "Claude Opus 4.6") + if err != nil { + return nil, nil, err + } + result := buildLLMSpyRoutedOverlay(cloud) + return result, cloud, nil + default: + return nil, nil, fmt.Errorf("unknown choice '%s'; please select a valid provider", choice) + } +} + +// promptForCloudProvider asks for an API key and returns cloud provider info. +// The actual overlay (ImportResult) is built separately via buildLLMSpyRoutedOverlay. +func promptForCloudProvider(reader *bufio.Reader, name, display, modelID, modelName string) (*CloudProviderInfo, error) { + fmt.Printf("\n%s API key: ", display) + apiKey, _ := reader.ReadString('\n') + apiKey = strings.TrimSpace(apiKey) + if apiKey == "" { + return nil, fmt.Errorf("%s API key is required", display) + } + + return &CloudProviderInfo{ + Name: name, + APIKey: apiKey, + ModelID: modelID, + Display: modelName, + }, nil +} + +// buildLLMSpyRoutedOverlay creates an ImportResult that routes a cloud model +// through the llmspy proxy. OpenClaw sees a single "ollama" provider pointing +// at llmspy, with the cloud model in its model list. The actual cloud providers +// are disabled in OpenClaw — llmspy handles the routing. +func buildLLMSpyRoutedOverlay(cloud *CloudProviderInfo) *ImportResult { + return &ImportResult{ + AgentModel: cloud.ModelID, + Providers: []ImportedProvider{ + { + Name: "ollama", + BaseURL: "http://llmspy.llm.svc.cluster.local:8000/v1", + API: "openai-completions", + APIKeyEnvVar: "OLLAMA_API_KEY", + APIKey: "ollama-local", + Models: []ImportedModel{ + {ID: cloud.ModelID, Name: cloud.Display}, + }, + }, + {Name: "anthropic", Disabled: true}, + {Name: "openai", Disabled: true}, + }, + } +} + +// generateHelmfile creates a helmfile.yaml referencing the local chart +func generateHelmfile(id, namespace string) string { + return fmt.Sprintf(`# OpenClaw instance: %s +# Managed by obol openclaw + +releases: + - name: openclaw + namespace: %s + createNamespace: true + chart: ./chart + values: + - values.yaml + - values-obol.yaml +`, id, namespace) +} diff --git a/internal/openclaw/overlay_test.go b/internal/openclaw/overlay_test.go new file mode 100644 index 0000000..4df6808 --- /dev/null +++ b/internal/openclaw/overlay_test.go @@ -0,0 +1,154 @@ +package openclaw + +import ( + "strings" + "testing" +) + +func TestBuildLLMSpyRoutedOverlay_Anthropic(t *testing.T) { + cloud := &CloudProviderInfo{ + Name: "anthropic", + APIKey: "sk-ant-test", + ModelID: "claude-sonnet-4-5-20250929", + Display: "Claude Sonnet 4.5", + } + + result := buildLLMSpyRoutedOverlay(cloud) + + // Check agent model uses bare model ID (no provider/ prefix) + if result.AgentModel != "claude-sonnet-4-5-20250929" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "claude-sonnet-4-5-20250929") + } + + // Check 3 providers: ollama (enabled), anthropic (disabled), openai (disabled) + if len(result.Providers) != 3 { + t.Fatalf("len(Providers) = %d, want 3", len(result.Providers)) + } + + ollama := result.Providers[0] + if ollama.Name != "ollama" || ollama.Disabled { + t.Errorf("ollama: name=%q disabled=%v, want ollama/false", ollama.Name, ollama.Disabled) + } + if ollama.BaseURL != "http://llmspy.llm.svc.cluster.local:8000/v1" { + t.Errorf("ollama.BaseURL = %q", ollama.BaseURL) + } + if ollama.APIKeyEnvVar != "OLLAMA_API_KEY" { + t.Errorf("ollama.APIKeyEnvVar = %q, want OLLAMA_API_KEY", ollama.APIKeyEnvVar) + } + if ollama.APIKey != "ollama-local" { + t.Errorf("ollama.APIKey = %q, want ollama-local", ollama.APIKey) + } + if ollama.API != "openai-completions" { + t.Errorf("ollama.API = %q, want openai-completions", ollama.API) + } + if len(ollama.Models) != 1 || ollama.Models[0].ID != "claude-sonnet-4-5-20250929" { + t.Errorf("ollama.Models = %v", ollama.Models) + } + + // anthropic and openai should be disabled + if !result.Providers[1].Disabled || result.Providers[1].Name != "anthropic" { + t.Errorf("anthropic: disabled=%v name=%q", result.Providers[1].Disabled, result.Providers[1].Name) + } + if !result.Providers[2].Disabled || result.Providers[2].Name != "openai" { + t.Errorf("openai: disabled=%v name=%q", result.Providers[2].Disabled, result.Providers[2].Name) + } +} + +func TestBuildLLMSpyRoutedOverlay_OpenAI(t *testing.T) { + cloud := &CloudProviderInfo{ + Name: "openai", + APIKey: "sk-open-test", + ModelID: "gpt-5.2", + Display: "GPT-5.2", + } + + result := buildLLMSpyRoutedOverlay(cloud) + + if result.AgentModel != "gpt-5.2" { + t.Errorf("AgentModel = %q, want %q", result.AgentModel, "gpt-5.2") + } + + ollama := result.Providers[0] + if len(ollama.Models) != 1 || ollama.Models[0].ID != "gpt-5.2" { + t.Errorf("ollama model = %v, want gpt-5.2", ollama.Models) + } +} + +func TestOverlayYAML_LLMSpyRouted(t *testing.T) { + cloud := &CloudProviderInfo{ + Name: "anthropic", + APIKey: "sk-ant-test", + ModelID: "claude-sonnet-4-5-20250929", + Display: "Claude Sonnet 4.5", + } + result := buildLLMSpyRoutedOverlay(cloud) + yaml := TranslateToOverlayYAML(result) + + // Agent model should be the bare model ID + if !strings.Contains(yaml, "agentModel: claude-sonnet-4-5-20250929") { + t.Errorf("YAML missing agentModel, got:\n%s", yaml) + } + + // ollama should be enabled with llmspy baseUrl + if !strings.Contains(yaml, "baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1") { + t.Errorf("YAML missing llmspy baseUrl, got:\n%s", yaml) + } + + // apiKeyEnvVar should be set + if !strings.Contains(yaml, "apiKeyEnvVar: OLLAMA_API_KEY") { + t.Errorf("YAML missing apiKeyEnvVar, got:\n%s", yaml) + } + + // apiKeyValue should be ollama-local + if !strings.Contains(yaml, "apiKeyValue: ollama-local") { + t.Errorf("YAML missing apiKeyValue, got:\n%s", yaml) + } + + // api should be openai-completions (llmspy is OpenAI-compatible) + if !strings.Contains(yaml, "api: openai-completions") { + t.Errorf("YAML missing api: openai-completions, got:\n%s", yaml) + } + + // Cloud model should appear in ollama's model list + if !strings.Contains(yaml, "- id: claude-sonnet-4-5-20250929") { + t.Errorf("YAML missing cloud model ID, got:\n%s", yaml) + } + + // anthropic and openai should be disabled + if !strings.Contains(yaml, "anthropic:\n enabled: false") { + t.Errorf("YAML missing disabled anthropic, got:\n%s", yaml) + } + if !strings.Contains(yaml, "openai:\n enabled: false") { + t.Errorf("YAML missing disabled openai, got:\n%s", yaml) + } +} + +func TestGenerateOverlayValues_OllamaDefault(t *testing.T) { + // When imported is nil, generateOverlayValues should use Ollama defaults + yaml := generateOverlayValues("openclaw-default.obol.stack", nil) + + if !strings.Contains(yaml, "agentModel: ollama/gpt-oss:20b-cloud") { + t.Errorf("default overlay missing ollama agentModel, got:\n%s", yaml) + } + if !strings.Contains(yaml, "baseUrl: http://llmspy.llm.svc.cluster.local:8000/v1") { + t.Errorf("default overlay missing llmspy baseUrl, got:\n%s", yaml) + } +} + +func TestRemoteCapableCommands(t *testing.T) { + // Commands that should go through port-forward + remote := []string{"gateway", "acp", "browser", "logs"} + for _, cmd := range remote { + if !remoteCapableCommands[cmd] { + t.Errorf("%q should be remote-capable", cmd) + } + } + + // Commands that should go through kubectl exec + local := []string{"agent", "doctor", "config", "models", "message"} + for _, cmd := range local { + if remoteCapableCommands[cmd] { + t.Errorf("%q should NOT be remote-capable", cmd) + } + } +} diff --git a/internal/stack/stack.go b/internal/stack/stack.go index c8366f6..0fa10dc 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -5,10 +5,14 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/dns" "github.com/ObolNetwork/obol-stack/internal/embed" + "github.com/ObolNetwork/obol-stack/internal/openclaw" + oboltls "github.com/ObolNetwork/obol-stack/internal/tls" petname "github.com/dustinkirkland/golang-petname" ) @@ -77,12 +81,42 @@ func Init(cfg *config.Config, force bool) error { fmt.Printf("K3d config saved to: %s\n", k3dConfigPath) // Copy embedded defaults (helmfile + charts for infrastructure) + // Resolve placeholders: {{OLLAMA_HOST}} → host DNS for the cluster runtime. + // On macOS (Docker Desktop), host.docker.internal resolves to the host. + // On Linux (native Docker), host.k3d.internal is added by k3d. + ollamaHost := "host.k3d.internal" + if runtime.GOOS == "darwin" { + ollamaHost = "host.docker.internal" + } defaultsDir := filepath.Join(cfg.ConfigDir, "defaults") - if err := embed.CopyDefaults(defaultsDir); err != nil { + if err := embed.CopyDefaults(defaultsDir, map[string]string{ + "{{OLLAMA_HOST}}": ollamaHost, + }); err != nil { return fmt.Errorf("failed to copy defaults: %w", err) } fmt.Printf("Defaults copied to: %s\n", defaultsDir) + // Generate TLS certificates for *.obol.stack (if mkcert is available) + if oboltls.MkcertAvailable(cfg.BinDir) { + fmt.Println("Generating TLS certificates for *.obol.stack...") + if err := oboltls.GenerateCerts(cfg.BinDir, cfg.ConfigDir); err != nil { + fmt.Printf("Warning: TLS cert generation failed: %v\n", err) + fmt.Println("Stack will use HTTP-only mode") + } else { + fmt.Println("TLS certificates generated (trusted by OS)") + } + } else { + fmt.Println("mkcert not found — TLS disabled (install via obolup.sh to enable)") + } + + // Patch the defaults helmfile to enable TLS if certs were generated + if oboltls.CertsExist(cfg.ConfigDir) { + helmfilePath := filepath.Join(defaultsDir, "helmfile.yaml") + if err := enableHelmfileTLS(helmfilePath); err != nil { + fmt.Printf("Warning: failed to enable TLS in helmfile: %v\n", err) + } + } + // Store stack ID for later use (stackIDPath already declared above) if err := os.WriteFile(stackIDPath, []byte(stackID), 0644); err != nil { return fmt.Errorf("failed to write stack ID: %w", err) @@ -132,6 +166,13 @@ func Up(cfg *config.Config) error { return err } + // Ensure DNS resolver is running for wildcard *.obol.stack + if err := dns.EnsureRunning(); err != nil { + fmt.Printf("Warning: DNS resolver failed to start: %v\n", err) + } else if err := dns.ConfigureSystemResolver(); err != nil { + fmt.Printf("Warning: failed to configure system DNS resolver: %v\n", err) + } + fmt.Println("Stack restarted successfully") fmt.Printf("Stack ID: %s\n", stackID) return nil @@ -180,6 +221,13 @@ func Up(cfg *config.Config) error { return err } + // Ensure DNS resolver is running for wildcard *.obol.stack + if err := dns.EnsureRunning(); err != nil { + fmt.Printf("Warning: DNS resolver failed to start: %v\n", err) + } else if err := dns.ConfigureSystemResolver(); err != nil { + fmt.Printf("Warning: failed to configure system DNS resolver: %v\n", err) + } + fmt.Println("Stack started successfully") fmt.Printf("Stack ID: %s\n", stackID) fmt.Printf("export KUBECONFIG=%s\n", kubeconfigPath) @@ -256,6 +304,10 @@ func Purge(cfg *config.Config, force bool) error { } } + // Stop DNS resolver and remove system resolver config + dns.Stop() + dns.RemoveSystemResolver() + // Remove stack config directory stackConfigDir := filepath.Join(cfg.ConfigDir) if err := os.RemoveAll(stackConfigDir); err != nil { @@ -321,12 +373,25 @@ func syncDefaults(cfg *config.Config, kubeconfigPath string) error { // Sync defaults using helmfile (handles Helm hooks properly) defaultsHelmfilePath := filepath.Join(cfg.ConfigDir, "defaults") + helmfilePath := filepath.Join(defaultsHelmfilePath, "helmfile.yaml") + + // Compatibility migration: older defaults pinned HTTPRoutes to `obol.stack` via + // `spec.hostnames`. This breaks public access for: + // - quick tunnels (random *.trycloudflare.com host) + // - user-provided DNS hostnames (e.g. agent.example.com) + // Removing hostnames makes routes match all hostnames while preserving existing + // path-based routing. + if err := migrateDefaultsHTTPRouteHostnames(helmfilePath); err != nil { + fmt.Printf("Warning: failed to migrate defaults helmfile hostnames: %v\n", err) + } + helmfileCmd := exec.Command( filepath.Join(cfg.BinDir, "helmfile"), - "--file", filepath.Join(defaultsHelmfilePath, "helmfile.yaml"), + "--file", helmfilePath, "--kubeconfig", kubeconfigPath, "sync", ) + helmfileCmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath) helmfileCmd.Stdout = os.Stdout helmfileCmd.Stderr = os.Stderr @@ -340,5 +405,94 @@ func syncDefaults(cfg *config.Config, kubeconfigPath string) error { } fmt.Println("Default infrastructure deployed") + + // Create TLS Secret in traefik namespace (if certs exist) + if oboltls.CertsExist(cfg.ConfigDir) { + fmt.Println("Creating TLS Secret for Traefik...") + if err := oboltls.EnsureK8sSecret(cfg.BinDir, cfg.ConfigDir, kubeconfigPath); err != nil { + fmt.Printf("Warning: TLS Secret creation failed: %v\n", err) + fmt.Println("HTTPS will be unavailable until the Secret is created") + } else { + fmt.Println("TLS Secret created — HTTPS available on port 8443") + } + } + + // Deploy default OpenClaw instance (non-fatal on failure) + fmt.Println("Setting up default OpenClaw instance...") + if err := openclaw.SetupDefault(cfg); err != nil { + fmt.Printf("Warning: failed to set up default OpenClaw: %v\n", err) + fmt.Println("You can manually set up OpenClaw later with: obol openclaw up") + } + return nil } + +func migrateDefaultsHTTPRouteHostnames(helmfilePath string) error { + data, err := os.ReadFile(helmfilePath) + if err != nil { + return err + } + + // Only removes the legacy default single-hostname block; if users customized their + // helmfile with different hostnames, we leave it alone. + needle := " hostnames:\n - obol.stack\n" + s := string(data) + if !strings.Contains(s, needle) { + return nil + } + updated := strings.ReplaceAll(s, needle, "") + if updated == s { + return nil + } + return os.WriteFile(helmfilePath, []byte(updated), 0644) +} + +// enableHelmfileTLS patches the defaults helmfile to enable TLS on the Traefik +// websecure port and add a websecure Gateway listener with certificateRefs. +// Also adds websecure parentRef to infrastructure HTTPRoutes (erpc, obol-frontend). +func enableHelmfileTLS(helmfilePath string) error { + data, err := os.ReadFile(helmfilePath) + if err != nil { + return err + } + s := string(data) + + // Patch 1: Enable TLS on websecure port + tlsOld := "enabled: false # TLS termination disabled for local dev" + tlsNew := "enabled: true # TLS termination via mkcert" + if strings.Contains(s, tlsOld) { + s = strings.Replace(s, tlsOld, tlsNew, 1) + } + + // Patch 2: Add websecure Gateway listener after the web listener block. + // Find the end of the web listener's namespacePolicy block and insert websecure. + webListenerEnd := " namespacePolicy:\n from: All\n" + websecureListener := webListenerEnd + + " websecure:\n" + + " port: 8443\n" + + " protocol: HTTPS\n" + + " certificateRefs:\n" + + " - name: obol-stack-tls\n" + + " namespacePolicy:\n" + + " from: All\n" + if strings.Contains(s, webListenerEnd) && !strings.Contains(s, "protocol: HTTPS") { + // Only the first occurrence (the Gateway listeners block, not the ports block) + s = strings.Replace(s, webListenerEnd, websecureListener, 1) + } + + // Patch 3: Add websecure parentRef to infrastructure HTTPRoutes. + // Each route has "sectionName: web" — add a second parentRef for websecure. + webRef := " sectionName: web\n" + dualRef := webRef + + " - name: traefik-gateway\n" + + " namespace: traefik\n" + + " sectionName: websecure\n" + if strings.Contains(s, webRef) && !strings.Contains(s, "sectionName: websecure") { + s = strings.ReplaceAll(s, webRef, dualRef) + } + + if s == string(data) { + return nil // No changes needed + } + return os.WriteFile(helmfilePath, []byte(s), 0644) +} diff --git a/internal/stack/stack_test.go b/internal/stack/stack_test.go new file mode 100644 index 0000000..ba5b86a --- /dev/null +++ b/internal/stack/stack_test.go @@ -0,0 +1,93 @@ +package stack + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestEnableHelmfileTLS(t *testing.T) { + // Use the actual embedded helmfile content for the test. + // This ensures the string patterns match the real file. + srcPath := filepath.Join("..", "embed", "infrastructure", "helmfile.yaml") + data, err := os.ReadFile(srcPath) + if err != nil { + t.Fatalf("failed to read source helmfile: %v", err) + } + + // Write to a temp file + tmpDir := t.TempDir() + helmfilePath := filepath.Join(tmpDir, "helmfile.yaml") + if err := os.WriteFile(helmfilePath, data, 0644); err != nil { + t.Fatal(err) + } + + // Run the patching function + if err := enableHelmfileTLS(helmfilePath); err != nil { + t.Fatalf("enableHelmfileTLS failed: %v", err) + } + + // Read patched content + patched, err := os.ReadFile(helmfilePath) + if err != nil { + t.Fatal(err) + } + content := string(patched) + + // Verify Patch 1: TLS enabled + if strings.Contains(content, "enabled: false # TLS termination disabled for local dev") { + t.Error("Patch 1 failed: TLS still disabled") + } + if !strings.Contains(content, "enabled: true # TLS termination via mkcert") { + t.Error("Patch 1 failed: TLS enabled marker not found") + } + + // Verify Patch 2: websecure Gateway listener added + if !strings.Contains(content, "protocol: HTTPS") { + t.Error("Patch 2 failed: HTTPS protocol not found") + } + if !strings.Contains(content, "obol-stack-tls") { + t.Error("Patch 2 failed: certificateRefs not found") + } + + // Verify Patch 3: websecure parentRef added to HTTPRoutes + if !strings.Contains(content, "sectionName: websecure") { + t.Error("Patch 3 failed: websecure sectionName not found in routes") + } + + // Count exact occurrences (use "\n" boundary to avoid substring matching) + // "sectionName: web\n" matches only the web refs, not websecure + webCount := strings.Count(content, "sectionName: web\n") + websecureCount := strings.Count(content, "sectionName: websecure\n") + if webCount != websecureCount { + t.Errorf("Patch 3: web refs (%d) != websecure refs (%d)", webCount, websecureCount) + } + if websecureCount < 2 { + t.Errorf("Patch 3: expected at least 2 websecure refs, got %d", websecureCount) + } + + // Verify the patched content is valid YAML structure (basic check) + // Each websecure parentRef should appear after a web parentRef + lines := strings.Split(content, "\n") + for i, line := range lines { + if strings.Contains(line, "sectionName: websecure") { + // Should be preceded by a line with "namespace: traefik" + if i < 1 || !strings.Contains(lines[i-1], "namespace: traefik") { + t.Errorf("Patch 3: websecure sectionName at line %d not preceded by namespace: traefik", i+1) + } + } + } + + // Verify idempotency — running again should be a no-op + if err := enableHelmfileTLS(helmfilePath); err != nil { + t.Fatalf("second enableHelmfileTLS call failed: %v", err) + } + patched2, err := os.ReadFile(helmfilePath) + if err != nil { + t.Fatal(err) + } + if string(patched2) != content { + t.Error("enableHelmfileTLS is not idempotent — second call changed the file") + } +} diff --git a/internal/tls/tls.go b/internal/tls/tls.go new file mode 100644 index 0000000..9783e0f --- /dev/null +++ b/internal/tls/tls.go @@ -0,0 +1,183 @@ +package tls + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" +) + +const ( + // certFile is the TLS certificate filename. + certFile = "obol-stack.pem" + // keyFile is the TLS private key filename. + keyFile = "obol-stack-key.pem" + // tlsDir is the subdirectory under configDir for TLS files. + tlsDir = "tls" + // k8sSecretName is the Kubernetes TLS Secret name. + k8sSecretName = "obol-stack-tls" + // k8sNamespace is the namespace for the TLS Secret. + k8sNamespace = "traefik" +) + +// CertDir returns the TLS directory path. +func CertDir(configDir string) string { + return filepath.Join(configDir, tlsDir) +} + +// CertPath returns the path to the TLS certificate. +func CertPath(configDir string) string { + return filepath.Join(configDir, tlsDir, certFile) +} + +// KeyPath returns the path to the TLS private key. +func KeyPath(configDir string) string { + return filepath.Join(configDir, tlsDir, keyFile) +} + +// CertsExist checks if both cert and key files exist. +func CertsExist(configDir string) bool { + _, certErr := os.Stat(CertPath(configDir)) + _, keyErr := os.Stat(KeyPath(configDir)) + return certErr == nil && keyErr == nil +} + +// MkcertAvailable checks if the mkcert binary exists in binDir or PATH. +func MkcertAvailable(binDir string) bool { + // Check binDir first + if _, err := os.Stat(filepath.Join(binDir, "mkcert")); err == nil { + return true + } + // Fall back to PATH + _, err := exec.LookPath("mkcert") + return err == nil +} + +// mkcertPath returns the path to the mkcert binary, preferring binDir. +func mkcertPath(binDir string) string { + p := filepath.Join(binDir, "mkcert") + if _, err := os.Stat(p); err == nil { + return p + } + if path, err := exec.LookPath("mkcert"); err == nil { + return path + } + return "mkcert" +} + +// mkcertEnv returns the current environment with JAVA_HOME cleared. +// mkcert checks all trust stores including Java keytool, which can fail +// if the Java keystore is missing or corrupted. Since we only need browser +// trust (OS keychain), we skip the Java trust store entirely. +func mkcertEnv() []string { + var env []string + for _, e := range os.Environ() { + if !strings.HasPrefix(e, "JAVA_HOME=") { + env = append(env, e) + } + } + return env +} + +// GenerateCerts generates a wildcard TLS certificate for *.obol.stack using mkcert. +// It installs the mkcert CA into the system trust store and creates a certificate +// covering wildcard subdomains, the bare domain, localhost, and loopback addresses. +func GenerateCerts(binDir, configDir string) error { + mkcert := mkcertPath(binDir) + env := mkcertEnv() + + // Create TLS directory + dir := CertDir(configDir) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create TLS directory: %w", err) + } + + // Install the local CA into the system trust store. + // On macOS this adds to the login keychain; on Linux it updates ca-certificates. + installCmd := exec.Command(mkcert, "-install") + installCmd.Env = env + installCmd.Stdout = os.Stdout + installCmd.Stderr = os.Stderr + if err := installCmd.Run(); err != nil { + return fmt.Errorf("mkcert -install failed: %w", err) + } + + // Generate wildcard certificate. + // SANs: *.obol.stack (wildcard subdomains), obol.stack (bare domain), + // localhost + loopback (fallback). + certPath := CertPath(configDir) + keyPath := KeyPath(configDir) + genCmd := exec.Command(mkcert, + "-cert-file", certPath, + "-key-file", keyPath, + "*.obol.stack", + "obol.stack", + "localhost", + "127.0.0.1", + "::1", + ) + genCmd.Env = env + genCmd.Stdout = os.Stdout + genCmd.Stderr = os.Stderr + if err := genCmd.Run(); err != nil { + return fmt.Errorf("mkcert cert generation failed: %w", err) + } + + return nil +} + +// EnsureK8sSecret creates or updates the TLS Secret in the traefik namespace. +// Uses kubectl with --dry-run=client piped to apply for idempotent creation. +func EnsureK8sSecret(binDir, configDir, kubeconfigPath string) error { + kubectl := filepath.Join(binDir, "kubectl") + certPath := CertPath(configDir) + keyPath := KeyPath(configDir) + + // Verify cert files exist + if !CertsExist(configDir) { + return fmt.Errorf("TLS certificate files not found at %s", CertDir(configDir)) + } + + // kubectl create secret tls obol-stack-tls \ + // --cert= --key= -n traefik \ + // --dry-run=client -o yaml | kubectl apply -f - + createCmd := exec.Command(kubectl, + "create", "secret", "tls", k8sSecretName, + "--cert="+certPath, + "--key="+keyPath, + "-n", k8sNamespace, + "--dry-run=client", + "-o", "yaml", + ) + createCmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath) + + applyCmd := exec.Command(kubectl, + "apply", "-f", "-", + ) + applyCmd.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath) + applyCmd.Stderr = os.Stderr + + // Pipe create output to apply stdin + pipe, err := createCmd.StdoutPipe() + if err != nil { + return fmt.Errorf("failed to create pipe: %w", err) + } + applyCmd.Stdin = pipe + + if err := createCmd.Start(); err != nil { + return fmt.Errorf("kubectl create secret failed to start: %w", err) + } + if err := applyCmd.Start(); err != nil { + return fmt.Errorf("kubectl apply failed to start: %w", err) + } + + if err := createCmd.Wait(); err != nil { + return fmt.Errorf("kubectl create secret failed: %w", err) + } + if err := applyCmd.Wait(); err != nil { + return fmt.Errorf("kubectl apply failed: %w", err) + } + + return nil +} diff --git a/internal/tls/tls_test.go b/internal/tls/tls_test.go new file mode 100644 index 0000000..180a9ab --- /dev/null +++ b/internal/tls/tls_test.go @@ -0,0 +1,106 @@ +package tls + +import ( + "os" + "path/filepath" + "runtime" + "testing" +) + +func TestMkcertAvailableAndGenerateCerts(t *testing.T) { + // Find the project workspace bin directory relative to this test file + _, thisFile, _, _ := runtime.Caller(0) + projectRoot := filepath.Join(filepath.Dir(thisFile), "..", "..") + workspaceBin := filepath.Join(projectRoot, ".workspace", "bin") + + // Try common locations for mkcert + binDirs := []string{ + workspaceBin, + os.Getenv("HOME") + "/.local/bin", + } + + var binDir string + for _, d := range binDirs { + if MkcertAvailable(d) { + binDir = d + break + } + } + + if binDir == "" { + t.Skip("mkcert not available in any known location") + } + + t.Logf("mkcert found in: %s", binDir) + + tmpDir := t.TempDir() + + if err := GenerateCerts(binDir, tmpDir); err != nil { + t.Fatalf("GenerateCerts failed: %v", err) + } + + if !CertsExist(tmpDir) { + t.Fatal("CertsExist returned false after GenerateCerts") + } + + // Verify cert files are non-empty + certData, err := os.ReadFile(CertPath(tmpDir)) + if err != nil { + t.Fatalf("failed to read cert: %v", err) + } + if len(certData) == 0 { + t.Error("cert file is empty") + } + + keyData, err := os.ReadFile(KeyPath(tmpDir)) + if err != nil { + t.Fatalf("failed to read key: %v", err) + } + if len(keyData) == 0 { + t.Error("key file is empty") + } + + t.Logf("cert: %d bytes, key: %d bytes", len(certData), len(keyData)) +} + +func TestCertsExist(t *testing.T) { + tmpDir := t.TempDir() + + // No certs yet + if CertsExist(tmpDir) { + t.Error("CertsExist should return false for empty dir") + } + + // Create the tls dir and cert file only + tlsDir := CertDir(tmpDir) + if err := os.MkdirAll(tlsDir, 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(CertPath(tmpDir), []byte("cert"), 0644); err != nil { + t.Fatal(err) + } + if CertsExist(tmpDir) { + t.Error("CertsExist should return false with only cert file") + } + + // Create key file too + if err := os.WriteFile(KeyPath(tmpDir), []byte("key"), 0644); err != nil { + t.Fatal(err) + } + if !CertsExist(tmpDir) { + t.Error("CertsExist should return true with both cert and key files") + } +} + +func TestPaths(t *testing.T) { + dir := "/test/config" + if got := CertDir(dir); got != "/test/config/tls" { + t.Errorf("CertDir = %q, want /test/config/tls", got) + } + if got := CertPath(dir); got != "/test/config/tls/obol-stack.pem" { + t.Errorf("CertPath = %q, want /test/config/tls/obol-stack.pem", got) + } + if got := KeyPath(dir); got != "/test/config/tls/obol-stack-key.pem" { + t.Errorf("KeyPath = %q, want /test/config/tls/obol-stack-key.pem", got) + } +} diff --git a/internal/tunnel/cloudflare.go b/internal/tunnel/cloudflare.go new file mode 100644 index 0000000..669e880 --- /dev/null +++ b/internal/tunnel/cloudflare.go @@ -0,0 +1,224 @@ +package tunnel + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" +) + +type cloudflareTunnel struct { + ID string `json:"id"` + Token string `json:"token"` +} + +type cloudflareClient struct { + apiToken string +} + +func newCloudflareClient(apiToken string) *cloudflareClient { + return &cloudflareClient{apiToken: apiToken} +} + +func (c *cloudflareClient) CreateTunnel(accountID, tunnelName string) (*cloudflareTunnel, error) { + reqBody := map[string]any{ + "name": tunnelName, + "config_src": "cloudflare", + } + + var resp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + Result struct { + ID string `json:"id"` + Token string `json:"token"` + } `json:"result"` + } + + if err := c.doJSON("POST", fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/cfd_tunnel", accountID), reqBody, &resp); err != nil { + return nil, err + } + if !resp.Success { + return nil, fmt.Errorf("cloudflare tunnel create failed: %v", resp.Errors) + } + return &cloudflareTunnel{ID: resp.Result.ID, Token: resp.Result.Token}, nil +} + +func (c *cloudflareClient) GetTunnelToken(accountID, tunnelID string) (string, error) { + var resp struct { + Success bool `json:"success"` + Errors []any `json:"errors"` + Result string `json:"result"` + } + + if err := c.doJSON("GET", fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/cfd_tunnel/%s/token", accountID, tunnelID), nil, &resp); err != nil { + return "", err + } + if !resp.Success || resp.Result == "" { + return "", fmt.Errorf("cloudflare tunnel token fetch failed") + } + return resp.Result, nil +} + +func (c *cloudflareClient) UpdateTunnelConfiguration(accountID, tunnelID, hostname, serviceURL string) error { + reqBody := map[string]any{ + "config": map[string]any{ + "ingress": []map[string]any{ + { + "hostname": hostname, + "service": serviceURL, + "originRequest": map[string]any{}, + }, + { + "service": "http_status:404", + }, + }, + }, + } + + var resp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + } + + url := fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/cfd_tunnel/%s/configurations", accountID, tunnelID) + if err := c.doJSON("PUT", url, reqBody, &resp); err != nil { + return err + } + if !resp.Success { + return fmt.Errorf("cloudflare tunnel configuration update failed: %v", resp.Errors) + } + return nil +} + +type dnsRecord struct { + ID string `json:"id"` + Type string `json:"type"` + Name string `json:"name"` + Content string `json:"content"` + Proxied bool `json:"proxied"` +} + +func (c *cloudflareClient) UpsertTunnelDNSRecord(zoneID, hostname, content string) error { + // Find existing records for this exact name/type. + listURL := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records?type=CNAME&name=%s", zoneID, url.QueryEscape(hostname)) + var listResp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + Result []dnsRecord `json:"result"` + } + if err := c.doJSON("GET", listURL, nil, &listResp); err != nil { + return err + } + if !listResp.Success { + return fmt.Errorf("cloudflare dns record list failed: %v", listResp.Errors) + } + + if len(listResp.Result) > 0 { + // Update first matching record. + recID := listResp.Result[0].ID + updateURL := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records/%s", zoneID, recID) + reqBody := map[string]any{ + "type": "CNAME", + "proxied": true, + "name": hostname, + "content": content, + } + + var updResp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + } + if err := c.doJSON("PUT", updateURL, reqBody, &updResp); err != nil { + return err + } + if !updResp.Success { + return fmt.Errorf("cloudflare dns record update failed: %v", updResp.Errors) + } + return nil + } + + // Create new record. + createURL := fmt.Sprintf("https://api.cloudflare.com/client/v4/zones/%s/dns_records", zoneID) + reqBody := map[string]any{ + "type": "CNAME", + "proxied": true, + "name": hostname, + "content": content, + } + + var createResp struct { + Success bool `json:"success"` + Errors []struct { + Code int `json:"code"` + Message string `json:"message"` + } `json:"errors"` + } + + if err := c.doJSON("POST", createURL, reqBody, &createResp); err != nil { + return err + } + if !createResp.Success { + return fmt.Errorf("cloudflare dns record create failed: %v", createResp.Errors) + } + return nil +} + +func (c *cloudflareClient) doJSON(method, url string, reqBody any, out any) error { + var body []byte + var err error + if reqBody != nil { + body, err = json.Marshal(reqBody) + if err != nil { + return err + } + } + + req, err := http.NewRequest(method, url, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+c.apiToken) + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + // Best effort: surface body for debugging without leaking secrets. + return fmt.Errorf("cloudflare api error (%s): %s", resp.Status, strings.TrimSpace(string(respBody))) + } + + if out == nil { + return nil + } + if err := json.Unmarshal(respBody, out); err != nil { + return err + } + return nil +} diff --git a/internal/tunnel/login.go b/internal/tunnel/login.go new file mode 100644 index 0000000..de79882 --- /dev/null +++ b/internal/tunnel/login.go @@ -0,0 +1,213 @@ +package tunnel + +import ( + "bytes" + "encoding/base64" + "fmt" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +type LoginOptions struct { + Hostname string +} + +// Login provisions a locally-managed tunnel using `cloudflared tunnel login` (browser auth), +// then writes the required credentials/config into Kubernetes and upgrades the cloudflared +// Helm release so the in-cluster connector runs the locally-managed tunnel. +// +// Docs: +// - Create a locally-managed tunnel: https://developers.cloudflare.com/cloudflare-one/networks/connectors/cloudflare-tunnel/do-more-with-tunnels/local-management/create-local-tunnel/ +// - Configuration file for published apps: https://developers.cloudflare.com/cloudflare-one/networks/connectors/cloudflare-tunnel/do-more-with-tunnels/local-management/configuration-file/ +// - `origincert` run parameter (locally-managed tunnels): https://developers.cloudflare.com/cloudflare-one/networks/connectors/cloudflare-tunnel/configure-tunnels/cloudflared-parameters/run-parameters/ +func Login(cfg *config.Config, opts LoginOptions) error { + hostname := normalizeHostname(opts.Hostname) + if hostname == "" { + return fmt.Errorf("--hostname is required (e.g. stack.example.com)") + } + + // Stack must be running so we can write secrets/config to the cluster. + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + stackID := getStackID(cfg) + if stackID == "" { + return fmt.Errorf("stack not initialized, run 'obol stack init' first") + } + tunnelName := fmt.Sprintf("obol-stack-%s", stackID) + + cloudflaredPath, err := exec.LookPath("cloudflared") + if err != nil { + return fmt.Errorf("cloudflared not found in PATH. Install it first (e.g. 'brew install cloudflared' on macOS)") + } + + fmt.Println("Authenticating cloudflared (browser)...") + loginCmd := exec.Command(cloudflaredPath, "tunnel", "login") + loginCmd.Stdin = os.Stdin + loginCmd.Stdout = os.Stdout + loginCmd.Stderr = os.Stderr + if err := loginCmd.Run(); err != nil { + return fmt.Errorf("cloudflared tunnel login failed: %w", err) + } + + fmt.Printf("\nCreating tunnel: %s\n", tunnelName) + if out, err := exec.Command(cloudflaredPath, "tunnel", "create", tunnelName).CombinedOutput(); err != nil { + // "Already exists" is common if user re-runs. We'll recover by querying tunnel info. + fmt.Printf("cloudflared tunnel create returned an error (continuing): %s\n", strings.TrimSpace(string(out))) + } + + infoOut, err := exec.Command(cloudflaredPath, "tunnel", "info", tunnelName).CombinedOutput() + if err != nil { + return fmt.Errorf("cloudflared tunnel info failed: %w\n%s", err, strings.TrimSpace(string(infoOut))) + } + tunnelID, err := parseFirstUUID(string(infoOut)) + if err != nil { + return fmt.Errorf("could not parse tunnel UUID from cloudflared tunnel info:\n%s", strings.TrimSpace(string(infoOut))) + } + + cloudflaredDir := defaultCloudflaredDir() + certPath := filepath.Join(cloudflaredDir, "cert.pem") + credPath := filepath.Join(cloudflaredDir, tunnelID+".json") + + cert, err := os.ReadFile(certPath) + if err != nil { + return fmt.Errorf("failed to read %s: %w", certPath, err) + } + cred, err := os.ReadFile(credPath) + if err != nil { + return fmt.Errorf("failed to read %s: %w", credPath, err) + } + + fmt.Printf("\nCreating DNS route for %s...\n", hostname) + routeOut, err := exec.Command(cloudflaredPath, "tunnel", "route", "dns", tunnelName, hostname).CombinedOutput() + if err != nil { + return fmt.Errorf("cloudflared tunnel route dns failed: %w\n%s", err, strings.TrimSpace(string(routeOut))) + } + + if err := applyLocalManagedK8sResources(cfg, kubeconfigPath, hostname, tunnelID, cert, cred); err != nil { + return err + } + + // Re-render the chart so it flips from quick tunnel to locally-managed. + if err := helmUpgradeCloudflared(cfg, kubeconfigPath); err != nil { + return err + } + + st, _ := loadTunnelState(cfg) + if st == nil { + st = &tunnelState{} + } + st.Mode = "dns" + st.Hostname = hostname + st.TunnelID = tunnelID + st.TunnelName = tunnelName + if err := saveTunnelState(cfg, st); err != nil { + return fmt.Errorf("tunnel created, but failed to save local state: %w", err) + } + + fmt.Println("\n✓ Tunnel login complete") + fmt.Printf("Persistent URL: https://%s\n", hostname) + fmt.Println("Tip: run 'obol tunnel status' to verify the connector is active.") + return nil +} + +func defaultCloudflaredDir() string { + home, err := os.UserHomeDir() + if err != nil { + return ".cloudflared" + } + return filepath.Join(home, ".cloudflared") +} + +func parseFirstUUID(s string) (string, error) { + re := regexp.MustCompile(`[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}`) + if m := re.FindString(strings.ToLower(s)); m != "" { + return m, nil + } + return "", fmt.Errorf("uuid not found") +} + +func applyLocalManagedK8sResources(cfg *config.Config, kubeconfigPath, hostname, tunnelID string, certPEM, credJSON []byte) error { + // Secret: account certificate + tunnel credentials (locally-managed tunnel requires origincert). + secretYAML, err := buildLocalManagedSecretYAML(hostname, certPEM, credJSON) + if err != nil { + return err + } + if err := kubectlApply(cfg, kubeconfigPath, secretYAML); err != nil { + return err + } + + // ConfigMap: config.yml + tunnel_id used for command arg expansion. + cfgYAML := buildLocalManagedConfigYAML(hostname, tunnelID) + if err := kubectlApply(cfg, kubeconfigPath, cfgYAML); err != nil { + return err + } + + return nil +} + +const ( + localManagedSecretName = "cloudflared-local-credentials" + localManagedConfigMapName = "cloudflared-local-config" +) + +func buildLocalManagedSecretYAML(hostname string, certPEM, credJSON []byte) ([]byte, error) { + certB64 := base64.StdEncoding.EncodeToString(certPEM) + credB64 := base64.StdEncoding.EncodeToString(credJSON) + + secret := fmt.Sprintf(`apiVersion: v1 +kind: Secret +metadata: + name: %s + namespace: %s +type: Opaque +data: + cert.pem: %s + credentials.json: %s +`, localManagedSecretName, tunnelNamespace, certB64, credB64) + _ = hostname // reserved for future labels/annotations + return []byte(secret), nil +} + +func buildLocalManagedConfigYAML(hostname, tunnelID string) []byte { + cfg := fmt.Sprintf(`apiVersion: v1 +kind: ConfigMap +metadata: + name: %s + namespace: %s +data: + tunnel_id: %s + config.yml: | + tunnel: %s + credentials-file: /etc/cloudflared/credentials.json + + ingress: + - hostname: %s + service: http://traefik.traefik.svc.cluster.local:80 + - service: http_status:404 +`, localManagedConfigMapName, tunnelNamespace, tunnelID, tunnelID, hostname) + return []byte(cfg) +} + +func kubectlApply(cfg *config.Config, kubeconfigPath string, manifest []byte) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "apply", "-f", "-", + ) + cmd.Stdin = bytes.NewReader(manifest) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("kubectl apply failed: %w", err) + } + return nil +} diff --git a/internal/tunnel/provision.go b/internal/tunnel/provision.go new file mode 100644 index 0000000..b4c592a --- /dev/null +++ b/internal/tunnel/provision.go @@ -0,0 +1,206 @@ +package tunnel + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +// ProvisionOptions configures `obol tunnel provision`. +type ProvisionOptions struct { + Hostname string + AccountID string + ZoneID string + APIToken string +} + +// Provision provisions a persistent Cloudflare Tunnel routed via a proxied DNS record. +// +// Based on Cloudflare's "Create a tunnel (API)" guide: +// - POST /accounts/$ACCOUNT_ID/cfd_tunnel +// - PUT /accounts/$ACCOUNT_ID/cfd_tunnel/$TUNNEL_ID/configurations +// - POST /zones/$ZONE_ID/dns_records (proxied CNAME to .cfargotunnel.com) +func Provision(cfg *config.Config, opts ProvisionOptions) error { + hostname := normalizeHostname(opts.Hostname) + if hostname == "" { + return fmt.Errorf("--hostname is required (e.g. stack.example.com)") + } + if opts.AccountID == "" { + return fmt.Errorf("--account-id is required (or set CLOUDFLARE_ACCOUNT_ID)") + } + if opts.ZoneID == "" { + return fmt.Errorf("--zone-id is required (or set CLOUDFLARE_ZONE_ID)") + } + if opts.APIToken == "" { + return fmt.Errorf("--api-token is required (or set CLOUDFLARE_API_TOKEN)") + } + + // Stack must be running so we can store the tunnel token in-cluster. + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + stackID := getStackID(cfg) + if stackID == "" { + return fmt.Errorf("stack not initialized, run 'obol stack init' first") + } + tunnelName := fmt.Sprintf("obol-stack-%s", stackID) + + client := newCloudflareClient(opts.APIToken) + + // Try to reuse existing local state to keep the same tunnel ID. + st, _ := loadTunnelState(cfg) + if st != nil && st.AccountID == opts.AccountID && st.ZoneID == opts.ZoneID && st.TunnelID != "" && st.TunnelName != "" { + tunnelName = st.TunnelName + } + + fmt.Println("Provisioning Cloudflare Tunnel (API)...") + fmt.Printf("Hostname: %s\n", hostname) + fmt.Printf("Tunnel: %s\n", tunnelName) + + tunnelID := "" + tunnelToken := "" + + if st != nil && st.AccountID == opts.AccountID && st.TunnelID != "" { + tunnelID = st.TunnelID + tok, err := client.GetTunnelToken(opts.AccountID, tunnelID) + if err != nil { + // If the tunnel no longer exists, create a new one. + fmt.Printf("Existing tunnel token fetch failed (%v); creating a new tunnel...\n", err) + tunnelID = "" + } else { + tunnelToken = tok + } + } + + if tunnelID == "" { + t, err := client.CreateTunnel(opts.AccountID, tunnelName) + if err != nil { + return err + } + tunnelID = t.ID + tunnelToken = t.Token + } + + if err := client.UpdateTunnelConfiguration(opts.AccountID, tunnelID, hostname, "http://traefik.traefik.svc.cluster.local:80"); err != nil { + return err + } + + if err := client.UpsertTunnelDNSRecord(opts.ZoneID, hostname, tunnelID+".cfargotunnel.com"); err != nil { + return err + } + + if err := applyTunnelTokenSecret(cfg, kubeconfigPath, tunnelToken); err != nil { + return err + } + + // Ensure cloudflared switches to remotely-managed mode immediately (chart defaults to mode:auto). + if err := helmUpgradeCloudflared(cfg, kubeconfigPath); err != nil { + return err + } + + if st == nil { + st = &tunnelState{} + } + st.Mode = "dns" + st.Hostname = hostname + st.AccountID = opts.AccountID + st.ZoneID = opts.ZoneID + st.TunnelID = tunnelID + st.TunnelName = tunnelName + + if err := saveTunnelState(cfg, st); err != nil { + return fmt.Errorf("tunnel provisioned, but failed to save local state: %w", err) + } + + fmt.Println("\n✓ Tunnel provisioned") + fmt.Printf("Persistent URL: https://%s\n", hostname) + fmt.Println("Tip: run 'obol tunnel status' to verify the connector is active.") + return nil +} + +func normalizeHostname(s string) string { + s = strings.TrimSpace(s) + s = strings.TrimSuffix(s, "/") + s = strings.TrimPrefix(s, "https://") + s = strings.TrimPrefix(s, "http://") + + // Strip any path/query fragments users accidentally paste. + if idx := strings.IndexByte(s, '/'); idx >= 0 { + s = s[:idx] + } + if idx := strings.IndexByte(s, '?'); idx >= 0 { + s = s[:idx] + } + if idx := strings.IndexByte(s, '#'); idx >= 0 { + s = s[:idx] + } + + return strings.ToLower(s) +} + +func applyTunnelTokenSecret(cfg *config.Config, kubeconfigPath, token string) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + + createCmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "-n", tunnelNamespace, + "create", "secret", "generic", tunnelTokenSecretName, + fmt.Sprintf("--from-literal=%s=%s", tunnelTokenSecretKey, token), + "--dry-run=client", + "-o", "yaml", + ) + out, err := createCmd.Output() + if err != nil { + return fmt.Errorf("failed to create secret manifest: %w", err) + } + + applyCmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "apply", "-f", "-", + ) + applyCmd.Stdin = bytes.NewReader(out) + applyCmd.Stdout = os.Stdout + applyCmd.Stderr = os.Stderr + if err := applyCmd.Run(); err != nil { + return fmt.Errorf("failed to apply tunnel token secret: %w", err) + } + return nil +} + +func helmUpgradeCloudflared(cfg *config.Config, kubeconfigPath string) error { + helmPath := filepath.Join(cfg.BinDir, "helm") + defaultsDir := filepath.Join(cfg.ConfigDir, "defaults") + + if _, err := os.Stat(helmPath); os.IsNotExist(err) { + return fmt.Errorf("helm not found at %s", helmPath) + } + if _, err := os.Stat(filepath.Join(defaultsDir, "cloudflared", "Chart.yaml")); os.IsNotExist(err) { + return fmt.Errorf("cloudflared chart not found in %s (re-run 'obol stack init --force' to refresh defaults)", defaultsDir) + } + + // Run from the defaults dir so "./cloudflared" resolves correctly. + cmd := exec.Command(helmPath, + "--kubeconfig", kubeconfigPath, + "upgrade", + "--install", + "cloudflared", + "./cloudflared", + "--namespace", tunnelNamespace, + "--wait", + "--timeout", "2m", + ) + cmd.Dir = defaultsDir + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to upgrade cloudflared release: %w", err) + } + return nil +} diff --git a/internal/tunnel/stackid.go b/internal/tunnel/stackid.go new file mode 100644 index 0000000..a7cd6f2 --- /dev/null +++ b/internal/tunnel/stackid.go @@ -0,0 +1,19 @@ +package tunnel + +import ( + "os" + "path/filepath" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +const stackIDFile = ".stack-id" + +func getStackID(cfg *config.Config) string { + data, err := os.ReadFile(filepath.Join(cfg.ConfigDir, stackIDFile)) + if err != nil { + return "" + } + return strings.TrimSpace(string(data)) +} diff --git a/internal/tunnel/state.go b/internal/tunnel/state.go new file mode 100644 index 0000000..f7b026d --- /dev/null +++ b/internal/tunnel/state.go @@ -0,0 +1,62 @@ +package tunnel + +import ( + "encoding/json" + "os" + "path/filepath" + "time" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +type tunnelState struct { + Mode string `json:"mode"` // "quick" or "dns" + Hostname string `json:"hostname"` + AccountID string `json:"account_id,omitempty"` + ZoneID string `json:"zone_id,omitempty"` + TunnelID string `json:"tunnel_id,omitempty"` + TunnelName string `json:"tunnel_name,omitempty"` + UpdatedAt time.Time `json:"updated_at"` +} + +func tunnelStatePath(cfg *config.Config) string { + return filepath.Join(cfg.ConfigDir, "tunnel", "cloudflared.json") +} + +func loadTunnelState(cfg *config.Config) (*tunnelState, error) { + data, err := os.ReadFile(tunnelStatePath(cfg)) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + var st tunnelState + if err := json.Unmarshal(data, &st); err != nil { + return nil, err + } + return &st, nil +} + +func saveTunnelState(cfg *config.Config, st *tunnelState) error { + if err := os.MkdirAll(filepath.Dir(tunnelStatePath(cfg)), 0755); err != nil { + return err + } + st.UpdatedAt = time.Now().UTC() + + data, err := json.MarshalIndent(st, "", " ") + if err != nil { + return err + } + + // Contains non-secret metadata only, but keep it user-private by default. + return os.WriteFile(tunnelStatePath(cfg), data, 0600) +} + +func tunnelModeAndURL(st *tunnelState) (mode, url string) { + if st != nil && st.Hostname != "" { + return "dns", "https://" + st.Hostname + } + return "quick", "" +} diff --git a/internal/tunnel/tunnel.go b/internal/tunnel/tunnel.go new file mode 100644 index 0000000..1ad3f23 --- /dev/null +++ b/internal/tunnel/tunnel.go @@ -0,0 +1,200 @@ +package tunnel + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + "time" + + "github.com/ObolNetwork/obol-stack/internal/config" +) + +const ( + tunnelNamespace = "traefik" + tunnelLabelSelector = "app.kubernetes.io/name=cloudflared" + + // cloudflared-tunnel-token is created by `obol tunnel provision`. + tunnelTokenSecretName = "cloudflared-tunnel-token" + tunnelTokenSecretKey = "TUNNEL_TOKEN" +) + +// Status displays the current tunnel status and URL. +func Status(cfg *config.Config) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + // Check if kubeconfig exists. + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + st, _ := loadTunnelState(cfg) + + // Check pod status first. + podStatus, err := getPodStatus(kubectlPath, kubeconfigPath) + if err != nil { + mode, url := tunnelModeAndURL(st) + printStatusBox(mode, "not deployed", url, time.Now()) + fmt.Println("\nTroubleshooting:") + fmt.Println(" - Start the stack: obol stack up") + return nil + } + + statusLabel := podStatus + if podStatus == "running" { + statusLabel = "active" + } + + mode, url := tunnelModeAndURL(st) + if mode == "quick" { + // Quick tunnels only: try to get URL from logs. + u, err := GetTunnelURL(cfg) + if err != nil { + printStatusBox(mode, podStatus, "(not available)", time.Now()) + fmt.Println("\nTroubleshooting:") + fmt.Println(" - Check logs: obol tunnel logs") + fmt.Println(" - Restart tunnel: obol tunnel restart") + return nil + } + url = u + } + + printStatusBox(mode, statusLabel, url, time.Now()) + fmt.Printf("\nTest with: curl %s/\n", url) + + return nil +} + +// GetTunnelURL parses cloudflared logs to extract the quick tunnel URL. +func GetTunnelURL(cfg *config.Config) (string, error) { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "logs", "-n", tunnelNamespace, + "-l", tunnelLabelSelector, + "--tail=100", + ) + + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get tunnel logs: %w", err) + } + + if url, ok := parseQuickTunnelURL(string(output)); ok { + return url, nil + } + + // Back-compat: allow cfargotunnel.com to be detected too. + re := regexp.MustCompile(`https://[a-z0-9-]+\.cfargotunnel\.com`) + if url := re.FindString(string(output)); url != "" { + return url, nil + } + + return "", fmt.Errorf("tunnel URL not found in logs") +} + +// Restart restarts the cloudflared deployment. +func Restart(cfg *config.Config) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + // Check if kubeconfig exists. + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + fmt.Println("Restarting cloudflared tunnel...") + + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "rollout", "restart", "deployment/cloudflared", + "-n", tunnelNamespace, + ) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to restart tunnel: %w", err) + } + + fmt.Println("\nTunnel restarting...") + fmt.Println("Run 'obol tunnel status' to see the URL once ready (may take 10-30 seconds).") + + return nil +} + +// Logs displays cloudflared logs. +func Logs(cfg *config.Config, follow bool) error { + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") + + // Check if kubeconfig exists. + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + args := []string{ + "--kubeconfig", kubeconfigPath, + "logs", "-n", tunnelNamespace, + "-l", tunnelLabelSelector, + } + + if follow { + args = append(args, "-f") + } + + cmd := exec.Command(kubectlPath, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin + + return cmd.Run() +} + +// getPodStatus returns the status of the cloudflared pod. +func getPodStatus(kubectlPath, kubeconfigPath string) (string, error) { + cmd := exec.Command(kubectlPath, + "--kubeconfig", kubeconfigPath, + "get", "pods", "-n", tunnelNamespace, + "-l", tunnelLabelSelector, + "-o", "jsonpath={.items[0].status.phase}", + ) + + output, err := cmd.Output() + if err != nil { + return "", err + } + + status := strings.TrimSpace(string(output)) + if status == "" { + return "", fmt.Errorf("no pods found") + } + + return strings.ToLower(status), nil +} + +// printStatusBox prints a formatted status box. +func printStatusBox(mode, status, url string, lastUpdated time.Time) { + fmt.Println() + fmt.Println("Cloudflare Tunnel Status") + fmt.Println(strings.Repeat("─", 50)) + fmt.Printf("Mode: %s\n", mode) + fmt.Printf("Status: %s\n", status) + fmt.Printf("URL: %s\n", url) + fmt.Printf("Last Updated: %s\n", lastUpdated.Format(time.RFC3339)) + fmt.Println(strings.Repeat("─", 50)) +} + +func parseQuickTunnelURL(logs string) (string, bool) { + // Quick tunnel logs print a random *.trycloudflare.com URL. + re := regexp.MustCompile(`https://[a-z0-9-]+\.trycloudflare\.com`) + if url := re.FindString(logs); url != "" { + return url, true + } + return "", false +} diff --git a/internal/tunnel/tunnel_test.go b/internal/tunnel/tunnel_test.go new file mode 100644 index 0000000..74f8f3e --- /dev/null +++ b/internal/tunnel/tunnel_test.go @@ -0,0 +1,37 @@ +package tunnel + +import "testing" + +func TestNormalizeHostname(t *testing.T) { + tests := []struct { + in string + want string + }{ + {"stack.example.com", "stack.example.com"}, + {"https://stack.example.com", "stack.example.com"}, + {"http://stack.example.com/", "stack.example.com"}, + {"https://stack.example.com/foo?bar=baz#x", "stack.example.com"}, + {" stack.example.com ", "stack.example.com"}, + } + + for _, tt := range tests { + if got := normalizeHostname(tt.in); got != tt.want { + t.Fatalf("normalizeHostname(%q)=%q want %q", tt.in, got, tt.want) + } + } +} + +func TestParseQuickTunnelURL(t *testing.T) { + logs := ` +2026-01-14T12:00:00Z INF | Your quick tunnel URL is: | +2026-01-14T12:00:00Z INF | https://seasonal-deck-organisms-sf.trycloudflare.com | +` + + url, ok := parseQuickTunnelURL(logs) + if !ok { + t.Fatalf("expected ok=true") + } + if url != "https://seasonal-deck-organisms-sf.trycloudflare.com" { + t.Fatalf("unexpected url: %q", url) + } +} diff --git a/notes.md b/notes.md index 025b7ef..6550e6a 100644 --- a/notes.md +++ b/notes.md @@ -6,7 +6,7 @@ - obol agent - skeleton out the cmd - this should have a dummy manifest which templates a config map secret - - obol agent init, gets the secret from google account + - OKR-1: default LLM flow is llms.py -> Ollama Cloud (no API key copy/paste) - frontend (default) - erpc, helios (default) diff --git a/obolup.sh b/obolup.sh index 2741a53..3d0c515 100755 --- a/obolup.sh +++ b/obolup.sh @@ -49,12 +49,13 @@ fi # Pinned dependency versions # Update these versions to upgrade dependencies across all installations -readonly KUBECTL_VERSION="1.31.0" -readonly HELM_VERSION="3.19.1" +readonly KUBECTL_VERSION="1.35.0" +readonly HELM_VERSION="3.19.4" readonly K3D_VERSION="5.8.3" -readonly HELMFILE_VERSION="1.2.2" -readonly K9S_VERSION="0.32.5" -readonly HELM_DIFF_VERSION="3.9.11" +readonly HELMFILE_VERSION="1.2.3" +readonly K9S_VERSION="0.50.18" +readonly HELM_DIFF_VERSION="3.14.1" +readonly MKCERT_VERSION="1.4.4" # Repository URL for building from source readonly OBOL_REPO_URL="git@github.com:ObolNetwork/obol-stack.git" @@ -990,6 +991,121 @@ install_k9s() { fi } +# Install openclaw CLI +# Unlike other tools, openclaw has no standalone binary downloads. +# It's distributed as an npm package, so we install it locally into +# OBOL_BIN_DIR using npm --prefix to keep it workspace-contained. +install_openclaw() { + # Remove broken symlink if exists + remove_broken_symlink "openclaw" + + # Check for global openclaw first (same pattern as kubectl, helm, etc.) + local global_openclaw + if global_openclaw=$(check_global_binary "openclaw"); then + if create_binary_symlink "openclaw" "$global_openclaw"; then + log_success "openclaw already installed at: $global_openclaw (symlinked)" + else + log_success "openclaw already installed at: $global_openclaw" + fi + return 0 + fi + + # Check if already in OBOL_BIN_DIR + if [[ -f "$OBOL_BIN_DIR/openclaw" ]]; then + log_success "openclaw already installed" + return 0 + fi + + log_info "Installing openclaw CLI..." + + # Require Node.js 22+ and npm + if ! command_exists npm; then + log_warn "npm not found — cannot install openclaw CLI" + echo "" + echo " Install Node.js 22+ first, then re-run obolup.sh" + echo " Or install manually: npm install -g openclaw" + echo "" + return 1 + fi + + local node_major + node_major=$(node --version 2>/dev/null | sed 's/v//' | cut -d. -f1) + if [[ -z "$node_major" ]] || [[ "$node_major" -lt 22 ]]; then + log_warn "Node.js 22+ required for openclaw (found: v${node_major:-none})" + echo "" + echo " Upgrade Node.js, then re-run obolup.sh" + echo " Or install manually: npm install -g openclaw" + echo "" + return 1 + fi + + # Install into OBOL_BIN_DIR using npm --prefix so the package lives + # alongside the other managed binaries (works for both production + # ~/.local/bin and development .workspace/bin layouts). + local npm_prefix="$OBOL_BIN_DIR/.openclaw-npm" + log_info "Installing openclaw via npm into $OBOL_BIN_DIR..." + + if npm install --prefix "$npm_prefix" openclaw 2>&1; then + # Create a wrapper script in OBOL_BIN_DIR that invokes the local install. + # npm --prefix puts the .bin stubs in node_modules/.bin/ which handle + # the correct entry point (openclaw.mjs) automatically. + cat > "$OBOL_BIN_DIR/openclaw" < Dashboard (Next.js, Better Auth) + -> POST /api/copilotkit (server route) + -> HttpAgent -> obol-agent (FastAPI / Google ADK) + -> Gemini via GOOGLE_API_KEY (direct) +``` + +--- + +## Proposed target architecture (with LLMSpy + Ollama; cloud-first) + +### Runtime request flow (agent query) +``` +Browser (signed-in) + -> Dashboard (Next.js) + -> /api/copilotkit (server; auth-gated) + -> obol-agent (FastAPI/ADK, AG-UI) + -> LiteLLM client (OpenAI-compatible) + -> LLMSpy (llms.py) [cluster-internal service] + -> Provider A: Local (Ollama) [no keys, default] + -> Provider B+: Remote (optional; keys/OAuth later) +``` + +### Deployment topology (Kubernetes) +Namespaces: +- `agent` + - `obol-agent` Deployment (existing) +- `llm` (new) + - **`llmspy`** (`llms.py`) Deployment + ClusterIP Service + - **`ollama`** Deployment + ClusterIP Service (default provider) + - Optional model warmup Job (`ollama pull `) + +Storage: +- Ollama runtime + model cache uses `emptyDir` (ephemeral). +- **Ollama Cloud auth key**: + - Minimum viable: also `emptyDir` (user reconnects after pod restart). + - Recommended: mount a small PVC or Secret-backed volume for `/root/.ollama/id_ed25519` so reconnect isn’t needed after upgrades/restarts. + +--- + +## UX: “≤5 actions” and “≤10 minutes” target + +### Default flow (no API keys) +**Default provider:** Ollama (in-cluster) via LLMSpy, using **Ollama Cloud models** (e.g. `glm-4.7:cloud`). + +Target action count: +1. Install Obol Stack CLI (existing flow) +2. `obol stack init` (if required by current UX) +3. `obol stack up` +4. Open Dashboard URL and sign in +5. Send first message in agent sidebar + +Notes: +- Remove the **mandatory** `obol agent init` step from the default path. +- Replace the “paste an API key” step with an **Ollama Cloud connect** step: + - If Ollama isn’t signed in, show a “Connect Ollama Cloud” action in the dashboard. + - Clicking it surfaces the `https://ollama.com/connect?...` URL returned by the Ollama API and guides the user through login. + +### Time-to-first-query tactics +- Default to a **cloud model** to avoid GPU/VRAM constraints: + - `glm-4.7:cloud` is explicitly supported as a cloud model in Ollama. +- Add a lightweight warmup/prefetch mechanism: + - Post-install Job: `ollama pull glm-4.7:cloud` (downloads the stub/metadata so first chat is faster) + - Readiness gate: “ready” once Ollama is connected and the model is pullable +- Ensure agent readiness checks are reliable and fast: + - Keep `/api/copilotkit/health` public (already required) + - Add `llmspy` and `ollama` readiness checks and surface status in the UI + +--- + +## Configuration model + +### LLMSpy +LLMSpy is configured by `~/.llms/llms.json` (in-container: `/home/llms/.llms/llms.json`). + +We will manage this in-cluster using: +- ConfigMap for `llms.json` +- Volume mount to `/home/llms/.llms` (likely `emptyDir`; no secrets required for Ollama) + +Runtime: +- Prefer the upstream-published container image for reproducibility: + - `ghcr.io/servicestack/llms:v2.0.30` (pinned) + +Key config points (concrete based on llms.py docs): +- Only one enabled provider: `ollama` +- `providers.ollama.type = "OllamaProvider"` +- `providers.ollama.base_url = "http://ollama.llm.svc.cluster.local:11434"` +- `providers.ollama.all_models = true` (or restrict to `glm-4.7:cloud`) +- `defaults.text.model = "glm-4.7:cloud"` + +### Obol Agent +Make the agent model/backend configurable: +- `LLM_BACKEND`: + - `gemini` (existing path, requires `GOOGLE_API_KEY`) + - `llmspy` (new default path) +- `LLM_MODEL` (default to the cloud model) +- `OPENAI_API_BASE` set to `http://llmspy.llm.svc.cluster.local:/v1` +- `OPENAI_API_KEY` set to a dummy value (LiteLLM/OpenAI provider compatibility) + +NOTE: With `llmspy` as backend, the agent sends OpenAI-style requests to LLMSpy and LLMSpy forwards to Ollama. + +## Default model choice +Use `glm-4.7:cloud` by default to maximize quality and avoid local GPU requirements. + +This keeps the “no manual API key copy/paste” OKR achievable because Ollama supports a browser-based connect flow (user signs in; Ollama authenticates subsequent cloud requests). + +## OpenClaw tie-in (validation + reuse) +We can validate “tool-calling robustness” of the chosen Ollama model in two ways: + +1) **Direct OpenClaw + Ollama** (matches Ollama’s built-in `openclaw` integration) + - OpenClaw already supports an Ollama provider using the OpenAI-compatible `/v1` API. + - Ollama’s own code includes an integration that edits `~/.openclaw/openclaw.json` to point at Ollama and set `agents.defaults.model.primary`. + +2) **OpenClaw + LLMSpy (preferred for consistency)** + - Configure OpenClaw’s “OpenAI” provider baseUrl to LLMSpy (`http://llmspy.llm.svc.cluster.local:/v1`) + - This ensures OpenClaw and Obol Agent exercise the same gateway path. + +We should treat OpenClaw as: +- A **validation harness** for model/tool behavior (pre-flight testing + regression checks) +- Potential future **multi-channel UX** (WhatsApp/Telegram/etc) once dashboard MVP is stable + +### Obol Stack CLI changes (user-facing) +Reframe `obol agent init` into a provider configuration command: +- Default: **no command needed** +- Optional: `obol agent configure --provider <...>` or `obol agent set-llm --provider <...>` + - Writes K8s secrets/configmaps and triggers rollout restart of `obol-agent` and/or `llmspy` + +--- + +## Security & exposure +- Dashboard remains protected by Better Auth (Google now; GitHub later). +- `/rpc/*` remains public/unprotected (x402 responsibility). +- `/api/copilotkit/health` remains public for monitoring. +- **LLMSpy and Ollama remain cluster-internal by default**: + - No HTTPRoute for them + - ClusterIP only + - (Optional later) expose behind dashboard auth for debugging + +Threat model considerations: +- Ensure LLMSpy cannot be used as an open relay from the internet. +- Ensure remote provider keys (if configured) never get logged or surfaced in UI. + +--- + +## Observability + OKR measurement plan + +### Metrics we can measure in-product (self-hosted) +- `agent_query_success_total` / `agent_query_error_total` +- `agent_query_latency_seconds` histogram +- `agent_first_success_timestamp` (per install) – used for “time to first query” +- `agent_provider_backend` label (gemini vs llmspy; local vs remote) + +### MAU / “install success rate” (cross-install aggregation) +This requires centralized telemetry. Options: +- Opt-in telemetry to an Obol endpoint (privacy-preserving, hashed install id) +- Or a “bring your own analytics” integration (PostHog/Amplitude) + +Proposed approach for this OKR: +- Add **opt-in** telemetry flag at install time +- Emit minimal events: + - `stack_install_completed` + - `agent_ready` + - `agent_first_query_success` + - `agent_returning_user_monthly` (count only) + +--- + +## Implementation workstreams (by repo) + +### 1) `obol-stack` (installer + infra) +- Add `llmspy` Deployment/Service manifest under `internal/embed/infrastructure/base/templates/` +- Add `ollama` Deployment/Service (or allow external Ollama endpoint) +- Add “model warmup” Job (optional but recommended for ≤10 min) +- Add values/env wiring to configure: + - LLMSpy port, config map, and secret mounts + - Obol Agent env vars (`LLM_BACKEND`, `LLM_MODEL`, `OPENAI_API_BASE`, etc.) +- Update CLI: + - Make `obol agent init` optional or replace with `obol agent configure` + - Provide a keyless default; ensure docs and errors reflect new flow +- Update README (agent quickstart + troubleshooting) + +### 2) `obol-agent` (runtime changes) +- Read `LLM_MODEL` from env (remove hard-coded model) +- Add `LLM_BACKEND` switch: + - `gemini` (current) + - `llmspy` using ADK’s `LiteLlm` wrapper + OpenAI-compatible base URL +- Add health diagnostics: + - Include provider status in `/health` (e.g., “llm backend reachable”) +- Add unit/integration tests: + - Mock LLMSpy OpenAI endpoint + - Verify tool calling works with chosen default local model + +### 3) `obol-stack-front-end` (onboarding UX) +- Replace “run `obol agent init`” message with: + - “Agent is initializing” / “Model downloading” (with helpful tips) + - A “Retry health check” action + - A link to agent setup docs for optional remote providers +- Add an “Agent Setup” panel: + - Shows current backend (local/remote) + - Shows readiness status (agent/llmspy/ollama) + +### 4) `helm-charts` (if needed) +- Only if we decide to migrate these new services into charts instead of raw manifests. +- Otherwise, keep in `base/templates/` for speed. + +--- + +## Milestones + +### Milestone A — “Keyless Agent Works Locally” +Acceptance: +- Fresh install: no API keys required +- Agent responds from dashboard +- Median time to first response ≤ 10 min in test environment + +### Milestone B — “Provider Choice” +Acceptance: +- Optional remote providers via secrets/config (still no copy/paste required in default) +- Failover behavior works (local first, remote fallback if configured) + +### Milestone C — “OKR Instrumentation” +Acceptance: +- Prometheus metrics available +- Optional telemetry pipeline documented and implemented (if approved) + +--- + +## Open questions (needs product decision) +1. Do we persist `/root/.ollama/id_ed25519` so the Ollama Cloud connection survives pod restarts/upgrades? +2. Do we want to expose a “Connect Ollama Cloud” UX in the dashboard (recommended) or require a CLI step? +3. Telemetry: opt-in vs opt-out; where is the endpoint; privacy guarantees. +4. Do we expose LLMSpy UI behind auth for debugging, or keep it internal-only? diff --git a/renovate.json b/renovate.json index 6932b83..81e8188 100644 --- a/renovate.json +++ b/renovate.json @@ -20,6 +20,30 @@ "datasourceTemplate": "github-releases", "depNameTemplate": "ObolNetwork/obol-stack-front-end", "versioningTemplate": "semver" + }, + { + "customType": "regex", + "description": "Update Gateway API release version", + "matchStrings": [ + "gatewayApiVersion:\\s*[\"']?(?v[0-9]+\\.[0-9]+\\.[0-9]+)[\"']?" + ], + "fileMatch": [ + "^internal/embed/infrastructure/helmfile\\.yaml$" + ], + "datasourceTemplate": "github-releases", + "depNameTemplate": "kubernetes-sigs/gateway-api", + "versioningTemplate": "semver" + }, + { + "customType": "regex", + "description": "Update OpenClaw version from upstream GitHub releases", + "matchStrings": [ + "#\\s*renovate:\\s*datasource=(?.*?)\\s+depName=(?.*?)\\n(?v[0-9]+\\.[0-9]+\\.[0-9]+)" + ], + "fileMatch": [ + "^internal/openclaw/OPENCLAW_VERSION$" + ], + "versioningTemplate": "semver" } ], "packageRules": [ @@ -76,6 +100,22 @@ ], "dependencyDashboardApproval": true, "prBodyTemplate": "⚠️ **MAJOR VERSION UPDATE** ⚠️\n\nThis PR updates **obol-stack-front-end** from `{{currentVersion}}` to `{{newVersion}}`.\n\n### ⚠️ Breaking Changes Expected\n\nMajor version updates may include breaking changes. Please review the release notes carefully.\n\n### Release Notes\n\n{{{changelog}}}\n\n### Migration Checklist\n- [ ] Review breaking changes in release notes\n- [ ] Test the new version in staging environment\n- [ ] Update any integration code if needed\n- [ ] Verify deployment scripts still work\n\n---\n**⚠️ This PR requires manual approval due to major version change**\n**Auto-generated by Renovate Bot**" + }, + { + "description": "Group OpenClaw updates", + "matchDatasources": [ + "github-releases" + ], + "matchPackageNames": [ + "openclaw/openclaw" + ], + "labels": [ + "renovate/openclaw" + ], + "schedule": [ + "every hour" + ], + "groupName": "OpenClaw updates" } ] }