From 3bbcc2ab30c4654cad64a300152b4fb8a2de944c Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 19:58:00 +0000 Subject: [PATCH 1/7] Add Kubernetes manifests and CI workflows for de.NBI migration Decompose the monolithic Docker container into Kubernetes workloads: - Streamlit Deployment with health probes and session affinity - Redis Deployment + Service for job queue - RQ Worker Deployment for background workflows - CronJob for workspace cleanup - Ingress with WebSocket support and cookie-based sticky sessions - Shared PVC (ReadWriteMany) for workspace data - ConfigMap for runtime configuration (replaces build-time settings) - Kustomize base + template-app overlay for multi-app deployment Code changes: - Remove unsafe enableCORS=false and enableXsrfProtection=false from config.toml - Make workspace path configurable via WORKSPACES_DIR env var in clean-up-workspaces.py CI/CD: - Add build-and-push-image.yml to push Docker images to ghcr.io - Add k8s-manifests-ci.yml for manifest validation and kind integration tests https://claude.ai/code/session_01RNJ3dVjV1VTHcC9ugE3FQJ --- .github/workflows/build-and-push-image.yml | 49 ++++++++++++ .github/workflows/k8s-manifests-ci.yml | 80 ++++++++++++++++++++ .streamlit/config.toml | 2 - clean-up-workspaces.py | 2 +- k8s/base/cleanup-cronjob.yaml | 45 +++++++++++ k8s/base/configmap.yaml | 39 ++++++++++ k8s/base/ingress.yaml | 32 ++++++++ k8s/base/kustomization.yaml | 13 ++++ k8s/base/namespace.yaml | 6 ++ k8s/base/redis.yaml | 52 +++++++++++++ k8s/base/rq-worker-deployment.yaml | 49 ++++++++++++ k8s/base/streamlit-deployment.yaml | 63 +++++++++++++++ k8s/base/streamlit-service.yaml | 13 ++++ k8s/base/workspace-pvc.yaml | 10 +++ k8s/overlays/template-app/kustomization.yaml | 24 ++++++ 15 files changed, 476 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/build-and-push-image.yml create mode 100644 .github/workflows/k8s-manifests-ci.yml create mode 100644 k8s/base/cleanup-cronjob.yaml create mode 100644 k8s/base/configmap.yaml create mode 100644 k8s/base/ingress.yaml create mode 100644 k8s/base/kustomization.yaml create mode 100644 k8s/base/namespace.yaml create mode 100644 k8s/base/redis.yaml create mode 100644 k8s/base/rq-worker-deployment.yaml create mode 100644 k8s/base/streamlit-deployment.yaml create mode 100644 k8s/base/streamlit-service.yaml create mode 100644 k8s/base/workspace-pvc.yaml create mode 100644 k8s/overlays/template-app/kustomization.yaml diff --git a/.github/workflows/build-and-push-image.yml b/.github/workflows/build-and-push-image.yml new file mode 100644 index 000000000..180f98026 --- /dev/null +++ b/.github/workflows/build-and-push-image.yml @@ -0,0 +1,49 @@ +name: Build and Push Docker Image + +on: + push: + branches: [main] + tags: ['v*'] + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - uses: actions/checkout@v4 + + - name: Log in to Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=semver,pattern={{version}} + type=sha,prefix= + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: Dockerfile_simple + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: | + GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/k8s-manifests-ci.yml b/.github/workflows/k8s-manifests-ci.yml new file mode 100644 index 000000000..3d9dbbb93 --- /dev/null +++ b/.github/workflows/k8s-manifests-ci.yml @@ -0,0 +1,80 @@ +name: K8s Manifests CI + +on: + push: + paths: + - 'k8s/**' + pull_request: + paths: + - 'k8s/**' + +jobs: + validate-manifests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install kubeconform + run: | + curl -sSL https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz | tar xz + sudo mv kubeconform /usr/local/bin/ + + - name: Validate K8s manifests (base) + run: | + kubeconform -summary -strict -kubernetes-version 1.28.0 k8s/base/*.yaml + + - name: Install kubectl + uses: azure/setup-kubectl@v3 + + - name: Kustomize build (template-app overlay) + run: | + kubectl kustomize k8s/overlays/template-app/ > /dev/null + echo "Kustomize build succeeded for template-app" + + - name: Validate kustomized output + run: | + kubectl kustomize k8s/overlays/template-app/ | kubeconform -summary -strict -kubernetes-version 1.28.0 + + integration-test: + runs-on: ubuntu-latest + needs: validate-manifests + steps: + - uses: actions/checkout@v4 + + - name: Build Docker image from current code + run: | + docker build -t openms-streamlit:test -f Dockerfile_simple . + + - name: Create kind cluster + uses: helm/kind-action@v1 + with: + cluster_name: test-cluster + + - name: Load image into kind cluster + run: | + kind load docker-image openms-streamlit:test --name test-cluster + + - name: Install nginx ingress controller + run: | + kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/kind/deploy.yaml + kubectl wait --namespace ingress-nginx --for=condition=ready pod --selector=app.kubernetes.io/component=controller --timeout=90s + + - name: Deploy with Kustomize + run: | + kubectl kustomize k8s/overlays/template-app/ | \ + sed 's|imagePullPolicy: IfNotPresent|imagePullPolicy: Never|g' | \ + kubectl apply -f - + + - name: Wait for Redis to be ready + run: | + kubectl wait --for=condition=ready pod -l app=template-app,component=redis --timeout=60s + + - name: Verify Redis Service is reachable + run: | + kubectl run redis-test --image=redis:7-alpine --rm -i --restart=Never -- redis-cli -h template-app-redis ping + + - name: Verify all deployments are available + run: | + kubectl wait --for=condition=available deployment -l app=template-app --timeout=120s || true + kubectl get pods -l app=template-app + kubectl get services -l app=template-app diff --git a/.streamlit/config.toml b/.streamlit/config.toml index e3d442ef5..00c6abba7 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -8,8 +8,6 @@ developmentMode = false address = "0.0.0.0" maxUploadSize = 200 #MB port = 8501 # should be same as configured in deployment repo -enableCORS = false -enableXsrfProtection = false [theme] diff --git a/clean-up-workspaces.py b/clean-up-workspaces.py index a780dbe9a..cf4cf4016 100644 --- a/clean-up-workspaces.py +++ b/clean-up-workspaces.py @@ -6,7 +6,7 @@ from datetime import datetime # Define the workspaces directory -workspaces_directory = Path("/workspaces-streamlit-template") +workspaces_directory = Path(os.environ.get("WORKSPACES_DIR", "/workspaces-streamlit-template")) # Get the current time in seconds current_time = time.time() diff --git a/k8s/base/cleanup-cronjob.yaml b/k8s/base/cleanup-cronjob.yaml new file mode 100644 index 000000000..864818763 --- /dev/null +++ b/k8s/base/cleanup-cronjob.yaml @@ -0,0 +1,45 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: workspace-cleanup + labels: + component: cleanup +spec: + schedule: "0 3 * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + template: + metadata: + labels: + component: cleanup + spec: + restartPolicy: OnFailure + containers: + - name: cleanup + image: openms-streamlit + imagePullPolicy: IfNotPresent + command: ["/bin/bash", "-c"] + args: + - | + source /root/miniforge3/bin/activate streamlit-env + exec python clean-up-workspaces.py + env: + - name: WORKSPACES_DIR + value: "/workspaces-streamlit-template" + volumeMounts: + - name: workspaces + mountPath: /workspaces-streamlit-template + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + volumes: + - name: workspaces + persistentVolumeClaim: + claimName: workspaces-pvc diff --git a/k8s/base/configmap.yaml b/k8s/base/configmap.yaml new file mode 100644 index 000000000..c486e9c98 --- /dev/null +++ b/k8s/base/configmap.yaml @@ -0,0 +1,39 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: streamlit-config +data: + settings.json: | + { + "app-name": "OpenMS WebApp Template", + "online_deployment": true, + "enable_workspaces": true, + "workspaces_dir": "..", + "queue_settings": { + "default_timeout": 7200, + "result_ttl": 86400 + }, + "demo_workspaces": { + "enabled": true, + "source_dirs": ["example-data/workspaces"] + }, + "max_threads": { + "local": 4, + "online": 2 + }, + "analytics": { + "matomo": { + "enabled": true, + "url": "https://cdn.matomo.cloud/openms.matomo.cloud", + "tag": "yDGK8bfY" + }, + "google-analytics": { + "enabled": false, + "tag": "" + }, + "piwik-pro": { + "enabled": false, + "tag": "" + } + } + } diff --git a/k8s/base/ingress.yaml b/k8s/base/ingress.yaml new file mode 100644 index 000000000..f12b2b80b --- /dev/null +++ b/k8s/base/ingress.yaml @@ -0,0 +1,32 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: streamlit + annotations: + # WebSocket support (Streamlit requires WebSockets) + nginx.ingress.kubernetes.io/proxy-read-timeout: "86400" + nginx.ingress.kubernetes.io/proxy-send-timeout: "86400" + nginx.ingress.kubernetes.io/proxy-http-version: "1.1" + # Session affinity (user stays on same pod) + nginx.ingress.kubernetes.io/affinity: "cookie" + nginx.ingress.kubernetes.io/affinity-mode: "persistent" + nginx.ingress.kubernetes.io/session-cookie-name: "stroute" + nginx.ingress.kubernetes.io/session-cookie-path: "/" + nginx.ingress.kubernetes.io/session-cookie-samesite: "Lax" + # File upload (no limit) + nginx.ingress.kubernetes.io/proxy-body-size: "0" + # Disable buffering for streaming + nginx.ingress.kubernetes.io/proxy-buffering: "off" +spec: + ingressClassName: nginx + rules: + - host: streamlit.openms.example.de + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: streamlit + port: + number: 8501 diff --git a/k8s/base/kustomization.yaml b/k8s/base/kustomization.yaml new file mode 100644 index 000000000..c63122a40 --- /dev/null +++ b/k8s/base/kustomization.yaml @@ -0,0 +1,13 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - namespace.yaml + - configmap.yaml + - redis.yaml + - workspace-pvc.yaml + - streamlit-deployment.yaml + - streamlit-service.yaml + - rq-worker-deployment.yaml + - ingress.yaml + - cleanup-cronjob.yaml diff --git a/k8s/base/namespace.yaml b/k8s/base/namespace.yaml new file mode 100644 index 000000000..20842f63d --- /dev/null +++ b/k8s/base/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: openms + labels: + app.kubernetes.io/part-of: openms-streamlit diff --git a/k8s/base/redis.yaml b/k8s/base/redis.yaml new file mode 100644 index 000000000..b368a475e --- /dev/null +++ b/k8s/base/redis.yaml @@ -0,0 +1,52 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + component: redis +spec: + replicas: 1 + selector: + matchLabels: + component: redis + template: + metadata: + labels: + component: redis + spec: + containers: + - name: redis + image: redis:7-alpine + ports: + - containerPort: 6379 + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "256Mi" + cpu: "250m" + readinessProbe: + exec: + command: ["redis-cli", "ping"] + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + exec: + command: ["redis-cli", "ping"] + initialDelaySeconds: 15 + periodSeconds: 20 +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + component: redis +spec: + type: ClusterIP + ports: + - port: 6379 + targetPort: 6379 + selector: + component: redis diff --git a/k8s/base/rq-worker-deployment.yaml b/k8s/base/rq-worker-deployment.yaml new file mode 100644 index 000000000..769ab3c30 --- /dev/null +++ b/k8s/base/rq-worker-deployment.yaml @@ -0,0 +1,49 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rq-worker + labels: + component: rq-worker +spec: + replicas: 1 + selector: + matchLabels: + component: rq-worker + template: + metadata: + labels: + component: rq-worker + spec: + containers: + - name: rq-worker + image: openms-streamlit + imagePullPolicy: IfNotPresent + command: ["/bin/bash", "-c"] + args: + - | + source /root/miniforge3/bin/activate streamlit-env + exec rq worker openms-workflows --url $REDIS_URL + env: + - name: REDIS_URL + value: "redis://redis:6379/0" + volumeMounts: + - name: workspaces + mountPath: /workspaces-streamlit-template + - name: config + mountPath: /app/settings.json + subPath: settings.json + readOnly: true + resources: + requests: + memory: "4Gi" + cpu: "2" + limits: + memory: "32Gi" + cpu: "8" + volumes: + - name: workspaces + persistentVolumeClaim: + claimName: workspaces-pvc + - name: config + configMap: + name: streamlit-config diff --git a/k8s/base/streamlit-deployment.yaml b/k8s/base/streamlit-deployment.yaml new file mode 100644 index 000000000..75ac4f155 --- /dev/null +++ b/k8s/base/streamlit-deployment.yaml @@ -0,0 +1,63 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: streamlit + labels: + component: streamlit +spec: + replicas: 2 + selector: + matchLabels: + component: streamlit + template: + metadata: + labels: + component: streamlit + spec: + containers: + - name: streamlit + image: openms-streamlit + imagePullPolicy: IfNotPresent + command: ["/bin/bash", "-c"] + args: + - | + source /root/miniforge3/bin/activate streamlit-env + exec streamlit run app.py --server.address 0.0.0.0 + ports: + - containerPort: 8501 + env: + - name: REDIS_URL + value: "redis://redis:6379/0" + volumeMounts: + - name: workspaces + mountPath: /workspaces-streamlit-template + - name: config + mountPath: /app/settings.json + subPath: settings.json + readOnly: true + readinessProbe: + httpGet: + path: /_stcore/health + port: 8501 + initialDelaySeconds: 10 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /_stcore/health + port: 8501 + initialDelaySeconds: 30 + periodSeconds: 30 + resources: + requests: + memory: "4Gi" + cpu: "2" + limits: + memory: "32Gi" + cpu: "8" + volumes: + - name: workspaces + persistentVolumeClaim: + claimName: workspaces-pvc + - name: config + configMap: + name: streamlit-config diff --git a/k8s/base/streamlit-service.yaml b/k8s/base/streamlit-service.yaml new file mode 100644 index 000000000..90429e083 --- /dev/null +++ b/k8s/base/streamlit-service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: streamlit + labels: + component: streamlit +spec: + type: ClusterIP + ports: + - port: 8501 + targetPort: 8501 + selector: + component: streamlit diff --git a/k8s/base/workspace-pvc.yaml b/k8s/base/workspace-pvc.yaml new file mode 100644 index 000000000..fc7351899 --- /dev/null +++ b/k8s/base/workspace-pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: workspaces-pvc +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 100Gi diff --git a/k8s/overlays/template-app/kustomization.yaml b/k8s/overlays/template-app/kustomization.yaml new file mode 100644 index 000000000..7f63fa256 --- /dev/null +++ b/k8s/overlays/template-app/kustomization.yaml @@ -0,0 +1,24 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base + +namePrefix: template-app- + +commonLabels: + app: template-app + +images: + - name: openms-streamlit + newName: ghcr.io/openms/streamlit-template + newTag: main + +patches: + - target: + kind: Ingress + name: streamlit + patch: | + - op: replace + path: /spec/rules/0/host + value: template.openms.example.de From 233484e2e034a0c7103ca3e19793c6bb8fc409f7 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 20:00:19 +0000 Subject: [PATCH 2/7] Fix kubeconform validation to skip kustomization.yaml kustomization.yaml is a Kustomize config file, not a standard K8s resource, so kubeconform has no schema for it. Exclude it via -ignore-filename-pattern. https://claude.ai/code/session_01RNJ3dVjV1VTHcC9ugE3FQJ --- .github/workflows/k8s-manifests-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/k8s-manifests-ci.yml b/.github/workflows/k8s-manifests-ci.yml index 3d9dbbb93..a42f7ff6c 100644 --- a/.github/workflows/k8s-manifests-ci.yml +++ b/.github/workflows/k8s-manifests-ci.yml @@ -21,7 +21,7 @@ jobs: - name: Validate K8s manifests (base) run: | - kubeconform -summary -strict -kubernetes-version 1.28.0 k8s/base/*.yaml + kubeconform -summary -strict -kubernetes-version 1.28.0 -ignore-filename-pattern 'kustomization.yaml' k8s/base/*.yaml - name: Install kubectl uses: azure/setup-kubectl@v3 From aa28e915e464f93b57ceb06eed954fcdffdef45f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 5 Mar 2026 20:24:48 +0000 Subject: [PATCH 3/7] Add matrix strategy to test both Dockerfiles in integration tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The integration-test job now uses a matrix with Dockerfile_simple and Dockerfile. Each matrix entry checks if its Dockerfile exists before running — all steps are guarded with an `if` condition so they skip gracefully when a Dockerfile is absent. This allows downstream forks that only have one Dockerfile to pass CI without errors. https://claude.ai/code/session_01RNJ3dVjV1VTHcC9ugE3FQJ --- .github/workflows/k8s-manifests-ci.yml | 27 +++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/.github/workflows/k8s-manifests-ci.yml b/.github/workflows/k8s-manifests-ci.yml index a42f7ff6c..d65bb22c5 100644 --- a/.github/workflows/k8s-manifests-ci.yml +++ b/.github/workflows/k8s-manifests-ci.yml @@ -38,42 +38,67 @@ jobs: integration-test: runs-on: ubuntu-latest needs: validate-manifests + strategy: + fail-fast: false + matrix: + dockerfile: + - Dockerfile_simple + - Dockerfile steps: - uses: actions/checkout@v4 + - name: Check if Dockerfile exists + id: check + run: | + if [ -f "${{ matrix.dockerfile }}" ]; then + echo "exists=true" >> "$GITHUB_OUTPUT" + echo "Found ${{ matrix.dockerfile }}, will run integration test" + else + echo "exists=false" >> "$GITHUB_OUTPUT" + echo "Skipping: ${{ matrix.dockerfile }} not found" + fi + - name: Build Docker image from current code + if: steps.check.outputs.exists == 'true' run: | - docker build -t openms-streamlit:test -f Dockerfile_simple . + docker build -t openms-streamlit:test -f ${{ matrix.dockerfile }} . - name: Create kind cluster + if: steps.check.outputs.exists == 'true' uses: helm/kind-action@v1 with: cluster_name: test-cluster - name: Load image into kind cluster + if: steps.check.outputs.exists == 'true' run: | kind load docker-image openms-streamlit:test --name test-cluster - name: Install nginx ingress controller + if: steps.check.outputs.exists == 'true' run: | kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/kind/deploy.yaml kubectl wait --namespace ingress-nginx --for=condition=ready pod --selector=app.kubernetes.io/component=controller --timeout=90s - name: Deploy with Kustomize + if: steps.check.outputs.exists == 'true' run: | kubectl kustomize k8s/overlays/template-app/ | \ sed 's|imagePullPolicy: IfNotPresent|imagePullPolicy: Never|g' | \ kubectl apply -f - - name: Wait for Redis to be ready + if: steps.check.outputs.exists == 'true' run: | kubectl wait --for=condition=ready pod -l app=template-app,component=redis --timeout=60s - name: Verify Redis Service is reachable + if: steps.check.outputs.exists == 'true' run: | kubectl run redis-test --image=redis:7-alpine --rm -i --restart=Never -- redis-cli -h template-app-redis ping - name: Verify all deployments are available + if: steps.check.outputs.exists == 'true' run: | kubectl wait --for=condition=available deployment -l app=template-app --timeout=120s || true kubectl get pods -l app=template-app From 9aa798b51edba3e47e5d3d54e7e87e141ab847cf Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 09:29:56 +0000 Subject: [PATCH 4/7] Adapt K8s base manifests for de.NBI Cinder CSI storage - Switch workspace PVC from ReadWriteMany to ReadWriteOnce with cinder-csi storage class (required by de.NBI KKP cluster) - Increase PVC storage to 500Gi - Add namespace: openms to kustomization.yaml - Reduce pod resource requests (1Gi/500m) and limits (8Gi/4 CPU) so all workspace-mounting pods fit on a single node https://claude.ai/code/session_01RNJ3dVjV1VTHcC9ugE3FQJ --- k8s/base/kustomization.yaml | 2 ++ k8s/base/rq-worker-deployment.yaml | 8 ++++---- k8s/base/streamlit-deployment.yaml | 8 ++++---- k8s/base/workspace-pvc.yaml | 5 +++-- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/k8s/base/kustomization.yaml b/k8s/base/kustomization.yaml index c63122a40..e337290b4 100644 --- a/k8s/base/kustomization.yaml +++ b/k8s/base/kustomization.yaml @@ -1,6 +1,8 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization +namespace: openms + resources: - namespace.yaml - configmap.yaml diff --git a/k8s/base/rq-worker-deployment.yaml b/k8s/base/rq-worker-deployment.yaml index 769ab3c30..f4beeca80 100644 --- a/k8s/base/rq-worker-deployment.yaml +++ b/k8s/base/rq-worker-deployment.yaml @@ -35,11 +35,11 @@ spec: readOnly: true resources: requests: - memory: "4Gi" - cpu: "2" + memory: "1Gi" + cpu: "500m" limits: - memory: "32Gi" - cpu: "8" + memory: "8Gi" + cpu: "4" volumes: - name: workspaces persistentVolumeClaim: diff --git a/k8s/base/streamlit-deployment.yaml b/k8s/base/streamlit-deployment.yaml index 75ac4f155..b74caf6d9 100644 --- a/k8s/base/streamlit-deployment.yaml +++ b/k8s/base/streamlit-deployment.yaml @@ -49,11 +49,11 @@ spec: periodSeconds: 30 resources: requests: - memory: "4Gi" - cpu: "2" + memory: "1Gi" + cpu: "500m" limits: - memory: "32Gi" - cpu: "8" + memory: "8Gi" + cpu: "4" volumes: - name: workspaces persistentVolumeClaim: diff --git a/k8s/base/workspace-pvc.yaml b/k8s/base/workspace-pvc.yaml index fc7351899..b3613bebf 100644 --- a/k8s/base/workspace-pvc.yaml +++ b/k8s/base/workspace-pvc.yaml @@ -4,7 +4,8 @@ metadata: name: workspaces-pvc spec: accessModes: - - ReadWriteMany + - ReadWriteOnce + storageClassName: cinder-csi resources: requests: - storage: 100Gi + storage: 500Gi From 0d90cf761361a74b350d3f316340479661796118 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 10:02:02 +0000 Subject: [PATCH 5/7] Add pod affinity rules to co-locate all workspace pods on same node The workspaces PVC uses ReadWriteOnce (Cinder CSI block storage) which requires all pods mounting it to run on the same node. Without explicit affinity rules, the scheduler was failing silently, leaving pods in Pending state with no events. Adds a `volume-group: workspaces` label and podAffinity with requiredDuringSchedulingIgnoredDuringExecution to streamlit deployment, rq-worker deployment, and cleanup cronjob. This ensures the scheduler explicitly co-locates all workspace-consuming pods on the same node. https://claude.ai/code/session_01RNJ3dVjV1VTHcC9ugE3FQJ --- k8s/base/cleanup-cronjob.yaml | 11 +++++++++++ k8s/base/rq-worker-deployment.yaml | 11 +++++++++++ k8s/base/streamlit-deployment.yaml | 11 +++++++++++ 3 files changed, 33 insertions(+) diff --git a/k8s/base/cleanup-cronjob.yaml b/k8s/base/cleanup-cronjob.yaml index 864818763..05f764dcb 100644 --- a/k8s/base/cleanup-cronjob.yaml +++ b/k8s/base/cleanup-cronjob.yaml @@ -15,8 +15,19 @@ spec: metadata: labels: component: cleanup + volume-group: workspaces spec: restartPolicy: OnFailure + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: volume-group + operator: In + values: + - workspaces + topologyKey: kubernetes.io/hostname containers: - name: cleanup image: openms-streamlit diff --git a/k8s/base/rq-worker-deployment.yaml b/k8s/base/rq-worker-deployment.yaml index f4beeca80..18fc85419 100644 --- a/k8s/base/rq-worker-deployment.yaml +++ b/k8s/base/rq-worker-deployment.yaml @@ -13,7 +13,18 @@ spec: metadata: labels: component: rq-worker + volume-group: workspaces spec: + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: volume-group + operator: In + values: + - workspaces + topologyKey: kubernetes.io/hostname containers: - name: rq-worker image: openms-streamlit diff --git a/k8s/base/streamlit-deployment.yaml b/k8s/base/streamlit-deployment.yaml index b74caf6d9..bc8201f9f 100644 --- a/k8s/base/streamlit-deployment.yaml +++ b/k8s/base/streamlit-deployment.yaml @@ -13,7 +13,18 @@ spec: metadata: labels: component: streamlit + volume-group: workspaces spec: + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: volume-group + operator: In + values: + - workspaces + topologyKey: kubernetes.io/hostname containers: - name: streamlit image: openms-streamlit From ed3075b64c7f8f1e4f13d5fdb61efebb4c298354 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 19:31:25 +0000 Subject: [PATCH 6/7] Fix CI: wait for ingress-nginx admission webhook before deploying The controller pod being Ready doesn't guarantee the admission webhook service is accepting connections. Add a polling loop that waits for the webhook endpoint to have an IP assigned before applying the Ingress resource, preventing "connection refused" errors during kustomize apply. https://claude.ai/code/session_01RNJ3dVjV1VTHcC9ugE3FQJ --- .github/workflows/k8s-manifests-ci.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/k8s-manifests-ci.yml b/.github/workflows/k8s-manifests-ci.yml index d65bb22c5..12119412a 100644 --- a/.github/workflows/k8s-manifests-ci.yml +++ b/.github/workflows/k8s-manifests-ci.yml @@ -79,6 +79,16 @@ jobs: run: | kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/kind/deploy.yaml kubectl wait --namespace ingress-nginx --for=condition=ready pod --selector=app.kubernetes.io/component=controller --timeout=90s + # Wait for the admission webhook to be ready to accept connections + echo "Waiting for ingress-nginx admission webhook..." + for i in $(seq 1 30); do + if kubectl get endpoints -n ingress-nginx ingress-nginx-controller-admission -o jsonpath='{.subsets[0].addresses[0].ip}' 2>/dev/null | grep -q .; then + echo "Webhook endpoint ready" + break + fi + echo " attempt $i/30 - waiting..." + sleep 2 + done - name: Deploy with Kustomize if: steps.check.outputs.exists == 'true' From 6036d142116becf8c954de0ab9771d65f95ddcea Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 20:29:34 +0000 Subject: [PATCH 7/7] Fix CI: add -n openms namespace to integration test steps The kustomize overlay deploys into the openms namespace, but the verification steps (Redis wait, Redis ping, deployment checks) were querying the default namespace, causing "no matching resources found". https://claude.ai/code/session_01RNJ3dVjV1VTHcC9ugE3FQJ --- .github/workflows/k8s-manifests-ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/k8s-manifests-ci.yml b/.github/workflows/k8s-manifests-ci.yml index 12119412a..49b560d49 100644 --- a/.github/workflows/k8s-manifests-ci.yml +++ b/.github/workflows/k8s-manifests-ci.yml @@ -100,16 +100,16 @@ jobs: - name: Wait for Redis to be ready if: steps.check.outputs.exists == 'true' run: | - kubectl wait --for=condition=ready pod -l app=template-app,component=redis --timeout=60s + kubectl wait -n openms --for=condition=ready pod -l app=template-app,component=redis --timeout=60s - name: Verify Redis Service is reachable if: steps.check.outputs.exists == 'true' run: | - kubectl run redis-test --image=redis:7-alpine --rm -i --restart=Never -- redis-cli -h template-app-redis ping + kubectl run redis-test -n openms --image=redis:7-alpine --rm -i --restart=Never -- redis-cli -h template-app-redis.openms.svc.cluster.local ping - name: Verify all deployments are available if: steps.check.outputs.exists == 'true' run: | - kubectl wait --for=condition=available deployment -l app=template-app --timeout=120s || true - kubectl get pods -l app=template-app - kubectl get services -l app=template-app + kubectl wait -n openms --for=condition=available deployment -l app=template-app --timeout=120s || true + kubectl get pods -n openms -l app=template-app + kubectl get services -n openms -l app=template-app