From fa4df4307a401a6aaed48a72857ddafda4069524 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Tue, 19 May 2026 12:08:33 -0700
Subject: [PATCH 1/5] feat: add health and readiness check endpoints to CLI
 serve API

Adds /health and /ready endpoints to the FastAPI server for Kubernetes liveness and readiness probes.

- /health: always returns 200 (liveness check)
- /ready: returns 200 when ready, 503 otherwise (readiness check)

The readiness check is basic check that run_server happened which provides the
chat endpoint.  In the future this could be extended to let serve modules report
readiness of their backends (etc, needs some design). Would also need to be adapted
appropriately when we add support for multiple serve modules.

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
Assisted-by: IBM Bob
---
 cli/serve/app.py       | 47 +++++++++++++++++++++++++++++++++--
 test/cli/test_serve.py | 56 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 98 insertions(+), 5 deletions(-)

diff --git a/cli/serve/app.py b/cli/serve/app.py
index 82e652fb7..ef6a753d9 100644
--- a/cli/serve/app.py
+++ b/cli/serve/app.py
@@ -7,12 +7,12 @@
 import sys
 import time
 import uuid
-from typing import Any, Literal, cast
+from typing import Any, cast
 
 try:
     import typer
     import uvicorn
-    from fastapi import FastAPI, Request
+    from fastapi import FastAPI, HTTPException, Request
     from fastapi.exceptions import RequestValidationError
     from fastapi.responses import JSONResponse, StreamingResponse
     from pydantic import BaseModel
@@ -45,6 +45,9 @@
 
 logger = MelleaLogger.get_logger()
 
+# Track whether the server has been initialized with a module
+_server_ready = False
+
 app = FastAPI(
     title="M serve OpenAI API Compatible Server",
     description="M programs that run as a simple OpenAI API-compatible server",
@@ -52,6 +55,40 @@
 )
 
 
+@app.get("/health", status_code=200)
+async def health_check():
+    """Basic liveness check endpoint.
+
+    Returns a 200 OK status to signal that the Python process is alive and responding.
+
+    Returns:
+        dict: A dictionary with status "healthy".
+    """
+    return {"status": "healthy"}
+
+
+@app.get("/ready")
+async def readiness_check():
+    """Readiness check endpoint.
+
+    Returns 200 if the server has loaded a module and registered the chat
+    completions route. Returns 503 if the server is still starting up.
+
+    This endpoint is useful for Kubernetes readiness probes to ensure the
+    service doesn't receive traffic before it's ready to handle requests.
+
+    Returns:
+        dict: A dictionary with status "ready" when ready (HTTP 200).
+
+    Raises:
+        HTTPException: 503 status with error detail if server is not ready.
+    """
+    if _server_ready:
+        return {"status": "ready"}
+    else:
+        raise HTTPException(status_code=503, detail="Server not ready")
+
+
 @app.exception_handler(RequestValidationError)
 async def validation_exception_handler(
     request: Request, exc: RequestValidationError
@@ -286,6 +323,8 @@ def run_server(
     port: int = 8080,
 ):
     """Serve a FastAPI endpoint for a given script."""
+    global _server_ready
+
     module = load_module_from_path(script_path)
     route_path = "/v1/chat/completions"
 
@@ -295,5 +334,9 @@ def run_server(
         methods=["POST"],
         response_model=ChatCompletion | OpenAIErrorResponse,
     )
+
+    # Mark server as ready after route is successfully registered
+    _server_ready = True
+
     typer.echo(f"Serving {route_path} at http://{host}:{port}")
     uvicorn.run(app, host=host, port=port)
diff --git a/test/cli/test_serve.py b/test/cli/test_serve.py
index 889049613..89be73747 100644
--- a/test/cli/test_serve.py
+++ b/test/cli/test_serve.py
@@ -10,7 +10,13 @@
 from fastapi.testclient import TestClient
 from pydantic import BaseModel, ValidationError
 
-from cli.serve.app import make_chat_endpoint, validation_exception_handler
+import cli.serve.app as app_module
+from cli.serve.app import (
+    _server_ready,
+    app,
+    make_chat_endpoint,
+    validation_exception_handler,
+)
 from cli.serve.models import (
     ChatCompletion,
     ChatCompletionRequest,
@@ -45,6 +51,52 @@ def sample_request():
     )
 
 
+class TestHealthCheckEndpoint:
+    """Tests for the health check endpoint."""
+
+    def test_health_check(self):
+        """Test that /health endpoint returns 200 with correct JSON response."""
+        client = TestClient(app)
+        response = client.get("/health")
+
+        assert response.status_code == 200
+        assert response.json() == {"status": "healthy"}
+
+
+class TestReadinessCheckEndpoint:
+    """Tests for the readiness check endpoint."""
+
+    @pytest.fixture(autouse=True)
+    def reset_server_ready(self):
+        """Reset _server_ready state before and after each test."""
+        # Save original state
+        original_state = app_module._server_ready
+        # Reset to False for clean test start
+        app_module._server_ready = False
+        yield
+        # Restore original state after test
+        app_module._server_ready = original_state
+
+    def test_ready_returns_503_before_module_loaded(self):
+        """Test that /ready returns 503 when server hasn't loaded a module yet."""
+        client = TestClient(app)
+        response = client.get("/ready")
+
+        assert response.status_code == 503
+        assert response.json()["detail"] == "Server not ready"
+
+    def test_ready_returns_200_after_module_loaded(self):
+        """Test that /ready returns 200 after run_server() marks it ready."""
+        # Simulate what run_server() does
+        app_module._server_ready = True
+
+        client = TestClient(app)
+        response = client.get("/ready")
+
+        assert response.status_code == 200
+        assert response.json() == {"status": "ready"}
+
+
 class TestChatEndpoint:
     """Tests for the chat completion endpoint."""
 
@@ -722,8 +774,6 @@ async def test_text_format_no_schema(self, mock_module):
     @pytest.mark.asyncio
     async def test_json_schema_missing_schema_field(self, mock_module):
         """Test that json_schema without schema field raises ValidationError."""
-        from pydantic import ValidationError
-
         # Should raise ValidationError when creating ResponseFormat
         with pytest.raises(ValidationError) as exc_info:
             ResponseFormat(

From e775d8bbd328949814e84dea075f77cbe90820b5 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Wed, 20 May 2026 13:48:55 -0700
Subject: [PATCH 2/5] feat: cli serve health endpoint

* changed status value to "pass" because that is IETF standard.  k8s doesn't care what the string is.
* removing the /ready implementation which is not ready and not part of this issue

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 cli/serve/app.py       | 37 +++--------------------------------
 test/cli/test_serve.py | 44 ++----------------------------------------
 2 files changed, 5 insertions(+), 76 deletions(-)

diff --git a/cli/serve/app.py b/cli/serve/app.py
index ef6a753d9..aeb1f1b1c 100644
--- a/cli/serve/app.py
+++ b/cli/serve/app.py
@@ -12,7 +12,7 @@
 try:
     import typer
     import uvicorn
-    from fastapi import FastAPI, HTTPException, Request
+    from fastapi import FastAPI, Request
     from fastapi.exceptions import RequestValidationError
     from fastapi.responses import JSONResponse, StreamingResponse
     from pydantic import BaseModel
@@ -45,9 +45,6 @@
 
 logger = MelleaLogger.get_logger()
 
-# Track whether the server has been initialized with a module
-_server_ready = False
-
 app = FastAPI(
     title="M serve OpenAI API Compatible Server",
     description="M programs that run as a simple OpenAI API-compatible server",
@@ -62,31 +59,9 @@ async def health_check():
     Returns a 200 OK status to signal that the Python process is alive and responding.
 
     Returns:
-        dict: A dictionary with status "healthy".
+        dict: A dictionary with status "pass".
     """
-    return {"status": "healthy"}
-
-
-@app.get("/ready")
-async def readiness_check():
-    """Readiness check endpoint.
-
-    Returns 200 if the server has loaded a module and registered the chat
-    completions route. Returns 503 if the server is still starting up.
-
-    This endpoint is useful for Kubernetes readiness probes to ensure the
-    service doesn't receive traffic before it's ready to handle requests.
-
-    Returns:
-        dict: A dictionary with status "ready" when ready (HTTP 200).
-
-    Raises:
-        HTTPException: 503 status with error detail if server is not ready.
-    """
-    if _server_ready:
-        return {"status": "ready"}
-    else:
-        raise HTTPException(status_code=503, detail="Server not ready")
+    return {"status": "pass"}
 
 
 @app.exception_handler(RequestValidationError)
@@ -323,8 +298,6 @@ def run_server(
     port: int = 8080,
 ):
     """Serve a FastAPI endpoint for a given script."""
-    global _server_ready
-
     module = load_module_from_path(script_path)
     route_path = "/v1/chat/completions"
 
@@ -334,9 +307,5 @@ def run_server(
         methods=["POST"],
         response_model=ChatCompletion | OpenAIErrorResponse,
     )
-
-    # Mark server as ready after route is successfully registered
-    _server_ready = True
-
     typer.echo(f"Serving {route_path} at http://{host}:{port}")
     uvicorn.run(app, host=host, port=port)
diff --git a/test/cli/test_serve.py b/test/cli/test_serve.py
index 89be73747..82f0437cb 100644
--- a/test/cli/test_serve.py
+++ b/test/cli/test_serve.py
@@ -10,13 +10,7 @@
 from fastapi.testclient import TestClient
 from pydantic import BaseModel, ValidationError
 
-import cli.serve.app as app_module
-from cli.serve.app import (
-    _server_ready,
-    app,
-    make_chat_endpoint,
-    validation_exception_handler,
-)
+from cli.serve.app import app, make_chat_endpoint, validation_exception_handler
 from cli.serve.models import (
     ChatCompletion,
     ChatCompletionRequest,
@@ -60,41 +54,7 @@ def test_health_check(self):
         response = client.get("/health")
 
         assert response.status_code == 200
-        assert response.json() == {"status": "healthy"}
-
-
-class TestReadinessCheckEndpoint:
-    """Tests for the readiness check endpoint."""
-
-    @pytest.fixture(autouse=True)
-    def reset_server_ready(self):
-        """Reset _server_ready state before and after each test."""
-        # Save original state
-        original_state = app_module._server_ready
-        # Reset to False for clean test start
-        app_module._server_ready = False
-        yield
-        # Restore original state after test
-        app_module._server_ready = original_state
-
-    def test_ready_returns_503_before_module_loaded(self):
-        """Test that /ready returns 503 when server hasn't loaded a module yet."""
-        client = TestClient(app)
-        response = client.get("/ready")
-
-        assert response.status_code == 503
-        assert response.json()["detail"] == "Server not ready"
-
-    def test_ready_returns_200_after_module_loaded(self):
-        """Test that /ready returns 200 after run_server() marks it ready."""
-        # Simulate what run_server() does
-        app_module._server_ready = True
-
-        client = TestClient(app)
-        response = client.get("/ready")
-
-        assert response.status_code == 200
-        assert response.json() == {"status": "ready"}
+        assert response.json() == {"status": "pass"}
 
 
 class TestChatEndpoint:

From a1b97653b28242ae5c985ded8129d821ace64545 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Thu, 21 May 2026 11:44:13 -0700
Subject: [PATCH 3/5] test: test_health_check returns 200 by default

* removing the redundant explicit setting of status code

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 cli/serve/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/serve/app.py b/cli/serve/app.py
index aeb1f1b1c..93cca3e52 100644
--- a/cli/serve/app.py
+++ b/cli/serve/app.py
@@ -52,7 +52,7 @@
 )
 
 
-@app.get("/health", status_code=200)
+@app.get("/health")
 async def health_check():
     """Basic liveness check endpoint.
 

From 4b8e22dcd7cadc6cc3c0bb4db3855596d715ab7c Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Thu, 21 May 2026 11:49:00 -0700
Subject: [PATCH 4/5] test: add cli serve test for post /health returns 405

* GET is success 200
* POST is 405 (just reinforcing with a test)

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 test/cli/test_serve.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/test/cli/test_serve.py b/test/cli/test_serve.py
index 82f0437cb..88ad69d7c 100644
--- a/test/cli/test_serve.py
+++ b/test/cli/test_serve.py
@@ -49,13 +49,18 @@ class TestHealthCheckEndpoint:
     """Tests for the health check endpoint."""
 
     def test_health_check(self):
-        """Test that /health endpoint returns 200 with correct JSON response."""
+        """Test that /health GET endpoint returns 200 with correct JSON response."""
         client = TestClient(app)
         response = client.get("/health")
 
         assert response.status_code == 200
         assert response.json() == {"status": "pass"}
 
+    def test_health_check_rejects_post(self):
+        """Test that /health POST endpoint returns 405"""
+        client = TestClient(app)
+        assert client.post("/health").status_code == 405
+
 
 class TestChatEndpoint:
     """Tests for the chat completion endpoint."""

From 1233ab8185d0465312d09501e4933637fb4418b7 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Thu, 21 May 2026 12:39:37 -0700
Subject: [PATCH 5/5] test: cli serve health check test improvements

Misc improvements from PR review:
* fixture
* comment
* return type
* add test for POST

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 cli/serve/app.py       |  2 +-
 test/cli/test_serve.py | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/cli/serve/app.py b/cli/serve/app.py
index 93cca3e52..22e7f4e56 100644
--- a/cli/serve/app.py
+++ b/cli/serve/app.py
@@ -53,7 +53,7 @@
 
 
 @app.get("/health")
-async def health_check():
+async def health_check() -> dict[str, str]:
     """Basic liveness check endpoint.
 
     Returns a 200 OK status to signal that the Python process is alive and responding.
diff --git a/test/cli/test_serve.py b/test/cli/test_serve.py
index 88ad69d7c..473ddc69d 100644
--- a/test/cli/test_serve.py
+++ b/test/cli/test_serve.py
@@ -48,17 +48,22 @@ def sample_request():
 class TestHealthCheckEndpoint:
     """Tests for the health check endpoint."""
 
-    def test_health_check(self):
+    @pytest.fixture(scope="class")
+    def client(self, request) -> TestClient:
+        """Set up the test client."""
+
+        # /health is registered at module-load time — TestClient(app) is correct here
+        return TestClient(app)
+
+    def test_health_check(self, client: TestClient):
         """Test that /health GET endpoint returns 200 with correct JSON response."""
-        client = TestClient(app)
         response = client.get("/health")
 
         assert response.status_code == 200
         assert response.json() == {"status": "pass"}
 
-    def test_health_check_rejects_post(self):
+    def test_health_check_rejects_post(self, client: TestClient):
         """Test that /health POST endpoint returns 405"""
-        client = TestClient(app)
         assert client.post("/health").status_code == 405