feat(webapp): reload LLM pricing registry on Redis pub/sub (#3534)

ericallam · web-flow · commit ead1e5a53d62 · 2026-05-09T08:59:40.000+01:00
## Summary

Adds a Redis pub/sub reload path to the webapp's in-memory LLM pricing
registry. When enabled on a process, the registry reloads from the
database whenever a publish lands on the configured channel — instead of
waiting for the existing 5-minute interval. Lets pricing/model changes
propagate to cost enrichment within seconds.

Subscription is **off by default** and opt-in per process. Only
OTel-ingesting services need real-time freshness; dashboard and worker
services run fine on the periodic interval and shouldn't pile onto each
publish with a full-table reload.

## Design

When `LLM_PRICING_RELOAD_PUBSUB_ENABLED=true`, subscribes via
`createRedisClient` against `COMMON_WORKER_REDIS_*` and listens on
`LLM_PRICING_RELOAD_CHANNEL` (default `llm-registry:reload`). The
5-minute periodic reload stays as a backstop, and a SIGTERM/SIGINT
handler closes the subscription cleanly.

The publisher side lives outside this PR — any process running in the
same Redis namespace can trigger a reload by `PUBLISH
llm-registry:reload &lt;anything&gt;`. Includes a `.server-changes/` note for
the changelog.

### Debounced reload

Bursts of publishes are coalesced. The first publish schedules a reload
at T+`LLM_PRICING_RELOAD_DEBOUNCE_MS` (default 1s); subsequent publishes
during that window are no-ops because the trailing reload picks up
everything when it queries the DB. Bounds reload rate to at most 1 per
debounce window regardless of publisher chattiness, so a runaway
upstream publisher can't fan out into a flood of full-table-scan
reloads.

## Test plan

- [ ] With `LLM_PRICING_RELOAD_PUBSUB_ENABLED=false` (default):
`redis-cli PUBSUB NUMSUB llm-registry:reload` returns `0` while the
webapp is up
- [ ] With it set to `true`: returns `&gt;= 1`
- [ ] `redis-cli PUBLISH llm-registry:reload test` returns `1` (one
subscriber received) on a subscribed process
- [ ] Mutate an `LlmModel` row externally, publish on the channel,
observe the registry's match() picks up the change without waiting for
the 5-min tick
- [ ] Publish 100x in rapid succession; confirm only one reload fires
within the debounce window
diff --git a/.server-changes/llm-pricing-registry-reload-channel.md b/.server-changes/llm-pricing-registry-reload-channel.md
@@ -0,0 +1,6 @@
+---
+area: webapp
+type: improvement
+---
+
+The LLM pricing registry now reloads from the database whenever a publish lands on `LLM_PRICING_RELOAD_CHANNEL` on the worker Redis, instead of waiting for the next 5-minute interval. LLM model and pricing changes reflect in cost enrichment within seconds.
diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
@@ -1427,6 +1427,14 @@ const EnvironmentSchema = z
     // LLM cost tracking
     LLM_COST_TRACKING_ENABLED: BoolEnv.default(true),
     LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes
+    LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"),
+    LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000),
+    // Whether to subscribe this process to the LLM_PRICING_RELOAD_CHANNEL.
+    // Default off — only OTel-ingesting services need real-time pricing
+    // freshness; dashboard/worker processes are fine on the existing
+    // 5-minute periodic reload. In multi-service deployments, set this to
+    // true on the span-ingesting services.
+    LLM_PRICING_RELOAD_PUBSUB_ENABLED: BoolEnv.default(false),
     LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false),
     LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500),
     LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000),
diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts
@@ -1,7 +1,9 @@
 import { ModelPricingRegistry, seedLlmPricing } from "@internal/llm-model-catalog";
 import { prisma, $replica } from "~/db.server";
 import { env } from "~/env.server";
+import { logger } from "~/services/logger.server";
 import { signalsEmitter } from "~/services/signals.server";
+import { createRedisClient } from "~/redis.server";
 import { singleton } from "~/utils/singleton";
 import { setLlmPricingRegistry } from "./utils/enrichCreatableEvents.server";
 
@@ -27,20 +29,77 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => {
     console.error("Failed to initialize LLM pricing registry", err);
   });
 
-  // Periodic reload
+  // Periodic reload (backstop for the pub/sub path below)
   const reloadInterval = env.LLM_PRICING_RELOAD_INTERVAL_MS;
   const interval = setInterval(() => {
     registry.reload().catch((err) => {
       console.error("Failed to reload LLM pricing registry", err);
     });
   }, reloadInterval);
 
-  signalsEmitter.on("SIGTERM", () => {
-    clearInterval(interval);
-  });
-  signalsEmitter.on("SIGINT", () => {
-    clearInterval(interval);
-  });
+  // Pub/sub reload is opt-in per process (default off). Without it, the
+  // registry stays accurate via the existing 5-minute interval. Enable on
+  // the OTel-ingesting services where pricing freshness directly affects
+  // span cost enrichment; dashboard and worker services don't need it and
+  // shouldn't pile onto each publish with a full-table reload.
+  if (env.LLM_PRICING_RELOAD_PUBSUB_ENABLED) {
+    const subscriber = createRedisClient("llm-pricing:subscriber", {
+      keyPrefix: "llm-pricing:subscriber:",
+      host: env.COMMON_WORKER_REDIS_HOST,
+      port: env.COMMON_WORKER_REDIS_PORT,
+      username: env.COMMON_WORKER_REDIS_USERNAME,
+      password: env.COMMON_WORKER_REDIS_PASSWORD,
+      tlsDisabled: env.COMMON_WORKER_REDIS_TLS_DISABLED === "true",
+      clusterMode: env.COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED === "1",
+    });
+
+    subscriber.subscribe(env.LLM_PRICING_RELOAD_CHANNEL).catch((err) => {
+      logger.warn("Failed to subscribe to LLM pricing reload channel", {
+        channel: env.LLM_PRICING_RELOAD_CHANNEL,
+        error: err instanceof Error ? err.message : String(err),
+      });
+    });
+
+    // Coalesce reload calls so a burst of publishes only triggers one
+    // reload. The first publish schedules a reload at
+    // T+LLM_PRICING_RELOAD_DEBOUNCE_MS; subsequent publishes during that
+    // window are no-ops because the trailing reload picks up everything
+    // when it queries the DB. Bounds reload rate to at most 1 per debounce
+    // window regardless of publisher chattiness.
+    const debounceMs = env.LLM_PRICING_RELOAD_DEBOUNCE_MS;
+    let pendingReloadTimer: NodeJS.Timeout | null = null;
+
+    function scheduleReload() {
+      if (pendingReloadTimer) return;
+      pendingReloadTimer = setTimeout(() => {
+        pendingReloadTimer = null;
+        registry.reload().catch((err) => {
+          logger.warn("Failed to reload LLM pricing registry from pub/sub", {
+            error: err instanceof Error ? err.message : String(err),
+          });
+        });
+      }, debounceMs);
+    }
+
+    subscriber.on("message", (channel) => {
+      if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return;
+      scheduleReload();
+    });
+
+    signalsEmitter.on("SIGTERM", () => {
+      clearInterval(interval);
+      if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
+      void subscriber.quit().catch(() => {});
+    });
+    signalsEmitter.on("SIGINT", () => {
+      clearInterval(interval);
+      if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
+      void subscriber.quit().catch(() => {});
+    });
+  } else {
+    signalsEmitter.on("SIGTERM", () => clearInterval(interval));
+    signalsEmitter.on("SIGINT", () => clearInterval(interval));
+  }
 
   return registry;
 });