From 3b1cf86126bf94dbdfc4822ee3b33b9e17abe916 Mon Sep 17 00:00:00 2001 From: paulj Date: Mon, 2 Feb 2026 15:35:02 -0500 Subject: [PATCH] fix(metrics): Skip traffic metrics when user-agent unavailable Middleware traces (Edge runtime) don't receive normalizedRequest from Sentry SDK, causing all middleware traffic to be classified as "unknown". Since each request generates both a middleware trace and a handler trace, this was inflating the "unknown" count significantly. Instead of emitting "unknown" when we can't classify traffic, skip the metric entirely. The handler trace (Node.js) will emit the properly classified metric. Also adds case-insensitive header lookup since HTTP headers are case-insensitive but JS objects are case-sensitive. Co-Authored-By: Claude --- src/tracesSampler.ts | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/tracesSampler.ts b/src/tracesSampler.ts index 98521249664ed..ee4319385b88c 100644 --- a/src/tracesSampler.ts +++ b/src/tracesSampler.ts @@ -87,29 +87,36 @@ function matchPattern(input: string, pattern: RegExp): string | undefined { return match ? match[0].toLowerCase() : undefined; } +/** + * Gets user agent from headers object, handling case-insensitivity. + * HTTP headers are case-insensitive, but JS objects are case-sensitive. + */ +function getUserAgentFromHeaders(headers?: Record): string | undefined { + if (!headers) { + return undefined; + } + const key = Object.keys(headers).find(k => k.toLowerCase() === 'user-agent'); + return key ? headers[key] : undefined; +} + /** * Determines trace sample rate based on user agent. * - AI agents: 100% (we want full visibility into agentic docs consumption) * - Bots/crawlers: 0% (filter out noise) * - Real users: 30% * - * AI agents are checked first, so if something matches both AI and bot patterns, we sample it. + * AI agents are checked first, if something matches both AI and bot patterns, we sample it. */ export function tracesSampler(samplingContext: SamplingContext): number { // Try to get user agent from normalizedRequest headers (Sentry SDK provides this) // Falls back to OTel semantic convention attributes if normalizedRequest not available const userAgent = - samplingContext.normalizedRequest?.headers?.['user-agent'] ?? + getUserAgentFromHeaders(samplingContext.normalizedRequest?.headers) ?? (samplingContext.attributes?.['http.user_agent'] as string | undefined) ?? (samplingContext.attributes?.['user_agent.original'] as string | undefined); + // No user-agent = can't classify traffic, skip metric if (!userAgent) { - Sentry.metrics.count('docs.trace.sampled', 1, { - attributes: { - traffic_type: 'unknown', - sample_rate: DEFAULT_SAMPLE_RATE, - }, - }); return DEFAULT_SAMPLE_RATE; }