Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
66fce7b
Add Google Tag Manager first-party integration
prk-Jr Feb 9, 2026
348f150
Fix linting errors in google_tag_manager.rs and google_tag_manager.md
prk-Jr Feb 9, 2026
4d543e4
Add configuration and pipeline integration tests
prk-Jr Feb 9, 2026
4fd54b8
Enhance GTM integration with caching, validation, and improved logging
prk-Jr Feb 9, 2026
6486bd4
Enhance GTM integration with caching, validation, and improved logging
prk-Jr Feb 9, 2026
a02b7d8
Add integration tests for HTML processing and inline script rewriting…
prk-Jr Feb 9, 2026
f322373
Merge branch 'main' into feat/gtm-integration
prk-Jr Feb 10, 2026
0d4a47e
Enhance Google Tag Manager integration with regex-based URL rewriting…
prk-Jr Feb 13, 2026
4f33b26
Add configurable cache max-age for GTM scripts and disable synthetic …
prk-Jr Feb 16, 2026
61125a2
feat: Add gtag.js support and refactor GTM proxy logic into helper fu…
prk-Jr Feb 16, 2026
6a96c7f
fix: lint error on ci
prk-Jr Feb 16, 2026
76b20be
Docs fix to clarify client IP handling and privacy enhancement for Go…
prk-Jr Feb 17, 2026
72bacca
Broaden GTM attribute rewriter, add script guard, and harden regex
prk-Jr Feb 20, 2026
61a8260
fix: ts lint
prk-Jr Feb 20, 2026
1eda0af
Aadd beacon guard to proxy GA4 sendBeacon/fetch through first-party
prk-Jr Feb 20, 2026
f38cf89
Fix beacon guard tests for jsdom compatibility
prk-Jr Feb 20, 2026
0240992
Merge branch 'main' into feat/gtm-integration
prk-Jr Feb 21, 2026
0f85d9e
Improve GTM integration validation and error handling
prk-Jr Feb 24, 2026
4a7ed37
Fix lint format issue
prk-Jr Feb 24, 2026
457702c
Fix memory and security vulnerabilities in GTM integration
prk-Jr Feb 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,624 changes: 1,624 additions & 0 deletions crates/common/src/integrations/google_tag_manager.rs

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions crates/common/src/integrations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub mod adserver_mock;
pub mod aps;
pub mod datadome;
pub mod didomi;
pub mod google_tag_manager;
pub mod lockr;
pub mod nextjs;
pub mod permutive;
Expand All @@ -31,6 +32,7 @@ pub(crate) fn builders() -> &'static [IntegrationBuilder] {
permutive::register,
lockr::register,
didomi::register,
google_tag_manager::register,
datadome::register,
]
}
31 changes: 31 additions & 0 deletions crates/js/lib/src/integrations/google_tag_manager/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { log } from '../../core/log';

import { installGtmBeaconGuard } from './script_guard';
import { installGtmGuard } from './script_guard';

/**
* Google Tag Manager integration for tsjs
*
* Installs guards to intercept GTM and Google Analytics traffic:
*
* 1. **Script guard** — intercepts dynamically inserted `<script>` and
* `<link>` elements and rewrites their URLs to the first-party proxy.
*
* 2. **Beacon guard** — intercepts `navigator.sendBeacon()` and `fetch()`
* calls to Google Analytics domains (www.google-analytics.com,
* analytics.google.com) and rewrites them to the first-party proxy.
* This is necessary because gtag.js constructs beacon URLs dynamically
* from bare domain strings, which can't be safely rewritten at the
* script level.
*
* URLs are rewritten to preserve the original path:
* - https://www.googletagmanager.com/gtm.js?id=GTM-XXXX -> /integrations/google_tag_manager/gtm.js?id=GTM-XXXX
* - https://www.google-analytics.com/g/collect -> /integrations/google_tag_manager/g/collect
* - https://analytics.google.com/g/collect -> /integrations/google_tag_manager/g/collect
*/

if (typeof window !== 'undefined') {
installGtmGuard();
installGtmBeaconGuard();
log.info('Google Tag Manager integration initialized');
}
125 changes: 125 additions & 0 deletions crates/js/lib/src/integrations/google_tag_manager/script_guard.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import { createBeaconGuard } from '../../shared/beacon_guard';
import { createScriptGuard } from '../../shared/script_guard';

/**
* Google Tag Manager Script Interception Guard
*
* Intercepts dynamically inserted script tags that load GTM or Google Analytics
* and rewrites their URLs to use the first-party proxy endpoint. This catches
* scripts inserted via appendChild, insertBefore, or any other dynamic DOM
* manipulation (e.g. Next.js dynamic imports).
*
* Built on the shared script_guard factory with custom URL rewriting to preserve
* the original path and query string.
*/

/** Regex to match GTM/GA domains: www.googletagmanager.com, www.google-analytics.com, analytics.google.com */
const GTM_URL_PATTERN =
/^(?:https?:)?(?:\/\/)?(www\.(googletagmanager|google-analytics)\.com|analytics\.google\.com)(?:\/|$)/i;

/**
* Supported paths that the server can proxy.
* Must match the route patterns defined in the GoogleTagManagerIntegration handler
* in crates/common/src/integrations/google_tag_manager.rs
*/
const SUPPORTED_PATHS = ['/gtm.js', '/gtag/js', '/gtag.js', '/collect', '/g/collect'];

/**
* Check if a URL is a GTM or Google Analytics URL with a supported path.
* Matches the logic from google_tag_manager.rs GTM_URL_PATTERN.
*
* Valid patterns:
* - https://www.googletagmanager.com/gtm.js?id=GTM-XXXX
* - https://www.google-analytics.com/g/collect
* - https://analytics.google.com/g/collect
* - //www.googletagmanager.com/gtm.js?id=GTM-XXXX
*
* Invalid:
* - https://googletagmanager.com/gtm.js (missing www.)
* - https://example.com/www.googletagmanager.com (domain mismatch)
* - https://www.googletagmanager.com/ns.html (unsupported path)
*/
function isGtmUrl(url: string): boolean {
if (!url || !GTM_URL_PATTERN.test(url)) {
return false;
}

// Extract path from URL to validate it's a supported route
try {
const normalizedUrl = url.startsWith('//')
? `https:${url}`
: url.startsWith('http')
? url
: `https://${url}`;

const parsed = new URL(normalizedUrl);
const path = parsed.pathname;

// Check if the path matches any of our supported paths
// Note: pathname never includes query strings, so exact match is sufficient
return SUPPORTED_PATHS.some((supportedPath) => path === supportedPath);
} catch {
// Fail closed: if URL parsing fails, reject the URL rather than
// using a permissive fallback that could match malformed strings
return false;
}
}

/**
* Extract the path and query string from a GTM/GA URL.
* e.g., "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX" -> "/gtm.js?id=GTM-XXXX"
* "https://www.google-analytics.com/g/collect?v=2" -> "/g/collect?v=2"
*/
function extractGtmPath(url: string): string {
try {
const normalizedUrl = url.startsWith('//')
? `https:${url}`
: url.startsWith('http')
? url
: `https://${url}`;

const parsed = new URL(normalizedUrl);
return parsed.pathname + parsed.search;
} catch (error) {
// Fallback: extract path after the domain using regex
console.warn('[GTM Guard] URL parsing failed for:', url, 'Error:', error);
const match = url.match(
/(?:www\.(?:googletagmanager|google-analytics)\.com|analytics\.google\.com)(\/[^'"\s]*)/i
);
if (!match || !match[1]) {
console.warn('[GTM Guard] Fallback regex failed, using default path /gtm.js');
return '/gtm.js';
}
return match[1];
}
}

/**
* Rewrite a GTM/GA URL to the first-party proxy path.
*/
function rewriteGtmUrl(originalUrl: string): string {
return `${window.location.origin}/integrations/google_tag_manager${extractGtmPath(originalUrl)}`;
}

const guard = createScriptGuard({
name: 'GTM',
isTargetUrl: isGtmUrl,
rewriteUrl: rewriteGtmUrl,
});

const beaconGuard = createBeaconGuard({
name: 'GTM',
isTargetUrl: isGtmUrl,
rewriteUrl: rewriteGtmUrl,
});

export const installGtmGuard = guard.install;
export const isGuardInstalled = guard.isInstalled;
export const resetGuardState = guard.reset;

export const installGtmBeaconGuard = beaconGuard.install;
export const isBeaconGuardInstalled = beaconGuard.isInstalled;
export const resetBeaconGuardState = beaconGuard.reset;

// Export for testing
export { isGtmUrl, extractGtmPath, rewriteGtmUrl };
134 changes: 134 additions & 0 deletions crates/js/lib/src/shared/beacon_guard.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { log } from '../core/log';

/**
* Shared Beacon Guard Factory
*
* Creates a network interception guard that patches `navigator.sendBeacon`
* and `window.fetch` to intercept outgoing beacon/analytics requests whose
* URLs match an integration's target domains. Matched URLs are rewritten to
* a first-party proxy endpoint.
*
* This complements the script_guard (which intercepts DOM insertions) by
* handling the _runtime_ network calls that analytics SDKs use to send data.
*
* Each call to createBeaconGuard() produces an independent guard with its
* own installation state, so multiple integrations can coexist.
*/

export interface BeaconGuardConfig {
/** Integration name used in log messages (e.g. "GTM"). */
name: string;
/** Return true if the URL belongs to this integration's analytics domain. */
isTargetUrl: (url: string) => boolean;
/** Rewrite the original URL to a first-party proxy URL. */
rewriteUrl: (url: string) => string;
}

export interface BeaconGuard {
/** Patch sendBeacon/fetch to intercept matching beacon requests. */
install: () => void;
/** Whether the guard has already been installed. */
isInstalled: () => boolean;
/** Reset installation state (primarily for testing). */
reset: () => void;
}

/**
* Extract a URL string from the various input types that fetch() accepts.
* Returns null if the input can't be resolved to a URL string.
*/
function extractUrl(input: RequestInfo | URL): string | null {
if (typeof input === 'string') {
return input;
}
if (input instanceof URL) {
return input.href;
}
if (input instanceof Request) {
return input.url;
}
return null;
}

/**
* Create an independent beacon guard for a specific integration.
*/
export function createBeaconGuard(config: BeaconGuardConfig): BeaconGuard {
let installed = false;
let originalSendBeacon: typeof navigator.sendBeacon | null = null;
let originalFetch: typeof window.fetch | null = null;
const prefix = `${config.name} beacon guard`;

function install(): void {
if (installed) {
log.debug(`${prefix}: already installed, skipping`);
return;
}

if (typeof window === 'undefined') {
log.debug(`${prefix}: not in browser environment, skipping`);
return;
}

log.info(`${prefix}: installing network interception`);

// --- Patch navigator.sendBeacon ---
if (typeof navigator !== 'undefined' && typeof navigator.sendBeacon === 'function') {
originalSendBeacon = navigator.sendBeacon.bind(navigator);

navigator.sendBeacon = function (url: string, data?: BodyInit | null): boolean {
if (config.isTargetUrl(url)) {
const rewritten = config.rewriteUrl(url);
log.info(`${prefix}: rewriting sendBeacon`, { original: url, rewritten });
return originalSendBeacon!(rewritten, data);
}
return originalSendBeacon!(url, data);
};
}

// --- Patch window.fetch ---
if (typeof window.fetch === 'function') {
originalFetch = window.fetch.bind(window);

window.fetch = function (input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
const url = extractUrl(input);

if (url && config.isTargetUrl(url)) {
const rewritten = config.rewriteUrl(url);
log.info(`${prefix}: rewriting fetch`, { original: url, rewritten });

// If the input was a Request, create a new one with the rewritten URL
if (input instanceof Request) {
const newRequest = new Request(rewritten, input);
return originalFetch!(newRequest, init);
}
return originalFetch!(rewritten, init);
}

return originalFetch!(input, init);
};
}

installed = true;
log.info(`${prefix}: network interception installed successfully`);
}

function isInstalled(): boolean {
return installed;
}

function reset(): void {
if (originalSendBeacon && typeof navigator !== 'undefined') {
navigator.sendBeacon = originalSendBeacon;
originalSendBeacon = null;
}
if (originalFetch && typeof window !== 'undefined') {
window.fetch = originalFetch;
originalFetch = null;
}
installed = false;
log.debug(`${prefix}: reset and uninstalled`);
}

return { install, isInstalled, reset };
}
Loading