Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ PROMETHEUS_PUSHGATEWAY_URL=
# pushgateway push interval in ms
PROMETHEUS_PUSHGATEWAY_INTERVAL=10000

# Grouper memory log controls
GROUPER_MEMORY_LOG_EVERY_TASKS=50
GROUPER_MEMORY_GROWTH_WINDOW_TASKS=200
GROUPER_MEMORY_GROWTH_WARN_MB=64
GROUPER_MEMORY_HANDLE_GROWTH_WARN_MB=16
Comment on lines +35 to +38
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please specify units of measurement in comments


# project token for error catching
HAWK_CATCHER_TOKEN=

Expand All @@ -40,4 +46,4 @@ HAWK_CATCHER_TOKEN=
IS_NOTIFIER_WORKER_ENABLED=false

## Url for telegram notifications about workspace blocks and unblocks
TELEGRAM_LIMITER_CHAT_URL=
TELEGRAM_LIMITER_CHAT_URL=
97 changes: 97 additions & 0 deletions lib/metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import * as client from 'prom-client';
import os from 'os';
import { nanoid } from 'nanoid';
import createLogger from './logger';

const register = new client.Registry();
const logger = createLogger();

const DEFAULT_PUSH_INTERVAL_MS = 10_000;
const ID_SIZE = 5;
const METRICS_JOB_NAME = 'workers';

let pushInterval: NodeJS.Timeout | null = null;
let currentWorkerName = '';

client.collectDefaultMetrics({ register });

export { register, client };

/**
* Parse push interval from environment.
*/
function getPushIntervalMs(): number {
const rawInterval = process.env.PROMETHEUS_PUSHGATEWAY_INTERVAL;
const parsedInterval = rawInterval === undefined
? DEFAULT_PUSH_INTERVAL_MS
: Number(rawInterval);

const interval = Number.isFinite(parsedInterval) && parsedInterval > 0
? parsedInterval
: DEFAULT_PUSH_INTERVAL_MS;

if (rawInterval !== undefined && interval !== parsedInterval) {
logger.warn(`[metrics] invalid PROMETHEUS_PUSHGATEWAY_INTERVAL="${rawInterval}", fallback to ${DEFAULT_PUSH_INTERVAL_MS}ms`);
}

return interval;
}

/**
* Stop periodic push to pushgateway.
*/
export function stopMetricsPushing(): void {
if (!pushInterval) {
return;
}

clearInterval(pushInterval);
pushInterval = null;
logger.info(`[metrics] stopped pushing metrics for worker=${currentWorkerName}`);
currentWorkerName = '';
}

/**
* Start periodic push to pushgateway.
*
* @param workerName - name of the worker for grouping.
*/
export function startMetricsPushing(workerName: string): () => void {
const url = process.env.PROMETHEUS_PUSHGATEWAY_URL;

if (!url) {
return stopMetricsPushing;
}

if (pushInterval) {
logger.warn(`[metrics] pushing is already started for worker=${currentWorkerName}, skip duplicate start for worker=${workerName}`);

return stopMetricsPushing;
}

const interval = getPushIntervalMs();
const hostname = os.hostname();
const id = nanoid(ID_SIZE);
const gateway = new client.Pushgateway(url, [], register);
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

new client.Pushgateway(url, [], register) passes an array for the request/options argument. In prom-client (v12), this parameter is expected to be an options object (or undefined), so an array can lead to unexpected behavior when the library reads option fields. Pass undefined/{} (or a properly shaped options object) instead.

Suggested change
const gateway = new client.Pushgateway(url, [], register);
const gateway = new client.Pushgateway(url, undefined, register);

Copilot uses AI. Check for mistakes.

currentWorkerName = workerName;

logger.info(`Start pushing metrics to ${url} every ${interval}ms (host: ${hostname}, id: ${id}, worker: ${workerName})`);

pushInterval = setInterval(() => {
gateway.pushAdd({
jobName: METRICS_JOB_NAME,
groupings: {
worker: workerName,
host: hostname,
id,
},
}, (err) => {
if (err) {
logger.error(`Metrics push error: ${err.message || err}`);
}
});
}, interval);

return stopMetricsPushing;
}
108 changes: 34 additions & 74 deletions runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import * as utils from './lib/utils';
import { Worker } from './lib/worker';
import HawkCatcher from '@hawk.so/nodejs';
import * as dotenv from 'dotenv';
import { startMetricsPushing } from './lib/metrics';

dotenv.config();

Expand Down Expand Up @@ -40,9 +41,9 @@ class WorkerRunner {
// private gateway?: promClient.Pushgateway;

/**
* number returned by setInterval() of metrics push function
* Metrics push cleanup callback.
*/
private pushIntervalNumber?: ReturnType<typeof setInterval>;
private stopMetricsPushing?: () => void;

/**
* Create runner instance
Expand All @@ -57,19 +58,17 @@ class WorkerRunner {
.then((workerConstructors) => {
this.constructWorkers(workerConstructors);
})
// .then(() => {
// try {
// this.startMetrics();
// } catch (e) {
// HawkCatcher.send(e);
// console.error(`Metrics not started: ${e}`);
// }
//
// return Promise.resolve();
// })
.then(() => {
return this.startWorkers();
})
.then(() => {
try {
this.startMetrics();
} catch (e) {
HawkCatcher.send(e);
console.error(`Metrics not started: ${e}`);
}
})
.then(() => {
this.observeProcess();
})
Expand All @@ -82,67 +81,27 @@ class WorkerRunner {
/**
* Run metrics exporter
*/
// private startMetrics(): void {
// if (!process.env.PROMETHEUS_PUSHGATEWAY_URL) {
// return;
// }
//
// const PUSH_INTERVAL = parseInt(process.env.PROMETHEUS_PUSHGATEWAY_INTERVAL);
//
// if (isNaN(PUSH_INTERVAL)) {
// throw new Error('PROMETHEUS_PUSHGATEWAY_INTERVAL is invalid or not set');
// }
//
// const collectDefaultMetrics = promClient.collectDefaultMetrics;
// const Registry = promClient.Registry;
//
// const register = new Registry();
// const startGcStats = gcStats(register);
//
// const hostname = os.hostname();
//
// const ID_SIZE = 5;
// const id = nanoid(ID_SIZE);
//
// // eslint-disable-next-line node/no-deprecated-api
// const instance = url.parse(process.env.PROMETHEUS_PUSHGATEWAY_URL).host;
//
// // Initialize metrics for workers
// this.workers.forEach((worker) => {
// // worker.initMetrics();
// worker.getMetrics().forEach((metric: promClient.Counter<string>) => register.registerMetric(metric));
// });
//
// collectDefaultMetrics({ register });
// startGcStats();
//
// this.gateway = new promClient.Pushgateway(process.env.PROMETHEUS_PUSHGATEWAY_URL, null, register);
//
// console.log(`Start pushing metrics to ${process.env.PROMETHEUS_PUSHGATEWAY_URL}`);
//
// // Pushing metrics to the pushgateway every PUSH_INTERVAL
// this.pushIntervalNumber = setInterval(() => {
// this.workers.forEach((worker) => {
// if (!this.gateway || !instance) {
// return;
// }
// // Use pushAdd not to overwrite previous metrics
// this.gateway.pushAdd({
// jobName: 'workers',
// groupings: {
// worker: worker.type.replace('/', '_'),
// host: hostname,
// id,
// },
// }, (err?: Error) => {
// if (err) {
// HawkCatcher.send(err);
// console.log(`Error of pushing metrics to gateway: ${err}`);
// }
// });
// });
// }, PUSH_INTERVAL);
// }
private startMetrics(): void {
if (!process.env.PROMETHEUS_PUSHGATEWAY_URL) {
return;
}

if (this.workers.length === 0) {
return;
}

const workerTypes = Array.from(new Set(this.workers.map((worker) => {
return worker.type.replace('/', '_');
})));

const workerTypeForMetrics = workerTypes.length === 1 ? workerTypes[0] : 'multi_worker_process';

if (workerTypes.length > 1) {
console.warn(`[metrics] ${workerTypes.length} workers are running in one process; pushing metrics as "${workerTypeForMetrics}" to avoid duplicated attribution`);
}

this.stopMetricsPushing = startMetricsPushing(workerTypeForMetrics);
}

/**
* Dynamically loads workers through the yarn workspaces
Expand Down Expand Up @@ -277,7 +236,8 @@ class WorkerRunner {
private async stopWorker(worker: Worker): Promise<void> {
try {
// stop pushing metrics
clearInterval(this.pushIntervalNumber);
this.stopMetricsPushing?.();
this.stopMetricsPushing = undefined;
await worker.finish();

console.log(
Expand Down
Loading
Loading