From d79e6a9da545d566fcf3056059e82fe4e6476b11 Mon Sep 17 00:00:00 2001 From: liyanwei93 Date: Wed, 20 Nov 2024 21:33:00 -0800 Subject: [PATCH] Remove collect metrics func to sdk process --- src/paymentservice/charge.js | 4 - src/paymentservice/opentelemetry.js | 4 +- src/paymentservice/runtime-metrics.js | 454 +++++++++++++++----------- 3 files changed, 261 insertions(+), 201 deletions(-) diff --git a/src/paymentservice/charge.js b/src/paymentservice/charge.js index 9f63620fd8..7fcb7fbdaa 100644 --- a/src/paymentservice/charge.js +++ b/src/paymentservice/charge.js @@ -9,7 +9,6 @@ const { FlagdProvider} = require('@openfeature/flagd-provider'); const flagProvider = new FlagdProvider(); const logger = require('./logger'); -const runtimeMetrics = require('./runtime-metrics'); const tracer = trace.getTracer('paymentservice'); const meter = metrics.getMeter('paymentservice'); const transactionsCounter = meter.createCounter('app.payment.transactions') @@ -65,8 +64,5 @@ module.exports.charge = async request => { const { units, nanos, currencyCode } = request.amount; logger.info({transactionId, cardType, lastFourDigits, amount: { units, nanos, currencyCode }}, "Transaction complete."); transactionsCounter.add(1, {"app.payment.currency": currencyCode}) - - runtimeMetrics.setupRuntimeMetrics(); - return { transactionId } } diff --git a/src/paymentservice/opentelemetry.js b/src/paymentservice/opentelemetry.js index 623a309b1b..c5959d411c 100644 --- a/src/paymentservice/opentelemetry.js +++ b/src/paymentservice/opentelemetry.js @@ -12,6 +12,7 @@ const {containerDetector} = require('@opentelemetry/resource-detector-container' const {gcpDetector} = require('@opentelemetry/resource-detector-gcp') const {envDetector, hostDetector, osDetector, processDetector} = require('@opentelemetry/resources') const {RuntimeNodeInstrumentation} = require('@opentelemetry/instrumentation-runtime-node') +const { setSdkInstance } = require('./runtime-metrics'); const sdk = new opentelemetry.NodeSDK({ traceExporter: new OTLPTraceExporter(), @@ -42,4 +43,5 @@ const sdk = new opentelemetry.NodeSDK({ ], }) -sdk.start(); \ No newline at end of file +sdk.start(); +setSdkInstance(sdk); \ No newline at end of file diff --git a/src/paymentservice/runtime-metrics.js b/src/paymentservice/runtime-metrics.js index 5629920a64..4c46c15958 100644 --- a/src/paymentservice/runtime-metrics.js +++ b/src/paymentservice/runtime-metrics.js @@ -1,236 +1,298 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -const {metrics} = require('@opentelemetry/api'); +const { metrics } = require('@opentelemetry/api'); const process = require('process'); const v8 = require('v8'); const perf_hooks = require('perf_hooks'); const { monitorEventLoopDelay } = require('perf_hooks'); -const meter = metrics.getMeter('paymentservice'); -// new metrics -// GC Metrics tracking -let minorGcCount = 0; -let majorGcCount = 0; -let lastGcPause = 0; -let heapSizeAfterGc = 0; - -// Create observable gauges for all metrics -const gcPauseGauge = meter.createObservableGauge('nodejs.gc.gcPause', { - description: 'GC Pause in milliseconds', - unit: 'ms', -}); +class RuntimeMetricsCollector { + constructor(meterName = 'runtime-metrics', collectionIntervalMs = 10000) { + this.meter = metrics.getMeter(meterName); + this.collectionIntervalMs = collectionIntervalMs; + this.minorGcCount = 0; + this.majorGcCount = 0; + this.lastGcPause = 0; + this.heapSizeAfterGc = 0; + this.collectionInterval = null; + this.histogram = monitorEventLoopDelay({ resolution: 20 }); + + this.globalLastState = { + timestamp: process.hrtime.bigint(), + count: this.histogram.count, + sum: this.histogram.mean * this.histogram.count, + lastCollection: Date.now() + }; -const activeHandlesGauge = meter.createObservableGauge('nodejs.activeHandles', { - description: 'Number of active handles', - unit: '{handles}', -}); + this.initializeMetrics(); + this.setupGcObserver(); + } -const activeRequestsGauge = meter.createObservableGauge('nodejs.activeRequests', { - description: 'Number of active requests', - unit: '{requests}', -}); + initializeMetrics() { + // Initialize all gauge metrics + this.gcPauseGauge = this.meter.createObservableGauge('nodejs.gc.gcPause', { + description: 'GC Pause in milliseconds', + unit: 'ms', + }); -const minorGcsGauge = meter.createObservableGauge('nodejs.gc.minorGcs', { - description: 'Number of minor GCs', - unit: '{gcs}', -}); + this.activeHandlesGauge = this.meter.createObservableGauge('nodejs.activeHandles', { + description: 'Number of active handles', + unit: '{handles}', + }); -const majorGcsGauge = meter.createObservableGauge('nodejs.gc.majorGcs', { - description: 'Number of major GCs', - unit: '{gcs}', -}); + this.activeRequestsGauge = this.meter.createObservableGauge('nodejs.activeRequests', { + description: 'Number of active requests', + unit: '{requests}', + }); -const rssGauge = meter.createObservableGauge('nodejs.memory.rss', { - description: 'Resident Set Size', - unit: 'bytes', -}); + this.minorGcsGauge = this.meter.createObservableGauge('nodejs.gc.minorGcs', { + description: 'Number of minor GCs', + unit: '{gcs}', + }); -const heapUsedGauge = meter.createObservableGauge('nodejs.memory.heapUsed', { - description: 'Heap Size Used', - unit: 'bytes', -}); + this.majorGcsGauge = this.meter.createObservableGauge('nodejs.gc.majorGcs', { + description: 'Number of major GCs', + unit: '{gcs}', + }); -const heapSizeAfterGcGauge = meter.createObservableGauge('nodejs.gc.usedHeapSizeAfterGc', { - description: 'Heap Size After GC', - unit: 'bytes', -}); + this.rssGauge = this.meter.createObservableGauge('nodejs.memory.rss', { + description: 'Resident Set Size', + unit: 'bytes', + }); -// Event Loop Metrics -const eventLoopMaxGauge = meter.createObservableGauge('nodejs.libuv.max', { - description: 'Longest time spent in a single loop', - unit: 'ms', -}); + this.heapUsedGauge = this.meter.createObservableGauge('nodejs.memory.heapUsed', { + description: 'Heap Size Used', + unit: 'bytes', + }); -const eventLoopSumGauge = meter.createObservableGauge('nodejs.libuv.sum', { - description: 'Total time spent in loop', - unit: 'ms', -}); + this.heapSizeAfterGcGauge = this.meter.createObservableGauge('nodejs.gc.usedHeapSizeAfterGc', { + description: 'Heap Size After GC', + unit: 'bytes', + }); -const eventLoopLagGauge = meter.createObservableGauge('nodejs.libuv.lag', { - description: 'Event loop lag', - unit: 'ms', -}); + this.eventLoopMetrics = { + max: this.meter.createObservableGauge('nodejs.libuv.max', { + description: 'Longest time spent in a single loop', + unit: 'ms', + }), + sum: this.meter.createObservableGauge('nodejs.libuv.sum', { + description: 'Total time spent in loop', + unit: 'ms', + }), + lag: this.meter.createObservableGauge('nodejs.libuv.lag', { + description: 'Event loop lag', + unit: 'ms', + }), + count: this.meter.createObservableGauge('nodejs.libuv.num', { + description: 'Loops per second', + unit: '{loops}', + }) + }; -const eventLoopCountGauge = meter.createObservableGauge('nodejs.libuv.num', { - description: 'Loops per second', - unit: '{loops}', -}); + this.heapSpacesMetrics = { + used: this.meter.createObservableGauge('nodejs.heapSpaces.used', { + description: 'Heap Spaces Used', + unit: 'bytes', + }), + available: this.meter.createObservableGauge('nodejs.heapSpaces.available', { + description: 'Heap Spaces Available', + unit: 'bytes', + }), + current: this.meter.createObservableGauge('nodejs.heapSpaces.current', { + description: 'Heap Spaces Current', + unit: 'bytes', + }), + physical: this.meter.createObservableGauge('nodejs.heapSpaces.physical', { + description: 'Heap Spaces Physical', + unit: 'bytes', + }) + }; + } -// Heap Spaces Metrics -const heapSpacesUsedGauge = meter.createObservableGauge('nodejs.heapSpaces.used', { - description: 'Heap Spaces Used', - unit: 'bytes', -}); + setupGcObserver() { + const obs = new perf_hooks.PerformanceObserver((list) => { + const entries = list.getEntries(); + entries.forEach((entry) => { + if (entry.kind === perf_hooks.constants.NODE_PERFORMANCE_GC_MAJOR) { + this.majorGcCount++; + } else { + this.minorGcCount++; + } + this.lastGcPause = entry.duration; + this.heapSizeAfterGc = process.memoryUsage().heapUsed; + }); + }); -const heapSpacesAvailableGauge = meter.createObservableGauge('nodejs.heapSpaces.available', { - description: 'Heap Spaces Available', - unit: 'bytes', -}); + obs.observe({ entryTypes: ['gc'], buffered: true }); + } -const heapSpacesCurrentGauge = meter.createObservableGauge('nodejs.heapSpaces.current', { - description: 'Heap Spaces Current', - unit: 'bytes', -}); + collectEventLoopMetrics() { + const attributes = { type: 'loops' }; + const now = Date.now(); -const heapSpacesPhysicalGauge = meter.createObservableGauge('nodejs.heapSpaces.physical', { - description: 'Heap Spaces Physical', - unit: 'bytes', -}); + this.eventLoopMetrics.max.addCallback((result) => { + const maxValue = Math.round(this.histogram.max / 1e6 * 1000) / 1000; + result.observe(maxValue, attributes); + }); -// Set up performance observer for GC events -const obs = new perf_hooks.PerformanceObserver((list) => { - const entries = list.getEntries(); - entries.forEach((entry) => { - // Update GC metrics based on the type of GC - if (entry.kind === perf_hooks.constants.NODE_PERFORMANCE_GC_MAJOR) { - majorGcCount++; - } else { - minorGcCount++; - } - lastGcPause = entry.duration; - heapSizeAfterGc = process.memoryUsage().heapUsed; - }); -}); + this.eventLoopMetrics.lag.addCallback((result) => { + const lagValue = Math.round(this.histogram.mean / 1e6 * 1000) / 1000; + result.observe(lagValue, attributes); + }); -// Subscribe to GC events -obs.observe({ entryTypes: ['gc'], buffered: true }); + this.eventLoopMetrics.sum.addCallback((result) => { + const currentState = { + timestamp: process.hrtime.bigint(), + count: this.histogram.count, + sum: this.histogram.mean * this.histogram.count + }; + + const deltaTime = Number(currentState.timestamp - this.globalLastState.timestamp) / 1e9; + const deltaSum = (currentState.sum - this.globalLastState.sum) / 1e6; + + if (deltaTime >= 0.9) { + const timePerSecond = deltaSum / deltaTime; + result.observe(timePerSecond, attributes); + this.globalLastState.sum = currentState.sum; + this.globalLastState.timestamp = currentState.timestamp; + } + }); -const histogram = monitorEventLoopDelay({ resolution: 20 }); -histogram.enable(); + this.eventLoopMetrics.count.addCallback((result) => { + const currentCount = this.histogram.count; + const deltaTime = Number(process.hrtime.bigint() - this.globalLastState.timestamp) / 1e9; + const deltaCount = currentCount - this.globalLastState.count; -let globalLastState = { - timestamp: process.hrtime.bigint(), - count: histogram.count, - sum: histogram.mean * histogram.count, - lastCollection: Date.now() -}; + if (deltaTime >= 0.9) { + const loopsPerSecond = Math.round(deltaCount / deltaTime); + result.observe(loopsPerSecond, attributes); + this.globalLastState.count = currentCount; + } + }); -function eventLoopCollectMetrics() { - const attributes = { type: 'loops' }; - const now = Date.now(); - - const timeSinceLastCollection = now - globalLastState.lastCollection; - if (timeSinceLastCollection < 900) { - return; + this.globalLastState.lastCollection = now; } - eventLoopMaxGauge.addCallback((observableResult) => { - const maxValue = Math.round(histogram.max / 1e6 * 1000) / 1000; - observableResult.observe(maxValue, attributes); - }); + collectMetrics() { + // Memory metrics + this.rssGauge.addCallback((result) => { + const memoryUsage = process.memoryUsage(); + result.observe(memoryUsage.rss, { type: 'rss' }); + }); - eventLoopSumGauge.addCallback((observableResult) => { - const currentState = { - timestamp: process.hrtime.bigint(), - count: histogram.count, - sum: histogram.mean * histogram.count - }; + this.heapUsedGauge.addCallback((result) => { + const memoryUsage = process.memoryUsage(); + result.observe(memoryUsage.heapUsed, { type: 'heapUsed' }); + }); + + // Active handles and requests + this.activeHandlesGauge.addCallback((result) => { + result.observe(process._getActiveHandles().length, { type: 'handles' }); + }); - const deltaTime = Number(currentState.timestamp - globalLastState.timestamp) / 1e9; - const deltaSum = (currentState.sum - globalLastState.sum) / 1e6; + this.activeRequestsGauge.addCallback((result) => { + result.observe(process._getActiveRequests().length, { type: 'requests' }); + }); - if (deltaTime >= 0.9) { - const timePerSecond = deltaSum / deltaTime; - observableResult.observe(timePerSecond, attributes); - globalLastState.sum = currentState.sum; - globalLastState.timestamp = currentState.timestamp; - } - }); - - eventLoopLagGauge.addCallback((observableResult) => { - const lagValue = Math.round(histogram.mean / 1e6 * 1000) / 1000; - observableResult.observe(lagValue, attributes); - }); - - eventLoopCountGauge.addCallback((observableResult) => { - const currentCount = histogram.count; - const deltaTime = Number(process.hrtime.bigint() - globalLastState.timestamp) / 1e9; - const deltaCount = currentCount - globalLastState.count; - - if (deltaTime >= 0.9) { - const loopsPerSecond = Math.round(deltaCount / deltaTime); - observableResult.observe(loopsPerSecond, attributes); - globalLastState.count = currentCount; - } - }); + // GC metrics + this.minorGcsGauge.addCallback((result) => { + result.observe(this.minorGcCount, { type: 'minor' }); + }); - globalLastState.lastCollection = now; + this.majorGcsGauge.addCallback((result) => { + result.observe(this.majorGcCount, { type: 'major' }); + }); + + this.gcPauseGauge.addCallback((result) => { + result.observe(this.lastGcPause, { type: 'pause' }); + }); + this.heapSizeAfterGcGauge.addCallback((result) => { + result.observe(this.heapSizeAfterGc, { type: 'heapAfterGc' }); + }); + + // Heap spaces metrics + Object.keys(this.heapSpacesMetrics).forEach(metricType => { + this.heapSpacesMetrics[metricType].addCallback((result) => { + const heapSpaces = v8.getHeapSpaceStatistics(); + heapSpaces.forEach(space => { + const attributes = { + space: space.space_name, + metric: metricType + }; + + switch(metricType) { + case 'used': + result.observe(space.space_used_size, attributes); + break; + case 'available': + result.observe(space.space_available_size, attributes); + break; + case 'current': + result.observe(space.space_size, attributes); + break; + case 'physical': + result.observe(space.physical_space_size, attributes); + break; + } + }); + }); + }); + + this.collectEventLoopMetrics(); + } + + start() { + this.histogram.enable(); + this.collectionInterval = setInterval(() => { + this.collectMetrics(); + }, this.collectionIntervalMs); + } + + stop() { + if (this.collectionInterval) { + clearInterval(this.collectionInterval); + this.collectionInterval = null; + } + this.histogram.disable(); + } } -// Set up callbacks for all observable metrics -function setupRuntimeMetrics() { - // Memory metrics callback - const memoryCallback = (observableResult) => { - const memoryUsage = process.memoryUsage(); - observableResult.observe(memoryUsage.rss, { type: 'rss' }); - observableResult.observe(memoryUsage.heapUsed, { type: 'heapUsed' }); - }; - - // Active handles and requests callback - const handleCallback = (observableResult) => { - observableResult.observe(process._getActiveHandles().length, { type: 'handles' }); - observableResult.observe(process._getActiveRequests().length, { type: 'requests' }); - }; - - // GC metrics callback - const gcCallback = (observableResult) => { - observableResult.observe(minorGcCount, { type: 'minor' }); - observableResult.observe(majorGcCount, { type: 'major' }); - observableResult.observe(lastGcPause, { type: 'pause' }); - observableResult.observe(heapSizeAfterGc, { type: 'heapAfterGc' }); - }; - - // Heap spaces callback - const heapSpacesCallback = (observableResult) => { - const heapSpaces = v8.getHeapSpaceStatistics(); - heapSpaces.forEach(space => { - const attributes = { space: space.space_name }; - observableResult.observe(space.space_used_size, { ...attributes, metric: 'used' }); - observableResult.observe(space.space_available_size, { ...attributes, metric: 'available' }); - observableResult.observe(space.space_size, { ...attributes, metric: 'current' }); - observableResult.observe(space.physical_space_size, { ...attributes, metric: 'physical' }); - }); - }; - - // Register all callbacks - rssGauge.addCallback(memoryCallback); - heapUsedGauge.addCallback(memoryCallback); - activeHandlesGauge.addCallback(handleCallback); - activeRequestsGauge.addCallback(handleCallback); - minorGcsGauge.addCallback(gcCallback); - majorGcsGauge.addCallback(gcCallback); - gcPauseGauge.addCallback(gcCallback); - heapSizeAfterGcGauge.addCallback(gcCallback); - - heapSpacesUsedGauge.addCallback(heapSpacesCallback); - heapSpacesAvailableGauge.addCallback(heapSpacesCallback); - heapSpacesCurrentGauge.addCallback(heapSpacesCallback); - heapSpacesPhysicalGauge.addCallback(heapSpacesCallback); - - eventLoopCollectMetrics(); +const runtimeMetrics = new RuntimeMetricsCollector('paymentservice', 10000); +runtimeMetrics.start(); + +// Handle shutdown gracefully +let sdkInstance = null; + +function setSdkInstance(sdk) { + sdkInstance = sdk; } +process.on('SIGTERM', () => { + runtimeMetrics.stop(); + if (sdkInstance) { + sdkInstance.shutdown() + .then(() => console.log('SDK shut down successfully')) + .catch((error) => console.log('Error shutting down SDK', error)) + .finally(() => process.exit(0)); + } else { + process.exit(0); + } +}); + +process.on('SIGINT', () => { + runtimeMetrics.stop(); + if (sdkInstance) { + sdkInstance.shutdown() + .then(() => console.log('SDK shut down successfully')) + .catch((error) => console.log('Error shutting down SDK', error)) + .finally(() => process.exit(0)); + } else { + process.exit(0); + } +}); + module.exports = { - setupRuntimeMetrics + RuntimeMetricsCollector, + setSdkInstance };