From c7e3602a0ba7c4911e2ccd0f9dd21b80ca2b8c6c Mon Sep 17 00:00:00 2001 From: James Sumners Date: Tue, 19 Mar 2024 14:54:58 -0400 Subject: [PATCH] chore: Added instrumentation tracking to support issue 2033 --- lib/instrumentation-descriptor.js | 186 ++++++++++++++++ lib/instrumentation-tracker.js | 222 +++++++++++++++++++ lib/util/idgen.js | 31 +++ test/unit/instrumentation-descriptor.test.js | 36 +++ test/unit/instrumentation-tracker.test.js | 131 +++++++++++ 5 files changed, 606 insertions(+) create mode 100644 lib/instrumentation-descriptor.js create mode 100644 lib/instrumentation-tracker.js create mode 100644 lib/util/idgen.js create mode 100644 test/unit/instrumentation-descriptor.test.js create mode 100644 test/unit/instrumentation-tracker.test.js diff --git a/lib/instrumentation-descriptor.js b/lib/instrumentation-descriptor.js new file mode 100644 index 0000000000..357ed82b36 --- /dev/null +++ b/lib/instrumentation-descriptor.js @@ -0,0 +1,186 @@ +/* + * Copyright 2024 New Relic Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +'use strict' + +const IdGen = require('./util/idgen') +const idGen = new IdGen() + +/** + * @typedef {function} InstrumentationOnRequire + * @param {Shim} shim The shim instance to use for the instrumentation. + * @param {object} resolvedNodule The module being instrumented as returned by + * Node's `require` function. + * @param {string} moduleName The simple name of the module, i.e. the value + * passed to the `require` function. + * @throws {Error|object} + */ + +/** + * @typedef {function} InstrumentationOnError + * @param {Error|object} error The error thrown by `onRequire` when there was + * an issue registering the instrumentation. + */ + +/* eslint-disable jsdoc/require-property-description */ +/** + * @typedef {object} InstrumentationDescriptorParams + * @property {string} absolutePath + * @property {string} module + * @property {string} moduleName + * @property {InstrumentationOnError} onError + * @property {InstrumentationOnRequire} onRequire + * @property {string} resolvedName + * @property {string} type + */ + +/** + * Describes the configuration for an instrumentation. An instrumentation + * is what `newrelic` uses to wrap Node.js modules. In particular, a description + * details the name of the module, the path on disk to the module, and the + * hooks (`onRequire` and `onError`) to apply to the module. + */ +class InstrumentationDescriptor { + /** + * Utility/generic module. + * @type {string} + */ + static TYPE_GENERIC = 'generic' + + /** + * @private + * @type {string} + */ + static TYPE_CONGLOMERATE = 'conglomerate' + + /** + * Database module, such as the MongoDB or MySQL drivers. + * @type {string} + */ + static TYPE_DATASTORE = 'datastore' + + /** + * Messaging module, such as AMQP. + * @type {string} + */ + static TYPE_MESSAGE = 'message' + + /** + * Promise module, such as Bluebird. + * @type {string} + */ + static TYPE_PROMISE = 'promise' + + /** + * @private + * @type {string} + */ + static TYPE_TRANSACTION = 'transaction' + + /** + * Web server framework module, such as Express or Fastify. + * @type {string} + */ + static TYPE_WEB_FRAMEWORK = 'web-framework' + + /** + * Used to load supportability metrics on installed versions of packages + * that the Node.js agent does not instrument (e.g. OTEL instrumentation or + * top logging libraries). + * @type {string} + */ + static TYPE_TRACKING = 'tracking' + + /** + * The type of the module being instrumented. See the static `TYPE_` fields. + * @type {string|null} + */ + type + + /** + * The name of the module being instrumented, i.e. the string used to require + * the module. This must map to a directory in `lib/instrumentations` which + * contains an `nr-hooks.js` file. + * + * This takes precedence over `moduleName`. + * @type {string} + */ + module + + /** + * The name of the module being instrumented, i.e. the string used to require + * the module. This must map to a JavaScript file of the same name in the + * `lib/instrumentations` directory. + * @type {string} + */ + moduleName + + /** + * The absolute path to the module to instrument. This should only be set + * when the module being instrumented does not reside in a `node_modules` + * directory; for example, when someone is instrumenting a module of their + * own through the public API. + * + * The `moduleName` property still needs to be set to the simple name, i.e. + * the string passed to `require`, for instrumentation tracking purposes. + * + * Note: this value takes precedence over `moduleName`. + */ + absolutePath + + /** + * The fully resolved path to the module, e.g. `/opt/app/node_modules/foo`. + * If the module is a core module, the special value `.` should be used. + * @type {string} + */ + resolvedName + + /** + * Hook to invoke when the module is required. This is the actual + * implementation of the instrumentation. + * @type {InstrumentationOnRequire} + */ + onRequire + + /** + * Hook to invoke when the `onRequire` hook throws an error. + * @type {InstrumentationOnError} + */ + onError + + /** + * @type {number} + */ + #id + + /* eslint-disable jsdoc/require-param-description */ + /** + * @param {InstrumentationDescriptorParams} params + */ + constructor(params) { + this.absolutePath = params.absolutePath + this.module = params.module + this.moduleName = params.moduleName + this.onError = params.onError + this.onRequire = params.onRequire + this.resolvedName = params.resolvedName + this.type = params.type + + this.#id = idGen.idFor(this.moduleName) + } + + /** + * Identifier for the instrumentation. Used by the internal instrumentation + * tracker to distinguish between different instrumentations targeting the + * same module. + * + * @returns {number} The identifier. + */ + get instrumentationId() { + return this.#id + } +} + +module.exports = InstrumentationDescriptor diff --git a/lib/instrumentation-tracker.js b/lib/instrumentation-tracker.js new file mode 100644 index 0000000000..fe5efa0690 --- /dev/null +++ b/lib/instrumentation-tracker.js @@ -0,0 +1,222 @@ +/* + * Copyright 2024 New Relic Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +'use strict' + +const InstrumentationDescriptor = require('./instrumentation-descriptor') + +/** + * @typedef {object} TrackedItemMeta + * @property {boolean} instrumented Indicates if the instrumentation + * has been successfully applied. + * @property {boolean|undefined} didError Indicates if the instrumentation + * application resulted in an error or not. + */ + +/** + * Represents tracked instrumentations in the {@link InstrumentationTracker}. + * + * @private + */ +class TrackedItem { + /** + * @type {InstrumentationDescriptor} + */ + instrumentation + + /** + * @type {TrackedItemMeta} + */ + meta + + constructor(params) { + this.instrumentation = params.instrumentation + this.meta = params.meta + } +} + +/** + * The instrumentation tracker is used to keep track of + * {@link InstrumentationDescriptor} instances in relation to modules that + * are being instrumented. The general process looks like: + * + * 1. We register an instrumentation with a simple name like `pino`. This + * "instrumentation" includes things like the `onRequire` and `onError` + * callbacks. In this context, "instrumentation" and "hook" are interchangeable + * terms. + * + * 2. Upon `require()`, we hit `shimmer._postLoad` which will give + * us the fully resolved path to the module being loaded. + * + * 3. `_postLoad` will utilize the previously registered instrumentation + * information to determine if there are any callbacks for the module being + * loaded. If so, it will attempt to run the callbacks. + * + * 4. At this point we need to keep track of which simple name + fully resolved + * path has callbacks associated with it, and if the error callback was invoked. + * When a subsequent load of the same simple name + fully resolved path + * combination is encountered, we need to append that to the tracked hooks. + * Or, if the previous hook failed, provide a way for the loading algorithm + * to learn about that so that it can skip doing unnecessary work. + * + * The `InstrumentationTracker` object provides utility methods to facilitate + * that process. + * + * @private + */ +class InstrumentationTracker { + #tracked = new Map() + + get [Symbol.toStringTag]() { + return 'InstrumentationTracker' + } + + /** + * Get all tracked instrumentations for the named module. + * + * @param {string} moduleName The simple name for the module, e.g. "pino". + * + * @returns {TrackedItem[]} All tracked items for the module. + */ + getAllByName(moduleName) { + return this.#tracked.get(moduleName) + } + + /** + * Get a specific tracked item for a module. This allows the + * {@link setHookSuccess} and {@link setHookFailure} methods to be used. + * + * @param {string} moduleName The simple name for the module, e.g. "pino". + * @param {InstrumentationDescriptor} instrumentation The instrumentation + * descriptor that is contained within the tracked item. + * + * @returns {TrackedItem|undefined} The full tracked item that includes the + * passed in descriptor along with the metadata about the instrumentation. + */ + getTrackedItem(moduleName, instrumentation) { + const items = this.getAllByName(moduleName) + for (const item of items) { + /* istanbul ignore else */ + if (item.instrumentation === instrumentation) { + return item + } + } + } + + /** + * The primary entrypoint to the tracker. It registers the basic information + * about an instrumentation prior to the to-be-instrumented module being + * loaded. If this method is not used first, other methods will throw because + * they will not be able to find any tracked items. + * + * @param {string} moduleName The simple name of the module being + * instrumented, e.g. "pino". That is, whatever is passed to the `require` + * function. + * @param {InstrumentationDescriptor} instrumentation The initial descriptor + * for the module being instrumented. + */ + track(moduleName, instrumentation) { + const tracked = this.#tracked.get(moduleName) + if (tracked === undefined) { + this.#tracked.set(moduleName, [ + new TrackedItem({ instrumentation, meta: { instrumented: false, didError: undefined } }) + ]) + return + } + + let found + for (const t of tracked) { + if (t.instrumentation.instrumentationId === instrumentation.instrumentationId) { + return + } + } + /* istanbul ignore else */ + if (found === undefined) { + tracked.push( + new TrackedItem({ instrumentation, meta: { instrumented: false, didError: undefined } }) + ) + } + } + + /** + * Update the metadata for a tracked item to indicate that the hook failed. + * + * @param {TrackedItem} trackedItem The item to update. + */ + setHookFailure(trackedItem) { + trackedItem.meta.instrumented = false + trackedItem.meta.didError = true + } + + /** + * Update the metadata for a tracked item to indicate that the hook succeeded. + * + * @param {TrackedItem} trackedItem The item to update. + */ + setHookSuccess(trackedItem) { + trackedItem.meta.instrumented = true + trackedItem.meta.didError = false + } + + /** + * After a module has been loaded, via `require` or `import`, the tracked + * instrumentation for that module must have its `resolvedName` property + * updated. The `resolvedName` is used to uniquely identify instances of the + * module. Being able to uniquely identify instances is crucial to being able + * to instrument all loaded instances. + * + * @param {string} moduleName The simple name of the module, e.g. "pino". + * @param {string} resolvedName The fully resolved file system path to the + * module instance, e.g. "/opt/app/node_modules/pino". + * + * @throws {Error} If the provided `moduleName` is not present in the tracker. + */ + setResolvedName(moduleName, resolvedName) { + const items = this.#tracked.get(moduleName) + if (items === undefined) { + throw Error(`module not tracked: ${moduleName}`) + } + + const missingResolvedName = [] + for (const item of items) { + if (item.instrumentation.resolvedName === undefined) { + missingResolvedName.push(item) + } else if (item.instrumentation.resolvedName === resolvedName) { + // We already have this specific instance of the module tracked. + // So we don't need to do anything. + return + } + } + + // eslint-disable-next-line sonarjs/no-small-switch + switch (missingResolvedName.length) { + case 0: { + // We have encountered a new instance of the module. Therefore, we + // need to clone an existing instrumentation, but apply a different + // `resolvedName` to it. + // + // This happens when there is a dependency tree like: + // + `/opt/app/node_modules/foo` + // + `/opt/app/node_modules/transitive-dep/node_modules/foo` + const item = items[0] + this.track( + moduleName, + new InstrumentationDescriptor({ ...item.instrumentation, resolvedName }) + ) + break + } + + default: { + // Add the same name to all found instrumentations. This definitely + // happens when the security agent is enabled. + missingResolvedName.forEach((item) => { + item.instrumentation.resolvedName = resolvedName + }) + } + } + } +} + +module.exports = InstrumentationTracker diff --git a/lib/util/idgen.js b/lib/util/idgen.js new file mode 100644 index 0000000000..dcc479674d --- /dev/null +++ b/lib/util/idgen.js @@ -0,0 +1,31 @@ +/* + * Copyright 2024 New Relic Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +'use strict' + +class IdGenerator { + #tracked = new Map() + + /** + * Get the next available identifier for the provided `name`. + * + * @param {string} name The label to get an id for. + * + * @returns {number} The id. + */ + idFor(name) { + const key = Symbol.for(name) + let val = this.#tracked.get(key) + if (val === undefined) { + val = 0 + } else { + val += 1 + } + this.#tracked.set(key, val) + return val + } +} + +module.exports = IdGenerator diff --git a/test/unit/instrumentation-descriptor.test.js b/test/unit/instrumentation-descriptor.test.js new file mode 100644 index 0000000000..bf9c02d846 --- /dev/null +++ b/test/unit/instrumentation-descriptor.test.js @@ -0,0 +1,36 @@ +/* + * Copyright 2024 New Relic Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +'use strict' + +const tap = require('tap') +const InstrumentationDescriptor = require('../../lib/instrumentation-descriptor') + +tap.test('constructs instances', async (t) => { + const desc = new InstrumentationDescriptor({ + type: 'generic', + module: 'foo', + moduleName: 'foo', + absolutePath: '/foo', + resolvedName: '/opt/app/node_modules/foo', + onRequire, + onError + }) + + t.equal(desc.type, InstrumentationDescriptor.TYPE_GENERIC) + t.equal(desc.module, 'foo') + t.equal(desc.moduleName, 'foo') + t.equal(desc.absolutePath, '/foo') + t.equal(desc.resolvedName, '/opt/app/node_modules/foo') + t.equal(desc.onRequire, onRequire) + t.equal(desc.onError, onError) + t.equal(desc.instrumentationId, 0) + + const desc2 = new InstrumentationDescriptor({ moduleName: 'foo' }) + t.equal(desc2.instrumentationId, 1) + + function onRequire() {} + function onError() {} +}) diff --git a/test/unit/instrumentation-tracker.test.js b/test/unit/instrumentation-tracker.test.js new file mode 100644 index 0000000000..8d72e50c4d --- /dev/null +++ b/test/unit/instrumentation-tracker.test.js @@ -0,0 +1,131 @@ +/* + * Copyright 2024 New Relic Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +'use strict' + +const tap = require('tap') +const InstrumentationTracker = require('../../lib/instrumentation-tracker') +const InstrumentationDescriptor = require('../../lib/instrumentation-descriptor') + +tap.test('can inspect object type', async (t) => { + const tracker = new InstrumentationTracker() + t.equal(Object.prototype.toString.call(tracker), '[object InstrumentationTracker]') +}) + +tap.test('track method tracks new items and updates existing ones', async (t) => { + const tracker = new InstrumentationTracker() + const inst1 = new InstrumentationDescriptor({ moduleName: 'foo' }) + + tracker.track('foo', inst1) + t.equal(tracker.getAllByName('foo').length, 1) + + // Module already tracked and instrumentation id is the same. + tracker.track('foo', inst1) + t.equal(tracker.getAllByName('foo').length, 1) + + // Module already tracked, but new instrumentation with different id. + const inst2 = new InstrumentationDescriptor({ moduleName: 'foo' }) + tracker.track('foo', inst2) + t.equal(tracker.getAllByName('foo').length, 2) +}) + +tap.test('can get a tracked item by instrumentation', async (t) => { + const tracker = new InstrumentationTracker() + const inst = new InstrumentationDescriptor({ moduleName: 'foo' }) + + tracker.track('foo', inst) + const item = tracker.getTrackedItem('foo', inst) + t.equal(item.instrumentation, inst) + t.same(item.meta, { instrumented: false, didError: undefined }) +}) + +tap.test('sets hook failure correctly', async (t) => { + const tracker = new InstrumentationTracker() + const inst = new InstrumentationDescriptor({ moduleName: 'foo' }) + + tracker.track('foo', inst) + const item = tracker.getTrackedItem('foo', inst) + tracker.setHookFailure(item) + t.equal(item.meta.instrumented, false) + t.equal(item.meta.didError, true) + + // Double check that the item in the map got updated. + const items = tracker.getAllByName('foo') + t.equal(items[0].meta.instrumented, false) + t.equal(items[0].meta.didError, true) +}) + +tap.test('sets hook success correctly', async (t) => { + const tracker = new InstrumentationTracker() + const inst = new InstrumentationDescriptor({ moduleName: 'foo' }) + + tracker.track('foo', inst) + const item = tracker.getTrackedItem('foo', inst) + tracker.setHookSuccess(item) + t.equal(item.meta.instrumented, true) + t.equal(item.meta.didError, false) + + // Double check that the item in the map got updated. + const items = tracker.getAllByName('foo') + t.equal(items[0].meta.instrumented, true) + t.equal(items[0].meta.didError, false) +}) + +tap.test('setResolvedName', (t) => { + t.beforeEach((t) => { + t.context.tracker = new InstrumentationTracker() + }) + + t.test('throws expected error', async (t) => { + const { tracker } = t.context + t.throws(() => tracker.setResolvedName('foo', 'bar'), 'module not tracked: foo') + }) + + t.test('skips existing tracked items', async (t) => { + const { tracker } = t.context + const inst = new InstrumentationDescriptor({ + moduleName: 'foo', + resolvedName: '/opt/app/node_modules/foo' + }) + + tracker.track('foo', inst) + tracker.setResolvedName('foo', '/opt/app/node_modules/foo') + t.equal(tracker.getAllByName('foo').length, 1) + }) + + t.test('adds new tracked item for new resolved name', async (t) => { + const { tracker } = t.context + const inst1 = new InstrumentationDescriptor({ + moduleName: 'foo', + resolvedName: '/opt/app/node_modules/foo' + }) + + tracker.track('foo', inst1) + tracker.setResolvedName('foo', '/opt/app/node_modules/transitive-dep/node_modules/foo') + + const items = tracker.getAllByName('foo') + t.equal(items[0].instrumentation.resolvedName, '/opt/app/node_modules/foo') + t.equal( + items[1].instrumentation.resolvedName, + '/opt/app/node_modules/transitive-dep/node_modules/foo' + ) + }) + + t.test('updates all registered instrumentations with resolve name', async (t) => { + const { tracker } = t.context + const inst1 = new InstrumentationDescriptor({ moduleName: 'foo' }) + const inst2 = new InstrumentationDescriptor({ moduleName: 'foo' }) + + tracker.track('foo', inst1) + tracker.track('foo', inst2) + tracker.setResolvedName('foo', '/opt/app/node_modules/foo') + + const items = tracker.getAllByName('foo') + t.equal(items[0].instrumentation.resolvedName, '/opt/app/node_modules/foo') + t.equal(items[1].instrumentation.resolvedName, '/opt/app/node_modules/foo') + }) + + t.end() +})