Skip to content

Commit

Permalink
fix(instrumentation-http)!: drop url.parse in favor of URL constructor (
Browse files Browse the repository at this point in the history
  • Loading branch information
pichlermarc authored Nov 8, 2024
1 parent 030aff3 commit 87bd98e
Show file tree
Hide file tree
Showing 6 changed files with 259 additions and 47 deletions.
3 changes: 3 additions & 0 deletions experimental/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ All notable changes to experimental packages in this project will be documented

### :bug: (Bug Fix)

* fix(instrumentation-http): drop url.parse in favor of URL constructor [#5091](https://github.com/open-telemetry/opentelemetry-js/pull/5091) @pichlermarc
* fixes a bug where using cyrillic characters in a client request string URL would throw an exception, whereas an un-instrumented client would accept the same input without throwing an exception

### :books: (Refine Doc)

### :house: (Internal)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -632,15 +632,19 @@ export class HttpInstrumentation extends InstrumentationBase<HttpInstrumentation

const headers = request.headers;

const spanAttributes = getIncomingRequestAttributes(request, {
component: component,
serverName: instrumentation.getConfig().serverName,
hookAttributes: instrumentation._callStartSpanHook(
request,
instrumentation.getConfig().startIncomingSpanHook
),
semconvStability: instrumentation._semconvStability,
});
const spanAttributes = getIncomingRequestAttributes(
request,
{
component: component,
serverName: instrumentation.getConfig().serverName,
hookAttributes: instrumentation._callStartSpanHook(
request,
instrumentation.getConfig().startIncomingSpanHook
),
semconvStability: instrumentation._semconvStability,
},
instrumentation._diag
);

const spanOptions: SpanOptions = {
kind: SpanKind.SERVER,
Expand Down Expand Up @@ -757,7 +761,11 @@ export class HttpInstrumentation extends InstrumentationBase<HttpInstrumentation
(typeof options === 'string' || options instanceof url.URL)
? (args.shift() as http.RequestOptions)
: undefined;
const { method, optionsParsed } = getRequestInfo(options, extraOptions);
const { method, invalidUrl, optionsParsed } = getRequestInfo(
instrumentation._diag,
options,
extraOptions
);
/**
* Node 8's https module directly call the http one so to avoid creating
* 2 span for the same request we need to check that the protocol is correct
Expand Down Expand Up @@ -859,7 +867,16 @@ export class HttpInstrumentation extends InstrumentationBase<HttpInstrumentation
}

const request: http.ClientRequest = safeExecuteInTheMiddle(
() => original.apply(this, [optionsParsed, ...args]),
() => {
if (invalidUrl) {
// we know that the url is invalid, there's no point in injecting context as it will fail validation.
// Passing in what the user provided will give the user an error that matches what they'd see without
// the instrumentation.
return original.apply(this, [options, ...args]);
} else {
return original.apply(this, [optionsParsed, ...args]);
}
},
error => {
if (error) {
setSpanWithError(span, error, instrumentation._semconvStability);
Expand Down
171 changes: 146 additions & 25 deletions experimental/packages/opentelemetry-instrumentation-http/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
Span,
context,
SpanKind,
DiagLogger,
} from '@opentelemetry/api';
import {
ATTR_CLIENT_ADDRESS,
Expand Down Expand Up @@ -235,27 +236,104 @@ export const isCompressed = (
return !!encoding && encoding !== 'identity';
};

/**
* Mimics Node.js conversion of URL strings to RequestOptions expected by
* `http.request` and `https.request` APIs.
*
* See https://github.com/nodejs/node/blob/2505e217bba05fc581b572c685c5cf280a16c5a3/lib/internal/url.js#L1415-L1437
*
* @param stringUrl
* @throws TypeError if the URL is not valid.
*/
function stringUrlToHttpOptions(
stringUrl: string
): RequestOptions & { pathname: string } {
// This is heavily inspired by Node.js handling of the same situation, trying
// to follow it as closely as possible while keeping in mind that we only
// deal with string URLs, not URL objects.
const {
hostname,
pathname,
port,
username,
password,
search,
protocol,
hash,
href,
origin,
host,
} = new URL(stringUrl);

const options: RequestOptions & {
pathname: string;
hash: string;
search: string;
href: string;
origin: string;
} = {
protocol: protocol,
hostname:
hostname && hostname[0] === '[' ? hostname.slice(1, -1) : hostname,
hash: hash,
search: search,
pathname: pathname,
path: `${pathname || ''}${search || ''}`,
href: href,
origin: origin,
host: host,
};
if (port !== '') {
options.port = Number(port);
}
if (username || password) {
options.auth = `${decodeURIComponent(username)}:${decodeURIComponent(
password
)}`;
}
return options;
}

/**
* Makes sure options is an url object
* return an object with default value and parsed options
* @param logger component logger
* @param options original options for the request
* @param [extraOptions] additional options for the request
*/
export const getRequestInfo = (
logger: DiagLogger,
options: url.URL | RequestOptions | string,
extraOptions?: RequestOptions
): {
origin: string;
pathname: string;
method: string;
invalidUrl: boolean;
optionsParsed: RequestOptions;
} => {
let pathname = '/';
let origin = '';
let pathname: string;
let origin: string;
let optionsParsed: RequestOptions;
let invalidUrl = false;
if (typeof options === 'string') {
optionsParsed = url.parse(options);
pathname = (optionsParsed as url.UrlWithStringQuery).pathname || '/';
try {
const convertedOptions = stringUrlToHttpOptions(options);
optionsParsed = convertedOptions;
pathname = convertedOptions.pathname || '/';
} catch (e) {
invalidUrl = true;
logger.verbose(
'Unable to parse URL provided to HTTP request, using fallback to determine path. Original error:',
e
);
// for backward compatibility with how url.parse() behaved.
optionsParsed = {
path: options,
};
pathname = optionsParsed.path || '/';
}

origin = `${optionsParsed.protocol || 'http:'}//${optionsParsed.host}`;
if (extraOptions !== undefined) {
Object.assign(optionsParsed, extraOptions);
Expand Down Expand Up @@ -285,16 +363,23 @@ export const getRequestInfo = (
{ protocol: options.host ? 'http:' : undefined },
options
);
pathname = (options as url.URL).pathname;
if (!pathname && optionsParsed.path) {
pathname = url.parse(optionsParsed.path).pathname || '/';
}

const hostname =
optionsParsed.host ||
(optionsParsed.port != null
? `${optionsParsed.hostname}${optionsParsed.port}`
: optionsParsed.hostname);
origin = `${optionsParsed.protocol || 'http:'}//${hostname}`;

pathname = (options as url.URL).pathname;
if (!pathname && optionsParsed.path) {
try {
const parsedUrl = new URL(optionsParsed.path, origin);
pathname = parsedUrl.pathname || '/';
} catch (e) {
pathname = '/';
}
}
}

// some packages return method in lowercase..
Expand All @@ -303,7 +388,7 @@ export const getRequestInfo = (
? optionsParsed.method.toUpperCase()
: 'GET';

return { origin, pathname, method, optionsParsed };
return { origin, pathname, method, optionsParsed, invalidUrl };
};

/**
Expand Down Expand Up @@ -657,6 +742,42 @@ export function getRemoteClientAddress(
return null;
}

function getInfoFromIncomingMessage(
component: 'http' | 'https',
request: IncomingMessage,
logger: DiagLogger
): { pathname?: string; search?: string; toString: () => string } {
try {
if (request.headers.host) {
return new URL(
request.url ?? '/',
`${component}://${request.headers.host}`
);
} else {
const unsafeParsedUrl = new URL(
request.url ?? '/',
// using localhost as a workaround to still use the URL constructor for parsing
`${component}://localhost`
);
// since we use localhost as a workaround, ensure we hide the rest of the properties to avoid
// our workaround leaking though.
return {
pathname: unsafeParsedUrl.pathname,
search: unsafeParsedUrl.search,
toString: function () {
// we cannot use the result of unsafeParsedUrl.toString as it's potentially wrong.
return unsafeParsedUrl.pathname + unsafeParsedUrl.search;
},
};
}
} catch (e) {
// something is wrong, use undefined - this *should* never happen, logging
// for troubleshooting in case it does happen.
logger.verbose('Unable to get URL from request', e);
return {};
}
}

/**
* Returns incoming request attributes scoped to the request data
* @param {IncomingMessage} request the request object
Expand All @@ -670,18 +791,15 @@ export const getIncomingRequestAttributes = (
serverName?: string;
hookAttributes?: Attributes;
semconvStability: SemconvStability;
}
},
logger: DiagLogger
): Attributes => {
const headers = request.headers;
const userAgent = headers['user-agent'];
const ips = headers['x-forwarded-for'];
const httpVersion = request.httpVersion;
const requestUrl = request.url ? url.parse(request.url) : null;
const host = requestUrl?.host || headers.host;
const hostname =
requestUrl?.hostname ||
host?.replace(/^(.*)(:[0-9]{1,5})/, '$1') ||
'localhost';
const host = headers.host;
const hostname = host?.replace(/^(.*)(:[0-9]{1,5})/, '$1') || 'localhost';

const method = request.method;
const normalizedMethod = normalizeMethod(method);
Expand All @@ -701,8 +819,14 @@ export const getIncomingRequestAttributes = (
[ATTR_USER_AGENT_ORIGINAL]: userAgent,
};

if (requestUrl?.pathname != null) {
newAttributes[ATTR_URL_PATH] = requestUrl.pathname;
const parsedUrl = getInfoFromIncomingMessage(
options.component,
request,
logger
);

if (parsedUrl?.pathname != null) {
newAttributes[ATTR_URL_PATH] = parsedUrl.pathname;
}

if (remoteClientAddress != null) {
Expand All @@ -719,11 +843,7 @@ export const getIncomingRequestAttributes = (
}

const oldAttributes: Attributes = {
[SEMATTRS_HTTP_URL]: getAbsoluteUrl(
requestUrl,
headers,
`${options.component}:`
),
[SEMATTRS_HTTP_URL]: parsedUrl.toString(),
[SEMATTRS_HTTP_HOST]: host,
[SEMATTRS_NET_HOST_NAME]: hostname,
[SEMATTRS_HTTP_METHOD]: method,
Expand All @@ -738,8 +858,9 @@ export const getIncomingRequestAttributes = (
oldAttributes[SEMATTRS_HTTP_SERVER_NAME] = serverName;
}

if (requestUrl) {
oldAttributes[SEMATTRS_HTTP_TARGET] = requestUrl.path || '/';
if (parsedUrl?.pathname) {
oldAttributes[SEMATTRS_HTTP_TARGET] =
parsedUrl?.pathname + parsedUrl?.search || '/';
}

if (userAgent !== undefined) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,15 @@ describe('HttpInstrumentation', () => {
assert.strictEqual(rpcData.route, undefined);
rpcData.route = 'TheRoute';
}
if (request.url?.includes('/login')) {
assert.strictEqual(
request.headers.authorization,
'Basic ' + Buffer.from('username:password').toString('base64')
);
}
if (request.url?.includes('/withQuery')) {
assert.match(request.url, /withQuery\?foo=bar$/);
}
response.end('Test Server Response');
});

Expand Down Expand Up @@ -597,7 +606,7 @@ describe('HttpInstrumentation', () => {
assert.strictEqual(spans.length, 2);
});

for (const arg of ['string', {}, new Date()]) {
for (const arg of [{}, new Date()]) {
it(`should be traceable and not throw exception in ${protocol} instrumentation when passing the following argument ${JSON.stringify(
arg
)}`, async () => {
Expand Down Expand Up @@ -1005,6 +1014,34 @@ describe('HttpInstrumentation', () => {

assert.deepStrictEqual(warnMessages, []);
});

it('should not throw with cyrillic characters in the request path', async () => {
// see https://github.com/open-telemetry/opentelemetry-js/issues/5060
await httpRequest.get(`${protocol}://${hostname}:${serverPort}/привет`);
});

it('should keep username and password in the request', async () => {
await httpRequest.get(
`${protocol}://username:password@${hostname}:${serverPort}/login`
);
});

it('should keep query in the request', async () => {
await httpRequest.get(
`${protocol}://${hostname}:${serverPort}/withQuery?foo=bar`
);
});

it('using an invalid url does throw from client but still creates a span', async () => {
try {
await httpRequest.get(`http://instrumentation.test:string-as-port/`);
} catch (e) {
assert.match(e.message, /Invalid URL/);
}

const spans = memoryExporter.getFinishedSpans();
assert.strictEqual(spans.length, 1);
});
});

describe('with semconv stability set to http', () => {
Expand Down
Loading

0 comments on commit 87bd98e

Please sign in to comment.