feat: faster span and trace id generation #1349

dyladan · 2020-07-24T17:25:50Z

Benchmarks

Noticeable performance improvements in the BasicTracerProvider benchmarks with small numbers of attributes.

Browser benchmarks: https://jsperf.com/opentelemetry-random-id-generation/1

ID Generation Benchmark

const Benchmark = require('benchmark');
const benchmarks = require('beautify-benchmark');
const crypto = require('crypto');

const suite = new Benchmark.Suite();

const SHARED_BUFFER = Buffer.allocUnsafe(16);
const bytes = 16;

suite
  .add('old id generation', function () {
    return crypto.randomBytes(bytes).toString('hex');
  })
  .add('new id generation', function () {
    for (let i = 0; i < bytes / 4; i++) {
      // unsigned right shift drops decimal part of the number
      // it is required because if a number between 2**32 and 2**32 - 1 is generated, an out of range error is thrown by writeUInt32BE
      SHARED_BUFFER.writeUInt32BE((Math.random() * 2 ** 32) >>> 0, i * 4);
    }

    // If buffer is all 0, set the last byte to 1 to guarantee a valid w3c id is generated
    for (let i = 0; i < bytes; i++) {
      if (SHARED_BUFFER[i] > 0) {
        break;
      } else if (i === bytes - 1) {
        SHARED_BUFFER[bytes - 1] = 1;
      }
    }

    return SHARED_BUFFER.toString('hex', 0, bytes);
  })
  // add listeners
  .on('cycle', function (event) {
    benchmarks.add(event.target);
  })
  .on('complete', function () {
    benchmarks.log();
    console.log('Fastest is ' + this.filter('fastest').map('name'));
  })
  .run();

  2 tests completed.

  old id generation x   411,744 ops/sec ±1.16% (90 runs sampled)
  new id generation x 4,642,384 ops/sec ±0.75% (93 runs sampled)

New ID Generation

Beginning NoopTracerProvider Benchmark...
  5 tests completed.

  #startSpan                     x 902,557,527 ops/sec ±3.32% (20 runs sampled)
  #startSpan:parent              x 896,869,970 ops/sec ±3.00% (20 runs sampled)
  #startSpan with attribute      x 916,297,055 ops/sec ±3.43% (20 runs sampled)
  #startSpan with 30 attributes  x  11,340,599 ops/sec ±1.92% (20 runs sampled)
  #startSpan with 100 attributes x   3,670,278 ops/sec ±1.17% (20 runs sampled)

Beginning BasicTracerProvider Benchmark...
  5 tests completed.

  #startSpan                     x 340,802 ops/sec ±2.05% (20 runs sampled)
  #startSpan:parent              x 187,078 ops/sec ±1.77% (20 runs sampled)
  #startSpan with attribute      x 324,449 ops/sec ±2.44% (20 runs sampled)
  #startSpan with 30 attributes  x  76,231 ops/sec ±1.39% (20 runs sampled)
  #startSpan with 100 attributes x   5,920 ops/sec ±1.83% (20 runs sampled)

Beginning BasicTracerProvider with SimpleSpanProcessor Benchmark...
  5 tests completed.

  #startSpan                     x 247,847 ops/sec ±8.28% (20 runs sampled)
  #startSpan:parent              x 133,570 ops/sec ±1.88% (20 runs sampled)
  #startSpan with attribute      x 182,922 ops/sec ±17.35% (20 runs sampled)
  #startSpan with 30 attributes  x  52,325 ops/sec ±0.30% (20 runs sampled)
  #startSpan with 100 attributes x   5,396 ops/sec ±5.11% (20 runs sampled)

Beginning BasicTracerProvider with BatchSpanProcessor Benchmark...
  5 tests completed.

  #startSpan                     x 224,913 ops/sec ±1.11% (20 runs sampled)
  #startSpan:parent              x 117,169 ops/sec ±1.17% (20 runs sampled)
  #startSpan with attribute      x 183,762 ops/sec ±16.86% (20 runs sampled)
  #startSpan with 30 attributes  x  21,828 ops/sec ±20.00% (20 runs sampled)
  #startSpan with 100 attributes x   5,087 ops/sec ±2.23% (20 runs sampled)

Old ID Generation

Beginning NoopTracerProvider Benchmark...
  5 tests completed.

  #startSpan                     x 876,884,335 ops/sec ±3.21% (20 runs sampled)
  #startSpan:parent              x 883,984,177 ops/sec ±3.07% (20 runs sampled)
  #startSpan with attribute      x 900,037,749 ops/sec ±4.01% (20 runs sampled)
  #startSpan with 30 attributes  x  11,137,143 ops/sec ±1.61% (20 runs sampled)
  #startSpan with 100 attributes x   3,661,880 ops/sec ±1.34% (20 runs sampled)

Beginning BasicTracerProvider Benchmark...
  5 tests completed.

  #startSpan                     x 140,603 ops/sec ±3.06% (20 runs sampled)
  #startSpan:parent              x  88,120 ops/sec ±1.48% (20 runs sampled)
  #startSpan with attribute      x 140,540 ops/sec ±2.78% (20 runs sampled)
  #startSpan with 30 attributes  x  55,164 ops/sec ±2.19% (20 runs sampled)
  #startSpan with 100 attributes x   5,852 ops/sec ±1.26% (20 runs sampled)

Beginning BasicTracerProvider with SimpleSpanProcessor Benchmark...
  5 tests completed.

  #startSpan                     x 127,693 ops/sec ±2.09% (20 runs sampled)
  #startSpan:parent              x  75,054 ops/sec ±5.69% (20 runs sampled)
  #startSpan with attribute      x 126,848 ops/sec ±1.59% (20 runs sampled)
  #startSpan with 30 attributes  x  42,302 ops/sec ±7.81% (20 runs sampled)
  #startSpan with 100 attributes x   4,387 ops/sec ±20.72% (20 runs sampled)

Beginning BasicTracerProvider with BatchSpanProcessor Benchmark...
  5 tests completed.

  #startSpan                     x 122,387 ops/sec ±1.67% (20 runs sampled)
  #startSpan:parent              x  69,848 ops/sec ±3.11% (20 runs sampled)
  #startSpan with attribute      x 112,986 ops/sec ±1.78% (20 runs sampled)
  #startSpan with 30 attributes  x  40,048 ops/sec ±1.52% (20 runs sampled)
  #startSpan with 100 attributes x   5,384 ops/sec ±1.47% (20 runs sampled)

codecov · 2020-07-24T17:30:47Z

Codecov Report

Merging #1349 into master will decrease coverage by 0.03%.
The diff coverage is 84.61%.

@@            Coverage Diff             @@
##           master    #1349      +/-   ##
==========================================
- Coverage   94.15%   94.11%   -0.04%     
==========================================
  Files         145      145              
  Lines        4326     4334       +8     
  Branches      881      883       +2     
==========================================
+ Hits         4073     4079       +6     
- Misses        253      255       +2

Impacted Files	Coverage Δ
...emetry-core/src/platform/node/RandomIdGenerator.ts	`87.50% <84.61%> (-12.50%)`	⬇️

obecny

I'm not really sure if we should sacrifice the built in crypto with native support for random bytes towards the pseudo Math.random, I would rather expect the opposite. With regards to benchmarks I would like to see the same benchmarks to be run on some benchmarking pages that compares many different browsers and then compare how much you can gain. But if the gain is couple % we have to think if it is really worth to drop crypto in favour of Math.random

obecny · 2020-07-24T18:57:18Z

packages/opentelemetry-core/src/platform/node/id.ts

-  return crypto.randomBytes(SPAN_ID_BYTES).toString('hex');
+export const randomSpanId = getIdGenerator(SPAN_ID_BYTES);
+
+const SHARED_BUFFER = Buffer.allocUnsafe(TRACE_ID_BYTES);


what if there are going to be 2 processes that will import the same function and will work asynchronously, any chance for collision on using SHARED_BUFFER ?

No because there is only one execution thread. Nothing here is async.

dyladan · 2020-07-24T19:17:03Z

I'm not really sure if we should sacrifice the built in crypto with native support for random bytes towards the pseudo Math.random, I would rather expect the opposite.

Why would you expect the opposite? There is nothing in the spec which requires the generated ids to be cryptographically random (nor does it even require them to be random). I could see the code simplicity angle, where using built-ins results in simpler code, but I don't think the code here is particularly complex, and on the browser in particular the new code is significantly simpler and more direct.

With regards to benchmarks I would like to see the same benchmarks to be run on some benchmarking pages that compares many different browsers and then compare how much you can gain. But if the gain is couple % we have to think if it is really worth to drop crypto in favour of Math.random

The benchmarks collected here are on node, and you can see additional benchmarks in the linked issue, also collected with node. I would like to see tests across many browsers, but I think it is a fairly well-accepted fact that generating cryptographically secure random numbers is more expensive than random numbers without cryptographic guarantees. One additional thing to note aside from the performance improvement, is that this is also impervious to entropy depletion. If the system does not have enough entropy to generate cryptographically random numbers, it can pause.

vmarchaud

the collision chance are really small and we aren't doing anything security sensitive with those ids so the performance gain is worth for me

dyladan · 2020-07-24T19:19:23Z

the collision chance are really small and we aren't doing anything security sensitive with those ids so the performance gain is worth for me

This is not any more or less likely to collide. The only "loss" is that it may be now possible, but still likely very hard, to guess what the next generated ID will be.

dyladan · 2020-07-24T19:40:18Z

/cc @Flarna since you helped me with the current id generation

obecny

for node I'm fine, but for browser I'm not so sure if this will be faster without ability to see some benchmarks on most major browsers including IE and edge :/

dyladan · 2020-07-24T19:43:59Z

@obecny I added chrome jsperf benchmark to the PR description which does indeed show a 2x speedup, but I can remove the browser changes if you'd like.

obecny · 2020-07-24T19:56:10Z

@obecny I added chrome jsperf benchmark to the PR description which does indeed show a 2x speedup, but I can remove the browser changes if you'd like.

why would you remove the browser if the speed is 2x faster ?

dyladan · 2020-07-24T19:56:55Z

@obecny I added chrome jsperf benchmark to the PR description which does indeed show a 2x speedup, but I can remove the browser changes if you'd like.

why would you remove the browser if the speed is 2x faster ?

Because I can only test on my platform (macos), and you asked for IE/edge. Speedup is consistent in chrome, safari, and firefox though.

obecny

webkit browser are 2 faster, cannot check IE, now but even though it is much better

dyladan · 2020-07-24T20:02:16Z

webkit browser are 2 faster, cannot check IE, now but even though it is much better

Just had a coworker test on his windows machine. The old code does not even work on IE as the ArrayBuffer does not support slice. New code works fine.

Not sure if this is masked currently by typescript and/or webpack polyfills.

obecny · 2020-07-24T20:03:50Z

I added version 2 with msCrypto but it needs some polyfills I guess

anuraaga · 2020-07-25T01:25:07Z

Thanks @dyladan!

packages/opentelemetry-core/src/platform/node/id.ts

Flarna · 2020-07-27T21:36:41Z

packages/opentelemetry-core/src/platform/node/id.ts

+    for (let i = 0; i < bytes; i++) {
+      SHARED_BUFFER[i] = Math.floor(Math.random() * 256);
+    }
+    return SHARED_BUFFER.slice(0, bytes).toString('hex');


I think SHARED_BUFFER.toString('hex', 0, bytes) would avoid creation of a temporary Buffer with same result.

Looks like this improved time by ~20%

crypto x 427,138 ops/sec ±0.77% (92 runs sampled) pseudorandom x 2,767,795 ops/sec ±1.00% (93 runs sampled) pseudorandom no alloc x 3,355,721 ops/sec ±0.96% (90 runs sampled)

there is one more thing here to be improved (about 8%)
this line

SHARED_BUFFER[i] = Math.floor(Math.random() * 256);

replace with

SHARED_BUFFER[i] = Math.random() * 256 | 0;

sk- · 2020-07-28T15:13:40Z

packages/opentelemetry-core/src/platform/node/id.ts

+export const randomSpanId = getIdGenerator(SPAN_ID_BYTES);
+
+const SHARED_BUFFER = Buffer.allocUnsafe(TRACE_ID_BYTES);
+function getIdGenerator(bytes: number): () => string {


@dyladan I know it was already like that, but shouldn't the id generator ensure the returned value has at least one non-zero byte? The spec requires that for both traceId and spanId.

We discussed this at one time in the past. Technically it is possible for it to randomly return all 0's, but it is so vanishingly unlikely that I don't think it's worth the extra boolean check every time an id is generated. I can be talked out of this position though.

/cc @open-telemetry/javascript-maintainers

Fair enough @dyladan . One could accept that in the unlikely event that happens then it could be equivalent for the trace to not being sampled.

But, is that what happens in case either traceId or spanId is 0? Is the SpanContext gracefully discarded?

I'm asking to understand whether propagators should explicitly check those fields before calling setExtractedSpanContext. (Note: The B3 and HTTP propagators do check it).

We could create one random byte starting at one instead 0. Just one bit of randomness would be lost.

What do you think of this?

const SHARED_BUFFER = Buffer.allocUnsafe(TRACE_ID_BYTES); function getIdGenerator(bytes: number): () => string { return function generateId() { for (let i = 0; i < bytes / 4; i++) { SHARED_BUFFER.writeUInt32BE((Math.random() * 2 ** 32) >>> 0, i * 4); } // Ensure the last bit is never zero to guarantee a valid w3c id is generated SHARED_BUFFER[bytes - 1] = SHARED_BUFFER[bytes - 1] | 0x1; return SHARED_BUFFER.toString('hex', 0, bytes); }; }

Flarna · 2020-07-28T19:27:49Z

packages/opentelemetry-core/src/platform/node/id.ts

+function getIdGenerator(bytes: number): () => string {
+  return function generateId() {
+    for (let i = 0; i < bytes / 4; i++) {
+      SHARED_BUFFER.writeUInt32BE((Math.random() * 2 ** 32) >>> 0, i * 4);


actually the >>> 0 part is not needed as this happens implicit within writeUInt32BE.

During a benchmark run I recieved the following error:

RangeError [ERR_OUT_OF_RANGE]: The value of "value" is out of range. It must be >= 0 and <= 4294967295. Received 4294967295.2376738

I was able to reproduce the error in the repl:

> const b = Buffer.alloc(4); undefined > b.writeUInt32BE(4294967295.2376738, 0); Thrown: RangeError [ERR_OUT_OF_RANGE]: The value of "value" is out of range. It must be >= 0 and <= 4294967295. Received 4294967295.2376738 at checkInt (internal/buffer.js:58:11) at writeU_Int32BE (internal/buffer.js:788:3) at Buffer.writeUInt32BE (internal/buffer.js:801:10) at repl:1:3 at Script.runInThisContext (vm.js:116:20) at REPLServer.defaultEval (repl.js:405:29) at bound (domain.js:419:14) at REPLServer.runBound [as eval] (domain.js:432:12) at REPLServer.onLine (repl.js:716:10) at REPLServer.emit (events.js:228:7) { code:

And the following code works as expected

> const b = Buffer.alloc(4); undefined > b.writeUInt32BE(4294967295.2376738 >>> 0, 0); 4 > b <Buffer ff ff ff ff>

It appears, there is a 1/4294967296 chance of generating a random value outside of the range of the function. We have to keep the >>> 0 to guarantee the function stays in range.

dyladan · 2020-07-28T20:20:52Z

On my machine there appears to be no measurable difference between not checking, setting the last bit to 1, or looping until a non-zero byte is found.

const Benchmark = require('benchmark');
const benchmarks = require('beautify-benchmark');
const suite = new Benchmark.Suite();

const SHARED_BUFFER = Buffer.allocUnsafe(16);

suite
  .add('without check', function () {
    for (let i = 0; i < 4; i++) {
      SHARED_BUFFER.writeUInt32BE((Math.random() * 2**32), i * 4)
    }
    SHARED_BUFFER.toString('hex', 0, 16);
  })
  .add('last bit always 1', function () {
    for (let i = 0; i < 4; i++) {
      SHARED_BUFFER.writeUInt32BE((Math.random() * 2**32), i * 4)
    }
    SHARED_BUFFER[15] = SHARED_BUFFER[15] | 1;
    SHARED_BUFFER.toString('hex', 0, 16);
  })
  .add('loop check', function () {
    for (let i = 0; i < 4; i++) {
      SHARED_BUFFER.writeUInt32BE((Math.random() * 2**32), i * 4)
    }

    for (let i = 0; i < 16; i++) {
      if (SHARED_BUFFER[i] > 0) {
        break;
      } else if (i === 15) {
        SHARED_BUFFER[15] = 1;
      }
    }

    SHARED_BUFFER.toString('hex', 0, 16);
  })
  // add listeners
  .on('cycle', function (event) {
    benchmarks.add(event.target);
  })
  .on('complete', function () {
    benchmarks.log();
    console.log('Fastest is ' + this.filter('fastest').map('name'));
  })
  .run();

  3 tests completed.

  without check     x 4,816,246 ops/sec ±0.89% (93 runs sampled)
  last bit always 1 x 4,790,933 ops/sec ±0.89% (94 runs sampled)
  loop check        x 4,776,446 ops/sec ±0.77% (95 runs sampled)

Fastest is without check,last bit always 1

dyladan · 2020-07-29T15:50:43Z

@open-telemetry/javascript-approvers this is done being updated I think, and I would appreciate reviews so we can get this merged.

Flarna · 2020-08-12T22:51:47Z

Any chance to get this merged?

dyladan · 2020-08-13T19:02:18Z

@open-telemetry/javascript-approvers would really appreciate some ✅ here :)

dyladan added the enhancement New feature or request label Jul 24, 2020

dyladan requested review from legendecas, markwolff, mayurkale22, mwear, naseemkullah, obecny, OlivierAlbertini and vmarchaud as code owners July 24, 2020 17:25

dyladan force-pushed the fast-id-gen branch 2 times, most recently from 4fa8e37 to c175bd7 Compare July 24, 2020 17:26

feat: fast span and trace id generation

5f6d1cf

dyladan force-pushed the fast-id-gen branch from c175bd7 to 5f6d1cf Compare July 24, 2020 17:30

obecny reviewed Jul 24, 2020

View reviewed changes

vmarchaud approved these changes Jul 24, 2020

View reviewed changes

obecny reviewed Jul 24, 2020

View reviewed changes

obecny approved these changes Jul 24, 2020

View reviewed changes

Flarna reviewed Jul 27, 2020

View reviewed changes

packages/opentelemetry-core/src/platform/node/id.ts Outdated Show resolved Hide resolved

Flarna reviewed Jul 27, 2020

View reviewed changes

Flarna approved these changes Jul 27, 2020

View reviewed changes

sk- reviewed Jul 28, 2020

View reviewed changes

dyladan added 4 commits July 28, 2020 14:04

Merge remote-tracking branch 'origin/master' into fast-id-gen

1f01d86

chore: remove unnecessary buffer alloc

2f157a8

chore: minimize calls to random

33e9a4a

chore: lint

86fe596

Flarna reviewed Jul 28, 2020

View reviewed changes

dyladan added 2 commits July 28, 2020 16:38

chore: check if generated id is invalid (all 0)

b6d533b

chore: unsigned right shift required to guarantee function in range

eecf2d5

Merge remote-tracking branch 'origin/master' into fast-id-gen

cc110bf

markwolff approved these changes Aug 13, 2020

View reviewed changes

naseemkullah approved these changes Aug 13, 2020

View reviewed changes

dyladan merged commit 221ced8 into open-telemetry:master Aug 13, 2020

dyladan deleted the fast-id-gen branch August 13, 2020 20:10

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: faster span and trace id generation #1349

feat: faster span and trace id generation #1349

dyladan commented Jul 24, 2020 •

edited

Loading

codecov bot commented Jul 24, 2020 •

edited

Loading

obecny left a comment

obecny Jul 24, 2020

dyladan Jul 24, 2020

dyladan commented Jul 24, 2020

vmarchaud left a comment

dyladan commented Jul 24, 2020

dyladan commented Jul 24, 2020

obecny left a comment

dyladan commented Jul 24, 2020 •

edited

Loading

obecny commented Jul 24, 2020

dyladan commented Jul 24, 2020

obecny left a comment

dyladan commented Jul 24, 2020 •

edited

Loading

obecny commented Jul 24, 2020

anuraaga commented Jul 25, 2020

Flarna Jul 27, 2020

dyladan Jul 28, 2020

obecny Jul 28, 2020

dyladan Jul 28, 2020

sk- Jul 28, 2020

dyladan Jul 28, 2020

sk- Jul 28, 2020

Flarna Jul 28, 2020

dyladan Jul 28, 2020

sk- Jul 29, 2020

Flarna Jul 28, 2020

dyladan Jul 28, 2020

dyladan commented Jul 28, 2020

dyladan commented Jul 29, 2020

Flarna commented Aug 12, 2020

dyladan commented Aug 13, 2020

feat: faster span and trace id generation #1349

feat: faster span and trace id generation #1349

Conversation

dyladan commented Jul 24, 2020 • edited Loading

Benchmarks

ID Generation Benchmark

New ID Generation

Old ID Generation

codecov bot commented Jul 24, 2020 • edited Loading

Codecov Report

obecny left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

dyladan commented Jul 24, 2020

vmarchaud left a comment

Choose a reason for hiding this comment

dyladan commented Jul 24, 2020

dyladan commented Jul 24, 2020

obecny left a comment

Choose a reason for hiding this comment

dyladan commented Jul 24, 2020 • edited Loading

obecny commented Jul 24, 2020

dyladan commented Jul 24, 2020

obecny left a comment

Choose a reason for hiding this comment

dyladan commented Jul 24, 2020 • edited Loading

obecny commented Jul 24, 2020

anuraaga commented Jul 25, 2020

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

dyladan commented Jul 28, 2020

dyladan commented Jul 29, 2020

Flarna commented Aug 12, 2020

dyladan commented Aug 13, 2020

dyladan commented Jul 24, 2020 •

edited

Loading

codecov bot commented Jul 24, 2020 •

edited

Loading

dyladan commented Jul 24, 2020 •

edited

Loading

dyladan commented Jul 24, 2020 •

edited

Loading