Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Buffer speedups #1048

Merged
merged 2 commits into from
Mar 5, 2015
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 169 additions & 105 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,82 +24,148 @@ function createPool() {
}
createPool();

function Buffer(arg) {
if (!(this instanceof Buffer)) {
// Avoid going through an ArgumentsAdaptorTrampoline in the common case.
if (arguments.length > 1)
return new Buffer(arg, arguments[1]);

function Buffer(subject, encoding) {
if (!(this instanceof Buffer))
return new Buffer(subject, encoding);

if (typeof subject === 'number') {
this.length = +subject;

} else if (typeof subject === 'string') {
if (typeof encoding !== 'string' || encoding.length === 0)
encoding = 'utf8';
this.length = Buffer.byteLength(subject, encoding);
return new Buffer(arg);
}

// Handle Arrays, Buffers, Uint8Arrays or JSON.
} else if (subject !== null && typeof subject === 'object') {
if (subject.type === 'Buffer' && Array.isArray(subject.data))
subject = subject.data;
this.length = +subject.length;
this.length = 0;
this.parent = undefined;

} else {
throw new TypeError('must start with number, buffer, array or string');
// Common case.
if (typeof(arg) === 'number') {
fromNumber(this, arg);
return;
}

if (this.length > kMaxLength) {
throw new RangeError('Attempt to allocate Buffer larger than maximum ' +
'size: 0x' + kMaxLength.toString(16) + ' bytes');
// Slightly less common case.
if (typeof(arg) === 'string') {
fromString(this, arg, arguments.length > 1 ? arguments[1] : 'utf8');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do a length check here just to check if encoding is a string in fromString()?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two reasons, one aesthetic, one technical. Aesthetic: it matches the check at the top of the function. Technical: an unguarded arguments[1] is megamorphic whereas the length check keeps the property/index lookups monomorphic.

I didn't benchmark it to the death but I did look at the generated code and came to the conclusion that monomorphic is best, even with the additional property lookup. I don't know if you have looked at the machine code for KeyedLoadIC_Megamorphic but it's pretty complex.

return;
}

if (this.length < 0)
this.length = 0;
else
this.length >>>= 0; // Coerce to uint32.
// Unusual.
fromObject(this, arg);
}

this.parent = undefined;
if (this.length <= (Buffer.poolSize >>> 1) && this.length > 0) {
if (this.length > poolSize - poolOffset)
createPool();
this.parent = sliceOnto(allocPool,
this,
poolOffset,
poolOffset + this.length);
poolOffset += this.length;
} else {
alloc(this, this.length);
}
function fromNumber(that, length) {
allocate(that, length < 0 ? 0 : checked(length) | 0);
}

if (typeof subject === 'number') {
return;
function fromString(that, string, encoding) {
if (typeof(encoding) !== 'string' || encoding === '')
encoding = 'utf8';

// Assumption: byteLength() return value is always < kMaxLength.
var length = byteLength(string, encoding) | 0;
allocate(that, length);

var actual = that.write(string, encoding) | 0;
if (actual !== length) {
// Fix up for truncated base64 input. Don't bother returning
// the unused two or three bytes to the pool.
that.length = actual;
truncate(that, actual);
}
}

if (typeof subject === 'string') {
// In the case of base64 it's possible that the size of the buffer
// allocated was slightly too large. In this case we need to rewrite
// the length to the actual length written.
var len = this.write(subject, encoding);
// Buffer was truncated after decode, realloc internal ExternalArray
if (len !== this.length) {
var prevLen = this.length;
this.length = len;
truncate(this, this.length);
// Only need to readjust the poolOffset if the allocation is a slice.
if (this.parent != undefined)
poolOffset -= (prevLen - len);
}
function fromObject(that, object) {
if (object instanceof Buffer)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

future note, use smalloc.hasExternal() instead. that will allow creating Buffers from anything that has externally allocated array data.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be an additional if guard, wouldn't it? The idea here is to keep everything monomorphic (or as much as possible anyway.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

once it reaches C++ buffers and smalloc allocations are essentially treated the same.

return fromBuffer(that, object);

} else if (subject instanceof Buffer) {
subject.copy(this, 0, 0, this.length);
if (Array.isArray(object))
return fromArray(that, object);

if (object == null)
throw new TypeError('must start with number, buffer, array or string');

} else if (typeof subject.length === 'number' || Array.isArray(subject)) {
// Really crappy way to handle Uint8Arrays, but V8 doesn't give a simple
// way to access the data from the C++ API.
for (var i = 0; i < this.length; i++)
this[i] = subject[i];
if (object.buffer instanceof ArrayBuffer)
return fromTypedArray(that, object);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

future note, it is possible to cheat for loop death and attempt a GetIndexedPropertiesExternalArrayData() on the array buffer, if it doesn't return NULL (nullptr?) then can use a memcpy.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would only work for Int8Array / Uint8Array, I think. Other typed arrays need the element & 255 masking for compatibility reasons (which I think is silly design mistake, but that aside.)

Idle musing: the overhead of calling into C++ probably isn't worth it for small arrays. I don't care enough about typed arrays yet to go find the crossover point, though.


if (object.length)
return fromArrayLike(that, object);

return fromJsonObject(that, object);
}

function fromBuffer(that, buffer) {
var length = checked(buffer.length) | 0;
allocate(that, length);
buffer.copy(that, 0, 0, length);
}

function fromArray(that, array) {
var length = checked(array.length) | 0;
allocate(that, length);
for (var i = 0; i < length; i += 1)
that[i] = array[i] & 255;
}

// Duplicate of fromArray() to keep fromArray() monomorphic.
function fromTypedArray(that, array) {
var length = checked(array.length) | 0;
allocate(that, length);
// Truncating the elements is probably not what people expect from typed
// arrays with BYTES_PER_ELEMENT > 1 but it's compatible with the behavior
// of the old Buffer constructor.
for (var i = 0; i < length; i += 1)
that[i] = array[i] & 255;
}

function fromArrayLike(that, array) {
var length = checked(array.length) | 0;
allocate(that, length);
for (var i = 0; i < length; i += 1)
that[i] = array[i] & 255;
}

// Deserialize { type: 'Buffer', data: [1,2,3,...] } into a Buffer object.
// Returns a zero-length buffer for inputs that don't conform to the spec.
function fromJsonObject(that, object) {
var array;
var length = 0;

if (object.type === 'Buffer' && Array.isArray(object.data)) {
array = object.data;
length = checked(array.length) | 0;
}
allocate(that, length);

for (var i = 0; i < length; i += 1)
that[i] = array[i] & 255;
}

function allocate(that, length) {
var fromPool = length !== 0 && length <= Buffer.poolSize >>> 1;
that.parent = fromPool ? palloc(that, length) : alloc(that, length);
that.length = length;
}

function palloc(that, length) {
if (length > poolSize - poolOffset)
createPool();

var start = poolOffset;
var end = start + length;
var buf = sliceOnto(allocPool, that, start, end);
poolOffset = end;

return buf;
}

function checked(length) {
// Note: cannot use `length < kMaxLength` here because that fails when
// length is NaN (which is otherwise coerced to zero.)
if (length >= kMaxLength) {
throw new RangeError('Attempt to allocate Buffer larger than maximum ' +
'size: 0x' + kMaxLength.toString(16) + ' bytes');
}
return length >>> 0;
}

function SlowBuffer(length) {
length = length >>> 0;
Expand Down Expand Up @@ -197,30 +263,30 @@ Buffer.concat = function(list, length) {
};


Buffer.byteLength = function(str, enc) {
var ret;
str = str + '';
switch (enc) {
function byteLength(string, encoding) {
if (typeof(string) !== 'string')
string = String(string);

switch (encoding) {
case 'ascii':
case 'binary':
case 'raw':
ret = str.length;
break;
return string.length;

case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
ret = str.length * 2;
break;
return string.length * 2;

case 'hex':
ret = str.length >>> 1;
break;
default:
ret = binding.byteLength(str, enc);
return string.length >>> 1;
}
return ret;
};

return binding.byteLength(string, encoding);
}

Buffer.byteLength = byteLength;

// toString(encoding, start=0, end=buffer.length)
Buffer.prototype.toString = function(encoding, start, end) {
Expand Down Expand Up @@ -414,47 +480,45 @@ Buffer.prototype.write = function(string, offset, length, encoding) {
if (length === undefined || length > remaining)
length = remaining;

encoding = !!encoding ? (encoding + '').toLowerCase() : 'utf8';

if (string.length > 0 && (length < 0 || offset < 0))
throw new RangeError('attempt to write outside buffer bounds');

var ret;
switch (encoding) {
case 'hex':
ret = this.hexWrite(string, offset, length);
break;
if (!encoding)
encoding = 'utf8';

case 'utf8':
case 'utf-8':
ret = this.utf8Write(string, offset, length);
break;
var loweredCase = false;
for (;;) {
switch (encoding) {
case 'hex':
return this.hexWrite(string, offset, length);

case 'ascii':
ret = this.asciiWrite(string, offset, length);
break;
case 'utf8':
case 'utf-8':
return this.utf8Write(string, offset, length);

case 'binary':
ret = this.binaryWrite(string, offset, length);
break;
case 'ascii':
return this.asciiWrite(string, offset, length);

case 'base64':
// Warning: maxLength not taken into account in base64Write
ret = this.base64Write(string, offset, length);
break;
case 'binary':
return this.binaryWrite(string, offset, length);

case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
ret = this.ucs2Write(string, offset, length);
break;
case 'base64':
// Warning: maxLength not taken into account in base64Write
return this.base64Write(string, offset, length);

default:
throw new TypeError('Unknown encoding: ' + encoding);
}
case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
return this.ucs2Write(string, offset, length);

return ret;
default:
if (loweredCase)
throw new TypeError('Unknown encoding: ' + encoding);
encoding = ('' + encoding).toLowerCase();
loweredCase = true;
}
}
};


Expand Down