Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: allow UTF-16 in generic StringBytes decode call #22622

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,6 @@ NODE_DEPRECATED("Use FatalException(isolate, ...)",
return FatalException(v8::Isolate::GetCurrent(), try_catch);
})

// Don't call with encoding=UCS2.
NODE_EXTERN v8::Local<v8::Value> Encode(v8::Isolate* isolate,
const char* buf,
size_t len,
Expand Down
59 changes: 0 additions & 59 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -464,65 +464,6 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
}


template <>
void StringSlice<UCS2>(const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = args.GetIsolate();
Environment* env = Environment::GetCurrent(isolate);

THROW_AND_RETURN_UNLESS_BUFFER(env, args.This());
SPREAD_BUFFER_ARG(args.This(), ts_obj);

if (ts_obj_length == 0)
return args.GetReturnValue().SetEmptyString();

SLICE_START_END(args[0], args[1], ts_obj_length)
length /= 2;

const char* data = ts_obj_data + start;
const uint16_t* buf;
bool release = false;

// Node's "ucs2" encoding expects LE character data inside a Buffer, so we
// need to reorder on BE platforms. See https://nodejs.org/api/buffer.html
// regarding Node's "ucs2" encoding specification.
const bool aligned = (reinterpret_cast<uintptr_t>(data) % sizeof(*buf) == 0);
if (IsLittleEndian() && !aligned) {
// Make a copy to avoid unaligned accesses in v8::String::NewFromTwoByte().
// This applies ONLY to little endian platforms, as misalignment will be
// handled by a byte-swapping operation in StringBytes::Encode on
// big endian platforms.
uint16_t* copy = new uint16_t[length];
for (size_t i = 0, k = 0; i < length; i += 1, k += 2) {
// Assumes that the input is little endian.
const uint8_t lo = static_cast<uint8_t>(data[k + 0]);
const uint8_t hi = static_cast<uint8_t>(data[k + 1]);
copy[i] = lo | hi << 8;
}
buf = copy;
release = true;
} else {
buf = reinterpret_cast<const uint16_t*>(data);
}

Local<Value> error;
MaybeLocal<Value> ret =
StringBytes::Encode(isolate,
buf,
length,
&error);

if (release)
delete[] buf;

if (ret.IsEmpty()) {
CHECK(!error.IsEmpty());
isolate->ThrowException(error);
return;
}
args.GetReturnValue().Set(ret.ToLocalChecked());
}


// bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd])
void Copy(const FunctionCallbackInfo<Value> &args) {
Environment* env = Environment::GetCurrent(args);
Expand Down
57 changes: 32 additions & 25 deletions src/string_bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,6 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
size_t buflen,
enum encoding encoding,
Local<Value>* error) {
CHECK_NE(encoding, UCS2);
CHECK_BUFLEN_IN_RANGE(buflen);

if (!buflen && encoding != BUFFER) {
Expand Down Expand Up @@ -697,6 +696,37 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
return ExternOneByteString::New(isolate, dst, dlen, error);
}

case UCS2: {
if (IsBigEndian()) {
uint16_t* dst = node::UncheckedMalloc<uint16_t>(buflen / 2);
if (dst == nullptr) {
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
return MaybeLocal<Value>();
}
for (size_t i = 0, k = 0; k < buflen / 2; i += 2, k += 1) {
// The input is in *little endian*, because that's what Node.js
// expects, so the high byte comes after the low byte.
const uint8_t hi = static_cast<uint8_t>(buf[i + 1]);
const uint8_t lo = static_cast<uint8_t>(buf[i + 0]);
dst[k] = static_cast<uint16_t>(hi) << 8 | lo;
}
return ExternTwoByteString::New(isolate, dst, buflen / 2, error);
}
if (reinterpret_cast<uintptr_t>(buf) % 2 != 0) {
// Unaligned data still means we can't directly pass it to V8.
char* dst = node::UncheckedMalloc(buflen);
if (dst == nullptr) {
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
return MaybeLocal<Value>();
}
memcpy(dst, buf, buflen);
return ExternTwoByteString::New(
isolate, reinterpret_cast<uint16_t*>(dst), buflen / 2, error);
}
return ExternTwoByteString::NewFromCopy(
isolate, reinterpret_cast<const uint16_t*>(buf), buflen / 2, error);
}

default:
CHECK(0 && "unknown encoding");
break;
Expand Down Expand Up @@ -736,30 +766,7 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
enum encoding encoding,
Local<Value>* error) {
const size_t len = strlen(buf);
MaybeLocal<Value> ret;
if (encoding == UCS2) {
// In Node, UCS2 means utf16le. The data must be in little-endian
// order and must be aligned on 2-bytes. This returns an empty
// value if it's not aligned and ensures the appropriate byte order
// on big endian architectures.
const bool be = IsBigEndian();
if (len % 2 != 0)
return ret;
std::vector<uint16_t> vec(len / 2);
for (size_t i = 0, k = 0; i < len; i += 2, k += 1) {
const uint8_t hi = static_cast<uint8_t>(buf[i + 0]);
const uint8_t lo = static_cast<uint8_t>(buf[i + 1]);
vec[k] = be ?
static_cast<uint16_t>(hi) << 8 | lo
: static_cast<uint16_t>(lo) << 8 | hi;
}
ret = vec.empty() ?
static_cast< Local<Value> >(String::Empty(isolate))
: StringBytes::Encode(isolate, &vec[0], vec.size(), error);
} else {
ret = StringBytes::Encode(isolate, buf, len, encoding, error);
}
return ret;
return Encode(isolate, buf, len, encoding, error);
}

} // namespace node
1 change: 0 additions & 1 deletion src/string_bytes.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ class StringBytes {
int* chars_written = nullptr);

// Take the bytes in the src, and turn it into a Buffer or String.
// Don't call with encoding=UCS2.
static v8::MaybeLocal<v8::Value> Encode(v8::Isolate* isolate,
const char* buf,
size_t buflen,
Expand Down
10 changes: 0 additions & 10 deletions src/string_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,6 @@ MaybeLocal<String> MakeString(Isolate* isolate,
data,
v8::NewStringType::kNormal,
length);
} else if (encoding == UCS2) {
#ifdef DEBUG
CHECK_EQ(reinterpret_cast<uintptr_t>(data) % 2, 0);
CHECK_EQ(length % 2, 0);
#endif
ret = StringBytes::Encode(
isolate,
reinterpret_cast<const uint16_t*>(data),
length / 2,
&error);
} else {
ret = StringBytes::Encode(
isolate,
Expand Down
8 changes: 8 additions & 0 deletions test/parallel/test-string-decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,14 @@ decoder = new StringDecoder('utf16le');
assert.strictEqual(decoder.write(Buffer.from('3DD84D', 'hex')), '\ud83d');
assert.strictEqual(decoder.end(), '');

// Regression test for https://github.com/nodejs/node/issues/22358
// (unaligned UTF-16 access).
decoder = new StringDecoder('utf16le');
assert.strictEqual(decoder.write(Buffer.alloc(1)), '');
assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10));
assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
assert.strictEqual(decoder.end(), '');

common.expectsError(
() => new StringDecoder(1),
{
Expand Down