Skip to content

Commit

Permalink
add buffer.transcode to nodejs_compat
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Aug 5, 2024
1 parent fc77758 commit fd50f17
Show file tree
Hide file tree
Showing 9 changed files with 466 additions and 26 deletions.
3 changes: 3 additions & 0 deletions src/node/buffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
SlowBuffer,
isAscii,
isUtf8,
transcode,
} from 'node-internal:internal_buffer';

// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
Expand All @@ -30,6 +31,7 @@ export {
SlowBuffer,
isAscii,
isUtf8,
transcode,
};

export default {
Expand All @@ -46,4 +48,5 @@ export default {
SlowBuffer,
isAscii,
isUtf8,
transcode,
};
1 change: 1 addition & 0 deletions src/node/internal/buffer.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ export function decode(buffer: Uint8Array, state: Uint8Array): string;
export function flush(state: Uint8Array): string;
export function isAscii(value: ArrayBufferView): boolean;
export function isUtf8(value: ArrayBufferView): boolean;
export function transcode(source: ArrayBufferView, fromEncoding: string, toEncoding: string): ArrayBuffer;
2 changes: 1 addition & 1 deletion src/node/internal/crypto_dh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ let DiffieHellman = function (this: DiffieHellman, sizeOrKey: number|ArrayLike,
if (typeof sizeOrKey === 'number')
validateInt32(sizeOrKey, 'sizeOrKey');

if (keyEncoding && !Buffer.isEncoding(keyEncoding) && keyEncoding !== 'buffer') {
if (keyEncoding && keyEncoding !== 'buffer' && !Buffer.isEncoding(keyEncoding)) {
genEncoding = generator as any;
generator = keyEncoding;
keyEncoding = "utf-8"; // default encoding
Expand Down
18 changes: 17 additions & 1 deletion src/node/internal/internal_buffer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ export function compare(a: Buffer|Uint8Array, b: Buffer|Uint8Array) {

Buffer.compare = compare;

export function isEncoding(encoding: unknown) {
export function isEncoding(encoding: unknown): encoding is string {
return typeof encoding === "string" &&
encoding.length !== 0 &&
normalizeEncoding(encoding) !== undefined;
Expand Down Expand Up @@ -2294,6 +2294,22 @@ export function isUtf8(value: ArrayBufferView) {
return bufferUtil.isUtf8(value);
}

export function transcode(source: ArrayBufferView, fromEncoding: string, toEncoding: string) {
if (!isArrayBufferView(source)) {
throw new ERR_INVALID_ARG_TYPE('source', 'ArrayBufferView', typeof source);
}
const normalizedFromEncoding = normalizeEncoding(fromEncoding);
if (!Buffer.isEncoding(normalizedFromEncoding)) {
throw new ERR_UNKNOWN_ENCODING(fromEncoding);
}
const normalizedToEncoding = normalizeEncoding(toEncoding);
if (!Buffer.isEncoding(normalizedToEncoding)) {
throw new ERR_UNKNOWN_ENCODING(toEncoding);
}
// TODO(soon): Optimization opportunity: Pass int encoding values instead of strings.
return Buffer.from(bufferUtil.transcode(source, normalizedFromEncoding, normalizedToEncoding));
}

export default {
Buffer,
constants,
Expand Down
55 changes: 31 additions & 24 deletions src/workerd/api/node/buffer.c++
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
#include "buffer-string-search.h"
#include <workerd/jsg/buffersource.h>
#include <kj/encoding.h>
#include <algorithm>
#include <kj/array.h>
#include "simdutf.h"
#include "i18n.h"

#include <algorithm>

// These are defined by <sys/byteorder.h> or <netinet/in.h> on some systems.
// To avoid warnings, undefine them before redefining them.
Expand Down Expand Up @@ -85,34 +88,24 @@ void SwapBytes(kj::ArrayPtr<kj::byte> bytes) {
}
}

enum class Encoding {
ASCII,
LATIN1,
UTF8,
UTF16LE,
BASE64,
BASE64URL,
HEX,
};

Encoding getEncoding(kj::StringPtr encoding) {
if (encoding == "utf8"_kj) {
inline Encoding getEncoding(kj::StringPtr input) {
if (input == "utf8"_kj) {
return Encoding::UTF8;
} else if (encoding == "ascii") {
} else if (input == "ascii"_kj) {
return Encoding::ASCII;
} else if (encoding == "latin1") {
} else if (input == "latin1"_kj) {
return Encoding::LATIN1;
} else if (encoding == "utf16le") {
} else if (input == "utf16le"_kj) {
return Encoding::UTF16LE;
} else if (encoding == "base64") {
} else if (input == "base64"_kj) {
return Encoding::BASE64;
} else if (encoding == "base64url") {
} else if (input == "base64url"_kj) {
return Encoding::BASE64URL;
} else if (encoding == "hex") {
} else if (input == "hex"_kj) {
return Encoding::HEX;
}

KJ_UNREACHABLE;
JSG_FAIL_REQUIRE(Error, kj::str("Invalid encoding: ", input));
}

kj::Maybe<uint> tryFromHexDigit(char c) {
Expand All @@ -137,7 +130,7 @@ kj::Array<byte> decodeHexTruncated(kj::ArrayPtr<kj::byte> text, bool strict = fa
}
text = text.slice(0, text.size() - 1);
}
kj::Vector vec = kj::Vector<kj::byte>(text.size() / 2);
auto vec = kj::Vector<kj::byte>(text.size() / 2);

for (size_t i = 0; i < text.size(); i += 2) {
byte b = 0;
Expand Down Expand Up @@ -216,8 +209,9 @@ uint32_t writeInto(
dest.first(amountToCopy).copyFrom(bytes.first(amountToCopy));
return amountToCopy;
}
default:
KJ_UNREACHABLE;
}
KJ_UNREACHABLE;
}

kj::Array<kj::byte> decodeStringImpl(
Expand Down Expand Up @@ -272,8 +266,9 @@ kj::Array<kj::byte> decodeStringImpl(
string.writeInto(js, buf, options);
return decodeHexTruncated(buf, strict);
}
default:
KJ_UNREACHABLE;
}
KJ_UNREACHABLE;
}
} // namespace

Expand Down Expand Up @@ -561,8 +556,9 @@ jsg::JsString toStringImpl(
case Encoding::HEX: {
return js.str(kj::encodeHex(slice));
}
default:
KJ_UNREACHABLE;
}
KJ_UNREACHABLE;
}

} // namespace
Expand Down Expand Up @@ -876,5 +872,16 @@ bool BufferUtil::isUtf8(kj::Array<kj::byte> buffer) {
return simdutf::validate_utf8(buffer.asChars().begin(), buffer.size());
}

kj::Array<kj::byte> BufferUtil::transcode(kj::Array<kj::byte> source, kj::String rawFromEncoding, kj::String rawToEncoding) {
auto fromEncoding = getEncoding(rawFromEncoding);
auto toEncoding = getEncoding(rawToEncoding);

JSG_REQUIRE(i18n::canBeTranscoded(fromEncoding) &&
i18n::canBeTranscoded(toEncoding), Error,
"Unable to transcode buffer due to unsupported encoding");

return i18n::transcode(source, fromEncoding, toEncoding);
}

} // namespace workerd::api::node {

4 changes: 4 additions & 0 deletions src/workerd/api/node/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ class BufferUtil final: public jsg::Object {
jsg::JsString flush(jsg::Lock& js, kj::Array<kj::byte> state);
bool isAscii(kj::Array<kj::byte> bytes);
bool isUtf8(kj::Array<kj::byte> bytes);
kj::Array<kj::byte> transcode(kj::Array<kj::byte> source,
kj::String rawFromEncoding,
kj::String rawToEncoding);

JSG_RESOURCE_TYPE(BufferUtil) {
JSG_METHOD(byteLength);
Expand All @@ -94,6 +97,7 @@ class BufferUtil final: public jsg::Object {
JSG_METHOD(write);
JSG_METHOD(isAscii);
JSG_METHOD(isUtf8);
JSG_METHOD(transcode);

// For StringDecoder
JSG_METHOD(decode);
Expand Down
Loading

0 comments on commit fd50f17

Please sign in to comment.