Skip to content

Commit

Permalink
buffer: add base64url encoding option
Browse files Browse the repository at this point in the history
Backport parts of dae283d

PR-URL: #36952
Backport-PR-URL: #39702
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Antoine du Hamel <[email protected]>
Reviewed-By: Rich Trott <[email protected]>
Reviewed-By: Anna Henningsen <[email protected]>
  • Loading branch information
panva authored and targos committed Aug 13, 2021
1 parent 73e6781 commit a343956
Show file tree
Hide file tree
Showing 22 changed files with 399 additions and 139 deletions.
16 changes: 13 additions & 3 deletions doc/api/buffer.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ const buf7 = Buffer.from('tést', 'latin1');
## Buffers and character encodings
<!-- YAML
changes:
- version: REPLACEME
pr-url: https://github.com/nodejs/node/pull/36952
description: Introduced `base64url` encoding.
- version: v6.4.0
pr-url: https://github.com/nodejs/node/pull/7111
description: Introduced `latin1` as an alias for `binary`.
Expand Down Expand Up @@ -106,6 +109,11 @@ string into a `Buffer` as decoding.
specified in [RFC 4648, Section 5][]. Whitespace characters such as spaces,
tabs, and new lines contained within the base64-encoded string are ignored.

* `'base64url'`: [base64url][] encoding as specified in
[RFC 4648, Section 5][]. When creating a `Buffer` from a string, this
encoding will also correctly accept regular base64-encoded strings. When
encoding a `Buffer` to a string, this encoding will omit padding.

* `'hex'`: Encode each byte as two hexadecimal characters. Data truncation
may occur when decoding strings that do exclusively contain valid hexadecimal
characters. See below for an example.
Expand Down Expand Up @@ -469,9 +477,10 @@ Returns the byte length of a string when encoded using `encoding`.
This is not the same as [`String.prototype.length`][], which does not account
for the encoding that is used to convert the string into bytes.

For `'base64'` and `'hex'`, this function assumes valid input. For strings that
contain non-base64/hex-encoded data (e.g. whitespace), the return value might be
greater than the length of a `Buffer` created from the string.
For `'base64'`, `'base64url'`, and `'hex'`, this function assumes valid input.
For strings that contain non-base64/hex-encoded data (e.g. whitespace), the
return value might be greater than the length of a `Buffer` created from the
string.

```js
const str = '\u00bd + \u00bc = \u00be';
Expand Down Expand Up @@ -3427,6 +3436,7 @@ introducing security vulnerabilities into an application.
[`buffer.kMaxLength`]: #buffer_buffer_kmaxlength
[`util.inspect()`]: util.md#util_util_inspect_object_options
[`v8::TypedArray::kMaxLength`]: https://v8.github.io/api/head/classv8_1_1TypedArray.html#a54a48f4373da0850663c4393d843b9b0
[base64url]: https://tools.ietf.org/html/rfc4648#section-5
[binary strings]: https://developer.mozilla.org/en-US/docs/Web/API/DOMString/Binary
[endianness]: https://en.wikipedia.org/wiki/Endianness
[iterator]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols
19 changes: 19 additions & 0 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,20 @@ const encodingOps = {
encodingsMap.base64,
dir)
},
base64url: {
encoding: 'base64url',
encodingVal: encodingsMap.base64url,
byteLength: (string) => base64ByteLength(string, string.length),
write: (buf, string, offset, len) =>
buf.base64urlWrite(string, offset, len),
slice: (buf, start, end) => buf.base64urlSlice(start, end),
indexOf: (buf, val, byteOffset, dir) =>
indexOfBuffer(buf,
fromStringFast(val, encodingOps.base64url),
byteOffset,
encodingsMap.base64url,
dir)
},
hex: {
encoding: 'hex',
encodingVal: encodingsMap.hex,
Expand Down Expand Up @@ -715,6 +729,11 @@ function getEncodingOps(encoding) {
if (encoding === 'hex' || StringPrototypeToLowerCase(encoding) === 'hex')
return encodingOps.hex;
break;
case 9:
if (encoding === 'base64url' ||
StringPrototypeToLowerCase(encoding) === 'base64url')
return encodingOps.base64url;
break;
}
}

Expand Down
4 changes: 4 additions & 0 deletions lib/internal/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ const { validateNumber } = require('internal/validators');
const {
asciiSlice,
base64Slice,
base64urlSlice,
latin1Slice,
hexSlice,
ucs2Slice,
utf8Slice,
asciiWrite,
base64Write,
base64urlWrite,
latin1Write,
hexWrite,
ucs2Write,
Expand Down Expand Up @@ -1026,12 +1028,14 @@ function addBufferPrototypeMethods(proto) {

proto.asciiSlice = asciiSlice;
proto.base64Slice = base64Slice;
proto.base64urlSlice = base64urlSlice;
proto.latin1Slice = latin1Slice;
proto.hexSlice = hexSlice;
proto.ucs2Slice = ucs2Slice;
proto.utf8Slice = utf8Slice;
proto.asciiWrite = asciiWrite;
proto.base64Write = base64Write;
proto.base64urlWrite = base64urlWrite;
proto.latin1Write = latin1Write;
proto.hexWrite = hexWrite;
proto.ucs2Write = ucs2Write;
Expand Down
5 changes: 5 additions & 0 deletions lib/internal/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,11 @@ function slowCases(enc) {
`${enc}`.toLowerCase() === 'utf-16le')
return 'utf16le';
break;
case 9:
if (enc === 'base64url' || enc === 'BASE64URL' ||
`${enc}`.toLowerCase() === 'base64url')
return 'base64url';
break;
default:
if (enc === '') return 'utf8';
}
Expand Down
4 changes: 4 additions & 0 deletions src/api/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,17 @@ enum encoding ParseEncoding(const char* encoding,
} else if (encoding[1] == 'a') {
if (strncmp(encoding + 2, "se64", 5) == 0)
return BASE64;
if (strncmp(encoding + 2, "se64url", 8) == 0)
return BASE64URL;
}
if (StringEqualNoCase(encoding, "binary"))
return LATIN1; // BINARY is a deprecated alias of LATIN1.
if (StringEqualNoCase(encoding, "buffer"))
return BUFFER;
if (StringEqualNoCase(encoding, "base64"))
return BASE64;
if (StringEqualNoCase(encoding, "base64url"))
return BASE64URL;
break;

case 'a':
Expand Down
20 changes: 11 additions & 9 deletions src/base64-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,13 @@ size_t base64_decode(char* const dst, const size_t dstlen,
inline size_t base64_encode(const char* src,
size_t slen,
char* dst,
size_t dlen) {
size_t dlen,
Base64Mode mode) {
// We know how much we'll write, just make sure that there's space.
CHECK(dlen >= base64_encoded_size(slen) &&
CHECK(dlen >= base64_encoded_size(slen, mode) &&
"not enough space provided for base64 encode");

dlen = base64_encoded_size(slen);
dlen = base64_encoded_size(slen, mode);

unsigned a;
unsigned b;
Expand All @@ -137,9 +138,7 @@ inline size_t base64_encode(const char* src,
unsigned k;
unsigned n;

static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
const char* table = base64_select_table(mode);

i = 0;
k = 0;
Expand All @@ -164,16 +163,19 @@ inline size_t base64_encode(const char* src,
a = src[i + 0] & 0xff;
dst[k + 0] = table[a >> 2];
dst[k + 1] = table[(a & 3) << 4];
dst[k + 2] = '=';
dst[k + 3] = '=';
if (mode == Base64Mode::NORMAL) {
dst[k + 2] = '=';
dst[k + 3] = '=';
}
break;
case 2:
a = src[i + 0] & 0xff;
b = src[i + 1] & 0xff;
dst[k + 0] = table[a >> 2];
dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
dst[k + 2] = table[(b & 0x0f) << 2];
dst[k + 3] = '=';
if (mode == Base64Mode::NORMAL)
dst[k + 3] = '=';
break;
}

Expand Down
34 changes: 31 additions & 3 deletions src/base64.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,40 @@

#include "util.h"

#include <cmath>
#include <cstddef>
#include <cstdint>

namespace node {
//// Base 64 ////
static inline constexpr size_t base64_encoded_size(size_t size) {
return ((size + 2) / 3 * 4);

enum class Base64Mode {
NORMAL,
URL
};

static constexpr char base64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";

static constexpr char base64_table_url[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789-_";

static inline const char* base64_select_table(Base64Mode mode) {
switch (mode) {
case Base64Mode::NORMAL: return base64_table;
case Base64Mode::URL: return base64_table_url;
default: UNREACHABLE();
}
}

static inline constexpr size_t base64_encoded_size(
size_t size,
Base64Mode mode = Base64Mode::NORMAL) {
return mode == Base64Mode::NORMAL
? ((size + 2) / 3 * 4)
: std::ceil(static_cast<double>(size * 4) / 3);
}

// Doesn't check for padding at the end. Can be 1-2 bytes over.
Expand All @@ -32,7 +59,8 @@ size_t base64_decode(char* const dst, const size_t dstlen,
inline size_t base64_encode(const char* src,
size_t slen,
char* dst,
size_t dlen);
size_t dlen,
Base64Mode mode = Base64Mode::NORMAL);
} // namespace node


Expand Down
13 changes: 12 additions & 1 deletion src/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,18 @@ inline void NODE_SET_PROTOTYPE_METHOD(v8::Local<v8::FunctionTemplate> recv,
#define NODE_SET_PROTOTYPE_METHOD node::NODE_SET_PROTOTYPE_METHOD

// BINARY is a deprecated alias of LATIN1.
enum encoding {ASCII, UTF8, BASE64, UCS2, BINARY, HEX, BUFFER, LATIN1 = BINARY};
// BASE64URL is not currently exposed to the JavaScript side.
enum encoding {
ASCII,
UTF8,
BASE64,
UCS2,
BINARY,
HEX,
BUFFER,
BASE64URL,
LATIN1 = BINARY
};

NODE_EXTERN enum encoding ParseEncoding(
v8::Isolate* isolate,
Expand Down
2 changes: 2 additions & 0 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1163,13 +1163,15 @@ void Initialize(Local<Object> target,

env->SetMethodNoSideEffect(target, "asciiSlice", StringSlice<ASCII>);
env->SetMethodNoSideEffect(target, "base64Slice", StringSlice<BASE64>);
env->SetMethodNoSideEffect(target, "base64urlSlice", StringSlice<BASE64URL>);
env->SetMethodNoSideEffect(target, "latin1Slice", StringSlice<LATIN1>);
env->SetMethodNoSideEffect(target, "hexSlice", StringSlice<HEX>);
env->SetMethodNoSideEffect(target, "ucs2Slice", StringSlice<UCS2>);
env->SetMethodNoSideEffect(target, "utf8Slice", StringSlice<UTF8>);

env->SetMethod(target, "asciiWrite", StringWrite<ASCII>);
env->SetMethod(target, "base64Write", StringWrite<BASE64>);
env->SetMethod(target, "base64urlWrite", StringWrite<BASE64URL>);
env->SetMethod(target, "latin1Write", StringWrite<LATIN1>);
env->SetMethod(target, "hexWrite", StringWrite<HEX>);
env->SetMethod(target, "ucs2Write", StringWrite<UCS2>);
Expand Down
20 changes: 20 additions & 0 deletions src/string_bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,8 @@ size_t StringBytes::Write(Isolate* isolate,
break;
}

case BASE64URL:
// Fall through
case BASE64:
if (str->IsExternalOneByte()) {
auto ext = str->GetExternalOneByteStringResource();
Expand Down Expand Up @@ -425,6 +427,8 @@ Maybe<size_t> StringBytes::StorageSize(Isolate* isolate,
data_size = str->Length() * sizeof(uint16_t);
break;

case BASE64URL:
// Fall through
case BASE64:
data_size = base64_decoded_size_fast(str->Length());
break;
Expand Down Expand Up @@ -466,6 +470,8 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate,
case UCS2:
return Just(str->Length() * sizeof(uint16_t));

case BASE64URL:
// Fall through
case BASE64: {
String::Value value(isolate, str);
return Just(base64_decoded_size(*value, value.length()));
Expand Down Expand Up @@ -691,6 +697,20 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
return ExternOneByteString::New(isolate, dst, dlen, error);
}

case BASE64URL: {
size_t dlen = base64_encoded_size(buflen, Base64Mode::URL);
char* dst = node::UncheckedMalloc(dlen);
if (dst == nullptr) {
*error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate);
return MaybeLocal<Value>();
}

size_t written = base64_encode(buf, buflen, dst, dlen, Base64Mode::URL);
CHECK_EQ(written, dlen);

return ExternOneByteString::New(isolate, dst, dlen, error);
}

case HEX: {
size_t dlen = buflen * 2;
char* dst = node::UncheckedMalloc(dlen);
Expand Down
8 changes: 6 additions & 2 deletions src/string_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,

size_t nread = *nread_ptr;

if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
if (Encoding() == UTF8 ||
Encoding() == UCS2 ||
Encoding() == BASE64 ||
Encoding() == BASE64URL) {
// See if we want bytes to finish a character from the previous
// chunk; if so, copy the new bytes to the missing bytes buffer
// and create a small string from it that is to be prepended to the
Expand Down Expand Up @@ -197,7 +200,7 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
state_[kBufferedBytes] = 2;
state_[kMissingBytes] = 2;
}
} else if (Encoding() == BASE64) {
} else if (Encoding() == BASE64 || Encoding() == BASE64URL) {
state_[kBufferedBytes] = nread % 3;
if (state_[kBufferedBytes] > 0)
state_[kMissingBytes] = 3 - BufferedBytes();
Expand Down Expand Up @@ -310,6 +313,7 @@ void InitializeStringDecoder(Local<Object> target,
ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
ADD_TO_ENCODINGS_ARRAY(BASE64URL, "base64url");
ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");
Expand Down
1 change: 1 addition & 0 deletions test/addons/parse-encoding/binding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace {
#define ENCODING_MAP(V) \
V(ASCII) \
V(BASE64) \
V(BASE64URL) \
V(BUFFER) \
V(HEX) \
V(LATIN1) \
Expand Down
1 change: 1 addition & 0 deletions test/addons/parse-encoding/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ assert.strictEqual(parseEncoding(''), 'UNKNOWN');

assert.strictEqual(parseEncoding('ascii'), 'ASCII');
assert.strictEqual(parseEncoding('base64'), 'BASE64');
assert.strictEqual(parseEncoding('base64url'), 'BASE64URL');
assert.strictEqual(parseEncoding('binary'), 'LATIN1');
assert.strictEqual(parseEncoding('buffer'), 'BUFFER');
assert.strictEqual(parseEncoding('hex'), 'HEX');
Expand Down
15 changes: 15 additions & 0 deletions test/cctest/test_base64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,20 @@ TEST(Base64Test, Encode) {
"IGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVtLg==");
}

TEST(Base64Test, EncodeURL) {
auto test = [](const char* string, const char* base64_string) {
const size_t len = strlen(base64_string);
char* const buffer = new char[len + 1];
buffer[len] = 0;
base64_encode(string, strlen(string), buffer, len, node::Base64Mode::URL);
EXPECT_STREQ(base64_string, buffer);
delete[] buffer;
};

test("\x68\xd9\x16\x25\x5c\x1e\x40\x92\x2d\xfb", "aNkWJVweQJIt-w");
test("\xac\xc7\x93\xaa\x83\x6f\xc3\xe3\x3f\x75", "rMeTqoNvw-M_dQ");
}

TEST(Base64Test, Decode) {
auto test = [](const char* base64_string, const char* string) {
const size_t len = strlen(string);
Expand Down Expand Up @@ -75,6 +89,7 @@ TEST(Base64Test, Decode) {
test("YWJj ZGVm", "abcdef");
test("Y W J j Z G V m", "abcdef");
test("Y W\n JjZ \nG Vm", "abcdef");
test("rMeTqoNvw-M_dQ", "\xac\xc7\x93\xaa\x83\x6f\xc3\xe3\x3f\x75");

const char* text =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do "
Expand Down
Loading

0 comments on commit a343956

Please sign in to comment.