Skip to content

Commit 13fafde

Browse files
dannysufacebook-github-bot
authored andcommitted
atob implementation (#1256)
Summary: Pull Request resolved: #1256 Implement [atob](https://html.spec.whatwg.org/multipage/webappapis.html#atob) utility function for decoding a base64 string. This implementation doesn't follow the HTML spec 100% in that for error cases, the code doesn't throw DOMException. Existing alternatives people use with Hermes simply throw Error, which is what this code throws as well. Reviewed By: avp Differential Revision: D52181353 fbshipit-source-id: c90ec95e1ed3b44a7668a6ae4071df536bb31a71
1 parent d2177c3 commit 13fafde

File tree

9 files changed

+298
-0
lines changed

9 files changed

+298
-0
lines changed

include/hermes/VM/JSLib/Base64Util.h

+17
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#ifndef HERMES_VM_JSLIB_BASE64UTIL_H
99
#define HERMES_VM_JSLIB_BASE64UTIL_H
1010

11+
#include "hermes/Support/OptValue.h"
1112
#include "hermes/VM/Runtime.h"
1213

1314
namespace hermes {
@@ -18,6 +19,22 @@ namespace vm {
1819
template <typename T>
1920
bool base64Encode(llvh::ArrayRef<T> str, StringBuilder &builder);
2021

22+
/// If \p str has a valid base64 encoded string length, then calculate the
23+
/// expected length after decoding using the forgiving base64 algorithm. Returns
24+
/// nullopt if \p str has an invalid length.
25+
template <typename T>
26+
OptValue<uint32_t> base64DecodeOutputLength(llvh::ArrayRef<T> str);
27+
28+
/// Implements the forgiving base64 decode algorithm:
29+
/// https://infra.spec.whatwg.org/#forgiving-base64-decode
30+
/// The key difference compared to other base64 decode algorithms is that the
31+
/// forgiving algorithm ignores whitespaces.
32+
/// \param str string to be decoded
33+
/// \param builder StringBuilder to store the output in
34+
/// \return true if successful, false otherwise
35+
template <typename T>
36+
bool base64Decode(llvh::ArrayRef<T> str, StringBuilder &builder);
37+
2138
} // namespace vm
2239
} // namespace hermes
2340

include/hermes/VM/NativeFunctions.def

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ NATIVE_FUNCTION(arrayPrototypeSome)
5858
NATIVE_FUNCTION(arrayPrototypeUnshift)
5959
NATIVE_FUNCTION(arrayPrototypeSplice)
6060
NATIVE_FUNCTION(asyncFunctionConstructor)
61+
NATIVE_FUNCTION(atob)
6162

6263
NATIVE_FUNCTION(bigintTruncate)
6364
NATIVE_FUNCTION(bigintConstructor)

include/hermes/VM/PredefinedStrings.def

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ STR(isNaN, "isNaN")
5252
STR(isFinite, "isFinite")
5353
STR(escape, "escape")
5454
STR(unescape, "unescape")
55+
STR(atob, "atob")
5556
STR(btoa, "btoa")
5657
STR(decodeURI, "decodeURI")
5758
STR(decodeURIComponent, "decodeURIComponent")

include/hermes/VM/StringBuilder.h

+11
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,17 @@ class StringBuilder {
5353
return StringBuilder(runtime, crtRes->getString());
5454
}
5555

56+
/// Number of characters accumulated in the StringBuilder so far.
57+
uint32_t currentLength() const {
58+
return index_;
59+
}
60+
61+
/// The length this StringBuilder was constructed with. The max length doesn't
62+
/// ever change.
63+
uint32_t maxLength() const {
64+
return strPrim_->getStringLength();
65+
}
66+
5667
/// Append an UTF16Ref \p str. Note that str should not point to a GC-managed
5768
/// memory, as this function in theory can allocate.
5869
void appendUTF16Ref(UTF16Ref str) {

lib/VM/JSLib/Base64.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,39 @@ CallResult<HermesValue> btoa(void *, Runtime &runtime, NativeArgs args) {
5050
return builder->getStringPrimitive().getHermesValue();
5151
}
5252

53+
/// Take a Base64-encoded ASCII string and decode it. Error is thrown if the
54+
/// input string isn't a valid base64 encoded string.
55+
CallResult<HermesValue> atob(void *, Runtime &runtime, NativeArgs args) {
56+
GCScope gcScope{runtime};
57+
auto res = toString_RJS(runtime, args.getArgHandle(0));
58+
if (LLVM_UNLIKELY(res == ExecutionStatus::EXCEPTION)) {
59+
return ExecutionStatus::EXCEPTION;
60+
}
61+
62+
auto string = runtime.makeHandle(std::move(*res));
63+
64+
OptValue<uint32_t> expectedLength = string->isASCII()
65+
? base64DecodeOutputLength(string->getStringRef<char>())
66+
: base64DecodeOutputLength(string->getStringRef<char16_t>());
67+
if (!expectedLength) {
68+
return runtime.raiseError("Not a valid base64 encoded string length");
69+
}
70+
CallResult<StringBuilder> builder =
71+
StringBuilder::createStringBuilder(runtime, SafeUInt32(*expectedLength));
72+
if (LLVM_UNLIKELY(builder == ExecutionStatus::EXCEPTION)) {
73+
return ExecutionStatus::EXCEPTION;
74+
}
75+
76+
bool success = string->isASCII()
77+
? base64Decode(string->getStringRef<char>(), *builder)
78+
: base64Decode(string->getStringRef<char16_t>(), *builder);
79+
if (!success) {
80+
return runtime.raiseError(
81+
"Found invalid character when decoding base64 string");
82+
}
83+
84+
return builder->getStringPrimitive().getHermesValue();
85+
}
86+
5387
} // namespace vm
5488
} // namespace hermes

lib/VM/JSLib/Base64Util.cpp

+105
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,30 @@ namespace hermes {
1313
namespace vm {
1414

1515
namespace {
16+
1617
constexpr const std::array<char, 64> Base64Chars = {
1718
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
1819
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
1920
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
2021
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
2122
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
23+
24+
// A lookup table that map (Base64-encoded) ASCII back to binary.
25+
constexpr const std::array<unsigned char, 128> decMap = {
26+
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
27+
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
28+
64, 64, 64, 64, 64, 62, 64, 64, 64, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
29+
61, 64, 64, 64, 64, 64, 64, 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
30+
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64,
31+
64, 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
32+
43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64};
33+
34+
template <typename T>
35+
inline bool isWhitespace(T c) {
36+
return (
37+
c == '\x09' || c == '\x0A' || c == '\x0C' || c == '\x0D' || c == '\x20');
38+
}
39+
2240
} // namespace
2341

2442
template <typename T>
@@ -90,5 +108,92 @@ template bool base64Encode(
90108
llvh::ArrayRef<char16_t> str,
91109
StringBuilder &builder);
92110

111+
template <typename T>
112+
OptValue<uint32_t> base64DecodeOutputLength(llvh::ArrayRef<T> str) {
113+
// Figure out the actual string length after ignoring all whitespaces.
114+
uint64_t strLength = 0;
115+
T lastChar = 0;
116+
T secondLastChar = 0;
117+
for (const auto c : str) {
118+
// Only increment length if character is not a whitespace
119+
if (!isWhitespace(c)) {
120+
strLength++;
121+
secondLastChar = lastChar;
122+
lastChar = c;
123+
}
124+
}
125+
126+
uint32_t numPadding = 0;
127+
if (strLength % 4 == 0) {
128+
// Check to see if the last character or the last 2 characters are the
129+
// padding character.
130+
if (strLength > 0 && lastChar == '=') {
131+
numPadding++;
132+
if (strLength > 1 && secondLastChar == '=') {
133+
numPadding++;
134+
}
135+
}
136+
} else {
137+
// The input string should always be divisible by 4.
138+
return llvh::None;
139+
}
140+
141+
// This shouldn't overflow because the value is guaranteed to be smaller.
142+
uint32_t expectedLength = (strLength / 4 * 3) - numPadding;
143+
if (strLength != 0 && expectedLength == 0) {
144+
return llvh::None;
145+
}
146+
return expectedLength;
147+
}
148+
149+
template OptValue<uint32_t> base64DecodeOutputLength(llvh::ArrayRef<char> str);
150+
template OptValue<uint32_t> base64DecodeOutputLength(
151+
llvh::ArrayRef<char16_t> str);
152+
153+
template <typename T>
154+
bool base64Decode(llvh::ArrayRef<T> str, StringBuilder &builder) {
155+
// Iterate over the trimmed \p str, decode every \c c into a sextet and store
156+
// into a buffer \c buf of capacity 32 bits. \c bufSize is maintained to
157+
// track how many bits are actually buffered.
158+
uint32_t buf = 0;
159+
uint32_t bufSize = 0;
160+
for (const auto c : str) {
161+
if (isWhitespace(c)) {
162+
continue;
163+
}
164+
165+
if (LLVM_UNLIKELY(c > 127) || LLVM_UNLIKELY(c < 0)) {
166+
return false;
167+
}
168+
169+
if (c == '=') {
170+
break;
171+
}
172+
173+
unsigned char sextet = decMap[c];
174+
if (LLVM_UNLIKELY(sextet >= 64)) {
175+
return false;
176+
}
177+
178+
// Making room for the new sextet.
179+
buf = (buf << 6) + sextet;
180+
bufSize += 6;
181+
182+
// Once buffer is filled over a byte, evacuate a byte to the output.
183+
if (bufSize >= 8) {
184+
char16_t decodedChar = (buf >> (bufSize - 8)) & 0xFF;
185+
builder.appendCharacter(decodedChar);
186+
bufSize -= 8;
187+
}
188+
}
189+
190+
return builder.currentLength() == builder.maxLength();
191+
}
192+
193+
template bool base64Decode(llvh::ArrayRef<char> str, StringBuilder &builder);
194+
template bool base64Decode(
195+
llvh::ArrayRef<char16_t> str,
196+
StringBuilder &builder);
197+
93198
} // namespace vm
94199
} // namespace hermes

lib/VM/JSLib/GlobalObject.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,9 @@ void initGlobalObject(Runtime &runtime, const JSLibFlags &jsLibFlags) {
740740
// Define the 'unescape' function.
741741
defineGlobalFunc(Predefined::getSymbolID(Predefined::unescape), unescape, 1);
742742

743+
// Define the 'atob' function.
744+
defineGlobalFunc(Predefined::getSymbolID(Predefined::atob), atob, 1);
745+
743746
// Define the 'btoa' function.
744747
defineGlobalFunc(Predefined::getSymbolID(Predefined::btoa), btoa, 1);
745748

test/hermes/atob.js

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/**
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
// RUN: LC_ALL=en_US.UTF-8 %hermes -O -target=HBC %s | %FileCheck --match-full-lines %s
9+
"use strict";
10+
11+
print('atob');
12+
// CHECK-LABEL: atob
13+
print(atob('YQ=='));
14+
// CHECK-NEXT: a
15+
print(atob('0w=='));
16+
// CHECK-NEXT: Ó
17+
print(atob('000='));
18+
// CHECK-NEXT: ÓM
19+
try {
20+
atob('\u03A9');
21+
} catch (e) {
22+
print(e.message);
23+
// CHECK-NEXT: Not a valid base64 encoded string length
24+
}
25+
print(atob(btoa("a")));
26+
// CHECK-NEXT: a

unittests/VMRuntime/Base64UtilTest.cpp

+100
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,36 @@ using Base64UtilTest = RuntimeTestFixture;
4747
EXPECT_ENCODED(createUTF16Ref(converted.data()), expected); \
4848
}
4949

50+
#define EXPECT_DECODED(original, expected) \
51+
{ \
52+
hermes::OptValue<uint32_t> expectedLength = \
53+
base64DecodeOutputLength(original); \
54+
EXPECT_TRUE(expectedLength.hasValue()); \
55+
CallResult<StringBuilder> builder = StringBuilder::createStringBuilder( \
56+
runtime, hermes::SafeUInt32(*expectedLength)); \
57+
EXPECT_NE(builder, ExecutionStatus::EXCEPTION); \
58+
\
59+
bool success = base64Decode(original, *builder); \
60+
EXPECT_TRUE(success); \
61+
EXPECT_EQ( \
62+
builder->getStringPrimitive()->getStringRef<char16_t>(), \
63+
createUTF16Ref(expected)); \
64+
}
65+
66+
#define EXPECT_DECODED_ASCII_AND_UTF16(original, expected) \
67+
{ \
68+
ASCIIRef asciiRef = createASCIIRef(original); \
69+
EXPECT_DECODED(asciiRef, expected); \
70+
\
71+
std::vector<char16_t> converted(asciiRef.size() + 1); \
72+
uint32_t i = 0; \
73+
for (i = 0; i < asciiRef.size(); i++) { \
74+
converted[i] = asciiRef[i]; \
75+
} \
76+
converted[i] = '\0'; \
77+
EXPECT_DECODED(createUTF16Ref(converted.data()), expected); \
78+
}
79+
5080
TEST_F(Base64UtilTest, EdgeCases) {
5181
EXPECT_ENCODED_ASCII_AND_UTF16("", "");
5282
}
@@ -90,4 +120,74 @@ TEST_F(Base64UtilTest, EncodeInvalid) {
90120
EXPECT_FALSE(base64Encode(createUTF16Ref(u"abc\U0001F600xyz"), *builder));
91121
}
92122

123+
TEST_F(Base64UtilTest, DecodeValid) {
124+
EXPECT_DECODED_ASCII_AND_UTF16("", u"");
125+
EXPECT_DECODED_ASCII_AND_UTF16("YQ==", u"a");
126+
EXPECT_DECODED_ASCII_AND_UTF16("YR==", u"a");
127+
EXPECT_DECODED_ASCII_AND_UTF16("YWI=", u"ab");
128+
EXPECT_DECODED_ASCII_AND_UTF16("YWJj", u"abc");
129+
EXPECT_DECODED_ASCII_AND_UTF16("YWJjZA==", u"abcd");
130+
EXPECT_DECODED_ASCII_AND_UTF16("YWJjZGU=", u"abcde");
131+
EXPECT_DECODED_ASCII_AND_UTF16("YWJjZGVm", u"abcdef");
132+
EXPECT_DECODED_ASCII_AND_UTF16("0w==", u"\xD3");
133+
EXPECT_DECODED_ASCII_AND_UTF16("000=", u"\xD3M");
134+
}
135+
136+
TEST_F(Base64UtilTest, DecodeWithWhitespace) {
137+
EXPECT_DECODED_ASCII_AND_UTF16(" ", u"");
138+
EXPECT_DECODED_ASCII_AND_UTF16("\x09\x0A\x0C\x0D\x20", u"");
139+
EXPECT_DECODED_ASCII_AND_UTF16("Y Q = =", u"a");
140+
EXPECT_DECODED_ASCII_AND_UTF16("\x09Y\x0AQ\x0C=\x0D=\x20", u"a");
141+
EXPECT_DECODED_ASCII_AND_UTF16(" YR==", u"a");
142+
EXPECT_DECODED_ASCII_AND_UTF16("YR== ", u"a");
143+
}
144+
145+
TEST_F(Base64UtilTest, DecodeInvalid) {
146+
// Just a long enough buffer. All calls in this function are expected to fail.
147+
hermes::SafeUInt32 outputLength{50};
148+
CallResult<StringBuilder> builder =
149+
StringBuilder::createStringBuilder(runtime, outputLength);
150+
151+
std::array<char, 5> hasNegative = {'A', 'b', 'c', -15, '\0'};
152+
EXPECT_FALSE(base64Decode(createASCIIRef(hasNegative.data()), *builder));
153+
154+
EXPECT_FALSE(base64Decode(createASCIIRef("==="), *builder));
155+
EXPECT_FALSE(base64Decode(createASCIIRef("0==="), *builder));
156+
EXPECT_FALSE(base64Decode(createASCIIRef("aa=0"), *builder));
157+
EXPECT_FALSE(
158+
base64Decode(createASCIIRef("$aaaaaaaaaaaaaaaaaaaaaaa"), *builder));
159+
EXPECT_FALSE(
160+
base64Decode(createASCIIRef("aaaaaa$aaaaaaaaaaaaaaaaa"), *builder));
161+
EXPECT_FALSE(base64Decode(
162+
createASCIIRef("bbbbbbbbbddddddddddddddddddddaaaaaaadddddddb="),
163+
*builder));
164+
165+
// Strings that are the wrong length to be a valid base64 encoded string
166+
EXPECT_FALSE(base64Decode(createASCIIRef("A"), *builder));
167+
EXPECT_FALSE(base64Decode(createASCIIRef("B="), *builder));
168+
EXPECT_FALSE(base64Decode(createASCIIRef("ba="), *builder));
169+
EXPECT_FALSE(base64Decode(createASCIIRef("aaaaa"), *builder));
170+
171+
// Non-Base64 ASCII , i.e., not [a-z]|[A-Z]|[0-p]|\+|\/
172+
EXPECT_FALSE(base64Decode(createASCIIRef("a*"), *builder));
173+
EXPECT_FALSE(base64Decode(createASCIIRef("YQ*"), *builder));
174+
175+
// Out of ASCII range.
176+
EXPECT_FALSE(base64Decode(createASCIIRef("a\xFF"), *builder));
177+
178+
// Padding where it's not expected
179+
EXPECT_FALSE(base64Decode(createASCIIRef("="), *builder));
180+
EXPECT_FALSE(base64Decode(createASCIIRef("=="), *builder));
181+
182+
// Padding in the middle
183+
EXPECT_FALSE(base64Decode(createASCIIRef("YQ==YQ=="), *builder));
184+
185+
// Extra padding
186+
EXPECT_FALSE(base64Decode(createASCIIRef("YQ==="), *builder));
187+
188+
EXPECT_FALSE(base64Decode(createUTF16Ref(u"a\uff20=="), *builder));
189+
EXPECT_FALSE(
190+
base64Decode(createUTF16Ref(u"\u0065\u0065\u0065\u03A9"), *builder));
191+
}
192+
93193
} // end anonymous namespace

0 commit comments

Comments
 (0)