From d2177c31b271e92d1725742605b53d7f6e8acf3e Mon Sep 17 00:00:00 2001 From: Danny Su Date: Tue, 16 Jan 2024 18:59:56 -0800 Subject: [PATCH] btoa implementation (#1255) Summary: Pull Request resolved: https://github.com/facebook/hermes/pull/1255 Implement [btoa](https://html.spec.whatwg.org/multipage/webappapis.html#atob) utility function for encoding a string to base64. This implementation doesn't follow the HTML spec 100% in that for error cases, the code doesn't throw DOMException. Existing alternatives people use with Hermes simply throw Error, which is what this code throws as well. Reviewed By: avp Differential Revision: D51876325 fbshipit-source-id: 085aa069a761d093fd9e504c0478ee18a36e8d34 --- include/hermes/VM/JSLib/Base64Util.h | 24 +++++++ include/hermes/VM/NativeFunctions.def | 1 + include/hermes/VM/PredefinedStrings.def | 1 + lib/VM/CMakeLists.txt | 2 + lib/VM/JSLib/Base64.cpp | 54 ++++++++++++++ lib/VM/JSLib/Base64Util.cpp | 94 +++++++++++++++++++++++++ lib/VM/JSLib/GlobalObject.cpp | 3 + test/hermes/btoa.js | 20 ++++++ unittests/VMRuntime/Base64UtilTest.cpp | 93 ++++++++++++++++++++++++ unittests/VMRuntime/CMakeLists.txt | 1 + 10 files changed, 293 insertions(+) create mode 100644 include/hermes/VM/JSLib/Base64Util.h create mode 100644 lib/VM/JSLib/Base64.cpp create mode 100644 lib/VM/JSLib/Base64Util.cpp create mode 100644 test/hermes/btoa.js create mode 100644 unittests/VMRuntime/Base64UtilTest.cpp diff --git a/include/hermes/VM/JSLib/Base64Util.h b/include/hermes/VM/JSLib/Base64Util.h new file mode 100644 index 00000000000..1810b3db5f7 --- /dev/null +++ b/include/hermes/VM/JSLib/Base64Util.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#ifndef HERMES_VM_JSLIB_BASE64UTIL_H +#define HERMES_VM_JSLIB_BASE64UTIL_H + +#include "hermes/VM/Runtime.h" + +namespace hermes { +namespace vm { + +/// Encode \p str to base64 characters and store the output in \p builder. +/// \return true if successful, false otherwise +template +bool base64Encode(llvh::ArrayRef str, StringBuilder &builder); + +} // namespace vm +} // namespace hermes + +#endif // HERMES_VM_JSLIB_BASE64UTIL_H diff --git a/include/hermes/VM/NativeFunctions.def b/include/hermes/VM/NativeFunctions.def index 4fa6fb06c24..3db241004c7 100644 --- a/include/hermes/VM/NativeFunctions.def +++ b/include/hermes/VM/NativeFunctions.def @@ -67,6 +67,7 @@ NATIVE_FUNCTION(bigintPrototypeValueOf) NATIVE_FUNCTION(booleanConstructor) NATIVE_FUNCTION(booleanPrototypeToString) NATIVE_FUNCTION(booleanPrototypeValueOf) +NATIVE_FUNCTION(btoa) NATIVE_FUNCTION(callSitePrototypeGetFunctionName) NATIVE_FUNCTION(callSitePrototypeGetFileName) NATIVE_FUNCTION(callSitePrototypeGetLineNumber) diff --git a/include/hermes/VM/PredefinedStrings.def b/include/hermes/VM/PredefinedStrings.def index eaea96f392b..6032589a3cb 100644 --- a/include/hermes/VM/PredefinedStrings.def +++ b/include/hermes/VM/PredefinedStrings.def @@ -52,6 +52,7 @@ STR(isNaN, "isNaN") STR(isFinite, "isFinite") STR(escape, "escape") STR(unescape, "unescape") +STR(btoa, "btoa") STR(decodeURI, "decodeURI") STR(decodeURIComponent, "decodeURIComponent") STR(encodeURI, "encodeURI") diff --git a/lib/VM/CMakeLists.txt b/lib/VM/CMakeLists.txt index a8d73be02c2..81403b44e05 100644 --- a/lib/VM/CMakeLists.txt +++ b/lib/VM/CMakeLists.txt @@ -79,6 +79,8 @@ set(source_files JSLib/ArrayBuffer.cpp JSLib/ArrayIterator.cpp JSLib/AsyncFunction.cpp + JSLib/Base64.cpp + JSLib/Base64Util.cpp JSLib/BigInt.cpp JSLib/CallSite.cpp JSLib/DataView.cpp diff --git a/lib/VM/JSLib/Base64.cpp b/lib/VM/JSLib/Base64.cpp new file mode 100644 index 00000000000..1c593c343a6 --- /dev/null +++ b/lib/VM/JSLib/Base64.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "JSLibInternal.h" + +#include "hermes/ADT/SafeInt.h" +#include "hermes/VM/JSLib/Base64Util.h" +#include "hermes/VM/StringBuilder.h" + +namespace hermes { +namespace vm { + +/// Create a Base64-encoded ASCII string from an input string expected to have +/// each character in the range of U+0000 to U+00FF. Error is thrown if any +/// character is outside of the expected range. +CallResult btoa(void *, Runtime &runtime, NativeArgs args) { + GCScope gcScope{runtime}; + auto res = toString_RJS(runtime, args.getArgHandle(0)); + if (LLVM_UNLIKELY(res == ExecutionStatus::EXCEPTION)) { + return ExecutionStatus::EXCEPTION; + } + + auto string = runtime.makeHandle(std::move(*res)); + + // Figure out the expected encoded length + uint64_t expectedLength = ((string->getStringLength() + 2) / 3) * 4; + bool overflow = expectedLength > std::numeric_limits::max(); + if (overflow) { + return runtime.raiseError("String length to convert to base64 is too long"); + } + SafeUInt32 outputLength{static_cast(expectedLength)}; + CallResult builder = + StringBuilder::createStringBuilder(runtime, outputLength, true); + if (LLVM_UNLIKELY(builder == ExecutionStatus::EXCEPTION)) { + return ExecutionStatus::EXCEPTION; + } + + bool success = string->isASCII() + ? base64Encode(string->getStringRef(), *builder) + : base64Encode(string->getStringRef(), *builder); + if (!success) { + return runtime.raiseError( + "Found invalid character when converting to base64"); + } + + return builder->getStringPrimitive().getHermesValue(); +} + +} // namespace vm +} // namespace hermes diff --git a/lib/VM/JSLib/Base64Util.cpp b/lib/VM/JSLib/Base64Util.cpp new file mode 100644 index 00000000000..d5d4f775db0 --- /dev/null +++ b/lib/VM/JSLib/Base64Util.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "hermes/VM/JSLib/Base64Util.h" + +#include "hermes/VM/StringBuilder.h" + +namespace hermes { +namespace vm { + +namespace { +constexpr const std::array Base64Chars = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; +} // namespace + +template +bool base64Encode(llvh::ArrayRef str, StringBuilder &builder) { + uint64_t strLength = str.size(); + + // An implementation of the algorithm at + // https://www.rfc-editor.org/rfc/rfc4648#section-4 + // Adapted from folly's base64Encode implementation. + uint32_t i = 0; + while ((strLength - i) >= 3) { + if (str[i] > 0xFF || str[i + 1] > 0xFF || str[i + 2] > 0xFF) { + return false; + } + + uint8_t aaab = str[i]; + uint8_t bbcc = str[i + 1]; + uint8_t cddd = str[i + 2]; + + uint8_t aaa = aaab >> 2; + uint8_t bbb = ((aaab << 4) | (bbcc >> 4)) & 0x3f; + uint8_t ccc = ((bbcc << 2) | (cddd >> 6)) & 0x3f; + uint8_t ddd = cddd & 0x3f; + + builder.appendCharacter(Base64Chars[aaa]); + builder.appendCharacter(Base64Chars[bbb]); + builder.appendCharacter(Base64Chars[ccc]); + builder.appendCharacter(Base64Chars[ddd]); + + i += 3; + } + + if (i == strLength) { + return true; + } + + if (str[i] > 0xFF) { + return false; + } + uint8_t aaab = str[i]; + uint8_t aaa = aaab >> 2; + builder.appendCharacter(Base64Chars[aaa]); + + // Duplicating some tail handling to try to do less jumps. + if (strLength - i == 1) { + uint8_t b00 = aaab << 4 & 0x3f; + builder.appendCharacter(Base64Chars[b00]); + builder.appendCharacter('='); + builder.appendCharacter('='); + return true; + } + + // When there are 2 characters left. + assert(strLength - i == 2); + if (str[i + 1] > 0xFF) { + return false; + } + uint8_t bbcc = str[i + 1]; + uint8_t bbb = ((aaab << 4) | (bbcc >> 4)) & 0x3f; + uint8_t cc0 = (bbcc << 2) & 0x3f; + builder.appendCharacter(Base64Chars[bbb]); + builder.appendCharacter(Base64Chars[cc0]); + builder.appendCharacter('='); + return true; +} + +template bool base64Encode(llvh::ArrayRef str, StringBuilder &builder); +template bool base64Encode( + llvh::ArrayRef str, + StringBuilder &builder); + +} // namespace vm +} // namespace hermes diff --git a/lib/VM/JSLib/GlobalObject.cpp b/lib/VM/JSLib/GlobalObject.cpp index 13893a0188d..e2d966020e8 100644 --- a/lib/VM/JSLib/GlobalObject.cpp +++ b/lib/VM/JSLib/GlobalObject.cpp @@ -740,6 +740,9 @@ void initGlobalObject(Runtime &runtime, const JSLibFlags &jsLibFlags) { // Define the 'unescape' function. defineGlobalFunc(Predefined::getSymbolID(Predefined::unescape), unescape, 1); + // Define the 'btoa' function. + defineGlobalFunc(Predefined::getSymbolID(Predefined::btoa), btoa, 1); + // Define the 'decodeURI' function. defineGlobalFunc( Predefined::getSymbolID(Predefined::decodeURI), decodeURI, 1); diff --git a/test/hermes/btoa.js b/test/hermes/btoa.js new file mode 100644 index 00000000000..24de02ff362 --- /dev/null +++ b/test/hermes/btoa.js @@ -0,0 +1,20 @@ +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// RUN: LC_ALL=en_US.UTF-8 %hermes -O -target=HBC %s | %FileCheck --match-full-lines %s +"use strict"; + +print('btoa'); +// CHECK-LABEL: btoa +print(btoa('123')); +// CHECK-NEXT: MTIz +try { + btoa('\u03A9'); +} catch (e) { + print(e.message); + // CHECK-NEXT: Found invalid character when converting to base64 +} diff --git a/unittests/VMRuntime/Base64UtilTest.cpp b/unittests/VMRuntime/Base64UtilTest.cpp new file mode 100644 index 00000000000..cb68715eec9 --- /dev/null +++ b/unittests/VMRuntime/Base64UtilTest.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "TestHelpers.h" + +#include "hermes/ADT/SafeInt.h" +#include "hermes/VM/JSLib/Base64Util.h" +#include "hermes/VM/StringBuilder.h" + +using namespace hermes::vm; + +namespace { + +using Base64UtilTest = RuntimeTestFixture; + +#define EXPECT_ENCODED(original, expected) \ + { \ + uint64_t expectedLength = ((original.size() + 2) / 3) * 4; \ + EXPECT_LE(expectedLength, std::numeric_limits::max()); \ + hermes::SafeUInt32 outputLength{static_cast(expectedLength)}; \ + CallResult builder = \ + StringBuilder::createStringBuilder(runtime, outputLength, true); \ + EXPECT_NE(builder, ExecutionStatus::EXCEPTION); \ + \ + bool success = base64Encode(original, *builder); \ + EXPECT_TRUE(success); \ + EXPECT_EQ( \ + builder->getStringPrimitive()->getStringRef(), \ + createASCIIRef(expected)); \ + } + +#define EXPECT_ENCODED_ASCII_AND_UTF16(original, expected) \ + { \ + ASCIIRef asciiRef = createASCIIRef(original); \ + EXPECT_ENCODED(asciiRef, expected); \ + \ + std::vector converted(asciiRef.size() + 1); \ + uint32_t i = 0; \ + for (i = 0; i < asciiRef.size(); i++) { \ + converted[i] = asciiRef[i]; \ + } \ + converted[i] = '\0'; \ + EXPECT_ENCODED(createUTF16Ref(converted.data()), expected); \ + } + +TEST_F(Base64UtilTest, EdgeCases) { + EXPECT_ENCODED_ASCII_AND_UTF16("", ""); +} + +TEST_F(Base64UtilTest, EncodePaddingRequired) { + EXPECT_ENCODED_ASCII_AND_UTF16("a", "YQ=="); + EXPECT_ENCODED_ASCII_AND_UTF16("ab", "YWI="); + EXPECT_ENCODED_ASCII_AND_UTF16("abcd", "YWJjZA=="); + EXPECT_ENCODED_ASCII_AND_UTF16("abcde", "YWJjZGU="); + EXPECT_ENCODED_ASCII_AND_UTF16( + "less is more than more", "bGVzcyBpcyBtb3JlIHRoYW4gbW9yZQ=="); + EXPECT_ENCODED_ASCII_AND_UTF16("<>?su", "PD4/c3U="); + + EXPECT_ENCODED(UTF16Ref(std::array{1}), "AQ=="); + EXPECT_ENCODED(ASCIIRef(std::array{1}), "AQ=="); + EXPECT_ENCODED(UTF16Ref(std::array{1, 0}), "AQA="); + EXPECT_ENCODED(ASCIIRef(std::array{1, 0}), "AQA="); +} + +TEST_F(Base64UtilTest, EncodePaddingNotNeeded) { + EXPECT_ENCODED_ASCII_AND_UTF16("abc", "YWJj"); + EXPECT_ENCODED_ASCII_AND_UTF16("abcdef", "YWJjZGVm"); + + EXPECT_ENCODED(UTF16Ref(std::array{0, 0, 0}), "AAAA"); + EXPECT_ENCODED(ASCIIRef(std::array{0, 0, 0}), "AAAA"); + EXPECT_ENCODED(UTF16Ref(std::array{1, 0, 0}), "AQAA"); + EXPECT_ENCODED(ASCIIRef(std::array{1, 0, 0}), "AQAA"); +} + +TEST_F(Base64UtilTest, EncodeInvalid) { + // Just a long enough buffer. All calls in this function are expected to fail. + hermes::SafeUInt32 outputLength{20}; + CallResult builder = + StringBuilder::createStringBuilder(runtime, outputLength, true); + EXPECT_NE(builder, ExecutionStatus::EXCEPTION); + EXPECT_FALSE(base64Encode(createUTF16Ref(u"\U0001F600"), *builder)); + EXPECT_FALSE(base64Encode(createUTF16Ref(u"a\U0001F600"), *builder)); + EXPECT_FALSE(base64Encode(createUTF16Ref(u"ab\U0001F600"), *builder)); + EXPECT_FALSE(base64Encode(createUTF16Ref(u"abc\U0001F600"), *builder)); + EXPECT_FALSE(base64Encode(createUTF16Ref(u"\U0001F600xyz"), *builder)); + EXPECT_FALSE(base64Encode(createUTF16Ref(u"abc\U0001F600xyz"), *builder)); +} + +} // end anonymous namespace diff --git a/unittests/VMRuntime/CMakeLists.txt b/unittests/VMRuntime/CMakeLists.txt index 45e53ea8a5e..82387a3ffee 100644 --- a/unittests/VMRuntime/CMakeLists.txt +++ b/unittests/VMRuntime/CMakeLists.txt @@ -14,6 +14,7 @@ set(RTSources AlignedStorageTest.cpp ArrayTest.cpp ArrayStorageTest.cpp + Base64UtilTest.cpp BigIntPrimitiveTest.cpp BytecodeProviderTest.cpp CallResultTest.cpp