Skip to content

Commit

Permalink
deps: update ada to 2.1.0
Browse files Browse the repository at this point in the history
PR-URL: nodejs#47598
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Filip Skokan <[email protected]>
Reviewed-By: Matthew Aitken <[email protected]>
Reviewed-By: Tiancheng "Timothy" Gu <[email protected]>
Reviewed-By: Tobias Nießen <[email protected]>
Reviewed-By: Rich Trott <[email protected]>
  • Loading branch information
nodejs-github-bot authored and anonrig committed Jun 26, 2023
1 parent a24884c commit fe86195
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 19 deletions.
93 changes: 79 additions & 14 deletions deps/ada/ada.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2023-03-30 17:00:48 -0400. Do not edit! */
/* auto-generated on 2023-04-17 12:20:41 -0400. Do not edit! */
/* begin file src/ada.cpp */
#include "ada.h"
/* begin file src/checkers.cpp */
Expand Down Expand Up @@ -2753,7 +2753,7 @@ bool ascii_has_upper_case(char* input, size_t length) {
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
uint64_t broadcast_80 = broadcast(0x80);
uint64_t broadcast_Ap = broadcast(128 - 'A');
uint64_t broadcast_Zp = broadcast(128 - 'Z');
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
size_t i = 0;

uint64_t runner{0};
Expand All @@ -2775,7 +2775,7 @@ void ascii_map(char* input, size_t length) {
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
uint64_t broadcast_80 = broadcast(0x80);
uint64_t broadcast_Ap = broadcast(128 - 'A');
uint64_t broadcast_Zp = broadcast(128 - 'Z');
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
size_t i = 0;

for (; i + 7 < length; i += 8) {
Expand Down Expand Up @@ -9845,7 +9845,7 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
uint64_t broadcast_80 = broadcast(0x80);
uint64_t broadcast_Ap = broadcast(128 - 'A');
uint64_t broadcast_Zp = broadcast(128 - 'Z');
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
uint64_t non_ascii = 0;
size_t i = 0;

Expand Down Expand Up @@ -9961,7 +9961,7 @@ ada_really_inline constexpr bool is_forbidden_domain_code_point(
}

ada_really_inline constexpr bool contains_forbidden_domain_code_point(
char* input, size_t length) noexcept {
const char* input, size_t length) noexcept {
size_t i = 0;
uint8_t accumulator{};
for (; i + 4 <= length; i += 4) {
Expand All @@ -9976,6 +9976,44 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point(
return accumulator;
}

constexpr static uint8_t is_forbidden_domain_code_point_table_or_upper[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};

static_assert(sizeof(is_forbidden_domain_code_point_table_or_upper) == 256);
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('A')] == 2);
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('Z')] == 2);

ada_really_inline constexpr bool contains_forbidden_domain_code_point_or_upper(
const char* input, size_t length) noexcept {
size_t i = 0;
uint8_t accumulator{};
for (; i + 4 <= length; i += 4) {
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i])];
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 1])];
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 2])];
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 3])];
}
for (; i < length; i++) {
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i])];
}
return accumulator;
}

static_assert(unicode::is_forbidden_domain_code_point('%'));
static_assert(unicode::is_forbidden_domain_code_point('\x7f'));
static_assert(unicode::is_forbidden_domain_code_point('\0'));
Expand Down Expand Up @@ -13473,23 +13511,50 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
// to ASCII with domain and false. The most common case is an ASCII input, in
// which case we do not need to call the expensive 'to_ascii' if a few
// conditions are met: no '%' and no 'xn-' subsequence.
std::string _buffer = std::string(input);
// This next function checks that the result is ascii, but we are going to
// to check anyhow with is_forbidden.
// bool is_ascii =
unicode::to_lower_ascii(_buffer.data(), _buffer.size());
bool is_forbidden = unicode::contains_forbidden_domain_code_point(
_buffer.data(), _buffer.size());
if (is_forbidden == 0 && _buffer.find("xn-") == std::string_view::npos) {

// Often, the input does not contain any forbidden code points, and no upper
// case ASCII letter, then we can just copy it to the buffer. We want to
// optimize for such a common case.
uint8_t is_forbidden_or_upper =
unicode::contains_forbidden_domain_code_point_or_upper(input.data(),
input.size());
// Minor optimization opportunity:
// contains_forbidden_domain_code_point_or_upper could be extend to check for
// the presence of characters that cannot appear in the ipv4 address and we
// could also check whether x and n and - are present, and so we could skip
// some of the checks below. However, the gains are likely to be small, and
// the code would be more complex.
if (is_forbidden_or_upper == 0 &&
input.find("xn-") == std::string_view::npos) {
// fast path
update_base_hostname(_buffer);
update_base_hostname(input);
if (checkers::is_ipv4(get_hostname())) {
ada_log("parse_host fast path ipv4");
return parse_ipv4(get_hostname());
}
ada_log("parse_host fast path ", get_hostname());
return true;
} else if (is_forbidden_or_upper == 2) {
// We have encountered at least one upper case ASCII letter, let us
// try to convert it to lower case. If there is no 'xn-' in the result,
// we can then use a secondary fast path.
std::string _buffer = std::string(input);
unicode::to_lower_ascii(_buffer.data(), _buffer.size());
if (input.find("xn-") == std::string_view::npos) {
// secondary fast path when input is not all lower case
update_base_hostname(input);
if (checkers::is_ipv4(get_hostname())) {
ada_log("parse_host fast path ipv4");
return parse_ipv4(get_hostname());
}
ada_log("parse_host fast path ", get_hostname());
return true;
}
}
// We have encountered at least one forbidden code point or the input contains
// 'xn-' (case insensitive), so we need to call 'to_ascii' to perform the full
// conversion.

ada_log("parse_host calling to_ascii");
std::optional<std::string> host = std::string(get_hostname());
is_valid = ada::unicode::to_ascii(host, input, input.find('%'));
Expand Down
19 changes: 14 additions & 5 deletions deps/ada/ada.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2023-03-30 17:00:48 -0400. Do not edit! */
/* auto-generated on 2023-04-17 12:20:41 -0400. Do not edit! */
/* begin file include/ada.h */
/**
* @file ada.h
Expand Down Expand Up @@ -1418,11 +1418,20 @@ ada_really_inline constexpr bool is_forbidden_host_code_point(
const char c) noexcept;

/**
* Checks if the input is a forbidden domain code point.
* Checks if the input contains a forbidden domain code point.
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
*/
ada_really_inline constexpr bool contains_forbidden_domain_code_point(
char* input, size_t length) noexcept;
const char* input, size_t length) noexcept;

/**
* Checks if the input contains a forbidden domain code point in which case
* the first bit is set to 1. If the input contains an upper case ASCII letter,
* then the second bit is set to 1.
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
*/
ada_really_inline constexpr bool contains_forbidden_domain_code_point_or_upper(
const char* input, size_t length) noexcept;

/**
* Checks if the input is a forbidden doamin code point.
Expand Down Expand Up @@ -6503,13 +6512,13 @@ inline std::ostream &operator<<(std::ostream &out,
#ifndef ADA_ADA_VERSION_H
#define ADA_ADA_VERSION_H

#define ADA_VERSION "2.0.0"
#define ADA_VERSION "2.1.0"

namespace ada {

enum {
ADA_VERSION_MAJOR = 2,
ADA_VERSION_MINOR = 0,
ADA_VERSION_MINOR = 1,
ADA_VERSION_REVISION = 0,
};

Expand Down

0 comments on commit fe86195

Please sign in to comment.