From 5c7d8f5c92c0dd40528f316e112c3c87a50d0dfe Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Tue, 15 Oct 2024 16:42:17 -0600 Subject: [PATCH 1/8] utils: improve performance of JSON escaping The following change in the utils write utility, improve the handling of characters that needs escaping by optimizing the character check with a lookup table. Signed-off-by: Eduardo Silva --- src/flb_utils.c | 73 ++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/src/flb_utils.c b/src/flb_utils.c index 010254f23c5..ba1661c1751 100644 --- a/src/flb_utils.c +++ b/src/flb_utils.c @@ -756,6 +756,30 @@ static inline void encoded_to_buf(char *out, const char *in, int len) } } +/* Structure to hold escape sequence */ +struct escape_seq { + const char *seq; +}; + +/* Lookup table for escape sequences */ +static const struct escape_seq json_escape_table[128] = { + ['\"'] = {"\\\""}, + ['\\'] = {"\\\\"}, + ['\n'] = {"\\n"}, + ['\r'] = {"\\r"}, + ['\t'] = {"\\t"}, + ['\b'] = {"\\b"}, + ['\f'] = {"\\f"}, + [0x00] = {"\\u0000"}, [0x01] = {"\\u0001"}, [0x02] = {"\\u0002"}, [0x03] = {"\\u0003"}, + [0x04] = {"\\u0004"}, [0x05] = {"\\u0005"}, [0x06] = {"\\u0006"}, [0x07] = {"\\u0007"}, + [0x0B] = {"\\u000b"}, [0x0E] = {"\\u000e"}, [0x0F] = {"\\u000f"}, + [0x10] = {"\\u0010"}, [0x11] = {"\\u0011"}, [0x12] = {"\\u0012"}, [0x13] = {"\\u0013"}, + [0x14] = {"\\u0014"}, [0x15] = {"\\u0015"}, [0x16] = {"\\u0016"}, [0x17] = {"\\u0017"}, + [0x18] = {"\\u0018"}, [0x19] = {"\\u0019"}, [0x1A] = {"\\u001a"}, [0x1B] = {"\\u001b"}, + [0x1C] = {"\\u001c"}, [0x1D] = {"\\u001d"}, [0x1E] = {"\\u001e"}, [0x1F] = {"\\u001f"}, + [0x7F] = {"\\u007f"} +}; + /* * Write string pointed by 'str' to the destination buffer 'buf'. It's make sure * to escape special characters and convert utf-8 byte characters to string @@ -795,43 +819,25 @@ int flb_utils_write_str(char *buf, int *off, size_t size, } c = (uint32_t) str[i]; - if (c == '\"') { - *p++ = '\\'; - *p++ = '\"'; - } - else if (c == '\\') { - *p++ = '\\'; - *p++ = '\\'; - } - else if (c == '\n') { - *p++ = '\\'; - *p++ = 'n'; - } - else if (c == '\r') { - *p++ = '\\'; - *p++ = 'r'; - } - else if (c == '\t') { - *p++ = '\\'; - *p++ = 't'; - } - else if (c == '\b') { - *p++ = '\\'; - *p++ = 'b'; - } - else if (c == '\f') { - *p++ = '\\'; - *p++ = 'f'; - } - else if (c < 32 || c == 0x7f) { - if ((available - written) < 6) { - return FLB_FALSE; + + /* Use the lookup table for known escape sequences */ + if (c < 128 && json_escape_table[c].seq) { + /* + * check the length for the string, for simple escaping is always + * 2 bytes and 6 bytes for unicode + */ + if (json_escape_table[c].seq[1] == 'u') { + len = 6; + } + else { + len = 2; } - len = snprintf(tmp, sizeof(tmp) - 1, "\\u%.4hhx", (unsigned char) c); + if ((available - written) < len) { return FLB_FALSE; } - encoded_to_buf(p, tmp, len); + + memcpy(p, json_escape_table[c].seq, len); p += len; } else if (c >= 0x80 && c <= 0xFFFF) { @@ -978,7 +984,6 @@ int flb_utils_write_str(char *buf, int *off, size_t size, return FLB_TRUE; } - int flb_utils_write_str_buf(const char *str, size_t str_len, char **out, size_t *out_size) { int ret; From 4c5ae0b40ab8832a6166a8c8b0936d9879c12613 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Thu, 17 Oct 2024 19:37:29 -0600 Subject: [PATCH 2/8] simd: new SIMD support for SSE and NEON Signed-off-by: Eduardo Silva --- include/fluent-bit/flb_simd.h | 250 ++++++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 include/fluent-bit/flb_simd.h diff --git a/include/fluent-bit/flb_simd.h b/include/fluent-bit/flb_simd.h new file mode 100644 index 00000000000..0f66d5e1bed --- /dev/null +++ b/include/fluent-bit/flb_simd.h @@ -0,0 +1,250 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2015-2024 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLB_SIMD_H +#define FLB_SIMD_H + +#include +#include + +/* Only enable SIMD support if it has not been explicity disabled */ +#ifndef FLB_SIMD_DISABLED + +#if (defined(__x86_64__) || defined(_M_AMD64)) +/* + * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume + * that compilers targeting this architecture understand SSE2 intrinsics. + * + * We use emmintrin.h rather than the comprehensive header immintrin.h in + * order to exclude extensions beyond SSE2. This is because MSVC, at least, + * will allow the use of intrinsics that haven't been enabled at compile + * time. + */ +#include +#define FLB_SIMD_SSE2 +typedef __m128i flb_vector8; +typedef __m128i flb_vector32; + +#elif defined(__aarch64__) && defined(__ARM_NEON) +/* + * We use the Neon instructions if the compiler provides access to them (as + * indicated by __ARM_NEON) and we are on aarch64. While Neon support is + * technically optional for aarch64, it appears that all available 64-bit + * hardware does have it. Neon exists in some 32-bit hardware too, but we + * could not realistically use it there without a run-time check, which seems + * not worth the trouble for now. + */ +#include +#define FLB_SIMD_NEON +typedef uint8x16_t flb_vector8; +typedef uint32x4_t flb_vector32; + +#else +/* + * If no SIMD instructions are available, we can in some cases emulate vector + * operations using bitwise operations on unsigned integers. Note that many + * of the functions in this file presently do not have non-SIMD + * implementations. In particular, none of the functions involving Vector32 + * are implemented without SIMD since it's likely not worthwhile to represent + * two 32-bit integers using a uint64. + */ +#define FLB_SIMD_NONE +typedef uint64 flb_vector8; +#endif + +#else +#define FLB_SIMD_NONE +#endif /* FLB_SIMD_DISABLED */ + +/* element-wise comparisons to a scalar */ +static inline bool flb_vector8_has(const flb_vector8 v, const uint8_t c); +static inline bool flb_vector8_has_zero(const flb_vector8 v); +static inline bool flb_vector8_has_le(const flb_vector8 v, const uint8_t c); +static inline bool flb_vector8_is_highbit_set(const flb_vector8 v); + +/* + * Load a chunk of memory into the given vector. + */ +static inline void flb_vector8_load(flb_vector8 *v, const uint8_t *s) +{ +#if defined(FLB_SIMD_SSE2) + *v = _mm_loadu_si128((const __m128i *) s); +#elif defined(FLB_SIMD_NEON) + *v = vld1q_u8(s); +#else + memcpy(v, s, sizeof(flb_vector8)); +#endif +} + +/* + * Convenience function equivalent to vector8_has(v, 0) + */ +static inline bool flb_vector8_has_zero(const flb_vector8 v) +{ +#if defined(FLB_SIMD_NONE) + /* + * We cannot call vector8_has() here, because that would lead to a + * circular definition. + */ + return flb_vector8_has_le(v, 0); +#else + return flb_vector8_has(v, 0); +#endif +} + + +/* + * Return the result of subtracting the respective elements of the input + * vectors using saturation (i.e., if the operation would yield a value less + * than zero, zero is returned instead). For more information on saturation + * arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic + */ +#ifndef FLB_SIMD_NONE +static inline flb_vector8 flb_vector8_ssub(const flb_vector8 v1, const flb_vector8 v2) +{ +#ifdef FLB_SIMD_SSE2 + return _mm_subs_epu8(v1, v2); +#elif defined(FLB_SIMD_NEON) + return vqsubq_u8(v1, v2); +#endif +} +#endif /* ! USE_NO_SIMD */ + +/* + * Return a vector with all bits set in each lane where the corresponding + * lanes in the inputs are equal. + */ +#ifndef FLB_SIMD_NONE +static inline flb_vector8 flb_vector8_eq(const flb_vector8 v1, const flb_vector8 v2) +{ +#ifdef FLB_SIMD_SSE2 + return _mm_cmpeq_epi8(v1, v2); +#elif defined(FLB_SIMD_NEON) + return vceqq_u8(v1, v2); +#endif +} +#endif /* ! USE_NO_SIMD */ + +#ifndef FLB_SIMD_NONE +static inline flb_vector32 flb_vector32_eq(const flb_vector32 v1, const flb_vector32 v2) +{ +#ifdef FLB_SIMD_SSE2 + return _mm_cmpeq_epi32(v1, v2); +#elif defined(FLB_SIMD_NEON) + return vceqq_u32(v1, v2); +#endif +} +#endif + +/* + * Create a vector with all elements set to the same value. + */ +static inline flb_vector8 flb_vector8_broadcast(const uint8_t c) +{ +#if defined(FLB_SIMD_SSE2) + return _mm_set1_epi8(c); +#elif defined(FLB_SIMD_NEON) + return vdupq_n_u8(c); +#else + return ~UINT64CONST(0) / 0xFF * c; +#endif +} + +/* + * Return true if the high bit of any element is set + */ +static inline bool flb_vector8_is_highbit_set(const flb_vector8 v) +{ +#ifdef FLB_SIMD_SSE2 + return _mm_movemask_epi8(v) != 0; +#elif defined(FLB_SIMD_NEON) + return vmaxvq_u8(v) > 0x7F; +#else + return v & flb_vector8_broadcast(0x80); +#endif +} + +/* + * Return true if any elements in the vector are equal to the given scalar. + */ +static inline bool flb_vector8_has(const flb_vector8 v, const uint8_t c) +{ + bool result; + +#if defined(FLB_SIMD_NONE) + /* any bytes in v equal to c will evaluate to zero via XOR */ + result = flb_vector8_has_zero(v ^ flb_vector8_broadcast(c)); +#else + result = flb_vector8_is_highbit_set(flb_vector8_eq(v, flb_vector8_broadcast(c))); +#endif + + return result; +} + +static inline bool flb_vector8_has_le(const flb_vector8 v, const uint8_t c) +{ + bool result = false; + +#if defined(FLB_SIMD_NONE) + + /* + * To find bytes <= c, we can use bitwise operations to find bytes < c+1, + * but it only works if c+1 <= 128 and if the highest bit in v is not set. + * Adapted from + * https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord + */ + if ((int64) v >= 0 && c < 0x80) + result = (v - flb_vector8_broadcast(c + 1)) & ~v & flb_vector8_broadcast(0x80); + else { + /* one byte at a time */ + int i; + for (i = 0; i < sizeof(flb_vector8); i++) + { + if (((const uint8 *) &v)[i] <= c) { + result = true; + break; + } + } + } +#else + /* + * Use saturating subtraction to find bytes <= c, which will present as + * NUL bytes. This approach is a workaround for the lack of unsigned + * comparison instructions on some architectures. + */ + result = flb_vector8_has_zero(flb_vector8_ssub(v, flb_vector8_broadcast(c))); +#endif + + return result; +} + +static inline char *flb_simd_info() +{ + #if defined(FLB_SIMD_SSE2) + return "SSE2"; + #elif defined(FLB_SIMD_NEON) + return "NEON"; + #elif defined(FLB_SIMD_NONE) + return "none"; + #elif defined(FLB_SIMD_DISABLED) + return "none (disabled)"; + #endif +} + +#endif /* FLB_SIMD_H */ From 47b22a2ec3cca917257a53933362b2131ca827c1 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Thu, 17 Oct 2024 19:38:49 -0600 Subject: [PATCH 3/8] utils: do JSON escaping with SIMD support (faster!) Signed-off-by: Eduardo Silva --- src/flb_utils.c | 373 +++++++++++++++++++++++++++++------------------- 1 file changed, 225 insertions(+), 148 deletions(-) diff --git a/src/flb_utils.c b/src/flb_utils.c index ba1661c1751..80cb3c6252d 100644 --- a/src/flb_utils.c +++ b/src/flb_utils.c @@ -35,6 +35,7 @@ #include #include #include +#include #ifdef FLB_HAVE_AWS_ERROR_REPORTER #include @@ -577,7 +578,7 @@ int64_t flb_utils_size_to_bytes(const char *size) if (tmp[0] == 'K') { /* set upper bound (2**64/KB)/2 to avoid overflows */ - if (val >= 9223372036854775 || val <= -9223372036854774) + if (val >= 9223372036854775.0 || val <= -9223372036854774.0) { return -1; } @@ -785,201 +786,277 @@ static const struct escape_seq json_escape_table[128] = { * to escape special characters and convert utf-8 byte characters to string * representation. */ -int flb_utils_write_str(char *buf, int *off, size_t size, - const char *str, size_t str_len) + + +int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_t str_len) { - int i; - int b; - int ret; - int written = 0; - int required; - int len; - int hex_bytes; - int is_valid; - int utf_sequence_number; - int utf_sequence_length; - uint32_t codepoint; - uint32_t state = 0; + int i, b, ret, len, hex_bytes, utf_sequence_length, utf_sequence_number; + int is_valid, copypos = 0, vlen; + uint32_t codepoint, state = 0; char tmp[16]; size_t available; uint32_t c; char *p; uint8_t *s; + off_t offset = 0; + + available = size - *off; - available = (size - *off); - required = str_len; - if (available <= required) { + /* Ensure we have some minimum space in the buffer */ + if (available < str_len) { return FLB_FALSE; } p = buf + *off; - for (i = 0; i < str_len; i++) { - if ((available - written) < 2) { - return FLB_FALSE; - } + vlen = str_len & ~(sizeof(flb_vector8) - 1); // Align length for SIMD - c = (uint32_t) str[i]; + for (i = 0;;) { + /* SIMD optimization: Process chunk of input string */ + for (; i < vlen; i += sizeof(flb_vector8)) { + flb_vector8 chunk; + flb_vector8_load(&chunk, (const uint8_t *)&str[i]); - /* Use the lookup table for known escape sequences */ - if (c < 128 && json_escape_table[c].seq) { /* - * check the length for the string, for simple escaping is always - * 2 bytes and 6 bytes for unicode + * Look for the special characters we are interested in, + * if they are found we break the loop and escape them + * in a char-by-char basis. Otherwise the do a bulk copy */ - if (json_escape_table[c].seq[1] == 'u') { - len = 6; - } - else { - len = 2; + if (flb_vector8_has_le(chunk, (unsigned char)0x1F) || + flb_vector8_has(chunk, (unsigned char)'"') || + flb_vector8_has(chunk, (unsigned char)'\\') || + flb_vector8_has(chunk, (unsigned char)'\n') || + flb_vector8_has(chunk, (unsigned char)'\r') || + flb_vector8_has(chunk, (unsigned char)'\t') || + flb_vector8_has(chunk, (unsigned char)'\b') || + flb_vector8_has(chunk, (unsigned char)'\f')) { + break; } + } - if ((available - written) < len) { + /* Copy the chunk processed so far */ + if (copypos < i) { + /* check if we have enough space */ + if (available < i - copypos) { return FLB_FALSE; } - memcpy(p, json_escape_table[c].seq, len); - p += len; + /* copy and adjust pointers */ + memcpy(p, &str[copypos], i - copypos); + p += i - copypos; + offset += i - copypos; + available -= (i - copypos); + copypos = i; } - else if (c >= 0x80 && c <= 0xFFFF) { - hex_bytes = flb_utf8_len(str + i); - if (available - written < 6) { - return FLB_FALSE; - } - if (i + hex_bytes > str_len) { - break; /* skip truncated UTF-8 */ + /* Process remaining characters one by one */ + for (b = 0; b < sizeof(flb_vector8); b++) { + if (i == str_len) { + /* all characters has been processed */ + goto done; } - state = FLB_UTF8_ACCEPT; - codepoint = 0; + c = (uint32_t) str[i]; - for (b = 0; b < hex_bytes; b++) { - s = (unsigned char *) str + i + b; - ret = flb_utf8_decode(&state, &codepoint, *s); - if (ret == 0) { - break; + /* Use lookup table for escaping known sequences */ + if (c < 128 && json_escape_table[c].seq) { + /* + * All characters in the table have a lenght of 2 or 6 bytes, + * just check if the second byte starts with 'u' so we know + * it's unicode and needs 6 bytes of space. + */ + if (json_escape_table[c].seq[1] == 'u') { + len = 6; + } + else { + len = 2; } - } - if (state != FLB_UTF8_ACCEPT) { - /* Invalid UTF-8 hex, just skip utf-8 bytes */ - flb_warn("[pack] invalid UTF-8 bytes found, skipping bytes"); - } - else { - len = snprintf(tmp, sizeof(tmp) - 1, "\\u%.4x", codepoint); - if ((available - written) < len) { + /* check if we have anough space */ + if (available < len) { return FLB_FALSE; } - encoded_to_buf(p, tmp, len); + + /* copy the escape sequence */ + memcpy(p, json_escape_table[c].seq, len); p += len; + offset += len; + available -= len; } - i += (hex_bytes - 1); - } - else if (c > 0xFFFF) { - utf_sequence_length = flb_utf8_len(str + i); + /* Handle UTF-8 sequences from 0x80 to 0xFFFF */ + else if (c >= 0x80 && c <= 0xFFFF) { + hex_bytes = flb_utf8_len(&str[i]); + + /* Handle invalid or truncated sequence */ + if (hex_bytes == 0 || i + hex_bytes > str_len) { + /* check for the minimum space required */ + if (available < 3) { + return FLB_FALSE; + } + + /* insert replacement character (U+FFFD) */ + p[0] = 0xEF; + p[1] = 0xBF; + p[2] = 0xBD; + p += 3; + offset += 3; + available -= 3; + + /* skip the original byte */ + i++; + continue; + } - if (i + utf_sequence_length > str_len) { - break; /* skip truncated UTF-8 */ - } + // Decode UTF-8 sequence + state = FLB_UTF8_ACCEPT; + codepoint = 0; - is_valid = FLB_TRUE; - for (utf_sequence_number = 0; utf_sequence_number < utf_sequence_length; - utf_sequence_number++) { - /* Leading characters must start with bits 11 */ - if (utf_sequence_number == 0 && ((str[i] & 0xC0) != 0xC0)) { - /* Invalid unicode character. replace */ - flb_debug("[pack] unexpected UTF-8 leading byte, " - "substituting character with replacement character"); - tmp[utf_sequence_number] = str[i]; - ++i; /* Consume invalid leading byte */ - utf_sequence_length = utf_sequence_number + 1; - is_valid = FLB_FALSE; - break; + for (b = 0; b < hex_bytes; b++) { + s = (unsigned char *) &str[i + b]; + ret = flb_utf8_decode(&state, &codepoint, *s); + if (ret == 0) { + break; + } } - /* Trailing characters must start with bits 10 */ - else if (utf_sequence_number > 0 && ((str[i] & 0xC0) != 0x80)) { - /* Invalid unicode character. replace */ - flb_debug("[pack] unexpected UTF-8 continuation byte, " - "substituting character with replacement character"); - /* This byte, i, is the start of the next unicode character */ - utf_sequence_length = utf_sequence_number; - is_valid = FLB_FALSE; - break; + + if (state != FLB_UTF8_ACCEPT) { + flb_warn("[pack] Invalid UTF-8 bytes found, skipping."); + } + else { + len = snprintf(tmp, sizeof(tmp), "\\u%.4x", codepoint); + if (available < len) { + return FLB_FALSE; // Not enough space + } + memcpy(p, tmp, len); + p += len; + offset += len; + available -= len; } - tmp[utf_sequence_number] = str[i]; - ++i; + i += hex_bytes; } - --i; + /* Handle sequences beyond 0xFFFF */ + else if (c > 0xFFFF) { + utf_sequence_length = flb_utf8_len(str + i); - if (is_valid) { - if (available - written < utf_sequence_length) { - return FLB_FALSE; + /* skip truncated UTF-8 ? */ + if (i + utf_sequence_length > str_len) { + i++; + break; } - encoded_to_buf(p, tmp, utf_sequence_length); - p += utf_sequence_length; - } - else { - if (available - written < utf_sequence_length * 3) { - return FLB_FALSE; + is_valid = FLB_TRUE; + for (utf_sequence_number = 0; utf_sequence_number < utf_sequence_length; utf_sequence_number++) { + /* Leading characters must start with bits 11 */ + if (utf_sequence_number == 0 && ((str[i] & 0xC0) != 0xC0)) { + /* Invalid unicode character. replace */ + flb_debug("[pack] unexpected UTF-8 leading byte, " + "substituting character with replacement character"); + tmp[utf_sequence_number] = str[i]; + i++; /* Consume invalid leading byte */ + utf_sequence_length = utf_sequence_number + 1; + is_valid = FLB_FALSE; + break; + } + /* Trailing characters must start with bits 10 */ + else if (utf_sequence_number > 0 && ((str[i] & 0xC0) != 0x80)) { + /* Invalid unicode character. replace */ + flb_debug("[pack] unexpected UTF-8 continuation byte, " + "substituting character with replacement character"); + /* This byte, i, is the start of the next unicode character */ + utf_sequence_length = utf_sequence_number; + is_valid = FLB_FALSE; + break; + } + + tmp[utf_sequence_number] = str[i]; + ++i; } - /* - * Utf-8 sequence is invalid. Map fragments to private use area - * codepoints in range: - * 0x00 to - * 0xFF - */ - for (b = 0; b < utf_sequence_length; ++b) { + --i; + + if (is_valid) { + if (available < utf_sequence_length) { + return FLB_FALSE; // Not enough space + } + + encoded_to_buf(p, tmp, utf_sequence_length); + p += utf_sequence_length; + offset += utf_sequence_length; + available -= utf_sequence_length; + } + else { + if (available < utf_sequence_length * 3) { + return FLB_FALSE; + } + /* - * Utf-8 private block invalid hex mapping. Format unicode charpoint - * in the following format: - * - * +--------+--------+--------+ - * |1110PPPP|10PPPPHH|10HHHHHH| - * +--------+--------+--------+ - * - * Where: - * P is FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR bits (1 byte) - * H is Utf-8 fragment hex bits (1 byte) - * 1 is bit 1 - * 0 is bit 0 - */ - - /* unicode codepoint start */ - *p = 0xE0; - - /* print unicode private block header first 4 bits */ - *p |= FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR >> 4; - ++p; - - /* unicode codepoint middle */ - *p = 0x80; - - /* print end of unicode private block header last 4 bits */ - *p |= ((FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR << 2) & 0x3f); - - /* print hex fragment first 2 bits */ - *p |= (tmp[b] >> 6) & 0x03; - ++p; - - /* unicode codepoint middle */ - *p = 0x80; - - /* print hex fragment last 6 bits */ - *p |= tmp[b] & 0x3f; - ++p; + * Utf-8 sequence is invalid. Map fragments to private use area + * codepoints in range: + * 0x00 to + * 0xFF + */ + for (b = 0; b < utf_sequence_length; ++b) { + /* + * Utf-8 private block invalid hex mapping. Format unicode charpoint + * in the following format: + * + * +--------+--------+--------+ + * |1110PPPP|10PPPPHH|10HHHHHH| + * +--------+--------+--------+ + * + * Where: + * P is FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR bits (1 byte) + * H is Utf-8 fragment hex bits (1 byte) + * 1 is bit 1 + * 0 is bit 0 + */ + + /* unicode codepoint start */ + *p = 0xE0; + + /* print unicode private block header first 4 bits */ + *p |= FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR >> 4; + ++p; + + /* unicode codepoint middle */ + *p = 0x80; + + /* print end of unicode private block header last 4 bits */ + *p |= ((FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR << 2) & 0x3f); + + /* print hex fragment first 2 bits */ + *p |= (tmp[b] >> 6) & 0x03; + ++p; + + /* unicode codepoint middle */ + *p = 0x80; + + /* print hex fragment last 6 bits */ + *p |= tmp[b] & 0x3f; + ++p; + + offset += 3; + available -= 3; + } } } + else { + if (available < 1) { + return FLB_FALSE; // No space for a single byte + } + *p++ = c; + offset++; + available--; + } + + i++; } - else { - *p++ = c; - } - written = (p - (buf + *off)); + + copypos = i; } - *off += written; +done: + *off += offset; // Update the buffer offset return FLB_TRUE; } From 34f365ccd528ab8eb49eb699e8d15e5a5554dad5 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sat, 19 Oct 2024 16:38:05 -0600 Subject: [PATCH 4/8] utils: code cleanup and fix comments Signed-off-by: Eduardo Silva --- src/flb_utils.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/flb_utils.c b/src/flb_utils.c index 80cb3c6252d..211632260e3 100644 --- a/src/flb_utils.c +++ b/src/flb_utils.c @@ -787,7 +787,6 @@ static const struct escape_seq json_escape_table[128] = { * representation. */ - int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_t str_len) { int i, b, ret, len, hex_bytes, utf_sequence_length, utf_sequence_number; @@ -808,8 +807,9 @@ int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_ } p = buf + *off; - vlen = str_len & ~(sizeof(flb_vector8) - 1); // Align length for SIMD + /* align length to the nearest multiple of the vector size for safe SIMD processing */ + vlen = str_len & ~(sizeof(flb_vector8) - 1); for (i = 0;;) { /* SIMD optimization: Process chunk of input string */ for (; i < vlen; i += sizeof(flb_vector8)) { @@ -821,14 +821,9 @@ int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_ * if they are found we break the loop and escape them * in a char-by-char basis. Otherwise the do a bulk copy */ - if (flb_vector8_has_le(chunk, (unsigned char)0x1F) || - flb_vector8_has(chunk, (unsigned char)'"') || - flb_vector8_has(chunk, (unsigned char)'\\') || - flb_vector8_has(chunk, (unsigned char)'\n') || - flb_vector8_has(chunk, (unsigned char)'\r') || - flb_vector8_has(chunk, (unsigned char)'\t') || - flb_vector8_has(chunk, (unsigned char)'\b') || - flb_vector8_has(chunk, (unsigned char)'\f')) { + if (flb_vector8_has_le(chunk, (unsigned char) 0x1F) || + flb_vector8_has(chunk, (unsigned char) '"') || + flb_vector8_has(chunk, (unsigned char) '\\')) { break; } } @@ -850,7 +845,7 @@ int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_ /* Process remaining characters one by one */ for (b = 0; b < sizeof(flb_vector8); b++) { - if (i == str_len) { + if (i >= str_len) { /* all characters has been processed */ goto done; } @@ -906,7 +901,7 @@ int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_ continue; } - // Decode UTF-8 sequence + /* decode UTF-8 sequence */ state = FLB_UTF8_ACCEPT; codepoint = 0; @@ -922,6 +917,7 @@ int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_ flb_warn("[pack] Invalid UTF-8 bytes found, skipping."); } else { + len = snprintf(tmp, sizeof(tmp), "\\u%.4x", codepoint); if (available < len) { return FLB_FALSE; // Not enough space @@ -1042,7 +1038,8 @@ int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_ } else { if (available < 1) { - return FLB_FALSE; // No space for a single byte + /* no space for a single byte */ + return FLB_FALSE; } *p++ = c; offset++; @@ -1056,7 +1053,8 @@ int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_ } done: - *off += offset; // Update the buffer offset + /* update the buffer offset */ + *off += offset; return FLB_TRUE; } From 173ddae45ffffa9251f7e803d415724d5f29d9f8 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sat, 19 Oct 2024 16:38:41 -0600 Subject: [PATCH 5/8] simd: use new defs and fix fallback mechanism Signed-off-by: Eduardo Silva --- include/fluent-bit/flb_simd.h | 70 ++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/include/fluent-bit/flb_simd.h b/include/fluent-bit/flb_simd.h index 0f66d5e1bed..9ae53b115fe 100644 --- a/include/fluent-bit/flb_simd.h +++ b/include/fluent-bit/flb_simd.h @@ -23,8 +23,11 @@ #include #include +#include +#define UINT64CONST(x) (x##ULL) + /* Only enable SIMD support if it has not been explicity disabled */ -#ifndef FLB_SIMD_DISABLED +#ifdef FLB_HAVE_SIMD #if (defined(__x86_64__) || defined(_M_AMD64)) /* @@ -38,6 +41,7 @@ */ #include #define FLB_SIMD_SSE2 + typedef __m128i flb_vector8; typedef __m128i flb_vector32; @@ -65,11 +69,14 @@ typedef uint32x4_t flb_vector32; * two 32-bit integers using a uint64. */ #define FLB_SIMD_NONE -typedef uint64 flb_vector8; +typedef uint64_t flb_vector8; #endif #else #define FLB_SIMD_NONE + +/* Original code aims to handle this as a uint64_t to search */ +typedef uint8_t flb_vector8; #endif /* FLB_SIMD_DISABLED */ /* element-wise comparisons to a scalar */ @@ -88,7 +95,7 @@ static inline void flb_vector8_load(flb_vector8 *v, const uint8_t *s) #elif defined(FLB_SIMD_NEON) *v = vld1q_u8(s); #else - memcpy(v, s, sizeof(flb_vector8)); + memset(v, 0, sizeof(flb_vector8)); #endif } @@ -124,7 +131,7 @@ static inline flb_vector8 flb_vector8_ssub(const flb_vector8 v1, const flb_vecto return vqsubq_u8(v1, v2); #endif } -#endif /* ! USE_NO_SIMD */ +#endif /* ! FLB_SIMD_NONE */ /* * Return a vector with all bits set in each lane where the corresponding @@ -139,7 +146,7 @@ static inline flb_vector8 flb_vector8_eq(const flb_vector8 v1, const flb_vector8 return vceqq_u8(v1, v2); #endif } -#endif /* ! USE_NO_SIMD */ +#endif /* ! FLB_SIMD_NONE */ #ifndef FLB_SIMD_NONE static inline flb_vector32 flb_vector32_eq(const flb_vector32 v1, const flb_vector32 v2) @@ -150,7 +157,7 @@ static inline flb_vector32 flb_vector32_eq(const flb_vector32 v1, const flb_vect return vceqq_u32(v1, v2); #endif } -#endif +#endif /* ! FLB_SIMD_NONE */ /* * Create a vector with all elements set to the same value. @@ -185,11 +192,10 @@ static inline bool flb_vector8_is_highbit_set(const flb_vector8 v) */ static inline bool flb_vector8_has(const flb_vector8 v, const uint8_t c) { - bool result; + bool result = false; #if defined(FLB_SIMD_NONE) - /* any bytes in v equal to c will evaluate to zero via XOR */ - result = flb_vector8_has_zero(v ^ flb_vector8_broadcast(c)); + return flb_vector8_has_zero(v ^ flb_vector8_broadcast(c)); #else result = flb_vector8_is_highbit_set(flb_vector8_eq(v, flb_vector8_broadcast(c))); #endif @@ -202,26 +208,28 @@ static inline bool flb_vector8_has_le(const flb_vector8 v, const uint8_t c) bool result = false; #if defined(FLB_SIMD_NONE) - /* - * To find bytes <= c, we can use bitwise operations to find bytes < c+1, - * but it only works if c+1 <= 128 and if the highest bit in v is not set. - * Adapted from - * https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord + * To find bytes <= c, we can use bitwise operations to find bytes < c+1, + * but it only works if c+1 <= 128 and if the highest bit in v is not set. + * + * Adapted from + * + * https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord */ - if ((int64) v >= 0 && c < 0x80) + if ((int64_t) v >= 0 && c < 0x80) { result = (v - flb_vector8_broadcast(c + 1)) & ~v & flb_vector8_broadcast(0x80); + } else { - /* one byte at a time */ - int i; - for (i = 0; i < sizeof(flb_vector8); i++) - { - if (((const uint8 *) &v)[i] <= c) { + size_t i; + for (i = 0; i < sizeof(flb_vector8); i++) { + if (((const uint8_t *) &v)[i] <= c) { result = true; break; } } } + + return result; #else /* * Use saturating subtraction to find bytes <= c, which will present as @@ -236,15 +244,19 @@ static inline bool flb_vector8_has_le(const flb_vector8 v, const uint8_t c) static inline char *flb_simd_info() { - #if defined(FLB_SIMD_SSE2) - return "SSE2"; - #elif defined(FLB_SIMD_NEON) - return "NEON"; - #elif defined(FLB_SIMD_NONE) - return "none"; - #elif defined(FLB_SIMD_DISABLED) - return "none (disabled)"; + #ifdef FLB_HAVE_SIMD + #if defined(FLB_SIMD_SSE2) + return "SSE2"; + #elif defined(FLB_SIMD_NEON) + return "NEON"; + #elif defined(FLB_SIMD_NONE) + return "none"; + #else + return "unknown"; + #endif + #else + return "disabled"; #endif } -#endif /* FLB_SIMD_H */ +#endif /* FLB_HAVE_SIMD */ From 0a2bd90337bca07adb0043902b8c2f300176199d Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sat, 19 Oct 2024 16:43:21 -0600 Subject: [PATCH 6/8] build: add new build option FLB_SIMD (default: off) Signed-off-by: Eduardo Silva --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e6c5ac2613..eff73766e7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,6 +141,7 @@ option(FLB_SIGNV4 "Enable AWS Signv4 support" Yes) option(FLB_AWS "Enable AWS support" Yes) option(FLB_STATIC_CONF "Build binary using static configuration") option(FLB_STREAM_PROCESSOR "Enable Stream Processor" Yes) +option(FLB_SIMD "Enable SIMD support" No) option(FLB_CORO_STACK_SIZE "Set coroutine stack size") option(FLB_AVRO_ENCODER "Build with Avro encoding support" No) option(FLB_AWS_ERROR_REPORTER "Build with aws error reporting support" No) @@ -181,6 +182,10 @@ option(FLB_EVENT_LOOP_KQUEUE "Enable kqueue(2) event loop backend" No) option(FLB_EVENT_LOOP_SELECT "Enable select(2) event loop backend" No) option(FLB_EVENT_LOOP_LIBEVENT "Enable libevent event loop backend" No) +# SIMD support +if(FLB_SIMD) + FLB_DEFINITION(FLB_HAVE_SIMD) +endif() if(DEFINED FLB_NIGHTLY_BUILD AND NOT "${FLB_NIGHTLY_BUILD}" STREQUAL "") FLB_DEFINITION_VAL(FLB_NIGHTLY_BUILD ${FLB_NIGHTLY_BUILD}) From 2c341e56764a3cdd99dce681b2e60a3262609ba2 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sat, 19 Oct 2024 16:43:43 -0600 Subject: [PATCH 7/8] bin: print SIMD status Signed-off-by: Eduardo Silva --- src/flb_engine.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/flb_engine.c b/src/flb_engine.c index 911c1389a79..bedc28477c5 100644 --- a/src/flb_engine.c +++ b/src/flb_engine.c @@ -54,6 +54,7 @@ #include #include #include +#include #ifdef FLB_HAVE_METRICS #include @@ -799,6 +800,9 @@ int flb_engine_start(struct flb_config *config) return -1; } + /* Internals */ + flb_info("[simd ] %s", flb_simd_info()); + /* Init Metrics engine */ cmt_initialize(); flb_info("[cmetrics] version=%s", cmt_version()); From 652ac2f046d4604c5844933a9d4f0b71094dab70 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Sat, 19 Oct 2024 16:44:14 -0600 Subject: [PATCH 8/8] workflows: build with FLB_SIMD=On|Off Signed-off-by: Eduardo Silva --- .github/workflows/unit-tests.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml index 0fa50f11890..36d1f85d841 100644 --- a/.github/workflows/unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -42,6 +42,8 @@ jobs: - "-DFLB_COVERAGE=On" - "-DFLB_SANITIZE_MEMORY=On" - "-DFLB_SANITIZE_THREAD=On" + - "-DFLB_SIMD=On" + - "-DFLB_SIMD=Off" compiler: - gcc - clang