Skip to content

Commit 390f7ca

Browse files
committed
Merge branch 'fix/sdk-4908-fix-utf8-chars-csv-parser' into 'release/v8.5.0'
SDK-4908. [PWM] Fix an out of memory access bug when importing passwords with UTF-8 characters (Release v8.5.0) See merge request sdk/sdk!6294
2 parents 79b1020 + e08aae9 commit 390f7ca

File tree

2 files changed

+36
-35
lines changed

2 files changed

+36
-35
lines changed

include/mega/mega_csv.h

+30-34
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ SOFTWARE.
4444
#include <sstream>
4545
#include <string>
4646
#include <vector>
47+
#include <bitset>
4748

4849
/* Copyright 2017 https://github.com/mandreyel
4950
*
@@ -4831,11 +4832,11 @@ namespace csv {
48314832
STATIC_ASSERT(quote_escape_flag(ParseFlags::DELIMITER, true) == ParseFlags::NOT_SPECIAL);
48324833
STATIC_ASSERT(quote_escape_flag(ParseFlags::NEWLINE, true) == ParseFlags::NOT_SPECIAL);
48334834

4834-
/** An array which maps ASCII chars to a parsing flag */
4835-
using ParseFlagMap = std::array<ParseFlags, 256>;
4835+
/** An array which maps UTF-8 chars to a parsing flag */
4836+
using ParseFlagMap = std::array<ParseFlags, std::numeric_limits<unsigned char>::max() + 1>;
48364837

4837-
/** An array which maps ASCII chars to a flag indicating if it is whitespace */
4838-
using WhitespaceMap = std::array<bool, 256>;
4838+
/** An array which maps UTF-8 chars to a flag indicating if it is whitespace */
4839+
using WhitespaceMap = std::bitset<std::numeric_limits<unsigned char>::max() + 1>;
48394840
}
48404841

48414842
/** Integer indicating a requested column wasn't found. */
@@ -5856,24 +5857,28 @@ inline std::ostream& operator << (std::ostream& os, csv::CSVField const& value)
58565857

58575858
namespace csv {
58585859
namespace internals {
5860+
5861+
/** Helper constexpr function to initialize arrays with default values
5862+
*/
5863+
template<typename T, typename Out>
5864+
HEDLEY_CONST CONSTEXPR_17 Out container_to_default(T&& value)
5865+
{
5866+
Out a{};
5867+
for (size_t i = 0; i < a.size(); ++i)
5868+
a[i] = value;
5869+
return a;
5870+
}
5871+
58595872
/** Create a vector v where each index i corresponds to the
58605873
* ASCII number for a character and, v[i + 128] labels it according to
58615874
* the CSVReader::ParseFlags enum
58625875
*/
58635876
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter) {
5864-
std::array<ParseFlags, 256> ret = {};
5865-
for (int i = -128; i < 128; i++) {
5866-
const int arr_idx = i + 128;
5867-
char ch = char(i);
5868-
5869-
if (ch == delimiter)
5870-
ret[arr_idx] = ParseFlags::DELIMITER;
5871-
else if (ch == '\r' || ch == '\n')
5872-
ret[arr_idx] = ParseFlags::NEWLINE;
5873-
else
5874-
ret[arr_idx] = ParseFlags::NOT_SPECIAL;
5875-
}
5876-
5877+
ParseFlagMap ret = container_to_default<ParseFlagMap::value_type, ParseFlagMap>(
5878+
ParseFlags::NOT_SPECIAL);
5879+
ret[static_cast<unsigned char>(delimiter)] = ParseFlags::DELIMITER;
5880+
ret[static_cast<unsigned char>('\r')] = ParseFlags::NEWLINE;
5881+
ret[static_cast<unsigned char>('\n')] = ParseFlags::NEWLINE;
58775882
return ret;
58785883
}
58795884

@@ -5882,8 +5887,8 @@ namespace csv {
58825887
* the CSVReader::ParseFlags enum
58835888
*/
58845889
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter, char quote_char) {
5885-
std::array<ParseFlags, 256> ret = make_parse_flags(delimiter);
5886-
ret[(size_t)quote_char + 128] = ParseFlags::QUOTE;
5890+
ParseFlagMap ret = make_parse_flags(delimiter);
5891+
ret[static_cast<unsigned char>(quote_char)] = ParseFlags::QUOTE;
58875892
return ret;
58885893
}
58895894

@@ -5892,19 +5897,10 @@ namespace csv {
58925897
* c is a whitespace character
58935898
*/
58945899
HEDLEY_CONST CONSTEXPR_17 WhitespaceMap make_ws_flags(const char* ws_chars, size_t n_chars) {
5895-
std::array<bool, 256> ret = {};
5896-
for (int i = -128; i < 128; i++) {
5897-
const int arr_idx = i + 128;
5898-
char ch = char(i);
5899-
ret[arr_idx] = false;
5900-
5901-
for (size_t j = 0; j < n_chars; j++) {
5902-
if (ws_chars[j] == ch) {
5903-
ret[arr_idx] = true;
5904-
}
5905-
}
5900+
WhitespaceMap ret = container_to_default<bool, WhitespaceMap>(false);
5901+
for (size_t i = 0; i < n_chars; ++i) {
5902+
ret[static_cast<unsigned char>(ws_chars[i])] = true;
59065903
}
5907-
59085904
return ret;
59095905
}
59105906

@@ -6044,7 +6040,7 @@ namespace csv {
60446040
void end_feed();
60456041

60466042
CONSTEXPR_17 ParseFlags parse_flag(const char ch) const noexcept {
6047-
return _parse_flags.data()[ch + 128];
6043+
return _parse_flags.data()[static_cast<unsigned char>(ch)];
60486044
}
60496045

60506046
CONSTEXPR_17 ParseFlags compound_parse_flag(const char ch) const noexcept {
@@ -6108,7 +6104,7 @@ namespace csv {
61086104
RowCollection* _records = nullptr;
61096105

61106106
CONSTEXPR_17 bool ws_flag(const char ch) const noexcept {
6111-
return _ws_flags.data()[ch + 128];
6107+
return _ws_flags[static_cast<unsigned char>(ch)];
61126108
}
61136109

61146110
size_t& current_row_start() {
@@ -7771,7 +7767,7 @@ namespace csv {
77717767
if (value.empty()) {
77727768
bool prev_ch_quote = false;
77737769
for (size_t i = 0; i < field.length; i++) {
7774-
if (this->data->parse_flags[field_str[i] + 128] == ParseFlags::QUOTE) {
7770+
if (this->data->parse_flags[static_cast<unsigned char>(field_str[i])] == ParseFlags::QUOTE) {
77757771
if (prev_ch_quote) {
77767772
prev_ch_quote = false;
77777773
continue;

tests/unit/pwm_file_parser_test.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ test.com,https://test.com/,test3,"hello.12,34",
1717
test.com,https://test.com/,txema,hel\nlo.1234,""
1818
test2.com,https://test2.com/,test,hello.1234,
1919
,https://nopassname.com/,test,hello.1234,
20+
HeLLO😍🤣🥰😉🥰😌🥰😋😘😌,https://m.facebook.com/,😌,123,😍HeLLO😌
2021
)"};
2122
const std::vector<std::vector<std::string_view>> expected{
2223
{"foo.com", "https://foo.com/", "tx", R"(hola""\"\".,,)", ""},
@@ -25,7 +26,11 @@ test2.com,https://test2.com/,test,hello.1234,
2526
{"test.com", "https://test.com/", "txema", "hel\\nlo.1234", ""},
2627
{"test2.com", "https://test2.com/", "test", "hello.1234", ""},
2728
{"", "https://nopassname.com/", "test", "hello.1234", ""},
28-
};
29+
{"HeLLO😍🤣🥰😉🥰😌🥰😋😘😌",
30+
"https://m.facebook.com/",
31+
"😌",
32+
"123",
33+
"😍HeLLO😌"}};
2934
const std::string fname = "test.csv";
3035
sdk_test::LocalTempFile f{fname, fileContents};
3136

0 commit comments

Comments
 (0)