Skip to content

Commit 88d83bf

Browse files
authored
test(clp-s): Add end-to-end test case for compression and extraction. (#595)
1 parent 5881b9c commit 88d83bf

File tree

6 files changed

+219
-1
lines changed

6 files changed

+219
-1
lines changed

components/core/CMakeLists.txt

+53-1
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,42 @@ add_subdirectory(src/clp_s)
241241
add_subdirectory(src/reducer)
242242

243243
set(SOURCE_FILES_clp_s_unitTest
244+
src/clp_s/ArchiveReader.cpp
245+
src/clp_s/ArchiveReader.hpp
246+
src/clp_s/ArchiveWriter.cpp
247+
src/clp_s/ArchiveWriter.hpp
248+
src/clp_s/ColumnReader.cpp
249+
src/clp_s/ColumnReader.hpp
250+
src/clp_s/ColumnWriter.cpp
251+
src/clp_s/ColumnWriter.hpp
252+
src/clp_s/DictionaryEntry.cpp
253+
src/clp_s/DictionaryEntry.hpp
254+
src/clp_s/DictionaryWriter.cpp
255+
src/clp_s/DictionaryWriter.hpp
256+
src/clp_s/FileReader.cpp
257+
src/clp_s/FileReader.hpp
258+
src/clp_s/FileWriter.cpp
259+
src/clp_s/FileWriter.hpp
260+
src/clp_s/JsonConstructor.cpp
261+
src/clp_s/JsonConstructor.hpp
262+
src/clp_s/JsonFileIterator.cpp
263+
src/clp_s/JsonFileIterator.hpp
264+
src/clp_s/JsonParser.cpp
265+
src/clp_s/JsonParser.hpp
266+
src/clp_s/PackedStreamReader.cpp
267+
src/clp_s/PackedStreamReader.hpp
268+
src/clp_s/ReaderUtils.cpp
269+
src/clp_s/ReaderUtils.hpp
270+
src/clp_s/Schema.cpp
271+
src/clp_s/Schema.hpp
272+
src/clp_s/SchemaMap.cpp
273+
src/clp_s/SchemaMap.hpp
274+
src/clp_s/SchemaReader.cpp
275+
src/clp_s/SchemaReader.hpp
276+
src/clp_s/SchemaTree.cpp
277+
src/clp_s/SchemaTree.hpp
278+
src/clp_s/SchemaWriter.cpp
279+
src/clp_s/SchemaWriter.hpp
244280
src/clp_s/search/AndExpr.cpp
245281
src/clp_s/search/AndExpr.hpp
246282
src/clp_s/search/BooleanLiteral.cpp
@@ -273,11 +309,24 @@ set(SOURCE_FILES_clp_s_unitTest
273309
src/clp_s/search/StringLiteral.hpp
274310
src/clp_s/search/Transformation.hpp
275311
src/clp_s/search/Value.hpp
276-
src/clp_s/SchemaTree.hpp
312+
src/clp_s/TimestampDictionaryReader.cpp
313+
src/clp_s/TimestampDictionaryReader.hpp
314+
src/clp_s/TimestampDictionaryWriter.cpp
315+
src/clp_s/TimestampDictionaryWriter.hpp
316+
src/clp_s/TimestampEntry.cpp
317+
src/clp_s/TimestampEntry.hpp
277318
src/clp_s/TimestampPattern.cpp
278319
src/clp_s/TimestampPattern.hpp
279320
src/clp_s/Utils.cpp
280321
src/clp_s/Utils.hpp
322+
src/clp_s/VariableDecoder.cpp
323+
src/clp_s/VariableDecoder.hpp
324+
src/clp_s/VariableEncoder.cpp
325+
src/clp_s/VariableEncoder.hpp
326+
src/clp_s/ZstdCompressor.cpp
327+
src/clp_s/ZstdCompressor.hpp
328+
src/clp_s/ZstdDecompressor.cpp
329+
src/clp_s/ZstdDecompressor.hpp
281330
)
282331

283332
set(SOURCE_FILES_unitTest
@@ -499,6 +548,7 @@ set(SOURCE_FILES_unitTest
499548
tests/LogSuppressor.hpp
500549
tests/test-Array.cpp
501550
tests/test-BufferedFileReader.cpp
551+
tests/test-clp_s-end_to_end.cpp
502552
tests/test-EncodedVariableInterpreter.cpp
503553
tests/test-encoding_methods.cpp
504554
tests/test-ffi_IrUnitHandlerInterface.cpp
@@ -542,6 +592,8 @@ target_link_libraries(unitTest
542592
log_surgeon::log_surgeon
543593
LibArchive::LibArchive
544594
MariaDBClient::MariaDBClient
595+
${MONGOCXX_TARGET}
596+
simdjson
545597
spdlog::spdlog
546598
OpenSSL::Crypto
547599
${sqlite_LIBRARY_DEPENDENCIES}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
#include <sys/wait.h>
2+
3+
#include <cstdlib>
4+
#include <filesystem>
5+
#include <string>
6+
#include <string_view>
7+
#include <vector>
8+
9+
#include <Catch2/single_include/catch2/catch.hpp>
10+
#include <fmt/format.h>
11+
12+
#include "../src/clp_s/JsonConstructor.hpp"
13+
#include "../src/clp_s/JsonParser.hpp"
14+
15+
constexpr std::string_view cTestEndToEndArchiveDirectory{"test-end-to-end-archive"};
16+
constexpr std::string_view cTestEndToEndOutputDirectory{"test-end-to-end-out"};
17+
constexpr std::string_view cTestEndToEndOutputSortedJson{"test-end-to-end_sorted.jsonl"};
18+
constexpr std::string_view cTestEndToEndInputFileDirectory{"test_log_files"};
19+
constexpr std::string_view cTestEndToEndInputFile{"test_no_floats_sorted.jsonl"};
20+
21+
namespace {
22+
/**
23+
* A class that deletes the directories and files created by test cases, both before and after each
24+
* test case where the class is instantiated.
25+
*/
26+
class TestOutputCleaner {
27+
public:
28+
TestOutputCleaner() { delete_files(); }
29+
30+
~TestOutputCleaner() { delete_files(); }
31+
32+
// Delete copy & move constructors and assignment operators
33+
TestOutputCleaner(TestOutputCleaner const&) = delete;
34+
TestOutputCleaner(TestOutputCleaner&&) = delete;
35+
auto operator=(TestOutputCleaner const&) -> TestOutputCleaner& = delete;
36+
auto operator=(TestOutputCleaner&&) -> TestOutputCleaner& = delete;
37+
38+
private:
39+
static void delete_files() {
40+
std::filesystem::remove_all(cTestEndToEndArchiveDirectory);
41+
std::filesystem::remove_all(cTestEndToEndOutputDirectory);
42+
std::filesystem::remove(cTestEndToEndOutputSortedJson);
43+
}
44+
};
45+
46+
auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path;
47+
auto get_test_input_local_path() -> std::string;
48+
void compress(bool structurize_arrays);
49+
auto extract() -> std::filesystem::path;
50+
void compare(std::filesystem::path const& extracted_json_path);
51+
52+
auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path {
53+
return std::filesystem::path{cTestEndToEndInputFileDirectory} / cTestEndToEndInputFile;
54+
}
55+
56+
auto get_test_input_local_path() -> std::string {
57+
std::filesystem::path const current_file_path{__FILE__};
58+
auto const tests_dir{current_file_path.parent_path()};
59+
return (tests_dir / get_test_input_path_relative_to_tests_dir()).string();
60+
}
61+
62+
void compress(bool structurize_arrays) {
63+
constexpr auto cDefaultTargetEncodedSize = 8ULL * 1024 * 1024 * 1024; // 8 GiB
64+
constexpr auto cDefaultMaxDocumentSize = 512ULL * 1024 * 1024; // 512 MiB
65+
constexpr auto cDefaultMinTableSize = 1ULL * 1024 * 1024; // 1 MiB
66+
constexpr auto cDefaultCompressionLevel = 3;
67+
constexpr auto cDefaultPrintArchiveStats = false;
68+
69+
std::filesystem::create_directory(cTestEndToEndArchiveDirectory);
70+
REQUIRE((std::filesystem::is_directory(cTestEndToEndArchiveDirectory)));
71+
72+
clp_s::JsonParserOption parser_option{};
73+
parser_option.file_paths.push_back(get_test_input_local_path());
74+
parser_option.archives_dir = cTestEndToEndArchiveDirectory;
75+
parser_option.target_encoded_size = cDefaultTargetEncodedSize;
76+
parser_option.max_document_size = cDefaultMaxDocumentSize;
77+
parser_option.min_table_size = cDefaultMinTableSize;
78+
parser_option.compression_level = cDefaultCompressionLevel;
79+
parser_option.print_archive_stats = cDefaultPrintArchiveStats;
80+
parser_option.structurize_arrays = structurize_arrays;
81+
82+
clp_s::JsonParser parser{parser_option};
83+
REQUIRE(parser.parse());
84+
parser.store();
85+
86+
REQUIRE((false == std::filesystem::is_empty(cTestEndToEndArchiveDirectory)));
87+
}
88+
89+
auto extract() -> std::filesystem::path {
90+
constexpr auto cDefaultOrdered = false;
91+
constexpr auto cDefaultTargetOrderedChunkSize = 0;
92+
93+
std::filesystem::create_directory(cTestEndToEndOutputDirectory);
94+
REQUIRE(std::filesystem::is_directory(cTestEndToEndOutputDirectory));
95+
96+
clp_s::JsonConstructorOption constructor_option{};
97+
constructor_option.archives_dir = cTestEndToEndArchiveDirectory;
98+
constructor_option.output_dir = cTestEndToEndOutputDirectory;
99+
constructor_option.ordered = cDefaultOrdered;
100+
constructor_option.target_ordered_chunk_size = cDefaultTargetOrderedChunkSize;
101+
for (auto const& entry : std::filesystem::directory_iterator(constructor_option.archives_dir)) {
102+
if (false == entry.is_directory()) {
103+
// Skip non-directories
104+
continue;
105+
}
106+
107+
constructor_option.archive_id = entry.path().filename();
108+
clp_s::JsonConstructor constructor{constructor_option};
109+
constructor.store();
110+
}
111+
std::filesystem::path extracted_json_path{cTestEndToEndOutputDirectory};
112+
extracted_json_path /= "original";
113+
REQUIRE(std::filesystem::exists(extracted_json_path));
114+
115+
return extracted_json_path;
116+
}
117+
118+
// Silence the checks below since our use of `std::system` is safe in the context of testing.
119+
// NOLINTBEGIN(cert-env33-c,concurrency-mt-unsafe)
120+
void compare(std::filesystem::path const& extracted_json_path) {
121+
int result{std::system("command -v jq >/dev/null 2>&1")};
122+
REQUIRE((0 == result));
123+
auto command = fmt::format(
124+
"jq --sort-keys --compact-output '.' {} | sort > {}",
125+
extracted_json_path.string(),
126+
cTestEndToEndOutputSortedJson
127+
);
128+
result = std::system(command.c_str());
129+
REQUIRE((0 == result));
130+
131+
REQUIRE((false == std::filesystem::is_empty(cTestEndToEndOutputSortedJson)));
132+
133+
result = std::system("command -v diff >/dev/null 2>&1");
134+
REQUIRE((0 == result));
135+
command = fmt::format(
136+
"diff --unified {} {} > /dev/null",
137+
cTestEndToEndOutputSortedJson,
138+
get_test_input_local_path()
139+
);
140+
result = std::system(command.c_str());
141+
REQUIRE((true == WIFEXITED(result)));
142+
REQUIRE((0 == WEXITSTATUS(result)));
143+
}
144+
145+
// NOLINTEND(cert-env33-c,concurrency-mt-unsafe)
146+
} // namespace
147+
148+
TEST_CASE("clp-s-compress-extract-no-floats", "[clp-s][end-to-end]") {
149+
auto structurize_arrays = GENERATE(true, false);
150+
151+
TestOutputCleaner const test_cleanup;
152+
153+
compress(structurize_arrays);
154+
155+
auto extracted_json_path = extract();
156+
157+
compare(extracted_json_path);
158+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true}
2+
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
3+
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
4+
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_jq_losslessly_represents":9824299763229016,"int64_min_jq_losslessly_represents":-9007199254740992,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true}

components/core/tools/scripts/lib_install/centos-stream-9/install-prebuilt-packages.sh

+2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@ set -u
88

99
dnf install -y \
1010
cmake \
11+
diffutils \
1112
gcc-c++ \
1213
git \
1314
java-11-openjdk \
15+
jq \
1416
libarchive-devel \
1517
libcurl-devel \
1618
libzstd-devel \

components/core/tools/scripts/lib_install/ubuntu-focal/install-prebuilt-packages.sh

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
1717
gcc \
1818
gcc-10 \
1919
git \
20+
jq \
2021
libcurl4 \
2122
libcurl4-openssl-dev \
2223
libmariadb-dev \

components/core/tools/scripts/lib_install/ubuntu-jammy/install-prebuilt-packages.sh

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
1414
curl \
1515
build-essential \
1616
git \
17+
jq \
1718
libboost-filesystem-dev \
1819
libboost-iostreams-dev \
1920
libboost-program-options-dev \

0 commit comments

Comments
 (0)