Skip to content

Commit

Permalink
Merge branch 'main' into feat(clp-s)-JSON-to-IRv2
Browse files Browse the repository at this point in the history
  • Loading branch information
AVMatthews authored Jan 15, 2025
2 parents 82367fd + 1ecd9c7 commit 5fc76ed
Show file tree
Hide file tree
Showing 122 changed files with 6,269 additions and 937 deletions.
4 changes: 3 additions & 1 deletion components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ set(SOURCE_FILES_clp_s_unitTest
src/clp_s/FileReader.hpp
src/clp_s/FileWriter.cpp
src/clp_s/FileWriter.hpp
src/clp_s/InputConfig.cpp
src/clp_s/InputConfig.hpp
src/clp_s/JsonConstructor.cpp
src/clp_s/JsonConstructor.hpp
src/clp_s/JsonFileIterator.cpp
Expand Down Expand Up @@ -613,7 +615,7 @@ target_include_directories(unitTest
target_link_libraries(unitTest
PRIVATE
absl::flat_hash_map
Boost::filesystem Boost::iostreams Boost::program_options Boost::regex
Boost::filesystem Boost::iostreams Boost::program_options Boost::regex Boost::url
${CURL_LIBRARIES}
fmt::fmt
kql
Expand Down
4 changes: 2 additions & 2 deletions components/core/src/clp/BoundedReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ class BoundedReader : public ReaderInterface {
* @return ErrorCode_EndOfFile on EOF or trying to read after hitting checkpoint
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto
try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override;
[[nodiscard]] auto try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read)
-> ErrorCode override;

/**
* This function is unsupported because BoundedReader can not delegate to a potentially
Expand Down
9 changes: 3 additions & 6 deletions components/core/src/clp/BufferReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,9 @@ auto BufferReader::try_get_pos(size_t& pos) -> ErrorCode {
return ErrorCode_Success;
}

auto BufferReader::try_read_to_delimiter(
char delim,
bool keep_delimiter,
bool append,
std::string& str
) -> ErrorCode {
auto
BufferReader::try_read_to_delimiter(char delim, bool keep_delimiter, bool append, std::string& str)
-> ErrorCode {
if (false == append) {
str.clear();
}
Expand Down
13 changes: 5 additions & 8 deletions components/core/src/clp/BufferReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class BufferReader : public ReaderInterface {
* @return ErrorCode_EndOfFile if the buffer doesn't contain any more data
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto
try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override;
[[nodiscard]] auto try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read)
-> ErrorCode override;

/**
* Tries to seek to the given position, relative to the beginning of the buffer
Expand All @@ -88,12 +88,9 @@ class BufferReader : public ReaderInterface {
* @param str Returns the content read from the buffer
* @return Same as BufferReader::try_read_to_delimiter(char, bool, std::string&, bool&, size_t&)
*/
[[nodiscard]] auto try_read_to_delimiter(
char delim,
bool keep_delimiter,
bool append,
std::string& str
) -> ErrorCode override;
[[nodiscard]] auto
try_read_to_delimiter(char delim, bool keep_delimiter, bool append, std::string& str)
-> ErrorCode override;

private:
// Methods
Expand Down
9 changes: 3 additions & 6 deletions components/core/src/clp/BufferedFileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,12 +265,9 @@ auto BufferedFileReader::try_read(char* buf, size_t num_bytes_to_read, size_t& n
return ErrorCode_Success;
}

auto BufferedFileReader::try_read_to_delimiter(
char delim,
bool keep_delimiter,
bool append,
string& str
) -> ErrorCode {
auto
BufferedFileReader::try_read_to_delimiter(char delim, bool keep_delimiter, bool append, string& str)
-> ErrorCode {
if (-1 == m_fd) {
return ErrorCode_NotInit;
}
Expand Down
17 changes: 7 additions & 10 deletions components/core/src/clp/BufferedFileReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ class BufferedFileReader : public ReaderInterface {
* @return ErrorCode_NotInit if the file is not opened
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto
try_peek_buffered_data(char const*& buf, size_t& peek_size) const -> ErrorCode;
[[nodiscard]] auto try_peek_buffered_data(char const*& buf, size_t& peek_size) const
-> ErrorCode;

/**
* Peeks the remaining buffered content without advancing the read head.
Expand Down Expand Up @@ -191,8 +191,8 @@ class BufferedFileReader : public ReaderInterface {
* @return ErrorCode_EndOfFile on EOF
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto
try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override;
[[nodiscard]] auto try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read)
-> ErrorCode override;

/**
* Tries to read up to an occurrence of the given delimiter
Expand All @@ -206,12 +206,9 @@ class BufferedFileReader : public ReaderInterface {
* @return Same as BufferReader::try_read_to_delimiter if it fails
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto try_read_to_delimiter(
char delim,
bool keep_delimiter,
bool append,
std::string& str
) -> ErrorCode override;
[[nodiscard]] auto
try_read_to_delimiter(char delim, bool keep_delimiter, bool append, std::string& str)
-> ErrorCode override;

private:
// Methods
Expand Down
2 changes: 1 addition & 1 deletion components/core/src/clp/FileDescriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ FileDescriptor::~FileDescriptor() {
}

auto FileDescriptor::get_size() const -> size_t {
struct stat stat_result {};
struct stat stat_result{};

if (ErrorCode_Success != stat(stat_result)) {
throw OperationFailed(
Expand Down
4 changes: 2 additions & 2 deletions components/core/src/clp/FileDescriptorReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ class FileDescriptorReader : public ReaderInterface {
* @return ErrorCode_EndOfFile on EOF
* @return ErrorCode_Success on success
*/
[[nodiscard]] auto
try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override;
[[nodiscard]] auto try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read)
-> ErrorCode override;

/**
* Tries to seek to the given position, relative to the beginning of the file.
Expand Down
6 changes: 2 additions & 4 deletions components/core/src/clp/Grep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -890,10 +890,8 @@ bool Grep::get_bounds_of_next_potential_var(
return (value_length != begin_pos);
}

void Grep::calculate_sub_queries_relevant_to_file(
File const& compressed_file,
vector<Query>& queries
) {
void
Grep::calculate_sub_queries_relevant_to_file(File const& compressed_file, vector<Query>& queries) {
for (auto& query : queries) {
query.make_sub_queries_relevant_to_segment(compressed_file.get_segment_id());
}
Expand Down
14 changes: 6 additions & 8 deletions components/core/src/clp/NetworkReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ extern "C" auto curl_progress_callback(
* @return On success, the number of bytes processed. If this is less than `nmemb`, the download
* will be aborted.
*/
extern "C" auto
curl_write_callback(char* ptr, size_t size, size_t nmemb, void* reader_ptr) -> size_t {
extern "C" auto curl_write_callback(char* ptr, size_t size, size_t nmemb, void* reader_ptr)
-> size_t {
return static_cast<NetworkReader*>(reader_ptr)->buffer_downloaded_data({ptr, size * nmemb});
}
} // namespace
Expand Down Expand Up @@ -252,10 +252,10 @@ auto NetworkReader::acquire_empty_buffer() -> void {
}
std::unique_lock<std::mutex> buffer_resource_lock{m_buffer_resource_mutex};
while (m_filled_buffer_queue.size() == m_buffer_pool_size) {
m_downloader_cv.wait(buffer_resource_lock);
if (is_abort_download_requested()) {
return;
}
m_downloader_cv.wait(buffer_resource_lock);
}
m_curr_downloader_buf.emplace(
m_buffer_pool.at(m_curr_downloader_buf_idx).data(),
Expand Down Expand Up @@ -304,11 +304,9 @@ auto NetworkReader::get_filled_buffer() -> void {
m_curr_reader_buf.emplace(next_reader_buffer);
}

auto NetworkReader::read_from_filled_buffers(
size_t num_bytes_to_read,
size_t& num_bytes_read,
char* dst
) -> ErrorCode {
auto
NetworkReader::read_from_filled_buffers(size_t num_bytes_to_read, size_t& num_bytes_read, char* dst)
-> ErrorCode {
num_bytes_read = 0;
std::optional<BufferView> dst_view;
if (nullptr != dst) {
Expand Down
12 changes: 5 additions & 7 deletions components/core/src/clp/NetworkReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ class NetworkReader : public ReaderInterface {
* @return ErrorCode_EndOfFile if there is no more buffered data.
* @return ErrorCode_Success on success.
*/
[[nodiscard]] auto
try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read) -> ErrorCode override {
[[nodiscard]] auto try_read(char* buf, size_t num_bytes_to_read, size_t& num_bytes_read)
-> ErrorCode override {
return read_from_filled_buffers(num_bytes_to_read, num_bytes_read, buf);
}

Expand Down Expand Up @@ -306,11 +306,9 @@ class NetworkReader : public ReaderInterface {
* @return ErrorCode_EndOfFile if the buffer doesn't contain any more data.
* @return ErrorCode_Success on success.
*/
[[nodiscard]] auto read_from_filled_buffers(
size_t num_bytes_to_read,
size_t& num_bytes_read,
char* dst
) -> ErrorCode;
[[nodiscard]] auto
read_from_filled_buffers(size_t num_bytes_to_read, size_t& num_bytes_read, char* dst)
-> ErrorCode;

/**
* Sets the download completion status with the return code from curl.
Expand Down
2 changes: 1 addition & 1 deletion components/core/src/clp/Thread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class Thread {
};

// Constructors
Thread() : m_thread_running(false) {};
Thread() : m_thread_running(false) {}

// Destructor
virtual ~Thread();
Expand Down
51 changes: 0 additions & 51 deletions components/core/src/clp/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,57 +88,6 @@ ErrorCode create_directory_structure(string const& path, mode_t mode) {
return ErrorCode_Success;
}

string get_parent_directory_path(string const& path) {
string dirname = get_unambiguous_path(path);

size_t last_slash_pos = dirname.find_last_of('/');
if (0 == last_slash_pos) {
dirname = "/";
} else if (string::npos == last_slash_pos) {
dirname = ".";
} else {
dirname.resize(last_slash_pos);
}

return dirname;
}

string get_unambiguous_path(string const& path) {
string unambiguous_path;
if (path.empty()) {
return unambiguous_path;
}

// Break path into components
vector<string> path_components;
boost::split(path_components, path, boost::is_any_of("/"), boost::token_compress_on);

// Remove ambiguous components
list<string> unambiguous_components;
size_t num_components_to_ignore = 0;
for (size_t i = path_components.size(); i-- > 0;) {
if (".." == path_components[i]) {
++num_components_to_ignore;
} else if ("." == path_components[i] || path_components[i].empty()) {
// Do nothing
} else if (num_components_to_ignore > 0) {
--num_components_to_ignore;
} else {
unambiguous_components.emplace_front(path_components[i]);
}
}

// Assemble unambiguous path from leading slash (if any) and the unambiguous components
if ('/' == path[0]) {
unambiguous_path += '/';
}
if (!unambiguous_components.empty()) {
unambiguous_path += boost::join(unambiguous_components, "/");
}

return unambiguous_path;
}

ErrorCode read_list_of_paths(string const& list_path, vector<string>& paths) {
unique_ptr<FileReader> file_reader;
try {
Expand Down
22 changes: 0 additions & 22 deletions components/core/src/clp/Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,6 @@ ErrorCode create_directory(std::string const& path, mode_t mode, bool exist_ok);
*/
ErrorCode create_directory_structure(std::string const& path, mode_t mode);

/**
* Gets the parent directory path for a given path
* Corner cases:
* - get_dirname("abc") = "."
* - get_dirname(".") = "."
* - get_dirname("..") = "."
* - get_dirname("/") = "/"
* - get_dirname("/.") = "/"
* - get_dirname("/..") = "/"
* - get_dirname("/abc") = "/"
* @param path
* @return Parent directory path
*/
std::string get_parent_directory_path(std::string const& path);

/**
* Removes ".", "..", and consecutive "/" from a given path and returns the result
* @param path The given path
* @return The unambiguous path
*/
std::string get_unambiguous_path(std::string const& path);

/**
* Read a list of paths from a file
* @param list_path
Expand Down
22 changes: 10 additions & 12 deletions components/core/src/clp/aws/AwsAuthenticationSigner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ namespace {
* @param timestamp
* @return The formatted date string.
*/
[[nodiscard]] auto get_formatted_date_string(std::chrono::system_clock::time_point const& timestamp
) -> string;
[[nodiscard]] auto get_formatted_date_string(std::chrono::system_clock::time_point const& timestamp)
-> string;

/**
* Gets the string to sign required by AWS Signature Version 4 protocol.
Expand Down Expand Up @@ -89,8 +89,8 @@ auto is_unreserved_characters(char c) -> bool {
return is_alphabet(c) || is_decimal_digit(c) || c == '-' || c == '_' || c == '.' || c == '~';
}

auto get_formatted_timestamp_string(std::chrono::system_clock::time_point const& timestamp
) -> string {
auto get_formatted_timestamp_string(std::chrono::system_clock::time_point const& timestamp)
-> string {
return fmt::format("{:%Y%m%dT%H%M%SZ}", timestamp);
}

Expand Down Expand Up @@ -203,10 +203,9 @@ S3Url::S3Url(string const& url) {
m_host = fmt::format("{}.s3.{}.{}", m_bucket, m_region, m_end_point);
}

auto AwsAuthenticationSigner::generate_presigned_url(
S3Url const& s3_url,
string& presigned_url
) const -> ErrorCode {
auto
AwsAuthenticationSigner::generate_presigned_url(S3Url const& s3_url, string& presigned_url) const
-> ErrorCode {
auto const s3_region = s3_url.get_region();

auto const now = std::chrono::system_clock::now();
Expand Down Expand Up @@ -245,10 +244,9 @@ auto AwsAuthenticationSigner::generate_presigned_url(
return ErrorCode_Success;
}

auto AwsAuthenticationSigner::get_canonical_query_string(
string_view scope,
string_view timestamp
) const -> string {
auto
AwsAuthenticationSigner::get_canonical_query_string(string_view scope, string_view timestamp) const
-> string {
auto const uri = fmt::format("{}/{}", m_access_key_id, scope);
return fmt::format(
"{}={}&{}={}&{}={}&{}={}&{}={}",
Expand Down
11 changes: 5 additions & 6 deletions components/core/src/clp/aws/AwsAuthenticationSigner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ class AwsAuthenticationSigner {
* @return ErrorCode_Success on success.
* @return Same as `get_sha256_hash` and `AwsAuthenticationSigner::get_signature` on failure.
*/
[[nodiscard]] auto
generate_presigned_url(S3Url const& s3_url, std::string& presigned_url) const -> ErrorCode;
[[nodiscard]] auto generate_presigned_url(S3Url const& s3_url, std::string& presigned_url) const
-> ErrorCode;

private:
/**
Expand All @@ -92,10 +92,9 @@ class AwsAuthenticationSigner {
* @param timestamp
* @return The canonical query string.
*/
[[nodiscard]] auto get_canonical_query_string(
std::string_view scope,
std::string_view timestamp
) const -> std::string;
[[nodiscard]] auto
get_canonical_query_string(std::string_view scope, std::string_view timestamp) const
-> std::string;

/**
* Gets the signature signing key for the request.
Expand Down
Loading

0 comments on commit 5fc76ed

Please sign in to comment.