diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index 0ebeaeb5b0b..1842287fc77 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -98,8 +98,8 @@ jobs: run: ci/scripts/csharp_test.sh $(pwd) macos: - name: AMD64 macOS 13 C# ${{ matrix.dotnet }} - runs-on: macos-13 + name: AMD64 macOS 15 C# ${{ matrix.dotnet }} + runs-on: macos-15-intel if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 15 strategy: diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index da8202082c9..3db6dfa00b7 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -86,8 +86,8 @@ jobs: run: archery docker push debian-js macos: - name: AMD64 macOS 13 NodeJS ${{ matrix.node }} - runs-on: macos-13 + name: AMD64 macOS 15 NodeJS ${{ matrix.node }} + runs-on: macos-15-intel if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 45 strategy: diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index a7f5f9dacff..836fecec960 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -64,7 +64,11 @@ set(SRC_FILES decimal_xlarge.cc engine.cc date_utils.cc - encrypt_utils.cc + encrypt_utils_common.cc + encrypt_utils_ecb.cc + encrypt_utils_cbc.cc + encrypt_utils_gcm.cc + encrypt_mode_dispatcher.cc expr_decomposer.cc expr_validator.cc expression.cc @@ -262,7 +266,10 @@ add_gandiva_test(internals-test llvm_generator_test.cc annotator_test.cc tree_expr_test.cc - encrypt_utils_test.cc + encrypt_utils_ecb_test.cc + encrypt_utils_cbc_test.cc + encrypt_utils_gcm_test.cc + encrypt_utils_common_test.cc expr_decomposer_test.cc exported_funcs_registry_test.cc expression_registry_test.cc diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.cc b/cpp/src/gandiva/encrypt_mode_dispatcher.cc new file mode 100644 index 00000000000..fad1c54ba9f --- /dev/null +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.cc @@ -0,0 +1,138 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License") you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_mode_dispatcher.h" +#include "gandiva/encrypt_utils_ecb.h" +#include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_utils_gcm.h" +#include "arrow/util/string.h" +#include +#include +#include +#include + +namespace gandiva { + +// Supported encryption modes +static const std::vector SUPPORTED_MODES = { + AES_ECB_MODE, AES_ECB_PKCS7_MODE, AES_ECB_NONE_MODE, + AES_CBC_MODE, AES_CBC_PKCS7_MODE, AES_CBC_NONE_MODE, + AES_GCM_MODE +}; + +enum class EncryptionMode { + ECB, + ECB_PKCS7, + ECB_NONE, + CBC, + CBC_PKCS7, + CBC_NONE, + GCM, + UNKNOWN +}; + +EncryptionMode ParseEncryptionMode(std::string_view mode_str) { + if (mode_str == AES_ECB_MODE) return EncryptionMode::ECB; + if (mode_str == AES_ECB_PKCS7_MODE) return EncryptionMode::ECB_PKCS7; + if (mode_str == AES_ECB_NONE_MODE) return EncryptionMode::ECB_NONE; + if (mode_str == AES_CBC_MODE) return EncryptionMode::CBC; + if (mode_str == AES_CBC_PKCS7_MODE) return EncryptionMode::CBC_PKCS7; + if (mode_str == AES_CBC_NONE_MODE) return EncryptionMode::CBC_NONE; + if (mode_str == AES_GCM_MODE) return EncryptionMode::GCM; + return EncryptionMode::UNKNOWN; +} + +int32_t EncryptModeDispatcher::encrypt( + const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, const char* mode, int32_t mode_len, const char* iv, + int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, + unsigned char* cipher) { + std::string mode_str = + arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); + + switch (ParseEncryptionMode(mode_str)) { + case EncryptionMode::ECB: + case EncryptionMode::ECB_PKCS7: + // Shorthand AES-ECB and explicit AES-ECB-PKCS7 both use ECB with PKCS7 padding + return aes_encrypt_ecb(plaintext, plaintext_len, key, key_len, true, cipher); + case EncryptionMode::ECB_NONE: + // ECB without padding + return aes_encrypt_ecb(plaintext, plaintext_len, key, key_len, false, cipher); + case EncryptionMode::CBC: + case EncryptionMode::CBC_PKCS7: + // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 + return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, + iv, iv_len, true, cipher); + case EncryptionMode::CBC_NONE: + // CBC without padding + return aes_encrypt_cbc(plaintext, plaintext_len, key, key_len, + iv, iv_len, false, cipher); + case EncryptionMode::GCM: + return aes_encrypt_gcm(plaintext, plaintext_len, key, key_len, + iv, iv_len, fifth_argument, fifth_argument_len, cipher); + case EncryptionMode::UNKNOWN: + default: { + std::string modes_str = arrow::internal::JoinStrings(SUPPORTED_MODES, ", "); + std::ostringstream oss; + oss << "Unsupported encryption mode: " << mode_str + << ". Supported modes: " << modes_str; + throw std::runtime_error(oss.str()); + } + } +} + +int32_t EncryptModeDispatcher::decrypt( + const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, const char* mode, int32_t mode_len, const char* iv, + int32_t iv_len, const char* fifth_argument, int32_t fifth_argument_len, + unsigned char* plaintext) { + std::string mode_str = + arrow::internal::AsciiToUpper(std::string_view(mode, mode_len)); + + switch (ParseEncryptionMode(mode_str)) { + case EncryptionMode::ECB: + case EncryptionMode::ECB_PKCS7: + // Shorthand AES-ECB and explicit AES-ECB-PKCS7 both use ECB with PKCS7 padding + return aes_decrypt_ecb(ciphertext, ciphertext_len, key, key_len, true, plaintext); + case EncryptionMode::ECB_NONE: + // ECB without padding + return aes_decrypt_ecb(ciphertext, ciphertext_len, key, key_len, false, plaintext); + case EncryptionMode::CBC: + case EncryptionMode::CBC_PKCS7: + // Shorthand AES-CBC and explicit AES-CBC-PKCS7 both use CBC with PKCS7 + return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, + iv, iv_len, true, plaintext); + case EncryptionMode::CBC_NONE: + // CBC without padding + return aes_decrypt_cbc(ciphertext, ciphertext_len, key, key_len, + iv, iv_len, false, plaintext); + case EncryptionMode::GCM: + return aes_decrypt_gcm(ciphertext, ciphertext_len, key, key_len, + iv, iv_len, fifth_argument, fifth_argument_len, plaintext); + case EncryptionMode::UNKNOWN: + default: { + std::string modes_str = arrow::internal::JoinStrings(SUPPORTED_MODES, ", "); + std::ostringstream oss; + oss << "Unsupported decryption mode: " << mode_str + << ". Supported modes: " << modes_str; + throw std::runtime_error(oss.str()); + } + } +} + +} // namespace gandiva + diff --git a/cpp/src/gandiva/encrypt_mode_dispatcher.h b/cpp/src/gandiva/encrypt_mode_dispatcher.h new file mode 100644 index 00000000000..20326845bd0 --- /dev/null +++ b/cpp/src/gandiva/encrypt_mode_dispatcher.h @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License") you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_ENCRYPT_MODE_DISPATCHER_H +#define GANDIVA_ENCRYPT_MODE_DISPATCHER_H + +#include + +namespace gandiva { + +/** + * Dispatcher for AES encryption/decryption based on mode string. + * Routes calls to appropriate implementation. + */ +class EncryptModeDispatcher { + public: + /** + * Encrypt data using the specified mode + * + * @param plaintext The data to encrypt + * @param plaintext_len Length of plaintext in bytes + * @param key The encryption key + * @param key_len Length of key in bytes + * @param mode Mode string + * @param mode_len Length of mode string in bytes + * @param iv The initialization vector (optional, only for modes that support it) + * @param iv_len Length of the IV in bytes + * @param fifth_argument Additional parameter (optional, only for modes that support it) + * @param fifth_argument_len Length of fifth_argument in bytes + * @param cipher Output buffer for encrypted data + * @return Length of encrypted data in bytes + * @throws std::runtime_error on encryption failure or unsupported mode + */ + static int32_t encrypt(const char* plaintext, int32_t plaintext_len, + const char* key, int32_t key_len, + const char* mode, int32_t mode_len, + const char* iv, int32_t iv_len, + const char* fifth_argument, int32_t fifth_argument_len, + unsigned char* cipher); + + /** + * Decrypt data using the specified mode + * + * @param ciphertext The data to decrypt + * @param ciphertext_len Length of ciphertext in bytes + * @param key The decryption key + * @param key_len Length of key in bytes + * @param mode Mode string + * @param mode_len Length of mode string in bytes + * @param iv The initialization vector (optional, only for modes that support it) + * @param iv_len Length of the IV in bytes + * @param fifth_argument Additional parameter (optional, only for modes that support it) + * @param fifth_argument_len Length of fifth_argument in bytes + * @param plaintext Output buffer for decrypted data + * @return Length of decrypted data in bytes + * @throws std::runtime_error on decryption failure or unsupported mode + */ + static int32_t decrypt(const char* ciphertext, int32_t ciphertext_len, + const char* key, int32_t key_len, + const char* mode, int32_t mode_len, + const char* iv, int32_t iv_len, + const char* fifth_argument, int32_t fifth_argument_len, + unsigned char* plaintext); +}; + +} // namespace gandiva + +#endif // GANDIVA_ENCRYPT_MODE_DISPATCHER_H + diff --git a/cpp/src/gandiva/encrypt_utils.cc b/cpp/src/gandiva/encrypt_utils.cc deleted file mode 100644 index 16c195d4944..00000000000 --- a/cpp/src/gandiva/encrypt_utils.cc +++ /dev/null @@ -1,124 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "gandiva/encrypt_utils.h" - -#include -#include - -namespace { -const EVP_CIPHER* get_cipher_algo(int32_t key_length) { - switch (key_length) { - case 16: - return EVP_aes_128_ecb(); - case 24: - return EVP_aes_192_ecb(); - case 32: - return EVP_aes_256_ecb(); - default: { - std::ostringstream oss; - oss << "unsupported key length: " << key_length; - throw std::runtime_error(oss.str()); - } - } -} -} // namespace - -namespace gandiva { -GANDIVA_EXPORT -int32_t aes_encrypt(const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, unsigned char* cipher) { - int32_t cipher_len = 0; - int32_t len = 0; - EVP_CIPHER_CTX* en_ctx = EVP_CIPHER_CTX_new(); - const EVP_CIPHER* cipher_algo = get_cipher_algo(key_len); - - if (!en_ctx) { - throw std::runtime_error("could not create a new evp cipher ctx for encryption"); - } - - if (!EVP_EncryptInit_ex(en_ctx, cipher_algo, nullptr, - reinterpret_cast(key), nullptr)) { - throw std::runtime_error("could not initialize evp cipher ctx for encryption"); - } - - if (!EVP_EncryptUpdate(en_ctx, cipher, &len, - reinterpret_cast(plaintext), - plaintext_len)) { - throw std::runtime_error("could not update evp cipher ctx for encryption"); - } - - cipher_len += len; - - if (!EVP_EncryptFinal_ex(en_ctx, cipher + len, &len)) { - throw std::runtime_error("could not finish evp cipher ctx for encryption"); - } - - cipher_len += len; - - EVP_CIPHER_CTX_free(en_ctx); - return cipher_len; -} - -GANDIVA_EXPORT -int32_t aes_decrypt(const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, unsigned char* plaintext) { - int32_t plaintext_len = 0; - int32_t len = 0; - EVP_CIPHER_CTX* de_ctx = EVP_CIPHER_CTX_new(); - const EVP_CIPHER* cipher_algo = get_cipher_algo(key_len); - - if (!de_ctx) { - throw std::runtime_error("could not create a new evp cipher ctx for decryption"); - } - - if (!EVP_DecryptInit_ex(de_ctx, cipher_algo, nullptr, - reinterpret_cast(key), nullptr)) { - throw std::runtime_error("could not initialize evp cipher ctx for decryption"); - } - - if (!EVP_DecryptUpdate(de_ctx, plaintext, &len, - reinterpret_cast(ciphertext), - ciphertext_len)) { - throw std::runtime_error("could not update evp cipher ctx for decryption"); - } - - plaintext_len += len; - - if (!EVP_DecryptFinal_ex(de_ctx, plaintext + len, &len)) { - throw std::runtime_error("could not finish evp cipher ctx for decryption"); - } - - plaintext_len += len; - - EVP_CIPHER_CTX_free(de_ctx); - return plaintext_len; -} - -const EVP_CIPHER* get_cipher_algo(int32_t key_length) { - switch (key_length) { - case 16: - return EVP_aes_128_ecb(); - case 24: - return EVP_aes_192_ecb(); - case 32: - return EVP_aes_256_ecb(); - default: - throw std::runtime_error("unsupported key length"); - } -} -} // namespace gandiva diff --git a/cpp/src/gandiva/encrypt_utils_cbc.cc b/cpp/src/gandiva/encrypt_utils_cbc.cc new file mode 100644 index 00000000000..04eb60c96a7 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_cbc.cc @@ -0,0 +1,169 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_utils_common.h" +#include +#include +#include +#include +#include +#include + +namespace gandiva { + +namespace { + +const EVP_CIPHER* get_cbc_cipher_algo(int32_t key_length) { + switch (key_length) { + case 16: + return EVP_aes_128_cbc(); + case 24: + return EVP_aes_192_cbc(); + case 32: + return EVP_aes_256_cbc(); + default: { + std::ostringstream oss; + oss << "Unsupported key length for AES-CBC: " << key_length + << " bytes. Supported lengths: 16, 24, 32 bytes"; + throw std::runtime_error(oss.str()); + } + } +} + +} // namespace + +GANDIVA_EXPORT +int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, + bool use_padding, unsigned char* cipher) { + // Validate IV length + if (iv_len != 16) { + std::ostringstream oss; + oss << "Invalid IV length for AES-CBC: " << iv_len + << " bytes. IV must be exactly 16 bytes"; + throw std::runtime_error(oss.str()); + } + + int32_t cipher_len = 0; + int32_t len = 0; + EVP_CIPHER_CTX* en_ctx = EVP_CIPHER_CTX_new(); + const EVP_CIPHER* cipher_algo = get_cbc_cipher_algo(key_len); + + if (!en_ctx) { + throw std::runtime_error("Could not create EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + if (!EVP_EncryptInit_ex(en_ctx, cipher_algo, nullptr, + reinterpret_cast(key), + reinterpret_cast(iv))) { + EVP_CIPHER_CTX_free(en_ctx); + throw std::runtime_error("Could not initialize EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + int padding_flag = use_padding ? 1 : 0; + if (!EVP_CIPHER_CTX_set_padding(en_ctx, padding_flag)) { + EVP_CIPHER_CTX_free(en_ctx); + throw std::runtime_error("Could not set padding mode for encryption: " + + get_openssl_error_string()); + } + + if (!EVP_EncryptUpdate(en_ctx, cipher, &len, + reinterpret_cast(plaintext), + plaintext_len)) { + EVP_CIPHER_CTX_free(en_ctx); + throw std::runtime_error("Could not update EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + cipher_len += len; + + if (!EVP_EncryptFinal_ex(en_ctx, cipher + len, &len)) { + EVP_CIPHER_CTX_free(en_ctx); + throw std::runtime_error("Could not finalize EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + cipher_len += len; + + EVP_CIPHER_CTX_free(en_ctx); + return cipher_len; +} + +GANDIVA_EXPORT +int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, + bool use_padding, unsigned char* plaintext) { + // Validate IV length + if (iv_len != 16) { + std::ostringstream oss; + oss << "Invalid IV length for AES-CBC: " << iv_len + << " bytes. IV must be exactly 16 bytes"; + throw std::runtime_error(oss.str()); + } + + int32_t plaintext_len = 0; + int32_t len = 0; + EVP_CIPHER_CTX* de_ctx = EVP_CIPHER_CTX_new(); + const EVP_CIPHER* cipher_algo = get_cbc_cipher_algo(key_len); + + if (!de_ctx) { + throw std::runtime_error("Could not create EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + if (!EVP_DecryptInit_ex(de_ctx, cipher_algo, nullptr, + reinterpret_cast(key), + reinterpret_cast(iv))) { + EVP_CIPHER_CTX_free(de_ctx); + throw std::runtime_error("Could not initialize EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + int padding_flag = use_padding ? 1 : 0; + if (!EVP_CIPHER_CTX_set_padding(de_ctx, padding_flag)) { + EVP_CIPHER_CTX_free(de_ctx); + throw std::runtime_error("Could not set padding mode for decryption: " + + get_openssl_error_string()); + } + + if (!EVP_DecryptUpdate(de_ctx, plaintext, &len, + reinterpret_cast(ciphertext), + ciphertext_len)) { + EVP_CIPHER_CTX_free(de_ctx); + throw std::runtime_error("Could not update EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + plaintext_len += len; + + if (!EVP_DecryptFinal_ex(de_ctx, plaintext + len, &len)) { + EVP_CIPHER_CTX_free(de_ctx); + throw std::runtime_error("Could not finalize EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + plaintext_len += len; + + EVP_CIPHER_CTX_free(de_ctx); + return plaintext_len; +} + +} // namespace gandiva + diff --git a/cpp/src/gandiva/encrypt_utils_cbc.h b/cpp/src/gandiva/encrypt_utils_cbc.h new file mode 100644 index 00000000000..b083d6f0a2d --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_cbc.h @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include "gandiva/visibility.h" + +namespace gandiva { + +// CBC mode identifiers +constexpr const char* AES_CBC_MODE = "AES-CBC"; +constexpr const char* AES_CBC_PKCS7_MODE = "AES-CBC-PKCS7"; +constexpr const char* AES_CBC_NONE_MODE = "AES-CBC-NONE"; + +/** + * Encrypt data using AES-CBC algorithm with explicit padding mode + * + * @param plaintext The data to encrypt + * @param plaintext_len Length of plaintext in bytes + * @param key The encryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) + * @param key_len Length of key in bytes + * @param iv The initialization vector (must be exactly 16 bytes) + * @param iv_len Length of IV in bytes (must be 16) + * @param use_padding Whether to use PKCS7 padding (true) or no padding (false) + * @param cipher Output buffer for encrypted data + * @return Length of encrypted data in bytes + * @throws std::runtime_error on encryption failure or invalid parameters + */ +GANDIVA_EXPORT +int32_t aes_encrypt_cbc(const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, + bool use_padding, unsigned char* cipher); + +/** + * Decrypt data using AES-CBC algorithm with explicit padding mode + * + * @param ciphertext The data to decrypt + * @param ciphertext_len Length of ciphertext in bytes + * @param key The decryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) + * @param key_len Length of key in bytes + * @param iv The initialization vector (must be exactly 16 bytes) + * @param iv_len Length of IV in bytes (must be 16) + * @param use_padding Whether to use PKCS7 padding (true) or no padding (false) + * @param plaintext Output buffer for decrypted data + * @return Length of decrypted data in bytes + * @throws std::runtime_error on decryption failure or invalid parameters + */ +GANDIVA_EXPORT +int32_t aes_decrypt_cbc(const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, + bool use_padding, unsigned char* plaintext); + +} // namespace gandiva + diff --git a/cpp/src/gandiva/encrypt_utils_cbc_test.cc b/cpp/src/gandiva/encrypt_utils_cbc_test.cc new file mode 100644 index 00000000000..8bf9227d65b --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_cbc_test.cc @@ -0,0 +1,157 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_utils_cbc.h" + +#include +#include +#include + +// Test PKCS#7 padding with 16-byte key +TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_16) { + auto* key = "12345678abcdefgh"; + auto* iv = "1234567890123456"; + auto* to_encrypt = "some test string"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[64]; + + int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, true, cipher); + + unsigned char decrypted[64]; + int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), + cipher_len, key, key_len, iv, iv_len, + true, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test PKCS#7 padding with 24-byte key +TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_24) { + auto* key = "12345678abcdefgh12345678"; + auto* iv = "1234567890123456"; + auto* to_encrypt = "some\ntest\nstring"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[64]; + + int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, true, cipher); + + unsigned char decrypted[64]; + int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), + cipher_len, key, key_len, iv, iv_len, + true, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test PKCS#7 padding with 32-byte key +TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptPkcs7_32) { + auto* key = "12345678abcdefgh12345678abcdefgh"; + auto* iv = "1234567890123456"; + auto* to_encrypt = "New\ntest\nstring"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[64]; + + int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, true, cipher); + + unsigned char decrypted[64]; + int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), + cipher_len, key, key_len, iv, iv_len, + true, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test no-padding mode with block-aligned data (16 bytes) +TEST(TestAesCbcEncryptUtils, TestAesEncryptDecryptNoPadding_16) { + auto* key = "12345678abcdefgh"; + auto* iv = "1234567890123456"; + auto* to_encrypt = "1234567890123456"; // Exactly 16 bytes + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[64]; + + int32_t cipher_len = gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, false, cipher); + + unsigned char decrypted[64]; + int32_t decrypted_len = gandiva::aes_decrypt_cbc(reinterpret_cast(cipher), + cipher_len, key, key_len, iv, iv_len, + false, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test invalid IV length +TEST(TestAesCbcEncryptUtils, TestInvalidIVLength) { + auto* key = "12345678abcdefgh"; + auto* iv = "short"; // Too short + auto* to_encrypt = "test"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[64]; + + try { + gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, true, cipher); + FAIL() << "Expected std::runtime_error"; + } catch (const std::runtime_error& e) { + EXPECT_THAT(e.what(), testing::HasSubstr("Invalid IV length for AES-CBC")); + } +} + +// Test invalid key length +TEST(TestAesCbcEncryptUtils, TestInvalidKeyLength) { + auto* key = "short"; // Too short + auto* iv = "1234567890123456"; + auto* to_encrypt = "test"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[64]; + + try { + gandiva::aes_encrypt_cbc(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, true, cipher); + FAIL() << "Expected std::runtime_error"; + } catch (const std::runtime_error& e) { + EXPECT_THAT(e.what(), testing::HasSubstr("Unsupported key length for AES-CBC")); + } +} + + + diff --git a/cpp/src/gandiva/encrypt_utils_common.cc b/cpp/src/gandiva/encrypt_utils_common.cc new file mode 100644 index 00000000000..3213e0c6e1a --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_common.cc @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_utils_common.h" +#include +#include +#include + +namespace gandiva { + +std::string get_openssl_error_string() { + std::string error_string; + unsigned long error_code; + char error_buffer[256]; + + // Loop through all errors in the queue + while ((error_code = ERR_get_error()) != 0) { + if (!error_string.empty()) { + error_string += "; "; + } + ERR_error_string(error_code, error_buffer); + error_string += std::string(error_buffer); + } + + if (error_string.empty()) { + return "Unknown OpenSSL error"; + } + return error_string; +} + +} // namespace gandiva + diff --git a/cpp/src/gandiva/encrypt_utils.h b/cpp/src/gandiva/encrypt_utils_common.h similarity index 60% rename from cpp/src/gandiva/encrypt_utils.h rename to cpp/src/gandiva/encrypt_utils_common.h index 06e178fd65e..62dc14db348 100644 --- a/cpp/src/gandiva/encrypt_utils.h +++ b/cpp/src/gandiva/encrypt_utils_common.h @@ -15,26 +15,21 @@ // specific language governing permissions and limitations // under the License. -#pragma once +#ifndef GANDIVA_ENCRYPT_UTILS_COMMON_H +#define GANDIVA_ENCRYPT_UTILS_COMMON_H -#include -#include -#include "gandiva/visibility.h" +#include namespace gandiva { -/** - * Encrypt data using aes algorithm - **/ -GANDIVA_EXPORT -int32_t aes_encrypt(const char* plaintext, int32_t plaintext_len, const char* key, - int32_t key_len, unsigned char* cipher); - -/** - * Decrypt data using aes algorithm - **/ -GANDIVA_EXPORT -int32_t aes_decrypt(const char* ciphertext, int32_t ciphertext_len, const char* key, - int32_t key_len, unsigned char* plaintext); +/// @brief Get a human-readable error string from OpenSSL's error queue. +/// @details Retrieves all errors from the OpenSSL error queue and concatenates them +/// with "; " as a separator. This ensures complete error information is captured. +/// @return A string describing all OpenSSL errors in the queue, or "Unknown OpenSSL error" +/// if no error is available. +std::string get_openssl_error_string(); } // namespace gandiva + +#endif // GANDIVA_ENCRYPT_UTILS_COMMON_H + diff --git a/cpp/src/gandiva/encrypt_utils_common_test.cc b/cpp/src/gandiva/encrypt_utils_common_test.cc new file mode 100644 index 00000000000..de55758d537 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_common_test.cc @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_utils_common.h" + +#include +#include +#include +#include + +// Test that get_openssl_error_string returns "Unknown OpenSSL error" when queue is empty +TEST(TestOpenSSLErrorUtils, TestEmptyErrorQueue) { + // Clear any existing errors + ERR_clear_error(); + + std::string error_string = gandiva::get_openssl_error_string(); + + EXPECT_EQ(error_string, "Unknown OpenSSL error"); +} + +// Test that get_openssl_error_string captures a single error +TEST(TestOpenSSLErrorUtils, TestSingleError) { + // Clear any existing errors + ERR_clear_error(); + + // Add a single error to the queue + ERR_raise(ERR_LIB_EVP, EVP_R_UNSUPPORTED_ALGORITHM); + + std::string error_string = gandiva::get_openssl_error_string(); + + // Verify that the error string is not empty and not the default message + EXPECT_NE(error_string, "Unknown OpenSSL error"); + EXPECT_GT(error_string.length(), 0); +} + +// Test that get_openssl_error_string captures multiple errors +TEST(TestOpenSSLErrorUtils, TestMultipleErrors) { + // Clear any existing errors + ERR_clear_error(); + + // Populate the OpenSSL error queue with multiple errors + ERR_raise(ERR_LIB_EVP, EVP_R_UNSUPPORTED_ALGORITHM); + ERR_raise(ERR_LIB_EVP, EVP_R_INVALID_KEY_LENGTH); + ERR_raise(ERR_LIB_EVP, EVP_R_INVALID_OPERATION); + + // Call our function to get all errors + std::string error_string = gandiva::get_openssl_error_string(); + + // Verify that the error string is not empty + EXPECT_NE(error_string, "Unknown OpenSSL error"); + + // Verify that all errors are captured (they should be separated by "; ") + // The exact error messages depend on OpenSSL version, so we just check + // that we got multiple errors (indicated by the separator) + EXPECT_THAT(error_string, testing::HasSubstr(";")); + + // Verify the error string contains meaningful content (not just separators) + EXPECT_GT(error_string.length(), 10); +} + +// Test that error queue is properly drained after calling get_openssl_error_string +TEST(TestOpenSSLErrorUtils, TestErrorQueueDrained) { + // Clear any existing errors + ERR_clear_error(); + + // Add errors to the queue + ERR_raise(ERR_LIB_EVP, EVP_R_UNSUPPORTED_ALGORITHM); + ERR_raise(ERR_LIB_EVP, EVP_R_INVALID_KEY_LENGTH); + + // Call our function to get all errors + std::string error_string = gandiva::get_openssl_error_string(); + + // Verify we got errors + EXPECT_NE(error_string, "Unknown OpenSSL error"); + + // Now call it again - the queue should be empty + std::string second_call = gandiva::get_openssl_error_string(); + + EXPECT_EQ(second_call, "Unknown OpenSSL error"); +} + diff --git a/cpp/src/gandiva/encrypt_utils_ecb.cc b/cpp/src/gandiva/encrypt_utils_ecb.cc new file mode 100644 index 00000000000..b4913e1c880 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_ecb.cc @@ -0,0 +1,148 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_utils_ecb.h" +#include "gandiva/encrypt_utils_common.h" +#include +#include +#include +#include +#include + +namespace gandiva { + +namespace { + +const EVP_CIPHER* get_ecb_cipher_algo(int32_t key_length) { + switch (key_length) { + case 16: + return EVP_aes_128_ecb(); + case 24: + return EVP_aes_192_ecb(); + case 32: + return EVP_aes_256_ecb(); + default: { + std::ostringstream oss; + oss << "Unsupported key length for AES-ECB: " << key_length + << " bytes. Supported lengths: 16, 24, 32 bytes"; + throw std::runtime_error(oss.str()); + } + } +} + +} // namespace + +GANDIVA_EXPORT +int32_t aes_encrypt_ecb(const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, bool use_padding, unsigned char* cipher) { + int32_t cipher_len = 0; + int32_t len = 0; + EVP_CIPHER_CTX* en_ctx = EVP_CIPHER_CTX_new(); + const EVP_CIPHER* cipher_algo = get_ecb_cipher_algo(key_len); + + if (!en_ctx) { + throw std::runtime_error("Could not create EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + if (!EVP_EncryptInit_ex(en_ctx, cipher_algo, nullptr, + reinterpret_cast(key), nullptr)) { + EVP_CIPHER_CTX_free(en_ctx); + throw std::runtime_error("Could not initialize EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + int padding_flag = use_padding ? 1 : 0; + if (!EVP_CIPHER_CTX_set_padding(en_ctx, padding_flag)) { + EVP_CIPHER_CTX_free(en_ctx); + throw std::runtime_error("Could not set padding mode for encryption: " + + get_openssl_error_string()); + } + + if (!EVP_EncryptUpdate(en_ctx, cipher, &len, + reinterpret_cast(plaintext), + plaintext_len)) { + EVP_CIPHER_CTX_free(en_ctx); + throw std::runtime_error("Could not update EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + cipher_len += len; + + if (!EVP_EncryptFinal_ex(en_ctx, cipher + len, &len)) { + EVP_CIPHER_CTX_free(en_ctx); + throw std::runtime_error("Could not finalize EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + cipher_len += len; + + EVP_CIPHER_CTX_free(en_ctx); + return cipher_len; +} + +GANDIVA_EXPORT +int32_t aes_decrypt_ecb(const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, bool use_padding, unsigned char* plaintext) { + int32_t plaintext_len = 0; + int32_t len = 0; + EVP_CIPHER_CTX* de_ctx = EVP_CIPHER_CTX_new(); + const EVP_CIPHER* cipher_algo = get_ecb_cipher_algo(key_len); + + if (!de_ctx) { + throw std::runtime_error("Could not create EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + if (!EVP_DecryptInit_ex(de_ctx, cipher_algo, nullptr, + reinterpret_cast(key), nullptr)) { + EVP_CIPHER_CTX_free(de_ctx); + throw std::runtime_error("Could not initialize EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + int padding_flag = use_padding ? 1 : 0; + if (!EVP_CIPHER_CTX_set_padding(de_ctx, padding_flag)) { + EVP_CIPHER_CTX_free(de_ctx); + throw std::runtime_error("Could not set padding mode for decryption: " + + get_openssl_error_string()); + } + + if (!EVP_DecryptUpdate(de_ctx, plaintext, &len, + reinterpret_cast(ciphertext), + ciphertext_len)) { + EVP_CIPHER_CTX_free(de_ctx); + throw std::runtime_error("Could not update EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + plaintext_len += len; + + if (!EVP_DecryptFinal_ex(de_ctx, plaintext + len, &len)) { + EVP_CIPHER_CTX_free(de_ctx); + throw std::runtime_error("Could not finalize EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + plaintext_len += len; + + EVP_CIPHER_CTX_free(de_ctx); + return plaintext_len; +} + +} // namespace gandiva + diff --git a/cpp/src/gandiva/encrypt_utils_ecb.h b/cpp/src/gandiva/encrypt_utils_ecb.h new file mode 100644 index 00000000000..ba62bf3bea9 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_ecb.h @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include "gandiva/visibility.h" + +namespace gandiva { + +// ECB mode identifiers +constexpr const char* AES_ECB_MODE = "AES-ECB"; +constexpr const char* AES_ECB_PKCS7_MODE = "AES-ECB-PKCS7"; +constexpr const char* AES_ECB_NONE_MODE = "AES-ECB-NONE"; + +/** + * Encrypt data using AES-ECB algorithm (legacy, insecure) + * + * WARNING: ECB mode is deterministic and should not be used for sensitive data. + * Use other encryption modes for better security. + * + * @param plaintext The data to encrypt + * @param plaintext_len Length of plaintext in bytes + * @param key The encryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) + * @param key_len Length of key in bytes + * @param use_padding Whether to use PKCS7 padding (true) or no padding (false) + * @param cipher Output buffer for encrypted data + * @return Length of encrypted data in bytes + * @throws std::runtime_error on encryption failure + */ +GANDIVA_EXPORT +int32_t aes_encrypt_ecb(const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, bool use_padding, unsigned char* cipher); + +/** + * Decrypt data using AES-ECB algorithm (legacy, insecure) + * + * WARNING: ECB mode is deterministic and should not be used for sensitive data. + * Use other encryption modes for better security. + * + * @param ciphertext The data to decrypt + * @param ciphertext_len Length of ciphertext in bytes + * @param key The decryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) + * @param key_len Length of key in bytes + * @param use_padding Whether to use PKCS7 padding (true) or no padding (false) + * @param plaintext Output buffer for decrypted data + * @return Length of decrypted data in bytes + * @throws std::runtime_error on decryption failure + */ +GANDIVA_EXPORT +int32_t aes_decrypt_ecb(const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, bool use_padding, unsigned char* plaintext); + +} // namespace gandiva + diff --git a/cpp/src/gandiva/encrypt_utils_test.cc b/cpp/src/gandiva/encrypt_utils_ecb_test.cc similarity index 67% rename from cpp/src/gandiva/encrypt_utils_test.cc rename to cpp/src/gandiva/encrypt_utils_ecb_test.cc index 5bc4c3957f2..6f229209bfd 100644 --- a/cpp/src/gandiva/encrypt_utils_test.cc +++ b/cpp/src/gandiva/encrypt_utils_ecb_test.cc @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -#include "gandiva/encrypt_utils.h" +#include "gandiva/encrypt_utils_ecb.h" #include +#include -TEST(TestShaEncryptUtils, TestAesEncryptDecrypt) { +TEST(TestAesEcbEncryptUtils, TestAesEncryptDecrypt) { // 16 bytes key auto* key = "12345678abcdefgh"; auto* to_encrypt = "some test string"; @@ -29,12 +30,11 @@ TEST(TestShaEncryptUtils, TestAesEncryptDecrypt) { static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_1[64]; - int32_t cipher_1_len = - gandiva::aes_encrypt(to_encrypt, to_encrypt_len, key, key_len, cipher_1); + int32_t cipher_1_len = gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_1); unsigned char decrypted_1[64]; - int32_t decrypted_1_len = gandiva::aes_decrypt(reinterpret_cast(cipher_1), - cipher_1_len, key, key_len, decrypted_1); + int32_t decrypted_1_len = gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_1), + cipher_1_len, key, key_len, true, decrypted_1); EXPECT_EQ(std::string(reinterpret_cast(to_encrypt), to_encrypt_len), std::string(reinterpret_cast(decrypted_1), decrypted_1_len)); @@ -48,12 +48,11 @@ TEST(TestShaEncryptUtils, TestAesEncryptDecrypt) { static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_2[64]; - int32_t cipher_2_len = - gandiva::aes_encrypt(to_encrypt, to_encrypt_len, key, key_len, cipher_2); + int32_t cipher_2_len = gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_2); unsigned char decrypted_2[64]; - int32_t decrypted_2_len = gandiva::aes_decrypt(reinterpret_cast(cipher_2), - cipher_2_len, key, key_len, decrypted_2); + int32_t decrypted_2_len = gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_2), + cipher_2_len, key, key_len, true, decrypted_2); EXPECT_EQ(std::string(reinterpret_cast(to_encrypt), to_encrypt_len), std::string(reinterpret_cast(decrypted_2), decrypted_2_len)); @@ -67,12 +66,11 @@ TEST(TestShaEncryptUtils, TestAesEncryptDecrypt) { static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_3[64]; - int32_t cipher_3_len = - gandiva::aes_encrypt(to_encrypt, to_encrypt_len, key, key_len, cipher_3); + int32_t cipher_3_len = gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_3); unsigned char decrypted_3[64]; - int32_t decrypted_3_len = gandiva::aes_decrypt(reinterpret_cast(cipher_3), - cipher_3_len, key, key_len, decrypted_3); + int32_t decrypted_3_len = gandiva::aes_decrypt_ecb(reinterpret_cast(cipher_3), + cipher_3_len, key, key_len, true, decrypted_3); EXPECT_EQ(std::string(reinterpret_cast(to_encrypt), to_encrypt_len), std::string(reinterpret_cast(decrypted_3), decrypted_3_len)); @@ -90,12 +88,13 @@ TEST(TestShaEncryptUtils, TestAesEncryptDecrypt) { to_encrypt_len = static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_4[64]; - ASSERT_THROW( - { gandiva::aes_encrypt(to_encrypt, to_encrypt_len, key, key_len, cipher_4); }, - std::runtime_error); + ASSERT_THROW({ + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_4); + }, std::runtime_error); - ASSERT_THROW({ gandiva::aes_decrypt(cipher, cipher_len, key, key_len, plain_text); }, - std::runtime_error); + ASSERT_THROW({ + gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); + }, std::runtime_error); key = "12345678"; to_encrypt = "New\ntest\nstring"; @@ -104,9 +103,11 @@ TEST(TestShaEncryptUtils, TestAesEncryptDecrypt) { to_encrypt_len = static_cast(strlen(reinterpret_cast(to_encrypt))); unsigned char cipher_5[64]; - ASSERT_THROW( - { gandiva::aes_encrypt(to_encrypt, to_encrypt_len, key, key_len, cipher_5); }, - std::runtime_error); - ASSERT_THROW({ gandiva::aes_decrypt(cipher, cipher_len, key, key_len, plain_text); }, - std::runtime_error); + ASSERT_THROW({ + gandiva::aes_encrypt_ecb(to_encrypt, to_encrypt_len, key, key_len, true, cipher_5); + }, std::runtime_error); + ASSERT_THROW({ + gandiva::aes_decrypt_ecb(cipher, cipher_len, key, key_len, true, plain_text); + }, std::runtime_error); } + diff --git a/cpp/src/gandiva/encrypt_utils_gcm.cc b/cpp/src/gandiva/encrypt_utils_gcm.cc new file mode 100644 index 00000000000..f028243da59 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_gcm.cc @@ -0,0 +1,214 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_utils_gcm.h" +#include "gandiva/encrypt_utils_common.h" +#include +#include +#include +#include +#include + +namespace gandiva { + +namespace { + +const EVP_CIPHER* get_gcm_cipher_algo(int32_t key_length) { + switch (key_length) { + case 16: + return EVP_aes_128_gcm(); + case 24: + return EVP_aes_192_gcm(); + case 32: + return EVP_aes_256_gcm(); + default: { + std::ostringstream oss; + oss << "Unsupported key length for AES-GCM: " << key_length + << " bytes. Supported lengths: 16, 24, 32 bytes"; + throw std::runtime_error(oss.str()); + } + } +} + +} // namespace + +GANDIVA_EXPORT +int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, + const char* key, int32_t key_len, const char* iv, + int32_t iv_len, const char* aad, int32_t aad_len, + unsigned char* cipher) { + if (iv_len <= 0) { + throw std::runtime_error( + "Invalid IV length for AES-GCM: IV length must be greater than 0"); + } + + int32_t cipher_len = 0; + int32_t len = 0; + EVP_CIPHER_CTX* en_ctx = EVP_CIPHER_CTX_new(); + const EVP_CIPHER* cipher_algo = get_gcm_cipher_algo(key_len); + + if (!en_ctx) { + throw std::runtime_error("Could not create EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + try { + if (!EVP_EncryptInit_ex(en_ctx, cipher_algo, nullptr, + reinterpret_cast(key), + reinterpret_cast(iv))) { + throw std::runtime_error( + "Could not initialize EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + // Set IV length for GCM mode + if (!EVP_CIPHER_CTX_ctrl(en_ctx, EVP_CTRL_GCM_SET_IVLEN, iv_len, nullptr)) { + throw std::runtime_error("Could not set GCM IV length: " + + get_openssl_error_string()); + } + + // Process AAD if provided + if (aad != nullptr && aad_len > 0) { + if (!EVP_EncryptUpdate(en_ctx, nullptr, &len, + reinterpret_cast(aad), aad_len)) { + throw std::runtime_error("Could not process AAD for encryption: " + + get_openssl_error_string()); + } + } + + // Encrypt plaintext + if (!EVP_EncryptUpdate(en_ctx, cipher, &len, + reinterpret_cast(plaintext), + plaintext_len)) { + throw std::runtime_error("Could not update EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + cipher_len += len; + + // Finalize encryption + if (!EVP_EncryptFinal_ex(en_ctx, cipher + len, &len)) { + throw std::runtime_error("Could not finalize EVP cipher context for encryption: " + + get_openssl_error_string()); + } + + cipher_len += len; + + // Get the authentication tag and append it to ciphertext + if (!EVP_CIPHER_CTX_ctrl(en_ctx, EVP_CTRL_GCM_GET_TAG, GCM_TAG_LENGTH, + cipher + cipher_len)) { + throw std::runtime_error("Could not get GCM authentication tag: " + + get_openssl_error_string()); + } + cipher_len += GCM_TAG_LENGTH; + } catch (...) { + EVP_CIPHER_CTX_free(en_ctx); + throw; + } + + EVP_CIPHER_CTX_free(en_ctx); + return cipher_len; +} + +GANDIVA_EXPORT +int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, + const char* key, int32_t key_len, const char* iv, + int32_t iv_len, const char* aad, int32_t aad_len, + unsigned char* plaintext) { + if (iv_len <= 0) { + throw std::runtime_error( + "Invalid IV length for AES-GCM: IV length must be greater than 0"); + } + + if (ciphertext_len < GCM_TAG_LENGTH) { + throw std::runtime_error( + "Ciphertext too short for AES-GCM: must be at least 16 bytes for tag"); + } + + int32_t plaintext_len = 0; + int32_t len = 0; + EVP_CIPHER_CTX* de_ctx = EVP_CIPHER_CTX_new(); + const EVP_CIPHER* cipher_algo = get_gcm_cipher_algo(key_len); + + if (!de_ctx) { + throw std::runtime_error("Could not create EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + try { + if (!EVP_DecryptInit_ex(de_ctx, cipher_algo, nullptr, + reinterpret_cast(key), + reinterpret_cast(iv))) { + throw std::runtime_error( + "Could not initialize EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + // Set IV length for GCM mode + if (!EVP_CIPHER_CTX_ctrl(de_ctx, EVP_CTRL_GCM_SET_IVLEN, iv_len, nullptr)) { + throw std::runtime_error("Could not set GCM IV length: " + + get_openssl_error_string()); + } + + // Process AAD if provided + if (aad != nullptr && aad_len > 0) { + if (!EVP_DecryptUpdate(de_ctx, nullptr, &len, + reinterpret_cast(aad), aad_len)) { + throw std::runtime_error("Could not process AAD for decryption: " + + get_openssl_error_string()); + } + } + + // Extract tag from end of ciphertext + int32_t actual_ciphertext_len = ciphertext_len - GCM_TAG_LENGTH; + const unsigned char* tag = + reinterpret_cast(ciphertext + actual_ciphertext_len); + + // Set the authentication tag + if (!EVP_CIPHER_CTX_ctrl(de_ctx, EVP_CTRL_GCM_SET_TAG, GCM_TAG_LENGTH, + const_cast(tag))) { + throw std::runtime_error("Could not set GCM authentication tag: " + + get_openssl_error_string()); + } + + // Decrypt ciphertext + if (!EVP_DecryptUpdate(de_ctx, plaintext, &len, + reinterpret_cast(ciphertext), + actual_ciphertext_len)) { + throw std::runtime_error("Could not update EVP cipher context for decryption: " + + get_openssl_error_string()); + } + + plaintext_len += len; + + // Finalize decryption (this verifies the tag) + if (!EVP_DecryptFinal_ex(de_ctx, plaintext + len, &len)) { + throw std::runtime_error("GCM tag verification failed or decryption error: " + + get_openssl_error_string()); + } + plaintext_len += len; + } catch (...) { + EVP_CIPHER_CTX_free(de_ctx); + throw; + } + + EVP_CIPHER_CTX_free(de_ctx); + return plaintext_len; +} + +} // namespace gandiva + diff --git a/cpp/src/gandiva/encrypt_utils_gcm.h b/cpp/src/gandiva/encrypt_utils_gcm.h new file mode 100644 index 00000000000..07a597af0b6 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_gcm.h @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include "gandiva/visibility.h" + +namespace gandiva { + +// GCM mode identifier +constexpr const char* AES_GCM_MODE = "AES-GCM"; + +// GCM authentication tag length in bytes +constexpr int32_t GCM_TAG_LENGTH = 16; + +/** + * Encrypt data using AES-GCM algorithm + * + * @param plaintext The data to encrypt + * @param plaintext_len Length of plaintext in bytes + * @param key The encryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) + * @param key_len Length of key in bytes + * @param iv The initialization vector (variable length, typically 12 bytes) + * @param iv_len Length of IV in bytes + * @param aad Optional additional authenticated data (can be null) + * @param aad_len Length of AAD in bytes (0 if aad is null) + * @param cipher Output buffer for encrypted data (must be at least plaintext_len + 16 bytes) + * @return Length of encrypted data in bytes (plaintext_len + 16 for the tag) + * @throws std::runtime_error on encryption failure or invalid parameters + */ +GANDIVA_EXPORT +int32_t aes_encrypt_gcm(const char* plaintext, int32_t plaintext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, + const char* aad, int32_t aad_len, unsigned char* cipher); + +/** + * Decrypt data using AES-GCM algorithm + * + * @param ciphertext The data to decrypt (includes 16-byte authentication tag at the end) + * @param ciphertext_len Length of ciphertext in bytes (includes tag) + * @param key The decryption key (16, 24, or 32 bytes for 128, 192, 256-bit keys) + * @param key_len Length of key in bytes + * @param iv The initialization vector (variable length, typically 12 bytes) + * @param iv_len Length of IV in bytes + * @param aad Optional additional authenticated data (can be null) + * @param aad_len Length of AAD in bytes (0 if aad is null) + * @param plaintext Output buffer for decrypted data + * @return Length of decrypted data in bytes (ciphertext_len - 16) + * @throws std::runtime_error on decryption failure, invalid parameters, or tag verification failure + */ +GANDIVA_EXPORT +int32_t aes_decrypt_gcm(const char* ciphertext, int32_t ciphertext_len, const char* key, + int32_t key_len, const char* iv, int32_t iv_len, + const char* aad, int32_t aad_len, unsigned char* plaintext); + +} // namespace gandiva + diff --git a/cpp/src/gandiva/encrypt_utils_gcm_test.cc b/cpp/src/gandiva/encrypt_utils_gcm_test.cc new file mode 100644 index 00000000000..2156132bc62 --- /dev/null +++ b/cpp/src/gandiva/encrypt_utils_gcm_test.cc @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/encrypt_utils_gcm.h" + +#include +#include +#include + +// Test IV-only GCM with 16-byte key +TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_16) { + auto* key = "12345678abcdefgh"; + auto* iv = "123456789012"; // 12-byte IV + auto* to_encrypt = "some test string"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, nullptr, 0, cipher); + + // Ciphertext should be plaintext_len + 16 (tag) + EXPECT_EQ(cipher_len, to_encrypt_len + 16); + + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), + cipher_len, key, key_len, iv, iv_len, + nullptr, 0, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test IV + AAD GCM with 16-byte key +TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptWithAad_16) { + auto* key = "12345678abcdefgh"; + auto* iv = "123456789012"; + auto* to_encrypt = "some test string"; + auto* aad = "additional authenticated data"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + auto aad_len = static_cast(strlen(aad)); + unsigned char cipher[128]; + + int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, aad, aad_len, cipher); + + EXPECT_EQ(cipher_len, to_encrypt_len + 16); + + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), + cipher_len, key, key_len, iv, iv_len, + aad, aad_len, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test IV-only GCM with 24-byte key +TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_24) { + auto* key = "12345678abcdefgh12345678"; + auto* iv = "123456789012"; + auto* to_encrypt = "test data"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, nullptr, 0, cipher); + + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), + cipher_len, key, key_len, iv, iv_len, + nullptr, 0, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test IV-only GCM with 32-byte key +TEST(TestAesGcmEncryptUtils, TestAesEncryptDecryptIvOnly_32) { + auto* key = "12345678abcdefgh12345678abcdefgh"; + auto* iv = "123456789012"; + auto* to_encrypt = "another test"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, nullptr, 0, cipher); + + unsigned char decrypted[128]; + int32_t decrypted_len = gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), + cipher_len, key, key_len, iv, iv_len, + nullptr, 0, decrypted); + + EXPECT_EQ(std::string(to_encrypt, to_encrypt_len), + std::string(reinterpret_cast(decrypted), decrypted_len)); +} + +// Test tag verification failure +TEST(TestAesGcmEncryptUtils, TestTagVerificationFailure) { + auto* key = "12345678abcdefgh"; + auto* iv = "123456789012"; + auto* to_encrypt = "some test string"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + int32_t cipher_len = gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, nullptr, 0, cipher); + + // Corrupt the tag (last byte) + cipher[cipher_len - 1] ^= 0xFF; + + unsigned char decrypted[128]; + EXPECT_THROW(gandiva::aes_decrypt_gcm(reinterpret_cast(cipher), + cipher_len, key, key_len, iv, iv_len, + nullptr, 0, decrypted), + std::runtime_error); +} + +// Test invalid IV length +TEST(TestAesGcmEncryptUtils, TestInvalidIvLength) { + auto* key = "12345678abcdefgh"; + auto* iv = ""; // Empty IV + auto* to_encrypt = "some test string"; + + auto key_len = static_cast(strlen(key)); + auto iv_len = static_cast(strlen(iv)); + auto to_encrypt_len = static_cast(strlen(to_encrypt)); + unsigned char cipher[128]; + + EXPECT_THROW(gandiva::aes_encrypt_gcm(to_encrypt, to_encrypt_len, key, key_len, + iv, iv_len, nullptr, 0, cipher), + std::runtime_error); +} + diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index 2bc6936d77b..7750421360e 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -494,12 +494,41 @@ std::vector GetStringFunctionRegistry() { kResultNullIfNull, "split_part", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + // ECB mode specific functions + // String-based signatures (UTF8, UTF8) -> UTF8 NativeFunction("aes_encrypt", {}, DataTypeVector{utf8(), utf8()}, utf8(), - kResultNullIfNull, "gdv_fn_aes_encrypt", + kResultNullIfNull, "gdv_fn_aes_encrypt_ecb_legacy", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("aes_decrypt", {}, DataTypeVector{utf8(), utf8()}, utf8(), - kResultNullIfNull, "gdv_fn_aes_decrypt", + kResultNullIfNull, "gdv_fn_aes_decrypt_ecb_legacy", + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + + // Parameters: data, key, mode (e.g. ECB mode) + NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8()}, binary(), + kResultNullIfNull, "gdv_fn_encrypt_dispatcher_3args", + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + + NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8()}, binary(), + kResultNullIfNull, "gdv_fn_decrypt_dispatcher_3args", + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + + // Parameters: data, key, mode, iv (e.g. CBC mode) + NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, binary(), + kResultNullIfNull, "gdv_fn_encrypt_dispatcher_4args", + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + + NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary()}, binary(), + kResultNullIfNull, "gdv_fn_decrypt_dispatcher_4args", + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + + // Parameters: data, key, mode, iv, fifth_argument (e.g. GCM mode) + NativeFunction("encrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, binary(), + kResultNullIfNull, "gdv_fn_encrypt_dispatcher_5args", + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + + NativeFunction("decrypt", {}, DataTypeVector{binary(), binary(), utf8(), binary(), binary()}, binary(), + kResultNullIfNull, "gdv_fn_decrypt_dispatcher_5args", NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), NativeFunction("mask_first_n", {}, DataTypeVector{utf8(), int32()}, utf8(), diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 7a47f7491a4..070a1463a0e 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -29,7 +30,9 @@ #include "arrow/util/double_conversion.h" #include "arrow/util/value_parsing.h" -#include "gandiva/encrypt_utils.h" +#include "gandiva/encrypt_utils_ecb.h" +#include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_mode_dispatcher.h" #include "gandiva/engine.h" #include "gandiva/exported_funcs.h" #include "gandiva/in_holder.h" @@ -307,93 +310,7 @@ CAST_NUMERIC_FROM_VARBINARY(double, arrow::DoubleType, FLOAT8) #undef GDV_FN_CAST_VARCHAR_INTEGER #undef GDV_FN_CAST_VARCHAR_REAL -GANDIVA_EXPORT -const char* gdv_fn_aes_encrypt(int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, - int32_t* out_len) { - if (data_len < 0) { - gdv_fn_context_set_error_msg(context, "Invalid data length to be encrypted"); - *out_len = 0; - return ""; - } - int64_t kAesBlockSize = 0; - if (key_data_len == 16 || key_data_len == 24 || key_data_len == 32) { - kAesBlockSize = static_cast(key_data_len); - } else { - std::ostringstream oss; - oss << "invalid key length: " << key_data_len; - gdv_fn_context_set_error_msg(context, oss.str().c_str()); - *out_len = 0; - return nullptr; - } - - *out_len = - static_cast(arrow::bit_util::RoundUpToPowerOf2(data_len, kAesBlockSize)); - char* ret = reinterpret_cast(gdv_fn_context_arena_malloc(context, *out_len)); - if (ret == nullptr) { - std::string err_msg = - "Could not allocate memory for returning aes encrypt cypher text"; - gdv_fn_context_set_error_msg(context, err_msg.data()); - *out_len = 0; - return nullptr; - } - - try { - *out_len = gandiva::aes_encrypt(data, data_len, key_data, key_data_len, - reinterpret_cast(ret)); - } catch (const std::runtime_error& e) { - gdv_fn_context_set_error_msg(context, e.what()); - *out_len = 0; - return nullptr; - } - - return ret; -} - -GANDIVA_EXPORT -const char* gdv_fn_aes_decrypt(int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, - int32_t* out_len) { - if (data_len < 0) { - gdv_fn_context_set_error_msg(context, "Invalid data length to be decrypted"); - *out_len = 0; - return ""; - } - - int64_t kAesBlockSize = 0; - if (key_data_len == 16 || key_data_len == 24 || key_data_len == 32) { - kAesBlockSize = static_cast(key_data_len); - } else { - std::ostringstream oss; - oss << "invalid key length: " << key_data_len; - gdv_fn_context_set_error_msg(context, oss.str().c_str()); - *out_len = 0; - return nullptr; - } - - *out_len = - static_cast(arrow::bit_util::RoundUpToPowerOf2(data_len, kAesBlockSize)); - char* ret = reinterpret_cast(gdv_fn_context_arena_malloc(context, *out_len)); - if (ret == nullptr) { - std::string err_msg = - "Could not allocate memory for returning aes encrypt cypher text"; - gdv_fn_context_set_error_msg(context, err_msg.data()); - *out_len = 0; - return nullptr; - } - - try { - *out_len = gandiva::aes_decrypt(data, data_len, key_data, key_data_len, - reinterpret_cast(ret)); - } catch (const std::runtime_error& e) { - gdv_fn_context_set_error_msg(context, e.what()); - *out_len = 0; - return nullptr; - } - ret[*out_len] = '\0'; - return ret; -} GANDIVA_EXPORT const char* gdv_mask_first_n_utf8_int32(int64_t context, const char* data, @@ -847,6 +764,158 @@ const char* gdv_mask_show_last_n_utf8_int32(int64_t context, const char* data, namespace gandiva { +// Legacy wrapper for string-based signatures (UTF8, UTF8) -> UTF8 +// This is called by the LLVM engine with string calling convention +// WARNING: This function is for backward compatibility only. Encrypted binary +// data is not guaranteed to be valid UTF-8. Use binary signatures for new code. +extern "C" GANDIVA_EXPORT +const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, + int32_t data_len, + const char* key_data, + int32_t key_data_len, + int32_t* out_len) { + // Delegate to the core implementation with ECB mode + // This function is ECB-only, so we enforce the mode + const char* mode = "AES-ECB"; + int32_t mode_len = 7; + const char* result = gdv_fn_encrypt_dispatcher_3args( + context, data, data_len, key_data, key_data_len, mode, mode_len, out_len); + + // Add null terminator for string compatibility + // Note: This may not be valid UTF-8, but it's needed for string handling + if (result != nullptr) { + char* mutable_result = const_cast(result); + mutable_result[*out_len] = '\0'; + } + + return result; +} + +// Legacy wrapper for string-based signatures (UTF8, UTF8) -> UTF8 +// This is called by the LLVM engine with string calling convention +// WARNING: This function is for backward compatibility only. Decrypted data +// may not be valid UTF-8. Use binary signatures for new code. +extern "C" GANDIVA_EXPORT +const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, + int32_t data_len, + const char* key_data, + int32_t key_data_len, + int32_t* out_len) { + // Delegate to the core implementation with ECB mode + // This function is ECB-only, so we enforce the mode + const char* mode = "AES-ECB"; + int32_t mode_len = 7; + const char* result = gdv_fn_decrypt_dispatcher_3args( + context, data, data_len, key_data, key_data_len, mode, mode_len, out_len); + + // Add null terminator for string compatibility + // Note: This may not be valid UTF-8, but it's needed for string handling + if (result != nullptr) { + char* mutable_result = const_cast(result); + mutable_result[*out_len] = '\0'; + } + + return result; +} + +// The 3- and 4-arg signatures exist to support optional IV and other arguments +extern "C" GANDIVA_EXPORT +const char* gdv_fn_encrypt_dispatcher_3args( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, int32_t mode_len, + int32_t* out_len) { + return gdv_fn_encrypt_dispatcher_5args( + context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, + 0, nullptr, 0, out_len); +} + +extern "C" GANDIVA_EXPORT +const char* gdv_fn_decrypt_dispatcher_3args( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, int32_t mode_len, + int32_t* out_len) { + return gdv_fn_decrypt_dispatcher_5args( + context, data, data_len, key_data, key_data_len, mode, mode_len, nullptr, + 0, nullptr, 0, out_len); +} + +extern "C" GANDIVA_EXPORT +const char* gdv_fn_encrypt_dispatcher_4args( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, int32_t mode_len, + const char* iv_data, int32_t iv_data_len, int32_t* out_len) { + return gdv_fn_encrypt_dispatcher_5args( + context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, + iv_data_len, nullptr, 0, out_len); +} + +extern "C" GANDIVA_EXPORT +const char* gdv_fn_decrypt_dispatcher_4args( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, int32_t mode_len, + const char* iv_data, int32_t iv_data_len, int32_t* out_len) { + return gdv_fn_decrypt_dispatcher_5args( + context, data, data_len, key_data, key_data_len, mode, mode_len, iv_data, + iv_data_len, nullptr, 0, out_len); +} + +extern "C" GANDIVA_EXPORT +const char* gdv_fn_encrypt_dispatcher_5args( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, int32_t mode_len, + const char* iv_data, int32_t iv_data_len, const char* fifth_argument, + int32_t fifth_argument_len, int32_t* out_len) { + try { + // Allocate extra 16 bytes for AES block padding (PKCS7 padding can add + // up to 16 bytes for a 128-bit block cipher) + // In cases of no-padding modes, this extra space is not used + auto* output = reinterpret_cast( + gdv_fn_context_arena_malloc(context, data_len + 16)); + if (output == nullptr) { + throw std::runtime_error( + "Memory allocation failed for encryption output"); + } + + int32_t cipher_len = EncryptModeDispatcher::encrypt( + data, data_len, key_data, key_data_len, mode, mode_len, iv_data, + iv_data_len, fifth_argument, fifth_argument_len, output); + + *out_len = cipher_len; + return reinterpret_cast(output); + } catch (const std::runtime_error& e) { + gdv_fn_context_set_error_msg(context, e.what()); + *out_len = 0; + return nullptr; + } +} + +extern "C" GANDIVA_EXPORT +const char* gdv_fn_decrypt_dispatcher_5args( + int64_t context, const char* data, int32_t data_len, const char* key_data, + int32_t key_data_len, const char* mode, int32_t mode_len, + const char* iv_data, int32_t iv_data_len, const char* fifth_argument, + int32_t fifth_argument_len, int32_t* out_len) { + try { + auto* output = reinterpret_cast( + gdv_fn_context_arena_malloc(context, data_len)); + if (output == nullptr) { + throw std::runtime_error( + "Memory allocation failed for decryption output"); + } + + int32_t plaintext_len = EncryptModeDispatcher::decrypt( + data, data_len, key_data, key_data_len, mode, mode_len, iv_data, + iv_data_len, fifth_argument, fifth_argument_len, output); + + *out_len = plaintext_len; + return reinterpret_cast(output); + } catch (const std::runtime_error& e) { + gdv_fn_context_set_error_msg(context, e.what()); + *out_len = 0; + return nullptr; + } +} + arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { std::vector args; auto types = engine->types(); @@ -1037,7 +1106,9 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_base64_decode_utf8)); - // gdv_fn_aes_encrypt + // gdv_fn_aes_encrypt_ecb_legacy (wrapper for string-based signatures) + // Note: Mode is hardcoded internally as "ECB", not passed as parameter + // Function signature: (context, data, data_len, key_data, key_data_len, out_len) args = { types->i64_type(), // context types->i8_ptr_type(), // data @@ -1047,11 +1118,11 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_ptr_type() // out_length }; - engine->AddGlobalMappingForFunc("gdv_fn_aes_encrypt", + engine->AddGlobalMappingForFunc("gdv_fn_aes_encrypt_ecb_legacy", types->i8_ptr_type() /*return_type*/, args, - reinterpret_cast(gdv_fn_aes_encrypt)); + reinterpret_cast(gdv_fn_aes_encrypt_ecb_legacy)); - // gdv_fn_aes_decrypt + // gdv_fn_aes_decrypt_ecb_legacy (wrapper for string-based signatures) args = { types->i64_type(), // context types->i8_ptr_type(), // data @@ -1061,9 +1132,140 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_ptr_type() // out_length }; - engine->AddGlobalMappingForFunc("gdv_fn_aes_decrypt", + engine->AddGlobalMappingForFunc("gdv_fn_aes_decrypt_ecb_legacy", types->i8_ptr_type() /*return_type*/, args, - reinterpret_cast(gdv_fn_aes_decrypt)); + reinterpret_cast(gdv_fn_aes_decrypt_ecb_legacy)); + + args = { + types->i64_type(), // context + types->i8_ptr_type(), // data + types->i32_type(), // data_length + types->i8_ptr_type(), // key_data + types->i32_type(), // key_data_length + types->i8_ptr_type(), // mode (binary string) + types->i32_type(), // mode_length + types->i8_ptr_type(), // iv (binary string) + types->i32_type(), // iv_length + types->i8_ptr_type(), // fifth_argument (binary string) + types->i32_type(), // fifth_argument_length + types->i32_ptr_type() // out_length + }; + + // gdv_fn_encrypt_dispatcher_3args (data, key, mode) + args = { + types->i64_type(), // context + types->i8_ptr_type(), // data + types->i32_type(), // data_length + types->i8_ptr_type(), // key_data + types->i32_type(), // key_data_length + types->i8_ptr_type(), // mode (binary string) + types->i32_type(), // mode_length + types->i32_ptr_type() // out_length + }; + + engine->AddGlobalMappingForFunc( + "gdv_fn_encrypt_dispatcher_3args", + types->i8_ptr_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_encrypt_dispatcher_3args)); + + // gdv_fn_decrypt_dispatcher_3args (data, key, mode) + args = { + types->i64_type(), // context + types->i8_ptr_type(), // data + types->i32_type(), // data_length + types->i8_ptr_type(), // key_data + types->i32_type(), // key_data_length + types->i8_ptr_type(), // mode (binary string) + types->i32_type(), // mode_length + types->i32_ptr_type() // out_length + }; + + engine->AddGlobalMappingForFunc( + "gdv_fn_decrypt_dispatcher_3args", + types->i8_ptr_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_decrypt_dispatcher_3args)); + + // gdv_fn_encrypt_dispatcher_4args (data, key, mode, iv) + args = { + types->i64_type(), // context + types->i8_ptr_type(), // data + types->i32_type(), // data_length + types->i8_ptr_type(), // key_data + types->i32_type(), // key_data_length + types->i8_ptr_type(), // mode (binary string) + types->i32_type(), // mode_length + types->i8_ptr_type(), // iv (binary string) + types->i32_type(), // iv_length + types->i32_ptr_type() // out_length + }; + + engine->AddGlobalMappingForFunc( + "gdv_fn_encrypt_dispatcher_4args", + types->i8_ptr_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_encrypt_dispatcher_4args)); + + // gdv_fn_decrypt_dispatcher_4args (data, key, mode, iv) + args = { + types->i64_type(), // context + types->i8_ptr_type(), // data + types->i32_type(), // data_length + types->i8_ptr_type(), // key_data + types->i32_type(), // key_data_length + types->i8_ptr_type(), // mode (binary string) + types->i32_type(), // mode_length + types->i8_ptr_type(), // iv (binary string) + types->i32_type(), // iv_length + types->i32_ptr_type() // out_length + }; + + engine->AddGlobalMappingForFunc( + "gdv_fn_decrypt_dispatcher_4args", + types->i8_ptr_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_decrypt_dispatcher_4args)); + + // gdv_fn_encrypt_dispatcher_5args (data, key, mode, iv, + // fifth_argument) + args = { + types->i64_type(), // context + types->i8_ptr_type(), // data + types->i32_type(), // data_length + types->i8_ptr_type(), // key_data + types->i32_type(), // key_data_length + types->i8_ptr_type(), // mode (binary string) + types->i32_type(), // mode_length + types->i8_ptr_type(), // iv (binary string) + types->i32_type(), // iv_length + types->i8_ptr_type(), // fifth_argument (binary string) + types->i32_type(), // fifth_argument_length + types->i32_ptr_type() // out_length + }; + + engine->AddGlobalMappingForFunc( + "gdv_fn_encrypt_dispatcher_5args", + types->i8_ptr_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_encrypt_dispatcher_5args)); + + // gdv_fn_decrypt_dispatcher_5args (data, key, mode, iv, + // fifth_argument) + args = { + types->i64_type(), // context + types->i8_ptr_type(), // data + types->i32_type(), // data_length + types->i8_ptr_type(), // key_data + types->i32_type(), // key_data_length + types->i8_ptr_type(), // mode (binary string) + types->i32_type(), // mode_length + types->i8_ptr_type(), // iv (binary string) + types->i32_type(), // iv_length + types->i8_ptr_type(), // fifth_argument (binary string) + types->i32_type(), // fifth_argument_length + types->i32_ptr_type() // out_length + }; + + engine->AddGlobalMappingForFunc( + "gdv_fn_decrypt_dispatcher_5args", + types->i8_ptr_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_decrypt_dispatcher_5args)); // gdv_mask_first_n and gdv_mask_last_n std::vector mask_args = { @@ -1162,7 +1364,8 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { }; engine->AddGlobalMappingForFunc( - "gdv_fn_cast_intervalday_utf8_int32", types->i64_type() /*return_type*/, args, + "gdv_fn_cast_intervalday_utf8_int32", + types->i64_type() /*return_type*/, args, reinterpret_cast(gdv_fn_cast_intervalday_utf8_int32)); // gdv_fn_cast_intervalyear_utf8 @@ -1179,6 +1382,21 @@ arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const { types->i32_type() /*return_type*/, args, reinterpret_cast(gdv_fn_cast_intervalyear_utf8)); +#define ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION( \ + LLVM_TYPE, DATA_TYPE) \ + args = {types->i64_type(), types->i8_ptr_type(), types->i32_ptr_type(), \ + types->i64_type(), types->LLVM_TYPE##_ptr_type(), \ + types->i32_type(), types->i32_ptr_type()}; \ + engine->AddGlobalMappingForFunc( \ + "gdv_fn_populate_list_" #DATA_TYPE "_vector", \ + types->i32_type() /*return_type*/, args, \ + reinterpret_cast(gdv_fn_populate_list_##DATA_TYPE##_vector)); + + ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(i32, int32_t) + ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(i64, int64_t) + ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(float, float) + ADD_MAPPING_FOR_NUMERIC_LIST_TYPE_POPULATE_FUNCTION(double, double) + // gdv_fn_cast_intervalyear_utf8_int32 args = { types->i64_type(), // context diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index 4113f261ad7..54480ac7f6f 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -189,14 +189,65 @@ float gdv_fn_castFLOAT4_varbinary(gdv_int64 context, const char* in, int32_t in_ GANDIVA_EXPORT double gdv_fn_castFLOAT8_varbinary(gdv_int64 context, const char* in, int32_t in_len); +// Legacy wrappers for string-based AES-ECB signatures GANDIVA_EXPORT -const char* gdv_fn_aes_encrypt(int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, - int32_t* out_len); +const char* gdv_fn_aes_encrypt_ecb_legacy(int64_t context, const char* data, + int32_t data_len, + const char* key_data, + int32_t key_data_len, + int32_t* out_len); + +GANDIVA_EXPORT +const char* gdv_fn_aes_decrypt_ecb_legacy(int64_t context, const char* data, + int32_t data_len, + const char* key_data, + int32_t key_data_len, + int32_t* out_len); + +// 3-argument dispatcher: (data, key, mode) +GANDIVA_EXPORT +const char* gdv_fn_encrypt_dispatcher_3args( + int64_t context, const char* data, int32_t data_len, + const char* key_data, int32_t key_data_len, const char* mode, + int32_t mode_len, int32_t* out_len); + +GANDIVA_EXPORT +const char* gdv_fn_decrypt_dispatcher_3args( + int64_t context, const char* data, int32_t data_len, + const char* key_data, int32_t key_data_len, const char* mode, + int32_t mode_len, int32_t* out_len); + +// 4-argument dispatcher: (data, key, mode, iv) +GANDIVA_EXPORT +const char* gdv_fn_encrypt_dispatcher_4args( + int64_t context, const char* data, int32_t data_len, + const char* key_data, int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, int32_t iv_data_len, + int32_t* out_len); + +GANDIVA_EXPORT +const char* gdv_fn_decrypt_dispatcher_4args( + int64_t context, const char* data, int32_t data_len, + const char* key_data, int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, int32_t iv_data_len, + int32_t* out_len); + +// 5-argument dispatcher: (data, key, mode, iv, fifth_argument) +GANDIVA_EXPORT +const char* gdv_fn_encrypt_dispatcher_5args( + int64_t context, const char* data, int32_t data_len, + const char* key_data, int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, int32_t iv_data_len, + const char* fifth_argument, int32_t fifth_argument_len, + int32_t* out_len); + GANDIVA_EXPORT -const char* gdv_fn_aes_decrypt(int64_t context, const char* data, int32_t data_len, - const char* key_data, int32_t key_data_len, - int32_t* out_len); +const char* gdv_fn_decrypt_dispatcher_5args( + int64_t context, const char* data, int32_t data_len, + const char* key_data, int32_t key_data_len, const char* mode, + int32_t mode_len, const char* iv_data, int32_t iv_data_len, + const char* fifth_argument, int32_t fifth_argument_len, + int32_t* out_len); GANDIVA_EXPORT const char* gdv_mask_first_n_utf8_int32(int64_t context, const char* data, diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index cbe24b74236..bfb34eeb31d 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -23,6 +23,9 @@ #include "arrow/util/logging.h" #include "gandiva/execution_context.h" +#include "gandiva/encrypt_utils_ecb.h" +#include "gandiva/encrypt_utils_cbc.h" +#include "gandiva/encrypt_utils_gcm.h" namespace gandiva { @@ -1353,15 +1356,20 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt16) { int32_t decrypted_len = 0; std::string data = "test string"; auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; + auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_aes_encrypt(ctx_ptr, data.c_str(), data_len, key16.c_str(), - key16_len, &cipher_len); - const char* decrypted_value = gdv_fn_aes_decrypt( - ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, &decrypted_len); + const char* cipher = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, &cipher_len); + const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), decrypted_len)); + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { @@ -1372,16 +1380,21 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt24) { int32_t decrypted_len = 0; std::string data = "test string"; auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; + auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_aes_encrypt(ctx_ptr, data.c_str(), data_len, key24.c_str(), - key24_len, &cipher_len); + const char* cipher = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, key24.c_str(), key24_len, mode.c_str(), + mode_len, &cipher_len); - const char* decrypted_value = gdv_fn_aes_decrypt( - ctx_ptr, cipher, cipher_len, key24.c_str(), key24_len, &decrypted_len); + const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, key24.c_str(), key24_len, mode.c_str(), + mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), decrypted_len)); + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { @@ -1392,16 +1405,21 @@ TEST(TestGdvFnStubs, TestAesEncryptDecrypt32) { int32_t decrypted_len = 0; std::string data = "test string"; auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; + auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); - const char* cipher = gdv_fn_aes_encrypt(ctx_ptr, data.c_str(), data_len, key32.c_str(), - key32_len, &cipher_len); + const char* cipher = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, key32.c_str(), key32_len, mode.c_str(), + mode_len, &cipher_len); - const char* decrypted_value = gdv_fn_aes_decrypt( - ctx_ptr, cipher, cipher_len, key32.c_str(), key32_len, &decrypted_len); + const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, key32.c_str(), key32_len, mode.c_str(), + mode_len, &decrypted_len); EXPECT_EQ(data, - std::string(reinterpret_cast(decrypted_value), decrypted_len)); + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); } TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { @@ -1411,18 +1429,285 @@ TEST(TestGdvFnStubs, TestAesEncryptDecryptValidation) { int32_t decrypted_len = 0; std::string data = "test string"; auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; + auto mode_len = static_cast(mode.length()); int64_t ctx_ptr = reinterpret_cast(&ctx); std::string cipher = "12345678abcdefgh12345678abcdefghb"; auto cipher_len = static_cast(cipher.length()); - gdv_fn_aes_encrypt(ctx_ptr, data.c_str(), data_len, key33.c_str(), key33_len, - &cipher_len); - EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("invalid key length")); + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, + key33.c_str(), key33_len, mode.c_str(), + mode_len, &cipher_len); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); - gdv_fn_aes_decrypt(ctx_ptr, cipher.c_str(), cipher_len, key33.c_str(), key33_len, - &decrypted_len); - EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("invalid key length")); + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len, + key33.c_str(), key33_len, mode.c_str(), + mode_len, &decrypted_len); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Unsupported key length for AES-ECB")); ctx.Reset(); } + +// Tests for new mode-aware AES functions +TEST(TestGdvFnStubs, TestAesEncryptDecryptModeEcb) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + const char* cipher = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, &cipher_len); + EXPECT_GT(cipher_len, 0); + + const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, &decrypted_len); + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +TEST(TestGdvFnStubs, TestAesEncryptDecryptModeValidation) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string invalid_mode = "AES-INVALID"; + auto invalid_mode_len = static_cast(invalid_mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + // Test encrypt with invalid mode + gdv_fn_encrypt_dispatcher_3args(ctx_ptr, data.c_str(), data_len, + key16.c_str(), key16_len, + invalid_mode.c_str(), invalid_mode_len, + &cipher_len); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Unsupported encryption mode")); + ctx.Reset(); + + // Test decrypt with invalid mode + std::string cipher = "12345678abcdefgh12345678abcdefgh"; + auto cipher_len_val = static_cast(cipher.length()); + gdv_fn_decrypt_dispatcher_3args(ctx_ptr, cipher.c_str(), cipher_len_val, + key16.c_str(), key16_len, + invalid_mode.c_str(), invalid_mode_len, + &decrypted_len); + EXPECT_THAT(ctx.get_error(), + ::testing::HasSubstr("Unsupported decryption mode")); + ctx.Reset(); +} + +// Tests for AES-GCM mode +TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmIvOnly) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_GCM_MODE; + auto mode_len = static_cast(mode.length()); + std::string iv = "123456789012"; + auto iv_len = static_cast(iv.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + const char* cipher = gdv_fn_encrypt_dispatcher_5args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, nullptr, 0, &cipher_len); + EXPECT_GT(cipher_len, 0); + + const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, nullptr, 0, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +TEST(TestGdvFnStubs, TestAesEncryptDecryptGcmWithAad) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_GCM_MODE; + auto mode_len = static_cast(mode.length()); + std::string iv = "123456789012"; + auto iv_len = static_cast(iv.length()); + std::string aad = "additional authenticated data"; + auto aad_len = static_cast(aad.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + const char* cipher = gdv_fn_encrypt_dispatcher_5args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &cipher_len); + EXPECT_GT(cipher_len, 0); + + const char* decrypted_value = gdv_fn_decrypt_dispatcher_5args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, aad.c_str(), aad_len, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +// Tests for shorthand mode: AES-ECB (defaults to PKCS7) +TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandEcb) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_MODE; // Shorthand mode + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + const char* cipher = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, &cipher_len); + EXPECT_GT(cipher_len, 0); + + const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +// Tests for explicit mode: AES-ECB-PKCS7 +TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitEcbPkcs7) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_ECB_PKCS7_MODE; // Explicit mode + auto mode_len = static_cast(mode.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + const char* cipher = gdv_fn_encrypt_dispatcher_3args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, &cipher_len); + EXPECT_GT(cipher_len, 0); + + const char* decrypted_value = gdv_fn_decrypt_dispatcher_3args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +// Tests for shorthand mode: AES-CBC (defaults to PKCS7) +TEST(TestGdvFnStubs, TestAesEncryptDecryptShorthandCbc) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_CBC_MODE; // Shorthand mode + auto mode_len = static_cast(mode.length()); + std::string iv = "1234567890123456"; + auto iv_len = static_cast(iv.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + const char* cipher = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, &cipher_len); + EXPECT_GT(cipher_len, 0); + + const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +// Tests for explicit mode: AES-CBC-PKCS7 +TEST(TestGdvFnStubs, TestAesEncryptDecryptExplicitCbcPkcs7) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + std::string data = "test string"; + auto data_len = static_cast(data.length()); + std::string mode = AES_CBC_PKCS7_MODE; // Explicit mode + auto mode_len = static_cast(mode.length()); + std::string iv = "1234567890123456"; + auto iv_len = static_cast(iv.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + const char* cipher = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, &cipher_len); + EXPECT_GT(cipher_len, 0); + + const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + +// Tests for explicit mode: AES-CBC-NONE (no padding) +TEST(TestGdvFnStubs, TestAesEncryptDecryptCbcNone) { + gandiva::ExecutionContext ctx; + std::string key16 = "12345678abcdefgh"; + auto key16_len = static_cast(key16.length()); + int32_t cipher_len = 0; + int32_t decrypted_len = 0; + // Use exactly 16 bytes (one block) for no-padding mode + std::string data = "1234567890123456"; + auto data_len = static_cast(data.length()); + std::string mode = AES_CBC_NONE_MODE; // No padding mode + auto mode_len = static_cast(mode.length()); + std::string iv = "1234567890123456"; + auto iv_len = static_cast(iv.length()); + int64_t ctx_ptr = reinterpret_cast(&ctx); + + const char* cipher = gdv_fn_encrypt_dispatcher_4args( + ctx_ptr, data.c_str(), data_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, &cipher_len); + EXPECT_GT(cipher_len, 0); + + const char* decrypted_value = gdv_fn_decrypt_dispatcher_4args( + ctx_ptr, cipher, cipher_len, key16.c_str(), key16_len, mode.c_str(), + mode_len, iv.c_str(), iv_len, &decrypted_len); + + EXPECT_EQ(data, + std::string(reinterpret_cast(decrypted_value), + decrypted_len)); +} + } // namespace gandiva diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 4ffe3ef67d6..86a98e1c956 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -66,7 +66,7 @@ jobs: fail-fast: false matrix: platform: - - { runs_on: macos-13, arch: "x86_64" } + - { runs_on: macos-15-intel, arch: "x86_64" } - { runs_on: macos-14, arch: "arm64" } openssl: ['3.0', '1.1'] @@ -208,7 +208,7 @@ jobs: matrix: platform: - { runs_on: 'windows-latest', name: "Windows"} - - { runs_on: macos-13, name: "macOS x86_64"} + - { runs_on: macos-15-intel, name: "macOS x86_64"} - { runs_on: macos-14, name: "macOS arm64" } r_version: [oldrel, release] steps: @@ -389,7 +389,7 @@ jobs: matrix: platform: - {runs_on: "ubuntu-latest", name: "Linux"} - - {runs_on: "macos-13" , name: "macOS"} + - {runs_on: "macos-15-intel" , name: "macOS"} steps: - name: Install R uses: r-lib/actions/setup-r@v2 diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 9a845c60add..0bd7cbde172 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -254,7 +254,7 @@ tasks: python_version: "{{ python_version }}" python_abi_tag: "{{ abi_tag }}" macos_deployment_target: "12.0" - runs_on: "macos-13" + runs_on: "macos-15-intel" vcpkg_arch: "amd64" artifacts: - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-macosx_12_0_x86_64.whl @@ -591,7 +591,7 @@ tasks: params: target: {{ target }} use_conda: True - github_runner: "macos-13" + github_runner: "macos-15-intel" {% endfor %} {% for target in ["cpp", @@ -605,7 +605,7 @@ tasks: template: verify-rc/github.macos.yml params: target: {{ target }} - github_runner: "macos-13" + github_runner: "macos-15-intel" {% endfor %} {% for target in ["cpp",