diff --git a/sdk/storage/azure-storage-blobs/src/rest_client.cpp b/sdk/storage/azure-storage-blobs/src/rest_client.cpp index 1678ac9b72..7742fd6087 100644 --- a/sdk/storage/azure-storage-blobs/src/rest_client.cpp +++ b/sdk/storage/azure-storage-blobs/src/rest_client.cpp @@ -5573,9 +5573,15 @@ namespace Azure { namespace Storage { namespace Blobs { response.LeaseDuration = Models::LeaseDurationType(pRawResponse->GetHeaders().at("x-ms-lease-duration")); } - response.LeaseState = Models::LeaseState(pRawResponse->GetHeaders().at("x-ms-lease-state")); - response.LeaseStatus - = Models::LeaseStatus(pRawResponse->GetHeaders().at("x-ms-lease-status")); + if (pRawResponse->GetHeaders().count("x-ms-lease-state") != 0) + { + response.LeaseState = Models::LeaseState(pRawResponse->GetHeaders().at("x-ms-lease-state")); + } + if (pRawResponse->GetHeaders().count("x-ms-lease-status") != 0) + { + response.LeaseStatus + = Models::LeaseStatus(pRawResponse->GetHeaders().at("x-ms-lease-status")); + } response.IsServerEncrypted = pRawResponse->GetHeaders().at("x-ms-server-encrypted") == std::string("true"); return Response(std::move(response), std::move(pRawResponse)); diff --git a/sdk/storage/azure-storage-blobs/swagger/README.md b/sdk/storage/azure-storage-blobs/swagger/README.md index 2b34011ca2..493c96ad29 100644 --- a/sdk/storage/azure-storage-blobs/swagger/README.md +++ b/sdk/storage/azure-storage-blobs/swagger/README.md @@ -1292,6 +1292,10 @@ directive: delete $[status_code].headers["x-ms-blob-content-md5"]; delete $[status_code].headers["x-ms-content-crc64"]; $[status_code].headers["x-ms-lease-duration"]["x-nullable"] = true; + $[status_code].headers["x-ms-lease-state"]["x-ms-client-default"] = ""; + $[status_code].headers["x-ms-lease-state"]["x-nullable"] = true; + $[status_code].headers["x-ms-lease-status"]["x-ms-client-default"] = ""; + $[status_code].headers["x-ms-lease-status"]["x-nullable"] = true; } ``` diff --git a/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_file_client.hpp b/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_file_client.hpp index 9a4e7c851c..a4ade4cc68 100644 --- a/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_file_client.hpp +++ b/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_file_client.hpp @@ -265,6 +265,19 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { const ScheduleFileDeletionOptions& options = ScheduleFileDeletionOptions(), const Azure::Core::Context& context = Azure::Core::Context()) const; + /** + * @brief Returns the result of a query against the file. + * + * @param querySqlExpression The query expression in SQL. + * @param options Optional parameters to execute this function. + * @param context Context for cancelling long running operations. + * @return A QueryFileResult describing the query result. + */ + Azure::Response Query( + const std::string& querySqlExpression, + const QueryFileOptions& options = QueryFileOptions(), + const Azure::Core::Context& context = Azure::Core::Context()) const; + private: explicit DataLakeFileClient( Azure::Core::Url fileUrl, diff --git a/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_options.hpp b/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_options.hpp index b0796cefde..4d73a3fe15 100644 --- a/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_options.hpp +++ b/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_options.hpp @@ -21,6 +21,8 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { using PathHttpHeaders = Blobs::Models::BlobHttpHeaders; using ListFileSystemsIncludeFlags = Blobs::Models::ListBlobContainersIncludeFlags; using SignedIdentifier = Blobs::Models::SignedIdentifier; + using FileQueryArrowField = Blobs::Models::BlobQueryArrowField; + using FileQueryArrowFieldType = Blobs::Models::BlobQueryArrowFieldType; } // namespace Models using DownloadFileToOptions = Blobs::DownloadBlobToOptions; @@ -638,4 +640,35 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { using ReleaseLeaseOptions = Blobs::ReleaseLeaseOptions; using ChangeLeaseOptions = Blobs::ChangeLeaseOptions; + using FileQueryInputTextOptions = Blobs::BlobQueryInputTextOptions; + using FileQueryOutputTextOptions = Blobs::BlobQueryOutputTextOptions; + using FileQueryError = Blobs::BlobQueryError; + + /** + * @brief Optional parameters for #Azure::Storage::Files::DataLake::DataLakeFileClient::Query. + */ + struct QueryFileOptions final + { + /** + * @brief Input text configuration. + */ + FileQueryInputTextOptions InputTextConfiguration; + /** + * @brief Output text configuration. + */ + FileQueryOutputTextOptions OutputTextConfiguration; + /** + * @brief Optional conditions that must be met to perform this operation. + */ + PathAccessConditions AccessConditions; + /** + * @brief Callback for progress handling. + */ + std::function ProgressHandler; + /** + * @brief Callback for error handling. If you don't specify one, the default will be used, which + * will ignore all non-fatal errors and throw for fatal errors. + */ + std::function ErrorHandler; + }; }}}} // namespace Azure::Storage::Files::DataLake diff --git a/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_responses.hpp b/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_responses.hpp index e8b9b90ce8..bc076a4cae 100644 --- a/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_responses.hpp +++ b/sdk/storage/azure-storage-files-datalake/inc/azure/storage/files/datalake/datalake_responses.hpp @@ -449,6 +449,44 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { using ScheduleFileDeletionResult = Blobs::Models::SetBlobExpiryResult; using CopyStatus = Blobs::Models::CopyStatus; + /** + * @brief Response type for #Azure::Storage::Files::DataLake::FileClient::Query. + */ + struct QueryFileResult final + { + std::unique_ptr BodyStream; + /** + * Returns the date and time the container was last modified. Any operation that modifies the + * file, including an update of the file's metadata or properties, changes the last-modified + * time of the file. + */ + DateTime LastModified; + /** + * The ETag contains a value that you can use to perform operations conditionally. If the + * request version is 2011-08-18 or newer, the ETag value will be in quotes. + */ + Azure::ETag ETag; + /** + * When a file is leased, specifies whether the lease is of infinite or fixed duration. + */ + Nullable LeaseDuration; + /** + * Lease state of the file. + */ + Models::LeaseState LeaseState; + /** + * The current lease status of the file. + */ + Models::LeaseStatus LeaseStatus; + /** + * The value of this header is set to true if the file data and application metadata are + * completely encrypted using the specified algorithm. Otherwise, the value is set to false + * (when the file is unencrypted, or if only parts of the file/application metadata are + * encrypted). + */ + bool IsServerEncrypted = bool(); + }; + /** * @brief The detailed information returned when downloading a file. */ diff --git a/sdk/storage/azure-storage-files-datalake/src/datalake_file_client.cpp b/sdk/storage/azure-storage-files-datalake/src/datalake_file_client.cpp index ded67cb674..407dbaaaa5 100644 --- a/sdk/storage/azure-storage-files-datalake/src/datalake_file_client.cpp +++ b/sdk/storage/azure-storage-files-datalake/src/datalake_file_client.cpp @@ -327,4 +327,33 @@ namespace Azure { namespace Storage { namespace Files { namespace DataLake { *m_pipeline, m_blobClient.m_blobUrl, protocolLayerOptions, context); } + Azure::Response DataLakeFileClient::Query( + const std::string& querySqlExpression, + const QueryFileOptions& options, + const Azure::Core::Context& context) const + { + Blobs::QueryBlobOptions blobOptions; + blobOptions.InputTextConfiguration = options.InputTextConfiguration; + blobOptions.OutputTextConfiguration = options.OutputTextConfiguration; + blobOptions.ErrorHandler = options.ErrorHandler; + blobOptions.ProgressHandler = options.ProgressHandler; + blobOptions.AccessConditions.IfMatch = options.AccessConditions.IfMatch; + blobOptions.AccessConditions.IfNoneMatch = options.AccessConditions.IfNoneMatch; + blobOptions.AccessConditions.IfModifiedSince = options.AccessConditions.IfModifiedSince; + blobOptions.AccessConditions.IfUnmodifiedSince = options.AccessConditions.IfUnmodifiedSince; + blobOptions.AccessConditions.LeaseId = options.AccessConditions.LeaseId; + auto response + = m_blobClient.AsBlockBlobClient().Query(querySqlExpression, blobOptions, context); + Models::QueryFileResult ret; + ret.BodyStream = std::move(response.Value.BodyStream); + ret.ETag = std::move(response.Value.ETag); + ret.LastModified = std::move(response.Value.LastModified); + ret.LeaseDuration = std::move(response.Value.LeaseDuration); + ret.LeaseState = std::move(response.Value.LeaseState); + ret.LeaseStatus = std::move(response.Value.LeaseStatus); + ret.IsServerEncrypted = response.Value.IsServerEncrypted; + return Azure::Response( + std::move(ret), std::move(response.RawResponse)); + } + }}}} // namespace Azure::Storage::Files::DataLake diff --git a/sdk/storage/azure-storage-files-datalake/test/ut/CMakeLists.txt b/sdk/storage/azure-storage-files-datalake/test/ut/CMakeLists.txt index a8d6fe32fe..ea2c7d2fd0 100644 --- a/sdk/storage/azure-storage-files-datalake/test/ut/CMakeLists.txt +++ b/sdk/storage/azure-storage-files-datalake/test/ut/CMakeLists.txt @@ -18,6 +18,7 @@ add_executable ( datalake_directory_client_test.hpp datalake_file_client_test.cpp datalake_file_client_test.hpp + datalake_file_query_test.cpp datalake_file_system_client_test.cpp datalake_file_system_client_test.hpp datalake_path_client_test.cpp diff --git a/sdk/storage/azure-storage-files-datalake/test/ut/datalake_file_client_test.cpp b/sdk/storage/azure-storage-files-datalake/test/ut/datalake_file_client_test.cpp index 099135cd54..327b6f1245 100644 --- a/sdk/storage/azure-storage-files-datalake/test/ut/datalake_file_client_test.cpp +++ b/sdk/storage/azure-storage-files-datalake/test/ut/datalake_file_client_test.cpp @@ -29,6 +29,7 @@ namespace Azure { namespace Storage { namespace Test { void DataLakeFileClientTest::SetUp() { DataLakeFileSystemClientTest::SetUp(); + CHECK_SKIP_TEST(); m_fileName = GetFileSystemValidName(); m_fileClient = std::make_shared( m_fileSystemClient->GetFileClient(m_fileName)); @@ -37,6 +38,7 @@ namespace Azure { namespace Storage { namespace Test { void DataLakeFileClientTest::TearDown() { + CHECK_SKIP_TEST(); m_fileSystemClient->GetFileClient(m_fileName).Delete(); DataLakeFileSystemClientTest::TearDown(); } diff --git a/sdk/storage/azure-storage-files-datalake/test/ut/datalake_file_query_test.cpp b/sdk/storage/azure-storage-files-datalake/test/ut/datalake_file_query_test.cpp new file mode 100644 index 0000000000..1aa6a8c3d8 --- /dev/null +++ b/sdk/storage/azure-storage-files-datalake/test/ut/datalake_file_query_test.cpp @@ -0,0 +1,430 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "datalake_file_client_test.hpp" + +#include +#include +#include + +// cspell:ignore sapote + +namespace Azure { namespace Storage { namespace Test { + + const std::string JsonQueryTestData = + R"json( +{"id": 100, "name": "oranges", "price": 100} +{"id": 101, "name": "limes", "price": 50} +{"id": 102, "name": "berries", "price": 199} +{"id": 103, "name": "apples", "price": 99} +{"id": 104, "name": "clementines", "price": 399} +{"id": 105, "name": "grapes", "price": 150} +{"id": 106, "name": "lemons", "price": 69} +{"id": 107, "name": "pears", "price": 100} +{"id": 108, "name": "cherries", "price": 281} +{"id": 109, "name": "coconut", "price": 178} +{"id": 110, "name": "bananas", "price": 39} +{"id": 111, "name": "peaches", "price": 117} +{"id": 112, "name": "sapote,\"mamey", "price": 50} +)json"; + + const std::string CsvQueryTestData = R"csv( +id,name,price +100,oranges,100 +101,limes,50 +102,berries,199 +103,apples,99 +104,clementines,399 +105,grapes,150 +106,lemons,69 +107,pears,100 +108,cherries,281 +109,coconut,178 +110,bananas,39 +111,peaches,117 +112,sapote\,mamey,50 +)csv"; + + const std::vector ParquetQueryTestData = Core::Convert::Base64Decode( + "UEFSMRUAFewBFewBLBUaFQAVBhUIAAACAAAAGgFkAAAAAAAAAGUAAAAAAAAAZgAAAAAAAABnAAAAAAAAAGgAAAAAAAAA" + "aQAAAAAAAABqAAAAAAAAAGsAAAAAAAAAbAAAAAAAAABtAAAAAAAAAG4AAAAAAAAAbwAAAAAAAABwAAAAAAAAAAAAAAAA" + "AAAAFQAVxAIVxAIsFRoVABUGFQgAAAIAAAAaAQcAAABvcmFuZ2VzBQAAAGxpbWVzBwAAAGJlcnJpZXMGAAAAYXBwbGVz" + "CwAAAGNsZW1lbnRpbmVzBgAAAGdyYXBlcwYAAABsZW1vbnMFAAAAcGVhcnMIAAAAY2hlcnJpZXMHAAAAY29jb251dAcA" + "AABiYW5hbmFzBwAAAHBlYWNoZXMOAAAAc2Fwb3RlLCJtYW1leSIAAAAAAAAAABUAFewBFewBLBUaFQAVBhUIAAACAAAA" + "GgFkAAAAAAAAADIAAAAAAAAAxwAAAAAAAABjAAAAAAAAAI8BAAAAAAAAlgAAAAAAAABFAAAAAAAAAGQAAAAAAAAAGQEA" + "AAAAAACyAAAAAAAAACcAAAAAAAAAdQAAAAAAAAAyAAAAAAAAAAAAAAAAAAAAFQIZTEgGc2NoZW1hFQYAFQQVgAEVAhgC" + "aWQAFQwlAhgEbmFtZSUAABUEFYABFQIYBXByaWNlABYaGRwZPCaaAhwVBBkVABkYAmlkFQAWGhaSAhaSAhkAFgg8GAhw" + "AAAAAAAAABgIZAAAAAAAAAAWAAAZHBUAFQAVAgAAACaEBRwVDBkVABkYBG5hbWUVABYaFuoCFuoCGQAWmgI8GA5zYXBv" + "dGUsIm1hbWV5IhgGYXBwbGVzFgAAGRwVABUAFQIAAAAmlgccFQQZFQAZGAVwcmljZRUAFhoWkgIWkgIZABaEBTwYCI8B" + "AAAAAAAAGAgnAAAAAAAAABYAABkcFQAVABUCAAAAFo4HFhoAGRwYBnBhbmRhcxiRBXsiY29sdW1uX2luZGV4ZXMiOiBb" + "eyJmaWVsZF9uYW1lIjogbnVsbCwgIm1ldGFkYXRhIjogbnVsbCwgIm5hbWUiOiBudWxsLCAibnVtcHlfdHlwZSI6ICJv" + "YmplY3QiLCAicGFuZGFzX3R5cGUiOiAibWl4ZWQtaW50ZWdlciJ9XSwgImNvbHVtbnMiOiBbeyJmaWVsZF9uYW1lIjog" + "ImlkIiwgIm1ldGFkYXRhIjogbnVsbCwgIm5hbWUiOiAiaWQiLCAibnVtcHlfdHlwZSI6ICJpbnQ2NCIsICJwYW5kYXNf" + "dHlwZSI6ICJpbnQ2NCJ9LCB7ImZpZWxkX25hbWUiOiAibmFtZSIsICJtZXRhZGF0YSI6IG51bGwsICJuYW1lIjogIm5h" + "bWUiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAicGFuZGFzX3R5cGUiOiAidW5pY29kZSJ9LCB7ImZpZWxkX25hbWUi" + "OiAicHJpY2UiLCAibWV0YWRhdGEiOiBudWxsLCAibmFtZSI6ICJwcmljZSIsICJudW1weV90eXBlIjogImludDY0Iiwg" + "InBhbmRhc190eXBlIjogImludDY0In1dLCAiY3JlYXRvciI6IHsibGlicmFyeSI6ICJmYXN0cGFycXVldCIsICJ2ZXJz" + "aW9uIjogIjAuOC4xIn0sICJpbmRleF9jb2x1bW5zIjogW3sia2luZCI6ICJyYW5nZSIsICJuYW1lIjogbnVsbCwgInN0" + "YXJ0IjogMCwgInN0ZXAiOiAxLCAic3RvcCI6IDEzfV0sICJwYW5kYXNfdmVyc2lvbiI6ICIxLjQuMiIsICJwYXJ0aXRp" + "b25fY29sdW1ucyI6IFtdfQAYKmZhc3RwYXJxdWV0LXB5dGhvbiB2ZXJzaW9uIDAuOC4xIChidWlsZCAwKQDXAwAAUEFS" + "MQ=="); + + TEST_F(DataLakeFileClientTest, QueryJsonInputCsvOutput_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + + client.UploadFrom( + reinterpret_cast(JsonQueryTestData.data()), JsonQueryTestData.size()); + + Files::DataLake::QueryFileOptions queryOptions; + queryOptions.InputTextConfiguration + = Files::DataLake::FileQueryInputTextOptions::CreateJsonTextOptions(); + + { + queryOptions.OutputTextConfiguration + = Files::DataLake::FileQueryOutputTextOptions::CreateCsvTextOptions(); + auto queryResponse + = client.Query("SELECT * from BlobStorage WHERE id > 101 AND price < 100;", queryOptions); + auto data = queryResponse.Value.BodyStream->ReadToEnd(); + EXPECT_EQ( + std::string(data.begin(), data.end()), + R"csv(103,apples,99 +106,lemons,69 +110,bananas,39 +112,"sapote,""mamey",50 +)csv"); + } + + { + queryOptions.OutputTextConfiguration + = Files::DataLake::FileQueryOutputTextOptions::CreateCsvTextOptions( + "|", ".", "[", "\\", true); + auto queryResponse + = client.Query("SELECT * from BlobStorage WHERE id > 101 AND price < 100;", queryOptions); + + auto data = queryResponse.Value.BodyStream->ReadToEnd(); + EXPECT_EQ( + std::string(data.begin(), data.end()), + R"csv(103.apples.99|106.lemons.69|110.bananas.39|112.sapote,"mamey.50|)csv"); + } + } + + TEST_F(DataLakeFileClientTest, QueryCsvInputJsonOutput_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + + client.UploadFrom( + reinterpret_cast(CsvQueryTestData.data()), CsvQueryTestData.size()); + + Files::DataLake::QueryFileOptions queryOptions; + queryOptions.InputTextConfiguration + = Files::DataLake::FileQueryInputTextOptions::CreateCsvTextOptions( + "\n", ",", "\"", "\\", true); + queryOptions.OutputTextConfiguration + = Files::DataLake::FileQueryOutputTextOptions::CreateJsonTextOptions("|"); + auto queryResponse + = client.Query("SELECT * from BlobStorage WHERE id > 101 AND price < 100;", queryOptions); + + auto data = queryResponse.Value.BodyStream->ReadToEnd(); + EXPECT_EQ( + std::string(data.begin(), data.end()), + R"json({"id":"103","name":"apples","price":"99"}|{"id":"106","name":"lemons","price":"69"}|{"id":"110","name":"bananas","price":"39"}|{"id":"112","name":"sapote,mamey","price":"50"}|)json"); + } + + TEST_F(DataLakeFileClientTest, QueryCsvInputArrowOutput_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + + client.UploadFrom( + reinterpret_cast(CsvQueryTestData.data()), CsvQueryTestData.size()); + + Files::DataLake::QueryFileOptions queryOptions; + queryOptions.InputTextConfiguration + = Files::DataLake::FileQueryInputTextOptions::CreateCsvTextOptions( + "\n", ",", "\"", "\\", true); + std::vector fields; + Files::DataLake::Models::FileQueryArrowField field; + field.Type = Files::DataLake::Models::FileQueryArrowFieldType::Int64; + field.Name = "id"; + fields.push_back(field); + field.Type = Files::DataLake::Models::FileQueryArrowFieldType::String; + field.Name = "name"; + fields.push_back(field); + field.Type = Files::DataLake::Models::FileQueryArrowFieldType::Decimal; + field.Name = "price"; + field.Precision = 10; + field.Scale = 2; + fields.push_back(field); + queryOptions.OutputTextConfiguration + = Files::DataLake::FileQueryOutputTextOptions::CreateArrowTextOptions(std::move(fields)); + auto queryResponse + = client.Query("SELECT * from BlobStorage WHERE id > 101 AND price < 100;", queryOptions); + + auto data = queryResponse.Value.BodyStream->ReadToEnd(); + const auto expectedData = Core::Convert::Base64Decode( + "/////" + "+gAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAMAAACAAAAAQAAAAAQAAAC" + "c////AAABBxAAAAAgAAAABAAAAAAAAAAFAAAAcHJpY2UAAAAIAAwABAAIAAgAAAAKAAAAAgAAANT///" + "8AAAEFEAAAABwAAAAEAAAAAAAAAAQAAABuYW1lAAAAAAQABAAEAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEC" + "EAAAABwAAAAEAAAAAAAAAAIAAABpZAAACAAMAAgABwAIAAAAAAAAAUAAAAAAAAAA//////" + "AAAAAUAAAAAAAAAAwAGgAGAAUACAAMAAwAAAAAAwQAHAAAAAgAAAAAAAAAAAAAAAAACgAMAAAABAAIAAoAAACA" + "AAAABAAAAAcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAQAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAMA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////" + "4AAAAFAAAAAAAAAAMABYABgAFAAgADAAMAAAAAAMEABgAAACYAAAAAAAAAAAACgAYAAwABAAIAAoAAACMAAAAE" + "AAAAAQAAAAAAAAAAAAAAAcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAACAAAAAAAAAAAAAAAAA" + "AAAAgAAAAAAAAABQAAAAAAAAAOAAAAAAAAAAfAAAAAAAAAFgAAAAAAAAAAAAAAAAAAABYAAAAAAAAAEAAAAAAA" + "AAAAAAAAAMAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAABnAAAAAAA" + "AAGoAAAAAAAAAbgAAAAAAAABwAAAAAAAAAAAAAAAGAAAADAAAABMAAAAfAAAAAAAAAGFwcGxlc2xlbW9uc2Jhb" + "mFuYXNzYXBvdGUsbWFtZXkAYwAAAAAAAAAAAAAAAAAAAEUAAAAAAAAAAAAAAAAAAAAnAAAAAAAAAAAAAAAAAAA" + "AMgAAAAAAAAAAAAAAAAAAAA=="); + EXPECT_EQ(data, expectedData); + } + + TEST_F(DataLakeFileClientTest, QueryParquetInputArrowOutput_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + + client.UploadFrom(ParquetQueryTestData.data(), ParquetQueryTestData.size()); + + Files::DataLake::QueryFileOptions queryOptions; + queryOptions.InputTextConfiguration + = Files::DataLake::FileQueryInputTextOptions::CreateParquetTextOptions(); + std::vector fields; + Files::DataLake::Models::FileQueryArrowField field; + field.Type = Files::DataLake::Models::FileQueryArrowFieldType::Int64; + field.Name = "id"; + fields.push_back(field); + field.Type = Files::DataLake::Models::FileQueryArrowFieldType::String; + field.Name = "name"; + fields.push_back(field); + field.Type = Files::DataLake::Models::FileQueryArrowFieldType::Int64; + field.Name = "price"; + fields.push_back(field); + queryOptions.OutputTextConfiguration + = Files::DataLake::FileQueryOutputTextOptions::CreateArrowTextOptions(std::move(fields)); + auto queryResponse + = client.Query("SELECT * from BlobStorage WHERE id > 101 AND price < 100;", queryOptions); + auto data = queryResponse.Value.BodyStream->ReadToEnd(); + const auto expectedData = Core::Convert::Base64Decode( + "/////" + "+AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAMAAAB4AAAAOAAAAAQAAAC" + "k////AAABAhAAAAAYAAAABAAAAAAAAAAFAAAAcHJpY2UAAACY////AAAAAUAAAADU////" + "AAABBRAAAAAcAAAABAAAAAAAAAAEAAAAbmFtZQAAAAAEAAQABAAAABAAFAAIAAYABwAMAAAAEAAQAAAAAAABAh" + "AAAAAcAAAABAAAAAAAAAACAAAAaWQAAAgADAAIAAcACAAAAAAAAAFAAAAAAAAAAP/////" + "wAAAAFAAAAAAAAAAMABoABgAFAAgADAAMAAAAAAMEABwAAAAIAAAAAAAAAAAAAAAAAAoADAAAAAQACAAKAAAAg" + "AAAAAQAAAAHAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "AAAAEAAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAADA" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////" + "+AAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADBAAYAAAAIAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAjAAAAB" + "AAAAABAAAAAAAAAAAAAAAHAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAIAAAAAAAAAAAAAAAA" + "AAAACAAAAAAAAAAIAAAAAAAAABAAAAAAAAAABgAAAAAAAAAYAAAAAAAAAAAAAAAAAAAAGAAAAAAAAAAIAAAAAA" + "AAAAAAAAADAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAZwAAAAAA" + "AAAAAAAABgAAAGFwcGxlcwAAYwAAAAAAAAD/////" + "+AAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADBAAYAAAAIAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAjAAAAB" + "AAAAABAAAAAAAAAAAAAAAHAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAIAAAAAAAAAAAAAAAA" + "AAAACAAAAAAAAAAIAAAAAAAAABAAAAAAAAAABgAAAAAAAAAYAAAAAAAAAAAAAAAAAAAAGAAAAAAAAAAIAAAAAA" + "AAAAAAAAADAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAagAAAAAA" + "AAAAAAAABgAAAGxlbW9ucwAARQAAAAAAAAD/////" + "+AAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADBAAYAAAAIAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAjAAAAB" + "AAAAABAAAAAAAAAAAAAAAHAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAIAAAAAAAAAAAAAAAA" + "AAAACAAAAAAAAAAIAAAAAAAAABAAAAAAAAAABwAAAAAAAAAYAAAAAAAAAAAAAAAAAAAAGAAAAAAAAAAIAAAAAA" + "AAAAAAAAADAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAbgAAAAAA" + "AAAAAAAABwAAAGJhbmFuYXMAJwAAAAAAAAD/////" + "+AAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADBAAYAAAAKAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAjAAAAB" + "AAAAABAAAAAAAAAAAAAAAHAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAIAAAAAAAAAAAAAAAA" + "AAAACAAAAAAAAAAIAAAAAAAAABAAAAAAAAAADgAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAIAAAAAA" + "AAAAAAAAADAAAAAQAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAcAAAAAAA" + "AAAAAAAADgAAAHNhcG90ZSwibWFtZXkiAAAyAAAAAAAAAP////8AAAAA"); + EXPECT_EQ(data, expectedData); + } + + TEST_F(DataLakeFileClientTest, QueryWithError_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + + const std::string malformedData = + R"json( +{"id": 100, "name": "oranges", "price": 100} +{"id": 101, "name": "limes", "price": "aa"} +{"id": 102, "name": "berries", "price": 199} +{"id": 103, "name": "apples", "price": "bb"} +{"id": 104, "name": "clementines", "price": 399} +xx +)json"; + client.UploadFrom(reinterpret_cast(malformedData.data()), malformedData.size()); + + Files::DataLake::QueryFileOptions queryOptions; + queryOptions.InputTextConfiguration + = Files::DataLake::FileQueryInputTextOptions::CreateJsonTextOptions(); + queryOptions.OutputTextConfiguration + = Files::DataLake::FileQueryOutputTextOptions::CreateJsonTextOptions(); + auto queryResponse = client.Query("SELECT * FROM BlobStorage WHERE price > 0;", queryOptions); + + try + { + auto data = queryResponse.Value.BodyStream->ReadToEnd(); + FAIL(); + } + catch (StorageException& e) + { + EXPECT_EQ(e.StatusCode, Core::Http::HttpStatusCode::Ok); + EXPECT_EQ(e.ReasonPhrase, "OK"); + EXPECT_FALSE(e.RequestId.empty()); + EXPECT_FALSE(e.ClientRequestId.empty()); + EXPECT_EQ(e.ErrorCode, "ParseError"); + EXPECT_FALSE(e.Message.empty()); + EXPECT_FALSE(std::string(e.what()).empty()); + } + + bool progressCallbackCalled = false; + queryOptions.ProgressHandler + = [&malformedData, &progressCallbackCalled](int64_t offset, int64_t totalBytes) { + EXPECT_EQ(totalBytes, static_cast(malformedData.size())); + EXPECT_TRUE(offset >= 0 && offset <= totalBytes); + progressCallbackCalled = true; + }; + int numNonFatalErrors = 0; + int numFatalErrors = 0; + queryOptions.ErrorHandler + = [&numNonFatalErrors, &numFatalErrors](Files::DataLake::FileQueryError e) { + if (e.IsFatal) + { + ++numFatalErrors; + } + else + { + ++numNonFatalErrors; + } + }; + queryResponse = client.Query("SELECT * FROM BlobStorage WHERE price > 0;", queryOptions); + queryResponse.Value.BodyStream->ReadToEnd(); + + EXPECT_EQ(numNonFatalErrors, 2); + EXPECT_EQ(numFatalErrors, 1); + EXPECT_TRUE(progressCallbackCalled); + } + + TEST_F(DataLakeFileClientTest, QueryDefaultInputOutput_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + + const std::string csvData = "100,oranges,100"; + client.UploadFrom(reinterpret_cast(csvData.data()), csvData.size()); + auto queryResponse = client.Query("SELECT * from BlobStorage;"); + + auto data = queryResponse.Value.BodyStream->ReadToEnd(); + } + + TEST_F(DataLakeFileClientTest, QueryLargeBlob_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + + constexpr size_t DataSize = static_cast(32_MB); + + int recordCounter = 0; + std::string csvData; + std::string jsonData; + while (csvData.size() < DataSize) + { + std::string counter = std::to_string(recordCounter++); + std::string record = RandomString(static_cast(RandomInt(1, 3000))); + csvData += counter + "," + record + "\n"; + jsonData += "{\"_1\":\"" + counter + "\",\"_2\":\"" + record + "\"}\n"; + } + + client.UploadFrom(reinterpret_cast(csvData.data()), csvData.size()); + + Files::DataLake::QueryFileOptions queryOptions; + queryOptions.InputTextConfiguration + = Files::DataLake::FileQueryInputTextOptions::CreateCsvTextOptions(); + queryOptions.OutputTextConfiguration + = Files::DataLake::FileQueryOutputTextOptions::CreateJsonTextOptions(); + auto queryResponse = client.Query("SELECT * FROM BlobStorage;", queryOptions); + + size_t comparePos = 0; + std::vector readBuffer(4096); + while (true) + { + auto s = queryResponse.Value.BodyStream->Read(readBuffer.data(), readBuffer.size()); + if (s == 0) + { + break; + } + ASSERT_TRUE(comparePos + s <= jsonData.size()); + ASSERT_EQ( + std::string(readBuffer.begin(), readBuffer.begin() + s), jsonData.substr(comparePos, s)); + comparePos += s; + } + } + + TEST_F(DataLakeFileClientTest, QueryBlobAccessConditionLeaseId_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + client.UploadFrom(nullptr, 0); + + Files::DataLake::DataLakeLeaseClient leaseClient( + client, Files::DataLake::DataLakeLeaseClient::CreateUniqueLeaseId()); + leaseClient.Acquire(Files::DataLake::DataLakeLeaseClient::InfiniteLeaseDuration); + + Files::DataLake::QueryFileOptions queryOptions; + queryOptions.AccessConditions.LeaseId + = Files::DataLake::DataLakeLeaseClient::CreateUniqueLeaseId(); + EXPECT_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions), StorageException); + + queryOptions.AccessConditions.LeaseId = leaseClient.GetLeaseId(); + EXPECT_NO_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions)); + } + + TEST_F(DataLakeFileClientTest, QueryBlobAccessConditionLastModifiedTime_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + client.UploadFrom(nullptr, 0); + + auto lastModifiedTime = client.GetProperties().Value.LastModified; + auto timeBeforeStr = lastModifiedTime - std::chrono::seconds(2); + auto timeAfterStr = lastModifiedTime + std::chrono::seconds(2); + + Files::DataLake::QueryFileOptions queryOptions; + queryOptions.AccessConditions.IfModifiedSince = timeBeforeStr; + EXPECT_NO_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions)); + queryOptions.AccessConditions.IfModifiedSince = timeAfterStr; + EXPECT_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions), StorageException); + + queryOptions = Files::DataLake::QueryFileOptions(); + queryOptions.AccessConditions.IfUnmodifiedSince = timeBeforeStr; + EXPECT_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions), StorageException); + queryOptions.AccessConditions.IfUnmodifiedSince = timeAfterStr; + EXPECT_NO_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions)); + } + + TEST_F(DataLakeFileClientTest, QueryBlobAccessConditionETag_LIVEONLY_) + { + auto const testName(GetTestName()); + auto client = m_fileSystemClient->GetFileClient(testName); + client.UploadFrom(nullptr, 0); + + auto etag = client.GetProperties().Value.ETag; + + Files::DataLake::QueryFileOptions queryOptions; + queryOptions.AccessConditions.IfMatch = etag; + EXPECT_NO_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions)); + queryOptions.AccessConditions.IfMatch = DummyETag; + EXPECT_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions), StorageException); + + queryOptions = Files::DataLake::QueryFileOptions(); + queryOptions.AccessConditions.IfNoneMatch = DummyETag; + EXPECT_NO_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions)); + queryOptions.AccessConditions.IfNoneMatch = etag; + EXPECT_THROW(client.Query("SELECT * FROM BlobStorage;", queryOptions), StorageException); + } +}}} // namespace Azure::Storage::Test