From 24f625cee3a3b3db80b8024fd0f3fc00b809e698 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Thu, 28 Apr 2022 14:59:19 +0800 Subject: [PATCH 01/18] storage data movement lib --- cmake-modules/FolderList.cmake | 3 + sdk/storage/CMakeLists.txt | 1 + .../azure-storage-datamovement/CHANGELOG.md | 11 + .../azure-storage-datamovement/CMakeLists.txt | 96 ++++ .../azure-storage-datamovement/NOTICE.txt | 452 ++++++++++++++++++ .../cgmanifest.json | 36 ++ .../storage/datamovement/blob_folder.hpp | 43 ++ .../datamovement/datamovement_options.hpp | 22 + .../datamovement/dll_import_export.hpp | 40 ++ .../storage/datamovement/job_properties.hpp | 24 + .../inc/azure/storage/datamovement/rtti.hpp | 36 ++ .../datamovement/storage_transfer_manager.hpp | 51 ++ .../src/private/package_version.hpp | 59 +++ .../src/storage_transfer_manager.cpp | 9 + .../azure-storage-datamovement/vcpkg.json | 25 + .../vcpkg/Config.cmake.in | 11 + .../vcpkg/portfile.cmake | 21 + .../vcpkg/vcpkg.json | 25 + 18 files changed, 965 insertions(+) create mode 100644 sdk/storage/azure-storage-datamovement/CHANGELOG.md create mode 100644 sdk/storage/azure-storage-datamovement/CMakeLists.txt create mode 100644 sdk/storage/azure-storage-datamovement/NOTICE.txt create mode 100644 sdk/storage/azure-storage-datamovement/cgmanifest.json create mode 100644 sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/blob_folder.hpp create mode 100644 sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/datamovement_options.hpp create mode 100644 sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/dll_import_export.hpp create mode 100644 sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/job_properties.hpp create mode 100644 sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/rtti.hpp create mode 100644 sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp create mode 100644 sdk/storage/azure-storage-datamovement/src/private/package_version.hpp create mode 100644 sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp create mode 100644 sdk/storage/azure-storage-datamovement/vcpkg.json create mode 100644 sdk/storage/azure-storage-datamovement/vcpkg/Config.cmake.in create mode 100644 sdk/storage/azure-storage-datamovement/vcpkg/portfile.cmake create mode 100644 sdk/storage/azure-storage-datamovement/vcpkg/vcpkg.json diff --git a/cmake-modules/FolderList.cmake b/cmake-modules/FolderList.cmake index 8c3685b432..b9fb72c9b5 100644 --- a/cmake-modules/FolderList.cmake +++ b/cmake-modules/FolderList.cmake @@ -27,6 +27,9 @@ macro(GetFolderList project) elseif(${project} STREQUAL STORAGE_QUEUES) DownloadDepVersion(sdk/core azure-core 1.5.0) DownloadDepVersion(sdk/storage/azure-storage-common azure-storage-common 12.2.3) + elseif(${project} STREQUAL STORAGE_DATAMOVEMENT) + DownloadDepVersion(sdk/core azure-core 1.5.0) + DownloadDepVersion(sdk/storage/azure-storage-blobs azure-storage-blobs 12.4.0) endif() list(REMOVE_DUPLICATES BUILD_FOLDERS) endmacro() diff --git a/sdk/storage/CMakeLists.txt b/sdk/storage/CMakeLists.txt index a23f27eae9..e1e9056e65 100644 --- a/sdk/storage/CMakeLists.txt +++ b/sdk/storage/CMakeLists.txt @@ -10,3 +10,4 @@ add_subdirectory(azure-storage-blobs) add_subdirectory(azure-storage-files-datalake) add_subdirectory(azure-storage-files-shares) add_subdirectory(azure-storage-queues) +add_subdirectory(azure-storage-datamovement) diff --git a/sdk/storage/azure-storage-datamovement/CHANGELOG.md b/sdk/storage/azure-storage-datamovement/CHANGELOG.md new file mode 100644 index 0000000000..61f1277dcf --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/CHANGELOG.md @@ -0,0 +1,11 @@ +# Release History + +## 12.0.0-beta.1 (Unreleased) + +### Features Added + +### Breaking Changes + +### Bugs Fixed + +### Other Changes \ No newline at end of file diff --git a/sdk/storage/azure-storage-datamovement/CMakeLists.txt b/sdk/storage/azure-storage-datamovement/CMakeLists.txt new file mode 100644 index 0000000000..ed4b2afc42 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/CMakeLists.txt @@ -0,0 +1,96 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# SPDX-License-Identifier: MIT + +cmake_minimum_required (VERSION 3.13) +project(azure-storage-datamovement LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + +option(FETCH_SOURCE_DEPS "build source dependencies" OFF) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake-modules") + +include(AzureVcpkg) +include(AzureVersion) +include(AzureCodeCoverage) +include(AzureTransportAdapters) +include(AzureDoxygen) +include(AzureGlobalCompileOptions) +include(AzureConfigRTTI) +include(AzureBuildTargetForCI) + +az_vcpkg_integrate() + +if(FETCH_SOURCE_DEPS) + set(AZ_ALL_LIBRARIES ON) + include(FolderList) + SetCompileOptions(STORAGE_DATAMOVEMENT) + GetFolderList(STORAGE_DATAMOVEMENT) + foreach(oneFolder IN LISTS BUILD_FOLDERS) + message("add folder ${oneFolder}") + add_subdirectory(${oneFolder} EXCLUDE_FROM_ALL) + endforeach() +elseif(NOT AZ_ALL_LIBRARIES) + find_package(azure-storage-blobs-cpp "12.4.0" CONFIG QUIET) + if(NOT azure-storage-blobs-cpp_FOUND) + find_package(azure-storage-blobs-cpp "12.4.0" REQUIRED) + endif() +endif() + +set( + AZURE_STORAGE_DATAMOVEMENT_HEADER + inc/azure/storage/datamovement/blob_folder.hpp + inc/azure/storage/datamovement/datamovement_options.hpp + inc/azure/storage/datamovement/dll_import_export.hpp + inc/azure/storage/datamovement/job_properties.hpp + inc/azure/storage/datamovement/rtti.hpp + inc/azure/storage/datamovement/storage_transfer_manager.hpp + src/private/package_version.hpp +) + +set( + AZURE_STORAGE_DATAMOVEMENT_SOURCE + src/storage_transfer_manager.cpp +) + +add_library(azure-storage-datamovement ${AZURE_STORAGE_DATAMOVEMENT_HEADER} ${AZURE_STORAGE_DATAMOVEMENT_SOURCE}) +create_per_service_target_build(storage azure-storage-datamovement) + +# make sure that users can consume the project as a library. +add_library(Azure::azure-storage-datamovement ALIAS azure-storage-datamovement) + +target_include_directories( + azure-storage-datamovement + PUBLIC + $ + $ +) + +target_link_libraries(azure-storage-datamovement PUBLIC Azure::azure-storage-blobs) + +get_az_version("${CMAKE_CURRENT_SOURCE_DIR}/src/private/package_version.hpp") +generate_documentation(azure-storage-datamovement ${AZ_LIBRARY_VERSION}) + +az_vcpkg_export( + azure-storage-datamovement + STORAGE_BLOBS_BATCH + "azure/storage/datamovement/dll_import_export.hpp" + ) + +az_rtti_setup( + azure-storage-datamovement + STORAGE_BLOBS_BATCH + "azure/storage/datamovement/rtti.hpp" +) + +# coverage. Has no effect if BUILD_CODE_COVERAGE is OFF +create_code_coverage(storage azure-storage-datamovement azure-storage-test "tests?/*;samples?/*") + +if(BUILD_TESTING) + # add_subdirectory(test/ut) +endif() + +if(BUILD_SAMPLES) + # add_subdirectory(samples) +endif() diff --git a/sdk/storage/azure-storage-datamovement/NOTICE.txt b/sdk/storage/azure-storage-datamovement/NOTICE.txt new file mode 100644 index 0000000000..de6b8d8b72 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/NOTICE.txt @@ -0,0 +1,452 @@ +azure-storage-blobs + +NOTICES AND INFORMATION +Do Not Translate or Localize + +This software incorporates material from third parties. Microsoft makes certain +open source code available at https://3rdpartysource.microsoft.com, or you may +send a check or money order for US $5.00, including the product name, the open +source component name, and version number, to: + +Source Code Compliance Team +Microsoft Corporation +One Microsoft Way +Redmond, WA 98052 +USA + +Notwithstanding any other terms, you may reverse engineer this software to the +extent required to debug changes to any libraries licensed under the GNU Lesser +General Public License. + +------------------------------------------------------------------------------ + +Azure SDK for C++ uses third-party libraries or other resources that may be +distributed under licenses different than the Azure SDK for C++ software. + +In the event that we accidentally failed to list a required notice, please +bring it to our attention. Post an issue or email us: + + azcppsdkhelp@microsoft.com + +The attached notices are provided for information only. + + +License notice for CMake Modules AddGoogleTest +------------------------------------------------------------------- + +BSD 3-Clause License + +Copyright (c) 2017, University of Cincinnati +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +License notice for +------------------------------------------------------------------- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +License notice for +------------------------------------------------------------------- + +The MIT License (MIT) + +Copyright (c) 2015 Max Woolf + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +License notice for +------------------------------------------------------------------- + +MIT License + +Copyright (c) 2016 Nicolas Seriot + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +License notice for +------------------------------------------------------------------- + +The MIT License (MIT) + +Copyright (c) 2016-2018 Viktor Kirilov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +License notice for +------------------------------------------------------------------- + +The MIT License + +Copyright (c) 2018-2019 Bryan Gillespie + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +License notice for +------------------------------------------------------------------- + +MIT License + +Copyright (c) 2015-2017 Niels Lohmann + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +License notice for cpplint +------------------------------------------------------------------- + +cpplint.py and its corresponding unit tests are Copyright (C) 2009 Google Inc. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +License notice for libcurl +------------------------------------------------------------------- + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1996 - 2020, Daniel Stenberg, , and many +contributors, see the THANKS file. + +All rights reserved. + +Permission to use, copy, modify, and distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright +notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall not +be used in advertising or otherwise to promote the sale, use or other dealings +in this Software without prior written authorization of the copyright holder. diff --git a/sdk/storage/azure-storage-datamovement/cgmanifest.json b/sdk/storage/azure-storage-datamovement/cgmanifest.json new file mode 100644 index 0000000000..728e7b7085 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/cgmanifest.json @@ -0,0 +1,36 @@ +{ + "Registrations": [ + { + "Component": { + "Type": "git", + "git": { + "RepositoryUrl": "https://github.com/cpplint/cpplint", + "CommitHash": "8456631ba3a12842da08238362a90d33fcf47062" + } + }, + "DevelopmentDependency": true + }, + { + "Component": { + "Type": "other", + "Other": { + "Name": "clang-format", + "Version": "9.0.0-2", + "DownloadUrl": "https://ubuntu.pkgs.org/18.04/ubuntu-updates-universe-amd64/clang-format-9_9-2~ubuntu18.04.2_amd64.deb.html" + } + }, + "DevelopmentDependency": true + }, + { + "Component": { + "Type": "other", + "Other": { + "Name": "doxygen", + "Version": "1.8.20", + "DownloadUrl": "http://doxygen.nl/files/doxygen-1.8.20-setup.exe" + } + }, + "DevelopmentDependency": true + } + ] +} diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/blob_folder.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/blob_folder.hpp new file mode 100644 index 0000000000..95fa0bf82c --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/blob_folder.hpp @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include + +namespace Azure { namespace Storage { namespace DataMovement { + + class BlobFolder final { + public: + static BlobFolder CreateFromConnectionString( + const std::string& connectionString, + const std::string& blobContainerName, + const std::string& blobName, + const Blobs::BlobClientOptions& options = Blobs::BlobClientOptions()); + + explicit BlobFolder( + const std::string& blobUrl, + std::shared_ptr credential, + const Blobs::BlobClientOptions& options = Blobs::BlobClientOptions()); + + explicit BlobFolder( + const std::string& blobUrl, + std::shared_ptr credential, + const Blobs::BlobClientOptions& options = Blobs::BlobClientOptions()); + + explicit BlobFolder( + const std::string& blobUrl, + const Blobs::BlobClientOptions& options = Blobs::BlobClientOptions()); + + BlobFolder GetBlobFolder(const std::string& folderName); + + Blobs::BlobClient GetBlobClient(const std::string& blobName); + }; + + BlobFolder GetBlobFolderFromBlobContainer( + const Blobs::BlobContainerClient& blobContainerClient, + const std::string& folderName); + +}}} // namespace Azure::Storage::DataMovement diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/datamovement_options.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/datamovement_options.hpp new file mode 100644 index 0000000000..c0603159fc --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/datamovement_options.hpp @@ -0,0 +1,22 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +namespace Azure { namespace Storage { namespace DataMovement { + + struct StorageTransferManagerOptions final + { + std::string TransferStateDirectoryPath; + }; + + struct UploadBlobOptions final + { + }; + + struct DownloadBlobOptions final + { + }; +}}} // namespace Azure::Storage::DataMovement diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/dll_import_export.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/dll_import_export.hpp new file mode 100644 index 0000000000..3101965a95 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/dll_import_export.hpp @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +/** + * @file + * @brief DLL export macro. + */ + +// For explanation, see the comment in azure/core/dll_import_export.hpp + +#pragma once + +/** + * @def AZ_STORAGE_DATAMOVEMENT_DLLEXPORT + * @brief Applies DLL export attribute, when applicable. + * @note See https://docs.microsoft.com/cpp/cpp/dllexport-dllimport?view=msvc-160. + */ + +#if defined(AZ_STORAGE_DATAMOVEMENT_DLL) \ + || (0 /*@AZ_STORAGE_DATAMOVEMENT_DLL_INSTALLED_AS_PACKAGE@*/) +#define AZ_STORAGE_DATAMOVEMENT_BUILT_AS_DLL 1 +#else +#define AZ_STORAGE_DATAMOVEMENT_BUILT_AS_DLL 0 +#endif + +#if AZ_STORAGE_DATAMOVEMENT_BUILT_AS_DLL +#if defined(_MSC_VER) +#if defined(AZ_STORAGE_DATAMOVEMENT_BEING_BUILT) +#define AZ_STORAGE_DATAMOVEMENT_DLLEXPORT __declspec(dllexport) +#else // !defined(AZ_STORAGE_DATAMOVEMENT_BEING_BUILT) +#define AZ_STORAGE_DATAMOVEMENT_DLLEXPORT __declspec(dllimport) +#endif // AZ_STORAGE_DATAMOVEMENT_BEING_BUILT +#else // !defined(_MSC_VER) +#define AZ_STORAGE_DATAMOVEMENT_DLLEXPORT +#endif // _MSC_VER +#else // !AZ_STORAGE_DATAMOVEMENT_BUILT_AS_DLL +#define AZ_STORAGE_DATAMOVEMENT_DLLEXPORT +#endif // AZ_STORAGE_DATAMOVEMENT_BUILT_AS_DLL + +#undef AZ_STORAGE_DATAMOVEMENT_BUILT_AS_DLL diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/job_properties.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/job_properties.hpp new file mode 100644 index 0000000000..34fba505c4 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/job_properties.hpp @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +#pragma once + +namespace Azure { namespace Storage { namespace DataMovement { + + enum class TransferType + { + SingleUpload = 0, + SingleDownload = 1, + DirectoryUpload = 2, + DirectoryDownload = 3, + }; + + struct JobProperties final + { + std::string JobId; + std::string SourceUrl; + std::string DestinationUrl; + TransferType Type; + }; + +}}} // namespace Azure::Storage::DataMovement diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/rtti.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/rtti.hpp new file mode 100644 index 0000000000..79b1d02ae8 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/rtti.hpp @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +/** + * @file + * @brief Run-time type info enable or disable. + * + * @details When RTTI is enabled, defines a macro `AZ_STORAGE_DATAMOVEMENT_RTTI`. + * When the macro is not defined, RTTI is disabled. + * + * @details Each library has this header file. These headers are being configured by + * `az_rtti_setup()` CMake macro. CMake install will patch this file during installation, depending + * on the build flags. + */ + +#pragma once + +/** + * @def AZ_STORAGE_DATAMOVEMENT_RTTI + * @brief A macro indicating whether the code is built with RTTI or not. + * + * @details `AZ_RTTI` could be defined while building the Azure SDK with CMake, however, after + * the build is completed, that information is not preserved for the code that consumes Azure SDK + * headers, unless the code that consumes the SDK is the part of the same build process. To address + * this issue, CMake install would patch the header it places in the installation directory, so that + * condition: + * `#if defined(AZ_RTTI) || (0)` + * becomes, effectively, + * `#if defined(AZ_RTTI) || (0 + 1)` + * when the library was built with RTTI support, and will make no changes to the + * condition when it was not. + */ + +#if defined(AZ_RTTI) || (0 /*@AZ_STORAGE_DATAMOVEMENT_RTTI@*/) +#define AZ_STORAGE_DATAMOVEMENT_RTTI +#endif diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp new file mode 100644 index 0000000000..c4e2ff1aa0 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +#include + +#include "azure/storage/datamovement/blob_folder.hpp" +#include "azure/storage/datamovement/datamovement_options.hpp" +#include "azure/storage/datamovement/job_properties.hpp" + +namespace Azure { namespace Storage { namespace DataMovement { + + class StorageTransferManager final { + public: + StorageTransferManager(const StorageTransferManagerOptions& options); + + JobProperties ScheduleUpload( + const std::string& sourceLocalPath, + const Blobs::BlobClient& destinationBlob, + const UploadBlobOptions& options = UploadBlobOptions()); + + JobProperties ScheduleUploadDirectory( + const std::string& sourceLocalPath, + const BlobFolder& destinationBlobFolder, + const UploadBlobOptions& options = UploadBlobOptions()); + + JobProperties ScheduleDownload( + const Blobs::BlobClient& sourceBlob, + const std::string& destinationLocalPath, + const DownloadBlobOptions& options = DownloadBlobOptions()); + + JobProperties ScheduleDownloadDirectory( + const BlobFolder& sourceBlobFolder, + const std::string& destinationLocalPath, + const DownloadBlobOptions& options = DownloadBlobOptions()); + + JobProperties GetJobProperties(const std::string& jobId); + void PauseJob(const std::string& jobId); + void PauseAllJobs(); + void ResumeJob(const std::string& jobId); + void ResumeAllJobs(); + void CancelJob(const std::string& jobId); + void CancelAllJobs(); + + private: + }; + +}}} // namespace Azure::Storage::DataMovement diff --git a/sdk/storage/azure-storage-datamovement/src/private/package_version.hpp b/sdk/storage/azure-storage-datamovement/src/private/package_version.hpp new file mode 100644 index 0000000000..e9e4524476 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/src/private/package_version.hpp @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +/** + * @file + * @brief Provides version information. + */ + +#pragma once + +#define AZURE_STORAGE_DATAMOVEMENT_VERSION_MAJOR 1 +#define AZURE_STORAGE_DATAMOVEMENT_VERSION_MINOR 0 +#define AZURE_STORAGE_DATAMOVEMENT_VERSION_PATCH 0 +#define AZURE_STORAGE_DATAMOVEMENT_VERSION_PRERELEASE "beta.1" + +#define AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA_HELPER(i) #i +#define AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA(i) AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA_HELPER(i) + +namespace Azure { namespace Storage { namespace DataMovement { namespace _detail { + /** + * @brief Provides version information. + */ + class PackageVersion final { + public: + /// Major numeric identifier. + static constexpr int32_t Major = AZURE_STORAGE_DATAMOVEMENT_VERSION_MAJOR; + + /// Minor numeric identifier. + static constexpr int32_t Minor = AZURE_STORAGE_DATAMOVEMENT_VERSION_MINOR; + + /// Patch numeric identifier. + static constexpr int32_t Patch = AZURE_STORAGE_DATAMOVEMENT_VERSION_PATCH; + + /// Indicates whether the SDK is in a pre-release state. + static constexpr bool IsPreRelease + = sizeof(AZURE_STORAGE_DATAMOVEMENT_VERSION_PRERELEASE) != sizeof(""); + + /** + * @brief The version in string format used for telemetry following the `semver.org` standard + * (https://semver.org). + */ + static constexpr const char* ToString() + { + return IsPreRelease + ? AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA(AZURE_STORAGE_DATAMOVEMENT_VERSION_MAJOR) "." AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA( + AZURE_STORAGE_DATAMOVEMENT_VERSION_MINOR) "." AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA(AZURE_STORAGE_DATAMOVEMENT_VERSION_PATCH) "-" AZURE_STORAGE_DATAMOVEMENT_VERSION_PRERELEASE + : AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA(AZURE_STORAGE_DATAMOVEMENT_VERSION_MAJOR) "." AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA( + AZURE_STORAGE_DATAMOVEMENT_VERSION_MINOR) "." AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA(AZURE_STORAGE_DATAMOVEMENT_VERSION_PATCH); + } + }; +}}}} // namespace Azure::Storage::DataMovement::_detail + +#undef AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA_HELPER +#undef AZURE_STORAGE_DATAMOVEMENT_VERSION_ITOA + +#undef AZURE_STORAGE_DATAMOVEMENT_VERSION_MAJOR +#undef AZURE_STORAGE_DATAMOVEMENT_VERSION_MINOR +#undef AZURE_STORAGE_DATAMOVEMENT_VERSION_PATCH +#undef AZURE_STORAGE_DATAMOVEMENT_VERSION_PRERELEASE diff --git a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp new file mode 100644 index 0000000000..9081e47d7d --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp @@ -0,0 +1,9 @@ +#include "azure/storage/datamovement/storage_transfer_manager.hpp" + +namespace Azure { namespace Storage { namespace DataMovement { + + StorageTransferManager::StorageTransferManager(const StorageTransferManagerOptions& options) { + (void)options; + } + +}}} // namespace Azure::Storage::DataMovement diff --git a/sdk/storage/azure-storage-datamovement/vcpkg.json b/sdk/storage/azure-storage-datamovement/vcpkg.json new file mode 100644 index 0000000000..a3162fb39c --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/vcpkg.json @@ -0,0 +1,25 @@ +{ + "name": "azure-storage-datamovement-cpp", + "version-semver": "1.0.0-beta.1", + "description": [ + "Microsoft Azure Storage Data Movement SDK for C++", + "This library provides Azure Storage Data Movement SDK." + ], + "homepage": "https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/storage/azure-storage-datamovement", + "license": "MIT", + "dependencies": [ + { + "name": "azure-storage-blobs-cpp", + "default-features": false, + "version>=": "12.4.0" + }, + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ] +} diff --git a/sdk/storage/azure-storage-datamovement/vcpkg/Config.cmake.in b/sdk/storage/azure-storage-datamovement/vcpkg/Config.cmake.in new file mode 100644 index 0000000000..8346a08aad --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/vcpkg/Config.cmake.in @@ -0,0 +1,11 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# SPDX-License-Identifier: MIT + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(azure-storage-blobs-cpp "12.4.0") + +include("${CMAKE_CURRENT_LIST_DIR}/azure-storage-datamovement-cppTargets.cmake") + +check_required_components("azure-storage-datamovement-cpp") diff --git a/sdk/storage/azure-storage-datamovement/vcpkg/portfile.cmake b/sdk/storage/azure-storage-datamovement/vcpkg/portfile.cmake new file mode 100644 index 0000000000..60322d883f --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/vcpkg/portfile.cmake @@ -0,0 +1,21 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# SPDX-License-Identifier: MIT + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO Azure/azure-sdk-for-cpp + REF azure-storage-datamovement_@AZ_LIBRARY_VERSION@ + SHA512 0 +) + +vcpkg_cmake_configure( + SOURCE_PATH ${SOURCE_PATH}/sdk/storage/azure-storage-datamovement/ + OPTIONS + -DWARNINGS_AS_ERRORS=OFF +) + +vcpkg_cmake_install() +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +vcpkg_cmake_config_fixup() +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share") +vcpkg_copy_pdbs() diff --git a/sdk/storage/azure-storage-datamovement/vcpkg/vcpkg.json b/sdk/storage/azure-storage-datamovement/vcpkg/vcpkg.json new file mode 100644 index 0000000000..2d99d3201d --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/vcpkg/vcpkg.json @@ -0,0 +1,25 @@ +{ + "name": "azure-storage-datamovement-cpp", + "version-semver": "@AZ_LIBRARY_VERSION@", + "description": [ + "Microsoft Azure Storage Data Movement SDK for C++", + "This library provides Azure Storage Data Movement SDK." + ], + "homepage": "https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/storage/azure-storage-datamovement", + "license": "MIT", + "dependencies": [ + { + "name": "azure-storage-blobs-cpp", + "default-features": false, + "version>=": "12.4.0" + }, + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ] +} From 66a57feff8556c9db79627907d023df59c7e0718 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Fri, 29 Apr 2022 08:51:58 +0800 Subject: [PATCH 02/18] clang-format --- .../src/storage_transfer_manager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp index 9081e47d7d..b5a783ce66 100644 --- a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp +++ b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp @@ -2,7 +2,8 @@ namespace Azure { namespace Storage { namespace DataMovement { - StorageTransferManager::StorageTransferManager(const StorageTransferManagerOptions& options) { + StorageTransferManager::StorageTransferManager(const StorageTransferManagerOptions& options) + { (void)options; } From c3a551fb80920eea8558eae5edb05b4877a1cc7f Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Sat, 30 Apr 2022 09:04:16 +0800 Subject: [PATCH 03/18] f --- .../azure-storage-datamovement/CMakeLists.txt | 12 ++++++------ .../inc/azure/storage/datamovement/blob_folder.hpp | 4 ++-- .../datamovement/storage_transfer_manager.hpp | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sdk/storage/azure-storage-datamovement/CMakeLists.txt b/sdk/storage/azure-storage-datamovement/CMakeLists.txt index ed4b2afc42..01545eb526 100644 --- a/sdk/storage/azure-storage-datamovement/CMakeLists.txt +++ b/sdk/storage/azure-storage-datamovement/CMakeLists.txt @@ -1,6 +1,10 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # SPDX-License-Identifier: MIT +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake-modules") +include(AzureVcpkg) +az_vcpkg_integrate() + cmake_minimum_required (VERSION 3.13) project(azure-storage-datamovement LANGUAGES CXX) @@ -9,9 +13,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED True) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) option(FETCH_SOURCE_DEPS "build source dependencies" OFF) -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake-modules") -include(AzureVcpkg) include(AzureVersion) include(AzureCodeCoverage) include(AzureTransportAdapters) @@ -20,8 +22,6 @@ include(AzureGlobalCompileOptions) include(AzureConfigRTTI) include(AzureBuildTargetForCI) -az_vcpkg_integrate() - if(FETCH_SOURCE_DEPS) set(AZ_ALL_LIBRARIES ON) include(FolderList) @@ -74,13 +74,13 @@ generate_documentation(azure-storage-datamovement ${AZ_LIBRARY_VERSION}) az_vcpkg_export( azure-storage-datamovement - STORAGE_BLOBS_BATCH + STORAGE_DATAMOVEMENT "azure/storage/datamovement/dll_import_export.hpp" ) az_rtti_setup( azure-storage-datamovement - STORAGE_BLOBS_BATCH + STORAGE_DATAMOVEMENT "azure/storage/datamovement/rtti.hpp" ) diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/blob_folder.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/blob_folder.hpp index 95fa0bf82c..e488787a8a 100644 --- a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/blob_folder.hpp +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/blob_folder.hpp @@ -31,9 +31,9 @@ namespace Azure { namespace Storage { namespace DataMovement { const std::string& blobUrl, const Blobs::BlobClientOptions& options = Blobs::BlobClientOptions()); - BlobFolder GetBlobFolder(const std::string& folderName); + BlobFolder GetBlobFolder(const std::string& folderName) const; - Blobs::BlobClient GetBlobClient(const std::string& blobName); + Blobs::BlobClient GetBlobClient(const std::string& blobName) const; }; BlobFolder GetBlobFolderFromBlobContainer( diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp index c4e2ff1aa0..da04f1f20d 100644 --- a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp @@ -15,7 +15,7 @@ namespace Azure { namespace Storage { namespace DataMovement { class StorageTransferManager final { public: - StorageTransferManager(const StorageTransferManagerOptions& options); + explicit StorageTransferManager(const StorageTransferManagerOptions& options); JobProperties ScheduleUpload( const std::string& sourceLocalPath, From 04d6b9a616dcdf02dddab533b746c38486937643 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Sun, 1 May 2022 11:57:39 +0800 Subject: [PATCH 04/18] task and scheduler --- .../azure-storage-datamovement/CMakeLists.txt | 3 + .../azure/storage/datamovement/scheduler.hpp | 71 +++++ .../inc/azure/storage/datamovement/task.hpp | 28 ++ .../src/scheduler.cpp | 276 ++++++++++++++++++ 4 files changed, 378 insertions(+) create mode 100644 sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp create mode 100644 sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp create mode 100644 sdk/storage/azure-storage-datamovement/src/scheduler.cpp diff --git a/sdk/storage/azure-storage-datamovement/CMakeLists.txt b/sdk/storage/azure-storage-datamovement/CMakeLists.txt index 01545eb526..96aa9c81a8 100644 --- a/sdk/storage/azure-storage-datamovement/CMakeLists.txt +++ b/sdk/storage/azure-storage-datamovement/CMakeLists.txt @@ -45,12 +45,15 @@ set( inc/azure/storage/datamovement/dll_import_export.hpp inc/azure/storage/datamovement/job_properties.hpp inc/azure/storage/datamovement/rtti.hpp + inc/azure/storage/datamovement/scheduler.hpp inc/azure/storage/datamovement/storage_transfer_manager.hpp + inc/azure/storage/datamovement/task.hpp src/private/package_version.hpp ) set( AZURE_STORAGE_DATAMOVEMENT_SOURCE + src/scheduler.cpp src/storage_transfer_manager.cpp ) diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp new file mode 100644 index 0000000000..80605d2845 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "azure/storage/datamovement/task.hpp" + +namespace Azure { namespace Storage { namespace DataMovement { namespace _internal { + + using TaskQueue = std::queue; + + struct SchedulerOptions + { + Nullable NumThreads; // default: 2 * num cpus + Nullable MaxMemorySize; // default: 128MB * num threads + }; + + class Scheduler { + public: + explicit Scheduler(const SchedulerOptions& options); + ~Scheduler(); + + Scheduler(const Scheduler&) = delete; + Scheduler& operator=(const Scheduler&) = delete; + + void AddTask(Task&& task); + void AddTasks(std::vector&& tasks); + + // TODO: Pasue/Suspend and Resume + + private: + SchedulerOptions m_options; + + std::atomic m_stopped{false}; + + // resource left + std::atomic m_memoryLeft; + // TODO: other resource + + // pending tasks + TaskQueue m_pendingDiskIOTasks; + TaskQueue m_pendingNetworkUploadTasks; + TaskQueue m_pendingNetworkDownloadTasks; + std::mutex m_pendingTasksMutex; + std::condition_variable m_pendingTasksCv; + + std::thread m_schedulerThread; + + // ready tasks, should run asap + TaskQueue m_readyTasks; + std::mutex m_readyTasksMutex; + std::condition_variable m_readyTasksCv; + + TaskQueue m_readyDiskIOTasks; + std::mutex m_readyDiskIOTasksMutex; + std::condition_variable m_readyDiskIOTasksCv; + + std::vector m_workerThreads; + }; + +}}}} // namespace Azure::Storage::DataMovement::_internal diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp new file mode 100644 index 0000000000..4d2dc34fd6 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +#pragma once + +#include + +namespace Azure { namespace Storage { namespace DataMovement { namespace _internal { + enum class TaskType + { + DiskIO, + NetworkUpload, + NetworkDownload, + Other, // other tasks will be run ASAP + }; + + // Task should be idempotent + struct Task + { + TaskType Type; + + size_t MemoryCost = 0; + size_t MemoryGiveBack = 0; + + std::function func; // func shouldn't throw + }; + +}}}} // namespace Azure::Storage::DataMovement::_internal diff --git a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp new file mode 100644 index 0000000000..9eb8504fcd --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp @@ -0,0 +1,276 @@ +#include "azure/storage/datamovement/scheduler.hpp" + +#include +#include +#include + +#include + +#pragma warning(disable : 26110 26117) + +namespace Azure { namespace Storage { namespace DataMovement { namespace _internal { + + Scheduler::Scheduler(const SchedulerOptions& options) : m_options(options) + { + size_t numThreads = options.NumThreads.HasValue() ? options.NumThreads.Value() + : std::thread::hardware_concurrency() * 2; + AZURE_ASSERT(numThreads != 0); + size_t maxMemorySize = options.MaxMemorySize.HasValue() ? options.MaxMemorySize.Value() + : 128 * 1024 * 1024 * numThreads; + m_options.NumThreads = numThreads; + m_options.MaxMemorySize = maxMemorySize; + + m_memoryLeft = m_options.MaxMemorySize.Value(); + + auto workerFunc = [this](TaskQueue& q, std::mutex& m, std::condition_variable& cv) { + while (true) + { + std::unique_lock guard(m); + cv.wait( + guard, [this, &q] { return m_stopped.load(std::memory_order_relaxed) || !q.empty(); }); + if (m_stopped.load(std::memory_order_relaxed)) + { + break; + } + auto task = std::move(q.front()); + q.pop(); + guard.unlock(); + + task.func(); + } + }; + + m_workerThreads.reserve(numThreads + 1); + for (size_t i = 0; i < numThreads; ++i) + { + m_workerThreads.push_back(std::thread( + workerFunc, + std::ref(m_readyTasks), + std::ref(m_readyTasksMutex), + std::ref(m_readyTasksCv))); + } + m_workerThreads.push_back(std::thread( + workerFunc, + std::ref(m_readyDiskIOTasks), + std::ref(m_readyDiskIOTasksMutex), + std::ref(m_readyDiskIOTasksCv))); + + auto schedulerFunc = [this]() { + std::unique_lock guard(m_pendingTasksMutex); + while (true) + { + if (m_stopped.load(std::memory_order_relaxed)) + { + break; + } + + { + // schedule disk IO tasks + std::unique_lock readyTasksGuard(m_readyDiskIOTasksMutex, std::defer_lock); + int numScheduledTasks = 0; + while (!m_pendingDiskIOTasks.empty() + && m_pendingDiskIOTasks.front().MemoryCost + < m_memoryLeft.load(std::memory_order_relaxed)) + { + auto task = std::move(m_pendingDiskIOTasks.front()); + m_pendingDiskIOTasks.pop(); + m_memoryLeft.fetch_sub(task.MemoryCost); + if (!readyTasksGuard.owns_lock()) + { + readyTasksGuard.lock(); + } + m_readyDiskIOTasks.push(std::move(task)); + ++numScheduledTasks; + } + if (numScheduledTasks != 0) + { + readyTasksGuard.unlock(); + m_readyDiskIOTasksCv.notify_all(); + } + } + { + // schedule network tasks + std::unique_lock readyTasksGuard(m_readyTasksMutex, std::defer_lock); + int numScheduledTasks = 0; + while (true) + { + bool shouldBreak = true; + if (!m_pendingNetworkUploadTasks.empty()) + { + auto task = std::move(m_pendingNetworkUploadTasks.front()); + m_pendingNetworkUploadTasks.pop(); + if (!readyTasksGuard.owns_lock()) + { + readyTasksGuard.lock(); + } + m_readyTasks.push(std::move(task)); + ++numScheduledTasks; + shouldBreak = false; + } + if (!m_pendingNetworkDownloadTasks.empty()) + { + auto task = std::move(m_pendingNetworkDownloadTasks.front()); + m_pendingNetworkDownloadTasks.pop(); + if (!readyTasksGuard.owns_lock()) + { + readyTasksGuard.lock(); + } + m_readyTasks.push(std::move(task)); + ++numScheduledTasks; + shouldBreak = false; + } + if (shouldBreak) + { + break; + } + } + if (numScheduledTasks >= m_options.NumThreads.Value()) + { + readyTasksGuard.unlock(); + m_readyTasksCv.notify_all(); + } + else if (numScheduledTasks > 0) + { + readyTasksGuard.unlock(); + for (int i = 0; i < numScheduledTasks; ++i) + { + m_readyTasksCv.notify_one(); + } + } + } + + m_pendingTasksCv.wait(guard); + } + }; + + m_schedulerThread = std::thread(schedulerFunc); + } + + Scheduler::~Scheduler() + { + m_stopped.store(true, std::memory_order_relaxed); + m_pendingTasksCv.notify_one(); + m_readyDiskIOTasksCv.notify_all(); + m_readyTasksCv.notify_all(); + m_schedulerThread.join(); + for (auto& th : m_workerThreads) + { + th.join(); + } + } + + void Scheduler::AddTask(Task&& task) + { + if (task.Type == TaskType::DiskIO) + { + std::lock_guard guard(m_pendingTasksMutex); + m_pendingDiskIOTasks.push(std::move(task)); + m_pendingTasksCv.notify_one(); + } + else if (task.Type == TaskType::NetworkUpload) + { + std::lock_guard guard(m_pendingTasksMutex); + m_pendingNetworkUploadTasks.push(std::move(task)); + m_pendingTasksCv.notify_one(); + } + else if (task.Type == TaskType::NetworkDownload) + { + std::lock_guard guard(m_pendingTasksMutex); + m_pendingNetworkDownloadTasks.push(std::move(task)); + m_pendingTasksCv.notify_one(); + } + else if (task.Type == TaskType::Other) + { + std::lock_guard guard(m_readyTasksMutex); + m_memoryLeft.fetch_sub(task.MemoryCost); + m_readyTasks.push(std::move(task)); + m_readyTasksCv.notify_one(); + } + else + { + AZURE_UNREACHABLE_CODE(); + } + } + + void Scheduler::AddTasks(std::vector&& tasks) + { + std::vector validTaskBitmap(tasks.size(), false); + { + std::unique_lock guard(m_pendingTasksMutex, std::defer_lock); + int numTasksAdded = 0; + for (int i = 0; i < tasks.size(); ++i) + { + if (tasks[i].Type == TaskType::DiskIO) + { + if (!guard.owns_lock()) + { + guard.lock(); + } + m_pendingDiskIOTasks.push(std::move(tasks[i])); + ++numTasksAdded; + } + else if (tasks[i].Type == TaskType::NetworkUpload) + { + if (!guard.owns_lock()) + { + guard.lock(); + } + m_pendingNetworkUploadTasks.push(std::move(tasks[i])); + ++numTasksAdded; + } + else if (tasks[i].Type == TaskType::NetworkDownload) + { + if (!guard.owns_lock()) + { + guard.lock(); + } + m_pendingNetworkDownloadTasks.push(std::move(tasks[i])); + ++numTasksAdded; + } + else + { + validTaskBitmap[i] = true; + } + } + if (numTasksAdded > 0) + { + guard.unlock(); + m_pendingTasksCv.notify_one(); + } + } + { + std::unique_lock guard(m_readyTasksMutex, std::defer_lock); + int numTasksAdded = 0; + for (int i = 0; i < tasks.size(); ++i) + { + if (!validTaskBitmap[i]) + { + continue; + } + if (tasks[i].Type == TaskType::Other) + { + if (!guard.owns_lock()) + { + guard.lock(); + } + m_readyTasks.push(std::move(tasks[i])); + ++numTasksAdded; + } + } + if (numTasksAdded >= m_options.NumThreads.Value()) + { + guard.unlock(); + m_readyTasksCv.notify_all(); + } + else if (numTasksAdded > 0) + { + guard.unlock(); + for (int i = 0; i < numTasksAdded; ++i) + { + m_readyTasksCv.notify_one(); + } + } + } + } + +}}}} // namespace Azure::Storage::DataMovement::_internal From 3fd6e78816cf275629307cc8ac62aec23729a1ce Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Sun, 1 May 2022 23:28:19 +0800 Subject: [PATCH 05/18] file_io read --- .../azure/storage/common/internal/file_io.hpp | 10 +++-- .../azure-storage-common/src/file_io.cpp | 43 ++++++++++++++++++- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/sdk/storage/azure-storage-common/inc/azure/storage/common/internal/file_io.hpp b/sdk/storage/azure-storage-common/inc/azure/storage/common/internal/file_io.hpp index 26c3d55b86..8a48526447 100644 --- a/sdk/storage/azure-storage-common/inc/azure/storage/common/internal/file_io.hpp +++ b/sdk/storage/azure-storage-common/inc/azure/storage/common/internal/file_io.hpp @@ -19,13 +19,16 @@ namespace Azure { namespace Storage { namespace _internal { class FileReader final { public: FileReader(const std::string& filename); - + FileReader(const FileReader&) = delete; + FileReader& operator=(const FileReader&) = delete; ~FileReader(); FileHandle GetHandle() const { return m_handle; } int64_t GetFileSize() const { return m_fileSize; } + size_t Read(uint8_t* buffer, size_t length, int64_t offset) const; + private: FileHandle m_handle; int64_t m_fileSize; @@ -34,12 +37,13 @@ namespace Azure { namespace Storage { namespace _internal { class FileWriter final { public: FileWriter(const std::string& filename); - + FileWriter(const FileWriter&) = delete; + FileWriter& operator=(const FileWriter&) = delete; ~FileWriter(); FileHandle GetHandle() const { return m_handle; } - void Write(const uint8_t* buffer, size_t length, int64_t offset); + void Write(const uint8_t* buffer, size_t length, int64_t offset) const; private: FileHandle m_handle; diff --git a/sdk/storage/azure-storage-common/src/file_io.cpp b/sdk/storage/azure-storage-common/src/file_io.cpp index 96cb8e1792..4ec7405c2d 100644 --- a/sdk/storage/azure-storage-common/src/file_io.cpp +++ b/sdk/storage/azure-storage-common/src/file_io.cpp @@ -22,6 +22,7 @@ #include #endif +#include #include #include @@ -87,6 +88,29 @@ namespace Azure { namespace Storage { namespace _internal { FileReader::~FileReader() { CloseHandle(static_cast(m_handle)); } + size_t FileReader::Read(uint8_t* buffer, size_t length, int64_t offset) const + { + length = std::min(length, m_fileSize - offset); + if (length > std::numeric_limits::max()) + { + throw std::runtime_error("Failed to read file."); + } + + OVERLAPPED overlapped; + std::memset(&overlapped, 0, sizeof(overlapped)); + overlapped.Offset = static_cast(static_cast(offset)); + overlapped.OffsetHigh = static_cast(static_cast(offset) >> 32); + + DWORD bytesRead; + BOOL ret = ReadFile( + static_cast(m_handle), buffer, static_cast(length), &bytesRead, &overlapped); + if (!ret) + { + throw std::runtime_error("Failed to read file."); + } + return bytesRead; + } + FileWriter::FileWriter(const std::string& filename) { int sizeNeeded = MultiByteToWideChar( @@ -138,7 +162,7 @@ namespace Azure { namespace Storage { namespace _internal { FileWriter::~FileWriter() { CloseHandle(static_cast(m_handle)); } - void FileWriter::Write(const uint8_t* buffer, size_t length, int64_t offset) + void FileWriter::Write(const uint8_t* buffer, size_t length, int64_t offset) const { if (length > std::numeric_limits::max()) { @@ -180,6 +204,21 @@ namespace Azure { namespace Storage { namespace _internal { FileReader::~FileReader() { close(m_handle); } + size_t FileReader::Read(uint8_t* buffer, size_t length, int64_t offset) const + { + if (offset > static_cast(std::numeric_limits::max())) + { + throw std::runtime_error("Failed to read file."); + } + length = std::min(length, m_fileSize - offset); + ssize_t bytesRead = pread(m_handle, buffer, length, static_cast(offset)); + if (bytesRead < 0) + { + throw std::runtime_error("Failed to read file."); + } + return bytesRead; + } + FileWriter::FileWriter(const std::string& filename) { m_handle = open( @@ -192,7 +231,7 @@ namespace Azure { namespace Storage { namespace _internal { FileWriter::~FileWriter() { close(m_handle); } - void FileWriter::Write(const uint8_t* buffer, size_t length, int64_t offset) + void FileWriter::Write(const uint8_t* buffer, size_t length, int64_t offset) const { if (offset > static_cast(std::numeric_limits::max())) { From f89cdbc901d14eb5acbd118ddb24f88f1e7389bb Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Sun, 1 May 2022 23:29:13 +0800 Subject: [PATCH 06/18] single upload task --- .../azure-storage-datamovement/CMakeLists.txt | 2 + .../datamovement/datamovement_options.hpp | 1 + .../datamovement/storage_transfer_manager.hpp | 3 + .../inc/azure/storage/datamovement/task.hpp | 21 +++- .../tasks/upload_blob_from_file_task.hpp | 68 +++++++++++++ .../src/scheduler.cpp | 28 +++--- .../src/storage_transfer_manager.cpp | 34 +++++++ .../src/tasks/upload_blob_from_file_task.cpp | 95 +++++++++++++++++++ 8 files changed, 238 insertions(+), 14 deletions(-) create mode 100644 sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/tasks/upload_blob_from_file_task.hpp create mode 100644 sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp diff --git a/sdk/storage/azure-storage-datamovement/CMakeLists.txt b/sdk/storage/azure-storage-datamovement/CMakeLists.txt index 96aa9c81a8..bbda951018 100644 --- a/sdk/storage/azure-storage-datamovement/CMakeLists.txt +++ b/sdk/storage/azure-storage-datamovement/CMakeLists.txt @@ -48,6 +48,7 @@ set( inc/azure/storage/datamovement/scheduler.hpp inc/azure/storage/datamovement/storage_transfer_manager.hpp inc/azure/storage/datamovement/task.hpp + inc/azure/storage/datamovement/tasks/upload_blob_from_file_task.hpp src/private/package_version.hpp ) @@ -55,6 +56,7 @@ set( AZURE_STORAGE_DATAMOVEMENT_SOURCE src/scheduler.cpp src/storage_transfer_manager.cpp + src/tasks/upload_blob_from_file_task.cpp ) add_library(azure-storage-datamovement ${AZURE_STORAGE_DATAMOVEMENT_HEADER} ${AZURE_STORAGE_DATAMOVEMENT_SOURCE}) diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/datamovement_options.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/datamovement_options.hpp index c0603159fc..6868d289a7 100644 --- a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/datamovement_options.hpp +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/datamovement_options.hpp @@ -19,4 +19,5 @@ namespace Azure { namespace Storage { namespace DataMovement { struct DownloadBlobOptions final { }; + }}} // namespace Azure::Storage::DataMovement diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp index da04f1f20d..37e3b4a175 100644 --- a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp @@ -10,6 +10,7 @@ #include "azure/storage/datamovement/blob_folder.hpp" #include "azure/storage/datamovement/datamovement_options.hpp" #include "azure/storage/datamovement/job_properties.hpp" +#include "azure/storage/datamovement/scheduler.hpp" namespace Azure { namespace Storage { namespace DataMovement { @@ -46,6 +47,8 @@ namespace Azure { namespace Storage { namespace DataMovement { void CancelAllJobs(); private: + StorageTransferManagerOptions m_options; + _internal::Scheduler m_scheduler; }; }}} // namespace Azure::Storage::DataMovement diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp index 4d2dc34fd6..43145651aa 100644 --- a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp @@ -4,8 +4,14 @@ #pragma once #include +#include + +#include +#include namespace Azure { namespace Storage { namespace DataMovement { namespace _internal { + class Scheduler; + enum class TaskType { DiskIO, @@ -15,14 +21,25 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern }; // Task should be idempotent - struct Task + // Root task should be serializable and deserializable + struct TaskBase { + TaskBase(TaskType type, Scheduler* scheduler) : Type(type), m_scheduler(scheduler) {} TaskType Type; size_t MemoryCost = 0; size_t MemoryGiveBack = 0; - std::function func; // func shouldn't throw + virtual ~TaskBase() {} + virtual void Execute() = 0; + virtual std::string Serialize() { return std::string(); } + + protected: + Scheduler* m_scheduler; }; + using Task = std::unique_ptr; + + Task Deserialize(const char*); + }}}} // namespace Azure::Storage::DataMovement::_internal diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/tasks/upload_blob_from_file_task.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/tasks/upload_blob_from_file_task.hpp new file mode 100644 index 0000000000..944c5a2d77 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/tasks/upload_blob_from_file_task.hpp @@ -0,0 +1,68 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include + +#include + +#include "azure/storage/datamovement/task.hpp" + +namespace Azure { namespace Storage { namespace DataMovement { namespace _internal { + + struct UploadBlobFromFileTask : public TaskBase + { + UploadBlobFromFileTask( + TaskType type, + Scheduler* scheduler, + const std::string& source, + const Blobs::BlobClient& destination) + : TaskBase(type, scheduler), Context(std::make_shared(source, destination)) + { + } + + struct TaskContext + { + explicit TaskContext(std::string source, Blobs::BlobClient destination) + : Source(std::move(source)), Destination(std::move(destination)) + { + } + std::string Source; + Blobs::BlobClient Destination; + std::unique_ptr FileReader; + uint64_t FileSize{0}; + int NumBlocks{0}; + std::atomic NumStagedBlocks{0}; + }; + std::shared_ptr Context; + + void Execute() override; + }; + + struct ReadFileRangeToMemoryTask : public TaskBase + { + using TaskBase::TaskBase; + + std::shared_ptr Context; + int BlockId; + int64_t Offset; + size_t Length; + + void Execute() override; + }; + + struct StageBlockTask : public TaskBase + { + using TaskBase::TaskBase; + + std::shared_ptr Context; + int BlockId; + size_t Length; + std::unique_ptr Buffer; + + void Execute() override; + }; + +}}}} // namespace Azure::Storage::DataMovement::_internal \ No newline at end of file diff --git a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp index 9eb8504fcd..5fadbd49bc 100644 --- a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp +++ b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp @@ -36,7 +36,11 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern q.pop(); guard.unlock(); - task.func(); + task->Execute(); + if (task->MemoryGiveBack != 0) + { + m_memoryLeft.fetch_add(task->MemoryGiveBack); + } } }; @@ -69,12 +73,12 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern std::unique_lock readyTasksGuard(m_readyDiskIOTasksMutex, std::defer_lock); int numScheduledTasks = 0; while (!m_pendingDiskIOTasks.empty() - && m_pendingDiskIOTasks.front().MemoryCost + && m_pendingDiskIOTasks.front()->MemoryCost < m_memoryLeft.load(std::memory_order_relaxed)) { auto task = std::move(m_pendingDiskIOTasks.front()); m_pendingDiskIOTasks.pop(); - m_memoryLeft.fetch_sub(task.MemoryCost); + m_memoryLeft.fetch_sub(task->MemoryCost); if (!readyTasksGuard.owns_lock()) { readyTasksGuard.lock(); @@ -161,28 +165,28 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern void Scheduler::AddTask(Task&& task) { - if (task.Type == TaskType::DiskIO) + if (task->Type == TaskType::DiskIO) { std::lock_guard guard(m_pendingTasksMutex); m_pendingDiskIOTasks.push(std::move(task)); m_pendingTasksCv.notify_one(); } - else if (task.Type == TaskType::NetworkUpload) + else if (task->Type == TaskType::NetworkUpload) { std::lock_guard guard(m_pendingTasksMutex); m_pendingNetworkUploadTasks.push(std::move(task)); m_pendingTasksCv.notify_one(); } - else if (task.Type == TaskType::NetworkDownload) + else if (task->Type == TaskType::NetworkDownload) { std::lock_guard guard(m_pendingTasksMutex); m_pendingNetworkDownloadTasks.push(std::move(task)); m_pendingTasksCv.notify_one(); } - else if (task.Type == TaskType::Other) + else if (task->Type == TaskType::Other) { std::lock_guard guard(m_readyTasksMutex); - m_memoryLeft.fetch_sub(task.MemoryCost); + m_memoryLeft.fetch_sub(task->MemoryCost); m_readyTasks.push(std::move(task)); m_readyTasksCv.notify_one(); } @@ -200,7 +204,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern int numTasksAdded = 0; for (int i = 0; i < tasks.size(); ++i) { - if (tasks[i].Type == TaskType::DiskIO) + if (tasks[i]->Type == TaskType::DiskIO) { if (!guard.owns_lock()) { @@ -209,7 +213,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern m_pendingDiskIOTasks.push(std::move(tasks[i])); ++numTasksAdded; } - else if (tasks[i].Type == TaskType::NetworkUpload) + else if (tasks[i]->Type == TaskType::NetworkUpload) { if (!guard.owns_lock()) { @@ -218,7 +222,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern m_pendingNetworkUploadTasks.push(std::move(tasks[i])); ++numTasksAdded; } - else if (tasks[i].Type == TaskType::NetworkDownload) + else if (tasks[i]->Type == TaskType::NetworkDownload) { if (!guard.owns_lock()) { @@ -247,7 +251,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern { continue; } - if (tasks[i].Type == TaskType::Other) + if (tasks[i]->Type == TaskType::Other) { if (!guard.owns_lock()) { diff --git a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp index b5a783ce66..e6deb94025 100644 --- a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp +++ b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp @@ -1,10 +1,44 @@ #include "azure/storage/datamovement/storage_transfer_manager.hpp" +#include + +#include "azure/storage/datamovement/tasks/upload_blob_from_file_task.hpp" + namespace Azure { namespace Storage { namespace DataMovement { + namespace { + constexpr static const char* FileUrlScheme = "file://"; + + std::string GetFullPath(const std::string& relativePath) + { + // TODO: implement this + (void)relativePath; + return std::string(); + } + } // namespace + StorageTransferManager::StorageTransferManager(const StorageTransferManagerOptions& options) + : m_options(options), m_scheduler(_internal::SchedulerOptions{}) + { + } + + JobProperties StorageTransferManager::ScheduleUpload( + const std::string& sourceLocalPath, + const Blobs::BlobClient& destinationBlob, + const UploadBlobOptions& options) { (void)options; + auto jobProperties = JobProperties(); + jobProperties.JobId = Core::Uuid::CreateUuid().ToString(); + jobProperties.SourceUrl = FileUrlScheme + sourceLocalPath; + jobProperties.DestinationUrl = destinationBlob.GetUrl(); + jobProperties.Type = TransferType::SingleUpload; + + auto task = std::make_unique<_internal::UploadBlobFromFileTask>( + _internal::TaskType::NetworkUpload, &m_scheduler, sourceLocalPath, destinationBlob); + m_scheduler.AddTask(std::move(task)); + + return jobProperties; } }}} // namespace Azure::Storage::DataMovement diff --git a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp new file mode 100644 index 0000000000..71806c154d --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp @@ -0,0 +1,95 @@ +#include "azure/storage/datamovement/tasks/upload_blob_from_file_task.hpp" + +#include + +#include +#include + +#include "azure/storage/datamovement/scheduler.hpp" + +namespace Azure { namespace Storage { namespace DataMovement { namespace _internal { + + namespace { + constexpr uint64_t SingleUploadThreshold = 4 * 1024 * 1024; + constexpr uint64_t ChunkSize = 4 * 1024 * 1024; + + std::string GetBlockId(int64_t id) + { + constexpr size_t BlockIdLength = 64; + std::string blockId = std::to_string(id); + blockId = std::string(BlockIdLength - blockId.length(), '0') + blockId; + return Azure::Core::Convert::Base64Encode( + std::vector(blockId.begin(), blockId.end())); + } + + } // namespace + + void UploadBlobFromFileTask::Execute() + { + if (!Context->FileReader) + { + Context->FileReader = std::make_unique(Context->Source); + } + const uint64_t fileSize = Context->FileReader->GetFileSize(); + Context->FileSize = fileSize; + + if (fileSize == 0) + { + Core::IO::MemoryBodyStream emptyStream(nullptr, 0); + Context->Destination.AsBlockBlobClient().Upload(emptyStream); + return; + } + + // TOOD: if file is small enough + + Context->NumBlocks = static_cast((fileSize + ChunkSize - 1) / ChunkSize); + std::vector subtasks; + for (int blockId = 0; blockId < Context->NumBlocks; ++blockId) + { + auto readFileRangeTask + = std::make_unique(TaskType::DiskIO, m_scheduler); + readFileRangeTask->Context = Context; + readFileRangeTask->BlockId = blockId; + readFileRangeTask->Offset = blockId * ChunkSize; + readFileRangeTask->Length = std::min(ChunkSize, fileSize - blockId * ChunkSize); + readFileRangeTask->MemoryCost = readFileRangeTask->Length; + subtasks.push_back(std::move(readFileRangeTask)); + } + m_scheduler->AddTasks(std::move(subtasks)); + } + + void ReadFileRangeToMemoryTask::Execute() + { + std::unique_ptr buffer = std::make_unique(Length); + size_t bytesRead = Context->FileReader->Read(buffer.get(), Length, Offset); + AZURE_ASSERT(bytesRead == Length); + + auto stageBlockTask = std::make_unique(TaskType::NetworkUpload, m_scheduler); + stageBlockTask->Context = Context; + stageBlockTask->BlockId = BlockId; + stageBlockTask->Buffer = std::move(buffer); + stageBlockTask->Length = Length; + stageBlockTask->MemoryGiveBack = MemoryCost; + + m_scheduler->AddTask(std::move(stageBlockTask)); + } + + void StageBlockTask::Execute() + { + const std::string blockId = GetBlockId(BlockId); + Core::IO::MemoryBodyStream contentStream(Buffer.get(), Length); + auto blockBlobClient = Context->Destination.AsBlockBlobClient(); + blockBlobClient.StageBlock(blockId, contentStream); + Buffer.reset(); + int numStagedBlocks = Context->NumStagedBlocks.fetch_add(1, std::memory_order_relaxed) + 1; + if (numStagedBlocks == Context->NumBlocks) + { + std::vector blockIds; + for (int i = 0; i < Context->NumBlocks; ++i) + { + blockIds.push_back(GetBlockId(i)); + } + blockBlobClient.CommitBlockList(blockIds); + } + } +}}}} // namespace Azure::Storage::DataMovement::_internal \ No newline at end of file From 93176e8472937abe28e4aeb055de366ce4e4ecbb Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 09:50:36 +0800 Subject: [PATCH 07/18] 1 --- .../azure-storage-datamovement/src/private/package_version.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdk/storage/azure-storage-datamovement/src/private/package_version.hpp b/sdk/storage/azure-storage-datamovement/src/private/package_version.hpp index e9e4524476..067bf10cd1 100644 --- a/sdk/storage/azure-storage-datamovement/src/private/package_version.hpp +++ b/sdk/storage/azure-storage-datamovement/src/private/package_version.hpp @@ -8,6 +8,8 @@ #pragma once +#include + #define AZURE_STORAGE_DATAMOVEMENT_VERSION_MAJOR 1 #define AZURE_STORAGE_DATAMOVEMENT_VERSION_MINOR 0 #define AZURE_STORAGE_DATAMOVEMENT_VERSION_PATCH 0 From ab4dc4f346b9c57640b80b032b59ce51e772a105 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 10:33:26 +0800 Subject: [PATCH 08/18] sample --- .../azure-storage-datamovement/CMakeLists.txt | 2 +- .../datamovement/storage_transfer_manager.hpp | 3 +- .../samples/CMakeLists.txt | 8 ++++ .../samples/datamovement_getting_started.cpp | 47 +++++++++++++++++++ 4 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 sdk/storage/azure-storage-datamovement/samples/CMakeLists.txt create mode 100644 sdk/storage/azure-storage-datamovement/samples/datamovement_getting_started.cpp diff --git a/sdk/storage/azure-storage-datamovement/CMakeLists.txt b/sdk/storage/azure-storage-datamovement/CMakeLists.txt index bbda951018..471e15e5e6 100644 --- a/sdk/storage/azure-storage-datamovement/CMakeLists.txt +++ b/sdk/storage/azure-storage-datamovement/CMakeLists.txt @@ -97,5 +97,5 @@ if(BUILD_TESTING) endif() if(BUILD_SAMPLES) - # add_subdirectory(samples) + add_subdirectory(samples) endif() diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp index 37e3b4a175..8d937f519f 100644 --- a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/storage_transfer_manager.hpp @@ -16,7 +16,8 @@ namespace Azure { namespace Storage { namespace DataMovement { class StorageTransferManager final { public: - explicit StorageTransferManager(const StorageTransferManagerOptions& options); + explicit StorageTransferManager( + const StorageTransferManagerOptions& options = StorageTransferManagerOptions()); JobProperties ScheduleUpload( const std::string& sourceLocalPath, diff --git a/sdk/storage/azure-storage-datamovement/samples/CMakeLists.txt b/sdk/storage/azure-storage-datamovement/samples/CMakeLists.txt new file mode 100644 index 0000000000..1abdccf3ce --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/samples/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# SPDX-License-Identifier: MIT + +cmake_minimum_required (VERSION 3.13) + +add_executable(datamovement-getting-started datamovement_getting_started.cpp) +target_link_libraries(datamovement-getting-started PRIVATE azure-storage-datamovement get-env-helper) +create_per_service_target_build_for_sample(storage datamovement-getting-started) diff --git a/sdk/storage/azure-storage-datamovement/samples/datamovement_getting_started.cpp b/sdk/storage/azure-storage-datamovement/samples/datamovement_getting_started.cpp new file mode 100644 index 0000000000..3f303a3bf7 --- /dev/null +++ b/sdk/storage/azure-storage-datamovement/samples/datamovement_getting_started.cpp @@ -0,0 +1,47 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "get_env.hpp" + +#include + +#include + +std::string GetConnectionString() +{ + const static std::string ConnectionString = ""; + + if (!ConnectionString.empty()) + { + return ConnectionString; + } + const static std::string envConnectionString = std::getenv("AZURE_STORAGE_CONNECTION_STRING"); + if (!envConnectionString.empty()) + { + return envConnectionString; + } + throw std::runtime_error("Cannot find connection string."); +} + +int main() +{ + using namespace Azure::Storage::DataMovement; + using namespace Azure::Storage::Blobs; + + const std::string containerName = "sample-container"; + const std::string blobName = "sample-blob"; + const std::string localFile = "sample-localfile"; + + StorageTransferManager m; + + auto blobContainerClient + = BlobContainerClient::CreateFromConnectionString(GetConnectionString(), containerName); + blobContainerClient.CreateIfNotExists(); + auto blobClient = blobContainerClient.GetBlobClient(blobName); + + m.ScheduleUpload(localFile, blobClient); + + getchar(); + + return 0; +} From 8bf9ac8c4a2848e072d8d5b80176242d6cca3026 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 12:51:43 +0800 Subject: [PATCH 09/18] fix bug: scheduler is not woken up --- sdk/storage/azure-storage-datamovement/src/scheduler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp index 5fadbd49bc..657f640787 100644 --- a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp +++ b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp @@ -40,6 +40,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern if (task->MemoryGiveBack != 0) { m_memoryLeft.fetch_add(task->MemoryGiveBack); + m_pendingTasksCv.notify_one(); } } }; From c526ad528a2d2c6dfef1305790925e46bc923e5c Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 15:14:59 +0800 Subject: [PATCH 10/18] change parameter default values --- sdk/storage/azure-storage-datamovement/src/scheduler.cpp | 7 ++++--- .../src/tasks/upload_blob_from_file_task.cpp | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp index 657f640787..ecf1137a94 100644 --- a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp +++ b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp @@ -12,11 +12,12 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern Scheduler::Scheduler(const SchedulerOptions& options) : m_options(options) { - size_t numThreads = options.NumThreads.HasValue() ? options.NumThreads.Value() - : std::thread::hardware_concurrency() * 2; + size_t numThreads = options.NumThreads.HasValue() + ? options.NumThreads.Value() + : std::max(5, std::thread::hardware_concurrency()); AZURE_ASSERT(numThreads != 0); size_t maxMemorySize = options.MaxMemorySize.HasValue() ? options.MaxMemorySize.Value() - : 128 * 1024 * 1024 * numThreads; + : 128ULL * 1024 * 1024 * numThreads; m_options.NumThreads = numThreads; m_options.MaxMemorySize = maxMemorySize; diff --git a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp index 71806c154d..465d3bcfef 100644 --- a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp +++ b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp @@ -11,7 +11,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern namespace { constexpr uint64_t SingleUploadThreshold = 4 * 1024 * 1024; - constexpr uint64_t ChunkSize = 4 * 1024 * 1024; + constexpr uint64_t ChunkSize = 8 * 1024 * 1024; std::string GetBlockId(int64_t id) { From c06e1a4ad83def290124c3036f4199f2e63f935c Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 20:51:29 +0800 Subject: [PATCH 11/18] use two threads for disk IO --- .../azure-storage-datamovement/src/scheduler.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp index ecf1137a94..e748c67cca 100644 --- a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp +++ b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp @@ -55,11 +55,14 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern std::ref(m_readyTasksMutex), std::ref(m_readyTasksCv))); } - m_workerThreads.push_back(std::thread( - workerFunc, - std::ref(m_readyDiskIOTasks), - std::ref(m_readyDiskIOTasksMutex), - std::ref(m_readyDiskIOTasksCv))); + for (size_t i = 0; i < 2; ++i) + { + m_workerThreads.push_back(std::thread( + workerFunc, + std::ref(m_readyDiskIOTasks), + std::ref(m_readyDiskIOTasksMutex), + std::ref(m_readyDiskIOTasksCv))); + } auto schedulerFunc = [this]() { std::unique_lock guard(m_pendingTasksMutex); From bbe6f225fc2f26beec313fe6dedd2fc57b5a75f8 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 21:16:01 +0800 Subject: [PATCH 12/18] fix build warnings --- sdk/storage/azure-storage-common/src/file_io.cpp | 2 +- .../inc/azure/storage/datamovement/scheduler.hpp | 10 +++++----- .../inc/azure/storage/datamovement/task.hpp | 2 +- .../azure-storage-datamovement/src/scheduler.cpp | 10 ++++++---- .../src/tasks/upload_blob_from_file_task.cpp | 2 +- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/sdk/storage/azure-storage-common/src/file_io.cpp b/sdk/storage/azure-storage-common/src/file_io.cpp index 4ec7405c2d..2eb04f98cc 100644 --- a/sdk/storage/azure-storage-common/src/file_io.cpp +++ b/sdk/storage/azure-storage-common/src/file_io.cpp @@ -90,7 +90,7 @@ namespace Azure { namespace Storage { namespace _internal { size_t FileReader::Read(uint8_t* buffer, size_t length, int64_t offset) const { - length = std::min(length, m_fileSize - offset); + length = std::min(length, std::max(0LL, m_fileSize - offset)); if (length > std::numeric_limits::max()) { throw std::runtime_error("Failed to read file."); diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp index 80605d2845..4d41fa0021 100644 --- a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp @@ -3,13 +3,13 @@ #pragma once +#include +#include #include +#include #include #include -#include #include -#include -#include #include @@ -21,7 +21,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern struct SchedulerOptions { - Nullable NumThreads; // default: 2 * num cpus + Nullable NumThreads; // default: 2 * num cpus Nullable MaxMemorySize; // default: 128MB * num threads }; @@ -36,7 +36,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern void AddTask(Task&& task); void AddTasks(std::vector&& tasks); - // TODO: Pasue/Suspend and Resume + // TODO: Pause/Suspend and Resume private: SchedulerOptions m_options; diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp index 43145651aa..941a9f91eb 100644 --- a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/task.hpp @@ -21,7 +21,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern }; // Task should be idempotent - // Root task should be serializable and deserializable + // Root task should be serializable and de-serializable struct TaskBase { TaskBase(TaskType type, Scheduler* scheduler) : Type(type), m_scheduler(scheduler) {} diff --git a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp index e748c67cca..46ed6f79a2 100644 --- a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp +++ b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp @@ -6,15 +6,17 @@ #include +#if defined(_MSC_VER) #pragma warning(disable : 26110 26117) +#endif namespace Azure { namespace Storage { namespace DataMovement { namespace _internal { Scheduler::Scheduler(const SchedulerOptions& options) : m_options(options) { - size_t numThreads = options.NumThreads.HasValue() + int numThreads = options.NumThreads.HasValue() ? options.NumThreads.Value() - : std::max(5, std::thread::hardware_concurrency()); + : std::max(5, std::thread::hardware_concurrency()); AZURE_ASSERT(numThreads != 0); size_t maxMemorySize = options.MaxMemorySize.HasValue() ? options.MaxMemorySize.Value() : 128ULL * 1024 * 1024 * numThreads; @@ -47,7 +49,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern }; m_workerThreads.reserve(numThreads + 1); - for (size_t i = 0; i < numThreads; ++i) + for (int i = 0; i < numThreads; ++i) { m_workerThreads.push_back(std::thread( workerFunc, @@ -207,7 +209,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern { std::unique_lock guard(m_pendingTasksMutex, std::defer_lock); int numTasksAdded = 0; - for (int i = 0; i < tasks.size(); ++i) + for (size_t i = 0; i < tasks.size(); ++i) { if (tasks[i]->Type == TaskType::DiskIO) { diff --git a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp index 465d3bcfef..4d300eade1 100644 --- a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp +++ b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp @@ -40,7 +40,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern return; } - // TOOD: if file is small enough + // TODO: if file is small enough Context->NumBlocks = static_cast((fileSize + ChunkSize - 1) / ChunkSize); std::vector subtasks; From 5677152e8261d79b30a18724e873ce6024a4f484 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 21:23:24 +0800 Subject: [PATCH 13/18] fix warnings --- sdk/storage/azure-storage-common/src/file_io.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/storage/azure-storage-common/src/file_io.cpp b/sdk/storage/azure-storage-common/src/file_io.cpp index 2eb04f98cc..c4c3f65477 100644 --- a/sdk/storage/azure-storage-common/src/file_io.cpp +++ b/sdk/storage/azure-storage-common/src/file_io.cpp @@ -90,7 +90,7 @@ namespace Azure { namespace Storage { namespace _internal { size_t FileReader::Read(uint8_t* buffer, size_t length, int64_t offset) const { - length = std::min(length, std::max(0LL, m_fileSize - offset)); + length = std::min(length, std::max(0LL, m_fileSize - offset)); if (length > std::numeric_limits::max()) { throw std::runtime_error("Failed to read file."); From 6374b5b8dd60fb22a5bafb84e297b70d006ae080 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 22:26:56 +0800 Subject: [PATCH 14/18] impl GetFullPath --- .../azure-storage-common/src/file_io.cpp | 2 +- .../azure/storage/datamovement/scheduler.hpp | 2 +- .../samples/datamovement_getting_started.cpp | 4 +- .../src/scheduler.cpp | 2 +- .../src/storage_transfer_manager.cpp | 79 ++++++++++++++++++- .../src/tasks/upload_blob_from_file_task.cpp | 2 + 6 files changed, 83 insertions(+), 8 deletions(-) diff --git a/sdk/storage/azure-storage-common/src/file_io.cpp b/sdk/storage/azure-storage-common/src/file_io.cpp index c4c3f65477..0d05c075a2 100644 --- a/sdk/storage/azure-storage-common/src/file_io.cpp +++ b/sdk/storage/azure-storage-common/src/file_io.cpp @@ -90,7 +90,7 @@ namespace Azure { namespace Storage { namespace _internal { size_t FileReader::Read(uint8_t* buffer, size_t length, int64_t offset) const { - length = std::min(length, std::max(0LL, m_fileSize - offset)); + length = std::min(length, static_cast(std::max(0LL, m_fileSize - offset))); if (length > std::numeric_limits::max()) { throw std::runtime_error("Failed to read file."); diff --git a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp index 4d41fa0021..d9a873c5d5 100644 --- a/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp +++ b/sdk/storage/azure-storage-datamovement/inc/azure/storage/datamovement/scheduler.hpp @@ -21,7 +21,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern struct SchedulerOptions { - Nullable NumThreads; // default: 2 * num cpus + Nullable NumThreads; // default: num cpus, minimum 5 Nullable MaxMemorySize; // default: 128MB * num threads }; diff --git a/sdk/storage/azure-storage-datamovement/samples/datamovement_getting_started.cpp b/sdk/storage/azure-storage-datamovement/samples/datamovement_getting_started.cpp index 3f303a3bf7..53ea9b8271 100644 --- a/sdk/storage/azure-storage-datamovement/samples/datamovement_getting_started.cpp +++ b/sdk/storage/azure-storage-datamovement/samples/datamovement_getting_started.cpp @@ -39,7 +39,9 @@ int main() blobContainerClient.CreateIfNotExists(); auto blobClient = blobContainerClient.GetBlobClient(blobName); - m.ScheduleUpload(localFile, blobClient); + auto job = m.ScheduleUpload(localFile, blobClient); + std::cout << job.JobId << std::endl; + std::cout << job.SourceUrl << " -> " << job.DestinationUrl << std::endl; getchar(); diff --git a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp index 46ed6f79a2..9f22058451 100644 --- a/sdk/storage/azure-storage-datamovement/src/scheduler.cpp +++ b/sdk/storage/azure-storage-datamovement/src/scheduler.cpp @@ -252,7 +252,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern { std::unique_lock guard(m_readyTasksMutex, std::defer_lock); int numTasksAdded = 0; - for (int i = 0; i < tasks.size(); ++i) + for (size_t i = 0; i < tasks.size(); ++i) { if (!validTaskBitmap[i]) { diff --git a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp index e6deb94025..7908918b1c 100644 --- a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp +++ b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp @@ -1,5 +1,19 @@ #include "azure/storage/datamovement/storage_transfer_manager.hpp" +#include + +#if defined(AZ_PLATFORM_WINDOWS) +#if !defined(WIN32_LEAN_AND_MEAN) +#define WIN32_LEAN_AND_MEAN +#endif +#if !defined(NOMINMAX) +#define NOMINMAX +#endif +#include +#else +#include +#endif + #include #include "azure/storage/datamovement/tasks/upload_blob_from_file_task.hpp" @@ -11,9 +25,66 @@ namespace Azure { namespace Storage { namespace DataMovement { std::string GetFullPath(const std::string& relativePath) { - // TODO: implement this - (void)relativePath; - return std::string(); +#if defined(AZ_PLATFORM_WINDOWS) + int sizeNeeded = MultiByteToWideChar( + CP_UTF8, + MB_ERR_INVALID_CHARS, + relativePath.data(), + static_cast(relativePath.length()), + nullptr, + 0); + if (sizeNeeded == 0) + { + throw std::runtime_error("Invalid filename."); + } + std::wstring relativePathW(sizeNeeded, L'\0'); + if (MultiByteToWideChar( + CP_UTF8, + MB_ERR_INVALID_CHARS, + relativePath.data(), + static_cast(relativePath.length()), + &relativePathW[0], + sizeNeeded) + == 0) + { + throw std::runtime_error("Invalid filename."); + } + wchar_t absPathW[MAX_PATH]; + DWORD absPathWLength = GetFullPathNameW(relativePathW.data(), MAX_PATH, absPathW, nullptr); + if (absPathWLength == 0) + { + throw std::runtime_error("Failed to get absoluate path."); + } + std::replace(absPathW, absPathW + absPathWLength, L'\\', L'/'); + sizeNeeded = WideCharToMultiByte( + CP_UTF8, WC_ERR_INVALID_CHARS, &absPathW[0], absPathWLength, NULL, 0, NULL, NULL); + if (sizeNeeded == 0) + { + throw std::runtime_error("Invalid filename"); + } + std::string absPath(sizeNeeded, '\0'); + if (WideCharToMultiByte( + CP_UTF8, + WC_ERR_INVALID_CHARS, + &absPathW[0], + absPathWLength, + &absPath[0], + sizeNeeded, + NULL, + NULL) + == 0) + { + throw std::runtime_error("Invalid filename"); + } + return absPath; +#else + std::string absPath(PATH_MAX + 1, '\0'); + if (realpath(relativePath.data(), &absPath[0]) == nullptr) + { + throw std::runtime_error("Invalid filename"); + } + return absPath; +#endif } } // namespace @@ -30,7 +101,7 @@ namespace Azure { namespace Storage { namespace DataMovement { (void)options; auto jobProperties = JobProperties(); jobProperties.JobId = Core::Uuid::CreateUuid().ToString(); - jobProperties.SourceUrl = FileUrlScheme + sourceLocalPath; + jobProperties.SourceUrl = FileUrlScheme + GetFullPath(sourceLocalPath); jobProperties.DestinationUrl = destinationBlob.GetUrl(); jobProperties.Type = TransferType::SingleUpload; diff --git a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp index 4d300eade1..dc369592d8 100644 --- a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp +++ b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp @@ -15,6 +15,8 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern std::string GetBlockId(int64_t id) { + // TODO: we want to add identifier in block ID, so that we resuming this job, we can pick up + // from where we left off. The identifier may include az-storage-dm and the file size constexpr size_t BlockIdLength = 64; std::string blockId = std::to_string(id); blockId = std::string(BlockIdLength - blockId.length(), '0') + blockId; From 9ddad2ecd34e207a6921fc4e74275f7e546b436c Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 22:31:32 +0800 Subject: [PATCH 15/18] fix typo --- .../azure-storage-datamovement/src/storage_transfer_manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp index 7908918b1c..e87104a32a 100644 --- a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp +++ b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp @@ -53,7 +53,7 @@ namespace Azure { namespace Storage { namespace DataMovement { DWORD absPathWLength = GetFullPathNameW(relativePathW.data(), MAX_PATH, absPathW, nullptr); if (absPathWLength == 0) { - throw std::runtime_error("Failed to get absoluate path."); + throw std::runtime_error("Failed to get absolute path."); } std::replace(absPathW, absPathW + absPathWLength, L'\\', L'/'); sizeNeeded = WideCharToMultiByte( From 3750a1d6990a279477bb52176becd1d7b62282e0 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 22:42:03 +0800 Subject: [PATCH 16/18] fix warning on x86 --- .../src/tasks/upload_blob_from_file_task.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp index dc369592d8..1cc02b7725 100644 --- a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp +++ b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp @@ -12,6 +12,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern namespace { constexpr uint64_t SingleUploadThreshold = 4 * 1024 * 1024; constexpr uint64_t ChunkSize = 8 * 1024 * 1024; + static_assert(ChunkSize < static_cast(std::numeric_limits::max()), ""); std::string GetBlockId(int64_t id) { @@ -53,7 +54,8 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern readFileRangeTask->Context = Context; readFileRangeTask->BlockId = blockId; readFileRangeTask->Offset = blockId * ChunkSize; - readFileRangeTask->Length = std::min(ChunkSize, fileSize - blockId * ChunkSize); + readFileRangeTask->Length + = static_cast(std::min(ChunkSize, fileSize - blockId * ChunkSize)); readFileRangeTask->MemoryCost = readFileRangeTask->Length; subtasks.push_back(std::move(readFileRangeTask)); } From 77fc4492a7eeea3f1e36d7450c003ce7c0534438 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 22:43:48 +0800 Subject: [PATCH 17/18] fix warning --- .../src/tasks/upload_blob_from_file_task.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp index 1cc02b7725..7ee704003b 100644 --- a/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp +++ b/sdk/storage/azure-storage-datamovement/src/tasks/upload_blob_from_file_task.cpp @@ -44,6 +44,7 @@ namespace Azure { namespace Storage { namespace DataMovement { namespace _intern } // TODO: if file is small enough + (void)SingleUploadThreshold; Context->NumBlocks = static_cast((fileSize + ChunkSize - 1) / ChunkSize); std::vector subtasks; From 195ede9dabcca2059edc07ee273d4892ae8028f3 Mon Sep 17 00:00:00 2001 From: Jinming Hu Date: Mon, 2 May 2022 22:56:18 +0800 Subject: [PATCH 18/18] fix warning --- .../azure-storage-datamovement/src/storage_transfer_manager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp index e87104a32a..544afeec3a 100644 --- a/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp +++ b/sdk/storage/azure-storage-datamovement/src/storage_transfer_manager.cpp @@ -11,6 +11,7 @@ #endif #include #else +#include #include #endif