From 2423a28b6b1fa822339c0d9b866daf91b3a1fa3a Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 28 Oct 2025 17:22:38 +0000 Subject: [PATCH 01/32] Scala 2.13 skeleton --- .vscode/cspell.json | 1 + eng/.docsettings.yml | 1 + eng/pipelines/aggregate-reports.yml | 4 +- eng/versioning/external_dependencies.txt | 16 +- eng/versioning/version_client.txt | 1 + sdk/cosmos/azure-cosmos-spark_3-5/pom.xml | 16 +- .../azure-cosmos-spark_3-5_2-12/pom.xml | 12 ++ .../azure-cosmos-spark_3-5_2-13/CHANGELOG.md | 154 ++++++++++++++ .../CONTRIBUTING.md | 84 ++++++++ .../azure-cosmos-spark_3-5_2-13/README.md | 192 ++++++++++++++++++ .../azure-cosmos-spark_3-5_2-13/pom.xml | 165 +++++++++++++++ .../scalastyle_config.xml | 130 ++++++++++++ .../resources/azure-cosmos-spark.properties | 2 + ...osmos.spark.CosmosClientBuilderInterceptor | 1 + ...azure.cosmos.spark.CosmosClientInterceptor | 1 + ...cosmos.spark.WriteOnRetryCommitInterceptor | 1 + sdk/cosmos/azure-cosmos-spark_3/pom.xml | 57 ++++-- sdk/cosmos/ci.yml | 21 +- sdk/cosmos/pom.xml | 1 + sdk/cosmos/spark.databricks.yml | 2 +- sdk/cosmos/spark.yml | 20 +- 21 files changed, 846 insertions(+), 36 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md create mode 100644 sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md create mode 100644 sdk/cosmos/azure-cosmos-spark_3-5_2-13/README.md create mode 100644 sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml create mode 100644 sdk/cosmos/azure-cosmos-spark_3-5_2-13/scalastyle_config.xml create mode 100644 sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/main/resources/azure-cosmos-spark.properties create mode 100644 sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.CosmosClientBuilderInterceptor create mode 100644 sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.CosmosClientInterceptor create mode 100644 sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.WriteOnRetryCommitInterceptor diff --git a/.vscode/cspell.json b/.vscode/cspell.json index 62a33b075301..f862bbf4559d 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -105,6 +105,7 @@ "sdk/cosmos/azure-cosmos-spark_3-4_2-12/**", "sdk/cosmos/azure-cosmos-spark_3-5/**", "sdk/cosmos/azure-cosmos-spark_3-5_2-12/**", + "sdk/cosmos/azure-cosmos-spark_3-5_2-13/**", "sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/**", "sdk/cosmos/fabric-cosmos-spark-auth_3/**", "sdk/cosmos/azure-cosmos-encryption/**", diff --git a/eng/.docsettings.yml b/eng/.docsettings.yml index b93948f76346..75a508cc75be 100644 --- a/eng/.docsettings.yml +++ b/eng/.docsettings.yml @@ -80,6 +80,7 @@ known_content_issues: - ['sdk/cosmos/azure-cosmos-spark_3-3_2-12/README.md', '#3113'] - ['sdk/cosmos/azure-cosmos-spark_3-4_2-12/README.md', '#3113'] - ['sdk/cosmos/azure-cosmos-spark_3-5_2-12/README.md', '#3113'] + - ['sdk/cosmos/azure-cosmos-spark_3-5_2-13/README.md', '#3113'] - ['sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/README.md', '#3113'] - ['sdk/cosmos/fabric-cosmos-spark-auth_3/README.md', '#3113'] - ['sdk/cosmos/azure-cosmos-spark_3_2-12/dev/README.md', '#3113'] diff --git a/eng/pipelines/aggregate-reports.yml b/eng/pipelines/aggregate-reports.yml index 9c69a195b44c..ce9a35950196 100644 --- a/eng/pipelines/aggregate-reports.yml +++ b/eng/pipelines/aggregate-reports.yml @@ -48,7 +48,7 @@ extends: displayName: 'Build all libraries that support Java $(JavaBuildVersion)' inputs: mavenPomFile: pom.xml - options: '$(DefaultOptions) -T 2C -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true -Djacoco.skip=true -Drevapi.skip=true -Dshade.skip=true -Dspotless.skip=true -pl !com.azure.cosmos.spark:azure-cosmos-spark_3-3_2-12,!com.azure.cosmos.spark:azure-cosmos-spark_3-4_2-12,!com.azure.cosmos.spark:azure-cosmos-spark_3-5_2-12,!com.azure.cosmos.spark:azure-cosmos-spark-account-data-resolver-sample,!com.azure.cosmos.kafka:azure-cosmos-kafka-connect,!com.microsoft.azure:azure-batch,!com.microsoft.azure:azure-media' + options: '$(DefaultOptions) -T 2C -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true -Djacoco.skip=true -Drevapi.skip=true -Dshade.skip=true -Dspotless.skip=true -pl !com.azure.cosmos.spark:azure-cosmos-spark_3-3_2-12,!com.azure.cosmos.spark:azure-cosmos-spark_3-4_2-12,!com.azure.cosmos.spark:azure-cosmos-spark_3-5_2-13,!com.azure.cosmos.spark:azure-cosmos-spark_3-5_2-12,!com.azure.cosmos.spark:azure-cosmos-spark-account-data-resolver-sample,!com.azure.cosmos.kafka:azure-cosmos-kafka-connect,!com.microsoft.azure:azure-batch,!com.microsoft.azure:azure-media' mavenOptions: '$(MemoryOptions) $(LoggingOptions)' javaHomeOption: 'JDKVersion' jdkVersionOption: $(JavaBuildVersion) @@ -60,7 +60,7 @@ extends: displayName: 'Build remaining libraries with Java $(FallbackJavaBuildVersion)' inputs: mavenPomFile: pom.xml - options: '$(DefaultOptions) -T 2C -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true -Djacoco.skip=true -Drevapi.skip=true -Dspotless.skip=true -pl com.azure.cosmos.spark:azure-cosmos-spark_3-3_2-12,com.azure.cosmos.spark:azure-cosmos-spark_3-4_2-12,com.azure.cosmos.spark:azure-cosmos-spark_3-5_2-12,com.azure.cosmos.spark:azure-cosmos-spark-account-data-resolver-sample,com.azure.cosmos.kafka:azure-cosmos-kafka-connect,com.microsoft.azure:azure-batch,com.microsoft.azure:azure-media' + options: '$(DefaultOptions) -T 2C -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true -Djacoco.skip=true -Drevapi.skip=true -Dspotless.skip=true -pl com.azure.cosmos.spark:azure-cosmos-spark_3-3_2-12,com.azure.cosmos.spark:azure-cosmos-spark_3-4_2-12,com.azure.cosmos.spark:azure-cosmos-spark_3-5_2-12,com.azure.cosmos.spark:azure-cosmos-spark_3-5_2-13,com.azure.cosmos.spark:azure-cosmos-spark-account-data-resolver-sample,com.azure.cosmos.kafka:azure-cosmos-kafka-connect,com.microsoft.azure:azure-batch,com.microsoft.azure:azure-media' mavenOptions: '$(MemoryOptions) $(LoggingOptions)' javaHomeOption: 'JDKVersion' jdkVersionOption: $(FallbackJavaBuildVersion) diff --git a/eng/versioning/external_dependencies.txt b/eng/versioning/external_dependencies.txt index f611e8eab3b6..7e734ed65a11 100644 --- a/eng/versioning/external_dependencies.txt +++ b/eng/versioning/external_dependencies.txt @@ -254,15 +254,25 @@ cosmos_com.microsoft.azure.synapse:synapseutils_2.12;1.5.4 ## Cosmos Spark connector under sdk\cosmos\azure-cosmos-spark_3-_2-12\pom.xml # Cosmos Spark connector runtime dependencies - provided by Spark runtime/host +cosmos-scala213-com.fasterxml.jackson.module:jackson-module-scala_2.13;2.18.4 cosmos-spark_3-3_org.apache.spark:spark-sql_2.12;3.3.0 +cosmos-scala213-spark_3-3_org.apache.spark:spark-sql_2.13;3.3.0 cosmos-spark_3-4_org.apache.spark:spark-sql_2.12;3.4.0 +cosmos-scala213-spark_3-4_org.apache.spark:spark-sql_2.13;3.4.0 cosmos-spark_3-5_org.apache.spark:spark-sql_2.12;3.5.0 +cosmos-scala213-spark_3-5_org.apache.spark:spark-sql_2.13;3.5.0 cosmos-spark_3-3_org.apache.spark:spark-hive_2.12;3.3.0 +cosmos-scala213-spark_3-3_org.apache.spark:spark-hive_2.13;3.3.0 cosmos-spark_3-4_org.apache.spark:spark-hive_2.12;3.4.0 +cosmos-scala213-spark_3-4_org.apache.spark:spark-hive_2.13;3.4.0 cosmos-spark_3-5_org.apache.spark:spark-hive_2.12;3.5.0 +cosmos-scala213-spark_3-5_org.apache.spark:spark-hive_2.13;3.5.0 cosmos_org.scala-lang:scala-library;2.12.19 -cosmos_org.scala-lang.modules:scala-java8-compat_2.12;0.8.0 +cosmos-scala213_org.scala-lang:scala-library;2.13.17 +cosmos_org.scala-lang.modules:scala-java8-compat_2.12;0.9.1 +cosmos-scala213_org.scala-lang.modules:scala-java8-compat_2.13;0.9.1 cosmos_io.projectreactor:reactor-scala-extensions_2.12;0.8.0 +cosmos-scala213_io.projectreactor:reactor-scala-extensions_2.13;0.8.0 cosmos_commons-io:commons-io;2.4 cosmos_com.microsoft.azure:applicationinsights-core;2.6.4 cosmos_io.micrometer:micrometer-core;1.15.1 @@ -271,9 +281,13 @@ cosmos_io.micrometer:micrometer-registry-graphite;1.15.1 # Cosmos Spark connector tests only cosmos_org.scalatest:scalatest_2.12;3.2.2 +cosmos-scala213_org.scalatest:scalatest_2.13;3.2.2 cosmos_org.scalatest:scalatest-flatspec_2.12;3.2.3 +cosmos-scala213_org.scalatest:scalatest-flatspec_2.13;3.2.3 cosmos_org.scalactic:scalactic_2.12;3.2.3 +cosmos-scala213_org.scalactic:scalactic_2.13;3.2.3 cosmos_org.scalamock:scalamock_2.12;5.0.0 +cosmos-scala213_org.scalamock:scalamock_2.13;5.0.0 cosmos_com.globalmentor:hadoop-bare-naked-local-fs;0.1.0 cosmos_org.mockito:mockito-core;4.8.1 diff --git a/eng/versioning/version_client.txt b/eng/versioning/version_client.txt index 681af42a18b7..292529e4b015 100644 --- a/eng/versioning/version_client.txt +++ b/eng/versioning/version_client.txt @@ -112,6 +112,7 @@ com.azure:azure-cosmos-test;1.0.0-beta.15;1.0.0-beta.16 com.azure.cosmos.spark:azure-cosmos-spark_3-3_2-12;4.41.0;4.42.0-beta.1 com.azure.cosmos.spark:azure-cosmos-spark_3-4_2-12;4.41.0;4.42.0-beta.1 com.azure.cosmos.spark:azure-cosmos-spark_3-5_2-12;4.41.0;4.42.0-beta.1 +com.azure.cosmos.spark:azure-cosmos-spark_3-5_2-13;4.41.0;4.42.0-beta.1 com.azure.cosmos.spark:fabric-cosmos-spark-auth_3;1.1.0;1.2.0-beta.1 com.azure:azure-cosmos-tests;1.0.0-beta.1;1.0.0-beta.1 com.azure:azure-data-appconfiguration;1.8.4;1.9.0-beta.1 diff --git a/sdk/cosmos/azure-cosmos-spark_3-5/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5/pom.xml index 348d8c15caec..85337b09b220 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5/pom.xml @@ -24,6 +24,10 @@ ${cosmos.spark.skip} ${cosmos.spark.skip} 3.5 + 2.12 + 3.5.0 + 3.5.0 + 2.18.4 @@ -93,8 +97,8 @@ org.apache.spark - spark-sql_2.12 - 3.5.0 + spark-sql_${scala.binary.version} + ${spark35.version} io.netty @@ -109,8 +113,8 @@ org.apache.spark - spark-hive_2.12 - 3.5.0 + spark-hive_${scala.binary.version} + ${spark-hive-version} io.netty @@ -130,8 +134,8 @@ com.fasterxml.jackson.module - jackson-module-scala_2.12 - 2.18.4 + jackson-module-scala_${scala.binary.version} + ${scala-jackson.version} diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml index 435e9fd672e5..d91dd06436ea 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml @@ -40,6 +40,18 @@ false + 2.12 + 2.12.19 + 3.3.0 + 3.4.0 + 3.5.0 + 0.9.1 + 0.8.0 + 3.2.2 + 3.2.3 + 3.2.3 + 5.0.0 + 2.18.4 diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md new file mode 100644 index 000000000000..94c2ec2d138d --- /dev/null +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md @@ -0,0 +1,154 @@ +## Release History + +### 4.42.0-beta.1 (Unreleased) + +#### Features Added + +#### Breaking Changes + +#### Bugs Fixed + +#### Other Changes + +### 4.41.0 (2025-10-21) + +#### Features Added +* Added support `spark.cosmos.write.strategy` value `ItemPatchIfExists` which allows gracefully ignoring documents/patch-instructions when the document does not exist (anymore). - See [47034](https://github.com/Azure/azure-sdk-for-java/pull/47034) +* Added support to optionally omit info about spark environment and/or machine-info for driver/executors from `UserAgent` header via new config `spark.cosmos.userAgent.format` (allowed values are `SparkEnvAndWorkers` (default value), `OnlySparkEnv` and `NoSparkEnv`. - See [47047](https://github.com/Azure/azure-sdk-for-java/pull/47047) + +### 4.40.0 (2025-09-27) + +#### Features Added +* Added support for feed range cache refresh interval config. - See [46759](https://github.com/Azure/azure-sdk-for-java/pull/46759) + +#### Other Changes +* Added improvement to reduce partition planning time for large containers. - See [46727](https://github.com/Azure/azure-sdk-for-java/pull/46727) + +### 4.39.0 (2025-09-05) + +#### Bugs Fixed +* Reverted known issue due to shading log4j (which was introduced in 4.38.1). - See [PR 46546](https://github.com/Azure/azure-sdk-for-java/pull/46546) and [PR 46608](https://github.com/Azure/azure-sdk-for-java/pull/46608) +* Added change feed performance monitoring which is used to improve end lsn calculation in `CosmosPartitionPlanner`. - See [PR 46320](https://github.com/Azure/azure-sdk-for-java/pull/46320) +* Added `spark.cosmos.auth.aad.audience` as a valid configuration option to allow using AAD tokens with custom audiences. - See [PR 46554](https://github.com/Azure/azure-sdk-for-java/pull/46554) + +### 4.38.1 (2025-08-22) + +**NOTE: This version has a known issue due to shading log4j - Please use more recent versions >= 4.38.2 or 4.38.0 instead** + +#### Other Changes +* Added log4j-core to the list of shaded packages to avoid conflicts when customers use log4j in a different version. **NOTE: This change caused known issue - Please use a more recent version instead** - See [PR 45924](https://github.com/Azure/azure-sdk-for-java/pull/46451) + +### 4.38.0 (2025-07-31) + +#### Features Added +* Added telemetry support by adding OTEL span attribute naming schemes, introducing Azure Monitor integration, and sampled diagnostics. - See [PR 45924](https://github.com/Azure/azure-sdk-for-java/pull/45924) + +#### Other Changes +* Added compatibility with CosmosDB Fabric Native Accounts using the `FabricAccountDataResolver` for authentication. - See [PR 45890](https://github.com/Azure/azure-sdk-for-java/pull/45890) + +### 4.37.2 (2025-05-14) + +#### Features Added +* Added option to use the connector in non-public Azure clouds. - See [PR 45310](https://github.com/Azure/azure-sdk-for-java/pull/45310) + +#### Bugs Fixed +* Fixed an issue during bulk write operations that could result in failing the Spark job in `BulkWriter.flushAndClose` too eagerly in certain cases. - See [PR 44992](https://github.com/Azure/azure-sdk-for-java/pull/44992) +* Fixed hang issue in `CosmosPagedIterable#handle` by preventing race conditions in underlying subscription of `Flux`. - [PR 45290](https://github.com/Azure/azure-sdk-for-java/pull/45290) + +### 4.37.1 (2025-03-04) + +#### Features Added +* Added config option `spark.cosmos.read.responseContinuationTokenLimitInKb` to reduce query continuation token size. - See [PR 44480](https://github.com/Azure/azure-sdk-for-java/pull/44480) + +### 4.37.0 (2025-02-20) + +#### Other Changes +* Updated netty dependency + +### 4.36.1 (2025-02-08) + +#### Bugs Fixed +* Fixed an issue in change feed where under certain rare race conditions records could be skipped and excessive requests are prefetched. - See [PR 43788](https://github.com/Azure/azure-sdk-for-java/pull/43788) + +### 4.36.0 (2025-01-14) +> [!IMPORTANT] +> We strongly recommend our customers to use version 4.36.0 and above especially if using all versions and deletes change feed. + +#### Features Added +* Added the udfs `GetFeedRangesForContainer` and `GetOverlappingFeedRange` to ease mapping of cosmos partition key to databricks table partition key. - See [PR 43092](https://github.com/Azure/azure-sdk-for-java/pull/43092) + +#### Bugs Fixed +* Added null checking for previous images for deletes in full fidelity change feed. - See [PR 43483](https://github.com/Azure/azure-sdk-for-java/pull/43483) + +#### Other Changes +* Added options to fine-tune settings for bulk operations. - [PR 43509](https://github.com/Azure/azure-sdk-for-java/pull/43509) + +### 4.35.0 (2024-11-27) + +#### Bugs Fixed +* Fixed an issue when using `ChangeFeed` causing some cosmos partitions to not be fully processed in some cases. - See [PR 42553](https://github.com/Azure/azure-sdk-for-java/pull/42553) + +### 4.34.0 (2024-10-10) +#### Bugs Fixed +* Fixed an issue to avoid transient `IllegalArgumentException` due to duplicate json properties for the `uniqueKeyPolicy` property in `DocumentCollection`. - See [PR 41608](https://github.com/Azure/azure-sdk-for-java/pull/41608) and [PR 42244](https://github.com/Azure/azure-sdk-for-java/pull/42244) + +### 4.33.1 (2024-08-23) + +#### Bugs Fixed +* Fixed an issue to avoid transient `IllegalArgumentException` due to duplicate json properties for the `uniqueKeyPolicy` property. - See [PR 41608](https://github.com/Azure/azure-sdk-for-java/pull/41608) + +#### Other Changes +* Added retries on a new `BulkWriter` instance when first attempt to commit times out for bulk write jobs. - See [PR 41553](https://github.com/Azure/azure-sdk-for-java/pull/41553) + +### 4.33.0 (2024-06-22) + +#### Features Added +* Added a service trait `CosmosClientBuilderInterceptor` to allow intercepting and customizing the CosmosClient creation. - See [PR 40714](https://github.com/Azure/azure-sdk-for-java/pull/40714) + +#### Bugs Fixed +* Fixed a race condition resulting in not always re-enqueueing retries for bulk writes. - See [PR 40714](https://github.com/Azure/azure-sdk-for-java/pull/40714) + +### 4.32.1 (2024-06-07) + +#### Other Changes +* Added retries when retrieving new pages for query or readMany operations are timing out to avoid unbounded awaits. - See [PR 40506](https://github.com/Azure/azure-sdk-for-java/pull/40506) +* Ensured that no statistics are reported when custom queries via `spark.cosmos.read.customQuery` are used. - See [PR 40506](https://github.com/Azure/azure-sdk-for-java/pull/40506) + +### 4.32.0 (2024-05-24) + +#### Features Added +* Added config option `spark.cosmos.auth.aad.clientCertPemBase64` to allow using SPN (ServicePrincipal name) authentication with certificate instead of client secret. - See [PR 40325](https://github.com/Azure/azure-sdk-for-java/pull/40325) +* Added config option `spark.cosmos.accountDataResolverServiceName` to allow specifying which `AccountDataResolver` trait implementation to use if there are multiple on the class path. - See [PR 40325](https://github.com/Azure/azure-sdk-for-java/pull/40325) + +#### Bugs Fixed +* Fixed an issue where `SHOW DATABASES IN` only return one database even though multiple databases exist. - See [PR 40277](https://github.com/Azure/azure-sdk-for-java/pull/40277) +* Fixed an issue where `SHOW TABLES FROM` only return one container even though multiple containers exist. - See [PR 40277](https://github.com/Azure/azure-sdk-for-java/pull/40277) +* Fixed UserAgent encoding when the suffix contains non-ASCII characters. - See[PR 40293](https://github.com/Azure/azure-sdk-for-java/pull/40293) + +#### Other Changes +* Added robustness improvement to avoid client-side parsing errors `java.lang.IllegalArgumentException: Unable to parse JSON` when Gateway returns duplicate `unqiueKeyPolicy` in IndexPolicy (invalid json) - See[PR 40306](https://github.com/Azure/azure-sdk-for-java/pull/40306) + +### 4.31.0 (2024-05-20) + +#### Features Added +* Added capability in azure-cosmos-spark to allow the spark environment to support access tokens via AccountDataResolver. - See [PR 40079](https://github.com/Azure/azure-sdk-for-java/pull/40079) + +### 4.30.0 (2024-04-27) + +#### Features Added +* Added capability to use (and enforce) native netty transport. The native transport is more efficient - esepcially when the number of TCP connections being used is high. - See [PR 39834](https://github.com/Azure/azure-sdk-for-java/pull/39834) +* Added ManagedIdentity authentication support for azure-cosmos-spark in Databricks. - See [PR 39870](https://github.com/Azure/azure-sdk-for-java/pull/39870) + +### 4.29.0 (2024-04-16) + +#### Features Added +* Spark 3.5 support: - See [PR 39395](https://github.com/Azure/azure-sdk-for-java/pull/39395). + +#### Bugs Fixed +* Fixed an issue causing failures when using change feed in batch mode with a batch location and `ChangeFeedBatch.planInputPartitions` is called multiple times (for example because physcial query plan gets retrieved) and some changes have been made in the monitored container between those calls). - See [PR 39635](https://github.com/Azure/azure-sdk-for-java/pull/39635) +* Made `AccountDataResolver` trait public again. - See [PR 39736](https://github.com/Azure/azure-sdk-for-java/pull/39736) + +#### Other Changes +* Optimized the partitioning strategy implementation details to avoid unnecessarily high RU usage. - See [PR 39438](https://github.com/Azure/azure-sdk-for-java/pull/39438) + +### NOTE: See CHANGELOG.md in 3.1, 3.2, 3.3 and 3.4 projects for changes prior to 4.29.0 diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md new file mode 100644 index 000000000000..7ada95d0a4e3 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md @@ -0,0 +1,84 @@ +# Contributing +This instruction is guideline for building and code contribution. + +## Prequisites +- JDK 8 and above +- [Maven](https://maven.apache.org/) 3.0 and above + +## Build from source +To build the project, run maven commands. + +```bash +git clone https://github.com/Azure/azure-sdk-for-java.git +cd sdk/cosmos/azure-cosmos-spark_3-5_2-12 +mvn clean install +``` + +## Test +There are integration tests on azure and on emulator to trigger integration test execution +against Azure Cosmos DB and against +[Azure Cosmos DB Emulator](https://docs.microsoft.com/azure/cosmos-db/local-emulator), you need to +follow the link to set up emulator before test execution. + +- Run unit tests +```bash +mvn clean install -Dgpg.skip +``` + +- Run integration tests + - on Azure + > **NOTE** Please note that integration test against Azure requires Azure Cosmos DB Document + API and will automatically create a Cosmos database in your Azure subscription, then there + will be **Azure usage fee.** + + Integration tests will require a Azure Subscription. If you don't already have an Azure + subscription, you can activate your + [MSDN subscriber benefits](https://azure.microsoft.com/pricing/member-offers/msdn-benefits-details/) + or sign up for a [free Azure account](https://azure.microsoft.com/free/). + + 1. Create an Azure Cosmos DB on Azure. + - Go to [Azure portal](https://portal.azure.com/) and click +New. + - Click Databases, and then click Azure Cosmos DB to create your database. + - Navigate to the database you have created, and click Access keys and copy your + URI and access keys for your database. + + 2. Set environment variables ACCOUNT_HOST, ACCOUNT_KEY and SECONDARY_ACCOUNT_KEY, where value + of them are Cosmos account URI, primary key and secondary key. + + So set the + second group environment variables NEW_ACCOUNT_HOST, NEW_ACCOUNT_KEY and + NEW_SECONDARY_ACCOUNT_KEY, the two group environment variables can be same. + 3. Run maven command with `integration-test-azure` profile. + + ```bash + set ACCOUNT_HOST=your-cosmos-account-uri + set ACCOUNT_KEY=your-cosmos-account-primary-key + set SECONDARY_ACCOUNT_KEY=your-cosmos-account-secondary-key + + set NEW_ACCOUNT_HOST=your-cosmos-account-uri + set NEW_ACCOUNT_KEY=your-cosmos-account-primary-key + set NEW_SECONDARY_ACCOUNT_KEY=your-cosmos-account-secondary-key + mvnw -P integration-test-azure clean install + ``` + + - on Emulator + + Setup Azure Cosmos DB Emulator by following + [this instruction](https://docs.microsoft.com/azure/cosmos-db/local-emulator), and set + associated environment variables. Then run test with: + ```bash + mvnw -P integration-test-emulator install + ``` + + +- Skip tests execution +```bash +mvn clean install -Dgpg.skip-DskipTests +``` + +## Version management +Developing version naming convention is like `0.1.2-beta.1`. Release version naming convention is like `0.1.2`. + +## Contribute to code +Contribution is welcome. Please follow +[this instruction](https://github.com/Azure/azure-sdk-for-java/blob/main/CONTRIBUTING.md) to contribute code. diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/README.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/README.md new file mode 100644 index 000000000000..250fba8bb05e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/README.md @@ -0,0 +1,192 @@ +# Azure Cosmos DB OLTP Spark 3 connector + +## Azure Cosmos DB OLTP Spark 3 connector for Spark 3.5 +**Azure Cosmos DB OLTP Spark connector** provides Apache Spark support for Azure Cosmos DB using +the [SQL API][sql_api_query]. +[Azure Cosmos DB][cosmos_introduction] is a globally-distributed database service which allows +developers to work with data using a variety of standard APIs, such as SQL, MongoDB, Cassandra, Graph, and Table. + +If you have any feedback or ideas on how to improve your experience please let us know here: +https://github.com/Azure/azure-sdk-for-java/issues/new + +### Documentation + +- [Getting started](https://aka.ms/azure-cosmos-spark-3-quickstart) +- [Catalog API](https://aka.ms/azure-cosmos-spark-3-catalog-api) +- [Configuration Parameter Reference](https://aka.ms/azure-cosmos-spark-3-config) + +[//]: # (//TODO: add more sections) +[//]: # (//TODO: Enable Client Logging) +[//]: # (//TODO: Examples) +[//]: # (//TODO: Next steps) +[//]: # (//TODO: Key concepts) +[//]: # (//TODO: Azure Cosmos DB Partition) +[//]: # (//TODO: Troubleshooting) + +### Version Compatibility + +#### azure-cosmos-spark_3-5_2-12 +| Connector | Supported Spark Versions | Minimum Java Version | Supported Scala Versions | Supported Databricks Runtimes | Supported Fabric Runtimes | +|-----------|--------------------------|-----------------------|---------------------------|-------------------------------|---------------------------| +| 4.41.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\*, 16.4 LTS | 1.3.\* | +| 4.40.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.39.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.38.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.37.2 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.37.1 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.37.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.36.1 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.36.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.35.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.34.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.33.1 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.33.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.32.1 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.32.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.31.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.30.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | +| 4.29.0 | 3.5.0 | [8, 11] | 2.12 | 14.\*, 15.\* | | + +Note: Java 8 prior to version 8u371 support is deprecated as of Spark 3.5.0. When using the Scala API, it is necessary for applications +to use the same version of Scala that Spark was compiled for. + +#### azure-cosmos-spark_3-4_2-12 +| Connector | Supported Spark Versions | Supported JVM Versions | Supported Scala Versions | Supported Databricks Runtimes | Supported Fabric Runtimes | +|-----------|--------------------------|------------------------|--------------------------|-------------------------------|---------------------------| +| 4.41.0 | 3.4.0 - 3.4.1 | [8, 11] | 2.12 | 13.\* | | +| 4.40.0 | 3.4.0 - 3.4.1 | [8, 11] | 2.12 | 13.\* | | +| 4.39.0 | 3.4.0 - 3.4.1 | [8, 11] | 2.12 | 13.\* | | +| 4.38.0 | 3.4.0 - 3.4.1 | [8, 11] | 2.12 | 13.\* | | +| 4.37.2 | 3.4.0 - 3.4.1 | [8, 11] | 2.12 | 13.\* | | +| 4.37.1 | 3.4.0 - 3.4.1 | [8, 11] | 2.12 | 13.\* | | +| 4.37.0 | 3.4.0 - 3.4.1 | [8, 11] | 2.12 | 13.\* | | +| 4.36.1 | 3.4.0 - 3.4.1 | [8, 11] | 2.12 | 13.\* | | +| 4.36.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.35.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.34.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.33.1 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.33.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.32.1 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.32.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.31.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.30.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.29.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.28.4 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.28.3 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.28.2 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.28.1 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.28.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.27.1 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.27.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.26.1 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.26.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.25.1 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.25.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.24.1 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.24.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.23.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.22.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.21.1 | 3.4.0 | [8, 11] | 2.12 | 13.* | | +| 4.21.0 | 3.4.0 | [8, 11] | 2.12 | 13.* | | + +#### azure-cosmos-spark_3-3_2-12 +| Connector | Supported Spark Versions | Supported JVM Versions | Supported Scala Versions | Supported Databricks Runtimes | +|-----------|--------------------------|------------------------|--------------------------|-------------------------------| +| 4.41.0 | 3.3.0 - 3.3.2 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.40.0 | 3.3.0 - 3.3.2 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.39.0 | 3.3.0 - 3.3.2 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.38.0 | 3.3.0 - 3.3.2 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.37.2 | 3.3.0 - 3.3.2 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.37.1 | 3.3.0 - 3.3.2 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.37.0 | 3.3.0 - 3.3.2 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.36.1 | 3.3.0 - 3.3.2 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.36.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.35.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.34.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.33.1 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.33.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.32.1 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.32.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.31.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.30.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.29.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.28.4 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.28.3 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.28.2 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.28.1 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.28.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.27.1 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.27.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.26.1 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.26.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.25.1 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.25.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.24.1 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.24.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.23.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.22.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.21.1 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.21.0 | 3.3.0 | [8, 11] | 2.12 | 11.\*, 12.\* | +| 4.20.0 | 3.3.0 | [8, 11] | 2.12 | 11.\* | +| 4.19.0 | 3.3.0 | [8, 11] | 2.12 | 11.\* | +| 4.18.2 | 3.3.0 | [8, 11] | 2.12 | 11.\* | +| 4.18.1 | 3.3.0 | [8, 11] | 2.12 | 11.\* | +| 4.18.0 | 3.3.0 | [8, 11] | 2.12 | 11.\* | +| 4.17.2 | 3.3.0 | [8, 11] | 2.12 | 11.\* | +| 4.17.0 | 3.3.0 | [8, 11] | 2.12 | 11.\* | +| 4.16.0 | 3.3.0 | [8, 11] | 2.12 | 11.\* | +| 4.15.0 | 3.3.0 | [8, 11] | 2.12 | 11.\* | + +### Download + +You can use the maven coordinate of the jar to auto install the Spark Connector to your Databricks Runtime from Maven: +`com.azure.cosmos.spark:azure-cosmos-spark_3-5_2-12:4.41.0` + +You can also integrate against Cosmos DB Spark Connector in your SBT project: +```scala +libraryDependencies += "com.azure.cosmos.spark" % "azure-cosmos-spark_3-5_2-12" % "4.41.0" +``` + +Cosmos DB Spark Connector is available on [Maven Central Repo](https://central.sonatype.com/search?namespace=com.azure.cosmos.spark). + +#### General + +If you encounter any bug, please file an issue [here](https://github.com/Azure/azure-sdk-for-java/issues/new). + +To suggest a new feature or changes that could be made, file an issue the same way you would for a bug. + +### License +This project is under MIT license and uses and repackages other third party libraries as an uber jar. +See [NOTICE.txt](https://github.com/Azure/azure-sdk-for-java/blob/main/NOTICE.txt). + +### Contributing + +This project welcomes contributions and suggestions. Most contributions require you to agree to a +[Contributor License Agreement (CLA)][cla] declaring that you have the right to, and actually do, grant us the rights +to use your contribution. + +When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate +the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to +do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct][coc]. For more information see the [Code of Conduct FAQ][coc_faq] +or contact [opencode@microsoft.com][coc_contact] with any additional questions or comments. + + +[source_code]: src +[cosmos_introduction]: https://learn.microsoft.com/azure/cosmos-db/ +[cosmos_docs]: https://learn.microsoft.com/azure/cosmos-db/introduction +[jdk]: https://learn.microsoft.com/java/azure/jdk/?view=azure-java-stable +[maven]: https://maven.apache.org/ +[cla]: https://cla.microsoft.com +[coc]: https://opensource.microsoft.com/codeofconduct/ +[coc_faq]: https://opensource.microsoft.com/codeofconduct/faq/ +[coc_contact]: mailto:opencode@microsoft.com +[azure_subscription]: https://azure.microsoft.com/free/ +[samples]: https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/spring/azure-spring-data-cosmos/src/samples/java/com/azure/spring/data/cosmos +[sql_api_query]: https://learn.microsoft.com/azure/cosmos-db/sql-api-sql-query +[local_emulator]: https://learn.microsoft.com/azure/cosmos-db/local-emulator +[local_emulator_export_ssl_certificates]: https://learn.microsoft.com/azure/cosmos-db/local-emulator-export-ssl-certificates +[azure_cosmos_db_partition]: https://learn.microsoft.com/azure/cosmos-db/partition-data +[sql_queries_in_cosmos]: https://learn.microsoft.com/azure/cosmos-db/tutorial-query-sql-api +[sql_queries_getting_started]: https://learn.microsoft.com/azure/cosmos-db/sql-query-getting-started diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml new file mode 100644 index 000000000000..db9db939027e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml @@ -0,0 +1,165 @@ + + + 4.0.0 + + com.azure.cosmos.spark + azure-cosmos-spark_3-5 + 0.0.1-beta.1 + ../azure-cosmos-spark_3-5 + + com.azure.cosmos.spark + azure-cosmos-spark_3-5_2-13 + 4.42.0-beta.1 + jar + https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/cosmos/azure-cosmos-spark_3-5_2-13 + OLTP Spark 3.5 Connector for Azure Cosmos DB SQL API + OLTP Spark 3.5 Connector for Azure Cosmos DB SQL API + + scm:git:https://github.com/Azure/azure-sdk-for-java.git/sdk/cosmos/azure-cosmos-spark_3-5_2-13 + + https://github.com/Azure/azure-sdk-for-java/sdk/cosmos/azure-cosmos-spark_3-5_2-13 + + + Microsoft Corporation + http://microsoft.com + + + + The MIT License (MIT) + http://opensource.org/licenses/MIT + repo + + + + + microsoft + Microsoft Corporation + + + + false + 2.13 + 2.13.17 + 3.3.0 + 3.4.0 + 3.5.0 + 0.9.1 + 0.8.0 + 3.2.2 + 3.2.3 + 3.2.3 + 5.0.0 + 2.18.4 + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.6.1 + + + add-sources + generate-sources + + add-source + + + + ${basedir}/../azure-cosmos-spark_3/src/main/scala + ${basedir}/../azure-cosmos-spark_3-5/src/main/scala + ${basedir}/src/main/scala + + + + + add-test-sources + generate-test-sources + + add-test-source + + + + ${basedir}/../azure-cosmos-spark_3/src/test/scala + ${basedir}/../azure-cosmos-spark_3-5/src/test/scala + ${basedir}/src/test/scala + + + + + add-resources + generate-resources + + add-resource + + + + ${basedir}/../azure-cosmos-spark_3/src/main/resources + ${basedir}/../azure-cosmos-spark_3-5/src/main/resources + ${basedir}/src/main/resources + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.6.1 + + + + + + + spark-e2e_3-5 + + + ${basedir}/scalastyle_config.xml + + + spark-e2e_3-5 + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.5.3 + + + **/*.* + **/*Test.* + **/*Suite.* + **/*Spec.* + + true + + + + org.scalatest + scalatest-maven-plugin + 2.1.0 + + ${project.build.directory}/surefire-reports + . + SparkTestSuite.txt + (ITest|Test|Spec|Suite) + + + + test + + test + + + + + + + + + diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/scalastyle_config.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/scalastyle_config.xml new file mode 100644 index 000000000000..7a8ad2823fb8 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/scalastyle_config.xml @@ -0,0 +1,130 @@ + + Scalastyle standard configuration + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/main/resources/azure-cosmos-spark.properties b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/main/resources/azure-cosmos-spark.properties new file mode 100644 index 000000000000..ca812989b4f2 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/main/resources/azure-cosmos-spark.properties @@ -0,0 +1,2 @@ +name=${project.artifactId} +version=${project.version} diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.CosmosClientBuilderInterceptor b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.CosmosClientBuilderInterceptor new file mode 100644 index 000000000000..0d43a5bfc657 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.CosmosClientBuilderInterceptor @@ -0,0 +1 @@ +com.azure.cosmos.spark.TestCosmosClientBuilderInterceptor \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.CosmosClientInterceptor b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.CosmosClientInterceptor new file mode 100644 index 000000000000..e2239720776d --- /dev/null +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.CosmosClientInterceptor @@ -0,0 +1 @@ +com.azure.cosmos.spark.TestFaultInjectionClientInterceptor \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.WriteOnRetryCommitInterceptor b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.WriteOnRetryCommitInterceptor new file mode 100644 index 000000000000..c60cbf2f14e4 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/src/test/resources/META-INF/services/com.azure.cosmos.spark.WriteOnRetryCommitInterceptor @@ -0,0 +1 @@ +com.azure.cosmos.spark.TestWriteOnRetryCommitInterceptor \ No newline at end of file diff --git a/sdk/cosmos/azure-cosmos-spark_3/pom.xml b/sdk/cosmos/azure-cosmos-spark_3/pom.xml index 366a6e81be5f..db7a5ac42bde 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3/pom.xml @@ -38,7 +38,20 @@ ${cosmos.spark.skip} ${cosmos.spark.skip} - 3.1 + 3.3 + + 2.12 + 2.12.19 + 3.3.0 + 3.4.0 + 3.5.0 + 0.9.1 + 0.8.0 + 3.2.2 + 3.2.3 + 3.2.3 + 5.0.0 + 2.18.4 @@ -78,13 +91,13 @@ org.scala-lang.modules - scala-java8-compat_2.12 - 0.8.0 + scala-java8-compat_${scala.binary.version} + ${scala-java8-compat.version} io.projectreactor - reactor-scala-extensions_2.12 - 0.8.0 + reactor-scala-extensions_${scala.binary.version} + ${reactor-scala-extensions.version} io.projectreactor @@ -176,26 +189,26 @@ org.scalatest - scalatest_2.12 - 3.2.2 + scalatest_${scala.binary.version} + ${scalatest.version} test org.scalatest - scalatest-flatspec_2.12 - 3.2.3 + scalatest-flatspec_${scala.binary.version} + ${scalatest-flatspec.version} test org.scalactic - scalactic_2.12 - 3.2.3 + scalactic_${scala.binary.version} + ${scalactic.version} test org.scalamock - scalamock_2.12 - 5.0.0 + scalamock_${scala.binary.version} + ${scalamock.version} test @@ -285,21 +298,21 @@ org.apache.commons:commons-lang3:[3.18.0] org.slf4j:slf4j-api:[1.7.36] - org.apache.spark:spark-sql_2.12:[3.3.0] - org.apache.spark:spark-sql_2.12:[3.4.0] - org.apache.spark:spark-sql_2.12:[3.5.0] + org.apache.spark:spark-sql_${scala.binary.version}:[${spark33.version}] + org.apache.spark:spark-sql_${scala.binary.version}:[${spark34.version}] + org.apache.spark:spark-sql_${scala.binary.version}:[${spark35.version}] commons-io:commons-io:[2.4] - org.scala-lang:scala-library:[2.12.19] - org.scala-lang.modules:scala-java8-compat_2.12:[0.8.0] - io.projectreactor:reactor-scala-extensions_2.12:[0.8.0] - org.scalatest:scalatest_2.12:[3.2.2] + org.scala-lang:scala-library:[${scala.version}] + org.scala-lang.modules:scala-java8-compat_${scala.binary.version}:[${scala-java8-compat.version}] + io.projectreactor:reactor-scala-extensions_${scala.binary.version}:[${reactor-scala-extensions.version}] + org.scalatest:scalatest_${scala.binary.version}:[${scalatest.version}] org.apache.maven.plugins:maven-antrun-plugin:[3.1.0] net.alchim31.maven:scala-maven-plugin:[4.8.1] org.scalastyle:scalastyle-maven-plugin:[1.0.0] com.fasterxml.jackson.core:jackson-databind:[2.18.4] com.fasterxml.jackson.datatype:jackson-datatype-jsr310:[2.18.4] com.fasterxml.jackson.module:jackson-module-afterburner:[2.18.4] - com.fasterxml.jackson.module:jackson-module-scala_2.12:[2.18.4] + com.fasterxml.jackson.module:jackson-module-scala_${scala.binary.version}:[${scala-jackson.version}] io.micrometer:micrometer-registry-azure-monitor:[1.15.1] io.micrometer:micrometer-core:[1.15.1] com.microsoft.azure:applicationinsights-core:[2.6.4] @@ -388,7 +401,7 @@ 1.8 1.8 - 2.12.19 + ${scala.version} diff --git a/sdk/cosmos/ci.yml b/sdk/cosmos/ci.yml index 2f094d0ec3b9..f74abd73ec96 100644 --- a/sdk/cosmos/ci.yml +++ b/sdk/cosmos/ci.yml @@ -18,6 +18,7 @@ trigger: - sdk/cosmos/azure-cosmos-spark_3-4_2-12/ - sdk/cosmos/azure-cosmos-spark_3-5/ - sdk/cosmos/azure-cosmos-spark_3-5_2-12/ + - sdk/cosmos/azure-cosmos-spark_3-5_2-13/ - sdk/cosmos/fabric-cosmos-spark-auth_3/ - sdk/cosmos/azure-cosmos-test/ - sdk/cosmos/azure-cosmos-tests/ @@ -33,6 +34,7 @@ trigger: - sdk/cosmos/azure-cosmos-spark_3-3_2-12/pom.xml - sdk/cosmos/azure-cosmos-spark_3-4_2-12/pom.xml - sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml + - sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml - sdk/cosmos/azure-cosmos-spark_3-5/pom.xml - sdk/cosmos/fabric-cosmos-spark-auth_3/pom.xml - sdk/cosmos/azure-cosmos-kafka-connect/pom.xml @@ -59,6 +61,7 @@ pr: - sdk/cosmos/azure-cosmos-spark_3-4_2-12/ - sdk/cosmos/azure-cosmos-spark_3-5/ - sdk/cosmos/azure-cosmos-spark_3-5_2-12/ + - sdk/cosmos/azure-cosmos-spark_3-5_2-13/ - sdk/cosmos/fabric-cosmos-spark-auth_3/ - sdk/cosmos/faq/ - sdk/cosmos/azure-cosmos-kafka-connect/ @@ -72,6 +75,7 @@ pr: - sdk/cosmos/azure-cosmos-spark_3-4_2-12/pom.xml - sdk/cosmos/azure-cosmos-spark_3-5/pom.xml - sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml + - sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml - sdk/cosmos/fabric-cosmos-spark-auth_3/pom.xml - sdk/cosmos/azure-cosmos-test/pom.xml - sdk/cosmos/azure-cosmos-tests/pom.xml @@ -93,10 +97,14 @@ parameters: displayName: 'azure-cosmos-spark_3-4_2-12' type: boolean default: true - - name: release_azurecosmosspark35 + - name: release_azurecosmosspark35-scala212 displayName: 'azure-cosmos-spark_3-5_2-12' type: boolean default: true + - name: release_azurecosmosspark35-scala213 + displayName: 'azure-cosmos-spark_3-5_2-13' + type: boolean + default: true - name: release_fabriccosmossparkauth3 displayName: 'fabric-cosmos-spark-auth_3' type: boolean @@ -140,11 +148,18 @@ extends: releaseInBatch: ${{ parameters.release_azurecosmosspark34 }} - name: azure-cosmos-spark_3-5_2-12 groupId: com.azure.cosmos.spark - safeName: azurecosmosspark35 + safeName: azurecosmosspark35scala212 + uberJar: true + skipPublishDocGithubIo: true + skipPublishDocMs: true + releaseInBatch: ${{ parameters.release_azurecosmosspark35-scala212 }} + - name: azure-cosmos-spark_3-5_2-13 + groupId: com.azure.cosmos.spark + safeName: azurecosmosspark35scala213 uberJar: true skipPublishDocGithubIo: true skipPublishDocMs: true - releaseInBatch: ${{ parameters.release_azurecosmosspark35 }} + releaseInBatch: ${{ release_azurecosmosspark35-scala213 }} - name: fabric-cosmos-spark-auth_3 groupId: com.azure.cosmos.spark safeName: fabriccosmossparkauth3 diff --git a/sdk/cosmos/pom.xml b/sdk/cosmos/pom.xml index c486b2fe9849..81b2db7ca53e 100644 --- a/sdk/cosmos/pom.xml +++ b/sdk/cosmos/pom.xml @@ -18,6 +18,7 @@ azure-cosmos-spark_3-4_2-12 azure-cosmos-spark_3-5 azure-cosmos-spark_3-5_2-12 + azure-cosmos-spark_3-5_2-13 azure-cosmos-test azure-cosmos-tests azure-cosmos-kafka-connect diff --git a/sdk/cosmos/spark.databricks.yml b/sdk/cosmos/spark.databricks.yml index 50b10d49190f..e124fc7e01e8 100644 --- a/sdk/cosmos/spark.databricks.yml +++ b/sdk/cosmos/spark.databricks.yml @@ -135,6 +135,6 @@ stages: fi env: JAR_URL: '${{ parameters.JarReadOnlySasUri }}' - JAR_NAME: 'azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar' + JAR_NAME: '${{ parameters.SparkVersion }}-latest-ci-candidate.jar' JAR_CHECK_SUM: $(JarCheckSum) AVOID_DBFS: ${{ parameters.AvoidDBFS }} diff --git a/sdk/cosmos/spark.yml b/sdk/cosmos/spark.yml index 465a66078343..6e8f97e120c0 100644 --- a/sdk/cosmos/spark.yml +++ b/sdk/cosmos/spark.yml @@ -82,4 +82,22 @@ stages: AvoidDBFS: true JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) - + - template: /sdk/cosmos/spark.databricks.yml + parameters: + CosmosEndpointMsi: $(spark-databricks-cosmos-endpoint-msi) + CosmosEndpoint: $(spark-databricks-cosmos-endpoint) + CosmosKey: $(spark-databricks-cosmos-key) + DatabricksEndpoint: $(spark-databricks-endpoint-with-msi) + SubscriptionId: '8fba6d4f-7c37-4d13-9063-fd58ad2b86e2' + TenantId: '72f988bf-86f1-41af-91ab-2d7cd011db47' + ResourceGroupName: 'oltp-spark-ci' + ClientId: $(spark-databricks-cosmos-spn-clientId) + ClientSecret: $(spark-databricks-cosmos-spn-clientSecret) + CosmosContainerName: 'sampleContainer7' + CosmosDatabaseName: 'sampleDB7' + DatabricksToken: $(spark-databricks-token-with-msi) + SparkVersion: 'azure-cosmos-spark_3-5_2-13' + ClusterName: 'oltp-ci-spark35-2workers-ds3v2-16.4-scala_2.13' + AvoidDBFS: true + JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) + JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) From 27ddfcb482ed2ea792d281f30ddf5c4859b70df2 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 31 Dec 2025 12:25:44 +0100 Subject: [PATCH 02/32] Update sdk/cosmos/ci.yml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sdk/cosmos/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/ci.yml b/sdk/cosmos/ci.yml index f74abd73ec96..a8c5cfb73cff 100644 --- a/sdk/cosmos/ci.yml +++ b/sdk/cosmos/ci.yml @@ -159,7 +159,7 @@ extends: uberJar: true skipPublishDocGithubIo: true skipPublishDocMs: true - releaseInBatch: ${{ release_azurecosmosspark35-scala213 }} + releaseInBatch: ${{ parameters.release_azurecosmosspark35-scala213 }} - name: fabric-cosmos-spark-auth_3 groupId: com.azure.cosmos.spark safeName: fabriccosmossparkauth3 From 95ccdb42d82c20e35f51b0a163021f972283907e Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 31 Dec 2025 12:26:31 +0100 Subject: [PATCH 03/32] Update sdk/cosmos/azure-cosmos-spark_3/pom.xml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sdk/cosmos/azure-cosmos-spark_3/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/pom.xml b/sdk/cosmos/azure-cosmos-spark_3/pom.xml index c99821ec7ee3..5a2e19ae7b6c 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3/pom.xml @@ -38,7 +38,7 @@ ${cosmos.spark.skip} ${cosmos.spark.skip} - 3.3 + 3.1 2.12 2.12.19 From 04aa7445e495fac17fae3e505f468224ac10c6a9 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 31 Dec 2025 12:29:06 +0100 Subject: [PATCH 04/32] Update sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md index 7ada95d0a4e3..dade0cc03421 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md @@ -1,7 +1,7 @@ # Contributing This instruction is guideline for building and code contribution. -## Prequisites +## Prerequisites - JDK 8 and above - [Maven](https://maven.apache.org/) 3.0 and above From c464e57b95b75464acb4052cd9a76a513819285b Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 31 Dec 2025 12:29:24 +0100 Subject: [PATCH 05/32] Update sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md index dade0cc03421..a8b1ac89996c 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md @@ -73,7 +73,7 @@ mvn clean install -Dgpg.skip - Skip tests execution ```bash -mvn clean install -Dgpg.skip-DskipTests +mvn clean install -Dgpg.skip -DskipTests ``` ## Version management From 774640f57e5224ddfc12d544f1cb42222f434040 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 31 Dec 2025 11:30:26 +0000 Subject: [PATCH 06/32] Update pom.xml --- sdk/cosmos/azure-cosmos-spark_3/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/pom.xml b/sdk/cosmos/azure-cosmos-spark_3/pom.xml index 5a2e19ae7b6c..c99821ec7ee3 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3/pom.xml @@ -38,7 +38,7 @@ ${cosmos.spark.skip} ${cosmos.spark.skip} - 3.1 + 3.3 2.12 2.12.19 From 434b78d029b405b997a74b281875c0101c60e98b Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 31 Dec 2025 11:38:03 +0000 Subject: [PATCH 07/32] md fixes --- sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md | 2 +- sdk/cosmos/azure-cosmos-spark_3/dev/README.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md index a8b1ac89996c..6949e20fb69d 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CONTRIBUTING.md @@ -10,7 +10,7 @@ To build the project, run maven commands. ```bash git clone https://github.com/Azure/azure-sdk-for-java.git -cd sdk/cosmos/azure-cosmos-spark_3-5_2-12 +cd sdk/cosmos/azure-cosmos-spark_3-5_2-13 mvn clean install ``` diff --git a/sdk/cosmos/azure-cosmos-spark_3/dev/README.md b/sdk/cosmos/azure-cosmos-spark_3/dev/README.md index d15bf4113612..30ba9fbfc0db 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/dev/README.md +++ b/sdk/cosmos/azure-cosmos-spark_3/dev/README.md @@ -46,6 +46,7 @@ mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dspotbugs.skip=true -Dcheckstyle.skip=true -Drevapi.skip=true -pl ,azure-cosmos-spark_3-3_2-12 clean install mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dspotbugs.skip=true -Dcheckstyle.skip=true -Drevapi.skip=true -pl ,azure-cosmos-spark_3-4_2-12 clean install mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dspotbugs.skip=true -Dcheckstyle.skip=true -Drevapi.skip=true -pl ,azure-cosmos-spark_3-5_2-12 clean install +mvn -e -DskipTests -Dgpg.skip -Dmaven.javadoc.skip=true -Dcodesnippet.skip=true -Dspotbugs.skip=true -Dcheckstyle.skip=true -Drevapi.skip=true -pl ,azure-cosmos-spark_3-5_2-13 clean install ``` Take these files: From 94fb11fc730f699d51fe949c9c27fdd2acbdcc52 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 31 Dec 2025 12:58:32 +0000 Subject: [PATCH 08/32] Fixing wrong versions --- .../azure-cosmos-spark-account-data-resolver-sample/pom.xml | 2 +- sdk/cosmos/fabric-cosmos-spark-auth_3/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/pom.xml b/sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/pom.xml index 5f8aa27d8f28..1b9c26e3eb2c 100644 --- a/sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/pom.xml @@ -281,7 +281,7 @@ org.apache.spark:spark-sql_2.12:[3.5.0] commons-io:commons-io:[2.4] org.scala-lang:scala-library:[2.12.19] - org.scala-lang.modules:scala-java8-compat_2.12:[0.8.0] + org.scala-lang.modules:scala-java8-compat_2.12:[0.9.1] io.projectreactor:reactor-scala-extensions_2.12:[0.8.0] org.scalatest:scalatest_2.12:[3.2.2] org.apache.maven.plugins:maven-antrun-plugin:[3.1.0] diff --git a/sdk/cosmos/fabric-cosmos-spark-auth_3/pom.xml b/sdk/cosmos/fabric-cosmos-spark-auth_3/pom.xml index 94d7f17c86ad..45524afbe974 100644 --- a/sdk/cosmos/fabric-cosmos-spark-auth_3/pom.xml +++ b/sdk/cosmos/fabric-cosmos-spark-auth_3/pom.xml @@ -176,7 +176,7 @@ org.slf4j:slf4j-api:[1.7.36] org.scala-lang:scala-library:[2.12.19] - org.scala-lang.modules:scala-java8-compat_2.12:[0.8.0] + org.scala-lang.modules:scala-java8-compat_2.12:[0.9.1] org.scalatest:scalatest_2.12:[3.2.2] org.apache.maven.plugins:maven-antrun-plugin:[3.1.0] org.scalastyle:scalastyle-maven-plugin:[1.0.0] From a127d71b3820930bde3ed363c0ae9ac069d712f6 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 5 Jan 2026 08:44:40 +0000 Subject: [PATCH 09/32] Update pom.xml --- sdk/cosmos/azure-cosmos-spark_3/pom.xml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/pom.xml b/sdk/cosmos/azure-cosmos-spark_3/pom.xml index 457f1cdb02d0..86d1b6819ac3 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3/pom.xml @@ -292,20 +292,25 @@ org.apache.commons:commons-lang3:[3.18.0] org.slf4j:slf4j-api:[1.7.36] - org.apache.spark:spark-sql_${scala.binary.version}:[${spark33.version}] - org.apache.spark:spark-sql_${scala.binary.version}:[${spark34.version}] - org.apache.spark:spark-sql_${scala.binary.version}:[${spark35.version}] + org.apache.spark:spark-sql_2.12:[${spark33.version}] + org.apache.spark:spark-sql_2.12:[${spark34.version}] + org.apache.spark:spark-sql_2.12:[${spark35.version}] + org.apache.spark:spark-sql_2.13:[${spark35.version}] org.scala-lang:scala-library:[${scala.version}] - org.scala-lang.modules:scala-java8-compat_${scala.binary.version}:[${scala-java8-compat.version}] - io.projectreactor:reactor-scala-extensions_${scala.binary.version}:[${reactor-scala-extensions.version}] - org.scalatest:scalatest_${scala.binary.version}:[${scalatest.version}] + org.scala-lang.modules:scala-java8-compat_2.12:[${scala-java8-compat.version}] + org.scala-lang.modules:scala-java8-compat_2.13:[${scala-java8-compat.version}] + io.projectreactor:reactor-scala-extensions_2.12:[${reactor-scala-extensions.version}] + io.projectreactor:reactor-scala-extensions_2.13:[${reactor-scala-extensions.version}] + org.scalatest:scalatest_2.12:[${scalatest.version}] + org.scalatest:scalatest_2.13:[${scalatest.version}] org.apache.maven.plugins:maven-antrun-plugin:[3.1.0] net.alchim31.maven:scala-maven-plugin:[4.8.1] org.scalastyle:scalastyle-maven-plugin:[1.0.0] com.fasterxml.jackson.core:jackson-databind:[2.18.4] com.fasterxml.jackson.datatype:jackson-datatype-jsr310:[2.18.4] com.fasterxml.jackson.module:jackson-module-afterburner:[2.18.4] - com.fasterxml.jackson.module:jackson-module-scala_${scala.binary.version}:[${scala-jackson.version}] + com.fasterxml.jackson.module:jackson-module-scala_2.12:[${scala-jackson.version}] + com.fasterxml.jackson.module:jackson-module-scala_2.13:[${scala-jackson.version}] io.micrometer:micrometer-registry-azure-monitor:[1.15.1] io.micrometer:micrometer-core:[1.15.1] com.microsoft.azure:applicationinsights-core:[2.6.4] From 46e6abcfc8f89ccd2c608385d770c54938252661 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 5 Jan 2026 10:33:54 +0000 Subject: [PATCH 10/32] Fix versioning powershell script to exclude checks for versions specified via variables --- eng/versioning/pom_file_version_scanner.ps1 | 164 ++++++++++-------- .../pom.xml | 2 +- 2 files changed, 94 insertions(+), 72 deletions(-) diff --git a/eng/versioning/pom_file_version_scanner.ps1 b/eng/versioning/pom_file_version_scanner.ps1 index 38546e5e02c9..654188d7deb4 100644 --- a/eng/versioning/pom_file_version_scanner.ps1 +++ b/eng/versioning/pom_file_version_scanner.ps1 @@ -723,31 +723,46 @@ Get-ChildItem -Path $Path -Filter pom*.xml -Recurse -File | ForEach-Object { $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: dependency is missing version element for groupId=$($groupId), artifactId=$($artifactId) should be " continue } - if ($versionNode.NextSibling -and $versionNode.NextSibling.NodeType -eq "Comment") + + if ($versionNode.FirstChild.Value.StartsWith('${')) { - # unfortunately because there are POM exceptions we need to wildcard the group which may be - # something like _groupId - if ($versionNode.NextSibling.Value.Trim() -notmatch "{x-version-update;(.+)?$($groupId):$($artifactId);\w+}") + # skip version checks when they have been intentionally applied via variables + } + else + { + if ($versionNode.NextSibling -and $versionNode.NextSibling.NodeType -eq "Comment") { - $hasError = $true - $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: dependency version update tag for groupId=$($groupId), artifactId=$($artifactId) should be " + # unfortunately because there are POM exceptions we need to wildcard the group which may be + # something like _groupId + if ($versionNode.FirstChild.Value.StartsWith('${')) + { + # skip version checks when they have been intentionally applied via variables + } + else + { + if ($versionNode.NextSibling.Value.Trim() -notmatch "{x-version-update;(.+)?$($groupId):$($artifactId);\w+}") + { + $hasError = $true + $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: dependency version update tag for groupId=$($groupId), artifactId=$($artifactId) should be " + } + else + { + # verify the version tag and version are correct + $retVal = Test-Dependency-Tag-And-Version $libHash $extDepHash $versionNode.InnerText.Trim() $versionNode.NextSibling.Value $artifactsPerSDHashSet + if ($retVal) + { + $hasError = $true + $potentialLogMessage = Join-With-NewLine $potentialLogMessage $retVal + } + } + } } else { - # verify the version tag and version are correct - $retVal = Test-Dependency-Tag-And-Version $libHash $extDepHash $versionNode.InnerText.Trim() $versionNode.NextSibling.Value $artifactsPerSDHashSet - if ($retVal) - { - $hasError = $true - $potentialLogMessage = Join-With-NewLine $potentialLogMessage $retVal - } + $hasError = $true + $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: Missing dependency version update tag for groupId=$($groupId), artifactId=$($artifactId). The tag should be " } - } - else - { - $hasError = $true - $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: Missing dependency version update tag for groupId=$($groupId), artifactId=$($artifactId). The tag should be " - } + } } # Verify every plugin has a group, artifact and version # Verify every dependency has a group, artifact and version @@ -882,80 +897,87 @@ Get-ChildItem -Path $Path -Filter pom*.xml -Recurse -File | ForEach-Object { $groupId = $split[0] $artifactId = $split[1] $version = $split[2] - # The groupId match has to be able to deal with _ for external dependency exceptions - if (!$includeNode.NextSibling -or $includeNode.NextSibling.NodeType -ne "Comment") - { - $hasError = $true - $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: is missing the update tag which should be " - } - elseif ($includeNode.NextSibling.Value.Trim() -notmatch "{x-include-update;(.+)?$($groupId):$($artifactId);(current|dependency|external_dependency)}") - { - $hasError = $true - $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: version update tag for $($includeNode.InnerText) should be " + + if ($version.StartsWith('[${')) { + # skip version checks when they have been intentionally applied via variables } else { - # verify that the version is formatted correctly - if (!$version.StartsWith("[") -or !$version.EndsWith("]")) + # The groupId match has to be able to deal with _ for external dependency exceptions + if (!$includeNode.NextSibling -or $includeNode.NextSibling.NodeType -ne "Comment") { $hasError = $true - $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: the version entry '$($version)' for '$($rawIncludeText)' is not formatted correctly. The include version needs to of the form '[]', the braces lock the include to a specific version for these entries. -->" + $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: is missing the update tag which should be " + } + elseif ($includeNode.NextSibling.Value.Trim() -notmatch "{x-include-update;(.+)?$($groupId):$($artifactId);(current|dependency|external_dependency)}") + { + $hasError = $true + $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: version update tag for $($includeNode.InnerText) should be " } - # verify the version has the correct value else { - $versionWithoutBraces = $version.Substring(1, $version.Length -2) - # the key into the dependency has needs to be created from the tag's group/artifact - # entries in case it's an external dependency entry. Because this has already - # been validated for format, grab the group:artifact - $depKey = $includeNode.NextSibling.Value.Trim().Split(";")[1] - $depType = $includeNode.NextSibling.Value.Trim().Split(";")[2] - $depType = $depType.Substring(0, $depType.IndexOf("}")) - if ($depType -eq $DependencyTypeExternal) + # verify that the version is formatted correctly + if (!$version.StartsWith("[") -or !$version.EndsWith("]")) { - if ($extDepHash.ContainsKey($depKey)) - { - if ($versionWithoutBraces -ne $extDepHash[$depKey].ver) - { - $hasError = $true - $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: $($depKey)'s version is '$($versionWithoutBraces)' but the external_dependency version is listed as $($extDepHash[$depKey].ver)" - } - } - else - { - $hasError = $true - $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: the groupId:artifactId entry '$($depKey)' for '$($rawIncludeText)' is not a valid external dependency. Please verify the entry exists in the external_dependencies.txt file. -->" - } + $hasError = $true + $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: the version entry '$($version)' for '$($rawIncludeText)' is not formatted correctly. The include version needs to of the form '[]', the braces lock the include to a specific version for these entries. -->" } + # verify the version has the correct value else { - # If the tag isn't external_dependency then verify it exists in the library hash - if (!$libHash.ContainsKey($depKey)) - { - $hasError = $true - return "Error: $($depKey)'s dependency type is '$($depType)' but the dependency does not exist in any of the version_*.txt files. Should this be an external_dependency? Please ensure the dependency type is correct or the dependency is added to the appropriate file." - - } - if ($depType -eq $DependencyTypeDependency) + $versionWithoutBraces = $version.Substring(1, $version.Length -2) + # the key into the dependency has needs to be created from the tag's group/artifact + # entries in case it's an external dependency entry. Because this has already + # been validated for format, grab the group:artifact + $depKey = $includeNode.NextSibling.Value.Trim().Split(";")[1] + $depType = $includeNode.NextSibling.Value.Trim().Split(";")[2] + $depType = $depType.Substring(0, $depType.IndexOf("}")) + if ($depType -eq $DependencyTypeExternal) { - if ($versionWithoutBraces -ne $libHash[$depKey].depVer) + if ($extDepHash.ContainsKey($depKey)) + { + if ($versionWithoutBraces -ne $extDepHash[$depKey].ver) + { + $hasError = $true + $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: $($depKey)'s version is '$($versionWithoutBraces)' but the external_dependency version is listed as $($extDepHash[$depKey].ver)" + } + } + else { $hasError = $true - return "Error: $($depKey)'s is '$($versionString)' but the dependency version is listed as $($libHash[$depKey].depVer)" + $potentialLogMessage = Join-With-NewLine $potentialLogMessage "Error: the groupId:artifactId entry '$($depKey)' for '$($rawIncludeText)' is not a valid external dependency. Please verify the entry exists in the external_dependencies.txt file. -->" } } - elseif ($depType -eq $DependencyTypeCurrent) + else { - # Verify that none of the 'current' dependencies are using a groupId that starts with 'unreleased_' or 'beta_' - if ($depKey.StartsWith('unreleased_') -or $depKey.StartsWith('beta_')) + # If the tag isn't external_dependency then verify it exists in the library hash + if (!$libHash.ContainsKey($depKey)) { $hasError = $true - return "Error: $($versionUpdateString) is using an unreleased_ or beta_ dependency and trying to set current value. Only dependency versions can be set with an unreleased or beta dependency." + return "Error: $($depKey)'s dependency type is '$($depType)' but the dependency does not exist in any of the version_*.txt files. Should this be an external_dependency? Please ensure the dependency type is correct or the dependency is added to the appropriate file." + } - if ($versionWithoutBraces -ne $libHash[$depKey].curVer) + if ($depType -eq $DependencyTypeDependency) { - $hasError = $true - return "Error: $($depKey)'s is '$($versionString)' but the current version is listed as $($libHash[$depKey].curVer)" + if ($versionWithoutBraces -ne $libHash[$depKey].depVer) + { + $hasError = $true + return "Error: $($depKey)'s is '$($versionString)' but the dependency version is listed as $($libHash[$depKey].depVer)" + } + } + elseif ($depType -eq $DependencyTypeCurrent) + { + # Verify that none of the 'current' dependencies are using a groupId that starts with 'unreleased_' or 'beta_' + if ($depKey.StartsWith('unreleased_') -or $depKey.StartsWith('beta_')) + { + $hasError = $true + return "Error: $($versionUpdateString) is using an unreleased_ or beta_ dependency and trying to set current value. Only dependency versions can be set with an unreleased or beta dependency." + } + if ($versionWithoutBraces -ne $libHash[$depKey].curVer) + { + $hasError = $true + return "Error: $($depKey)'s is '$($versionString)' but the current version is listed as $($libHash[$depKey].curVer)" + } } } } diff --git a/sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/pom.xml b/sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/pom.xml index 1b9c26e3eb2c..c594cbfb2f1f 100644 --- a/sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark-account-data-resolver-sample/pom.xml @@ -103,7 +103,7 @@ org.scala-lang.modules scala-java8-compat_2.12 - 0.8.0 + 0.9.1 io.projectreactor From 9a46e02de74c02bb71d747a1b773e85f88922666 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 5 Jan 2026 11:26:51 +0000 Subject: [PATCH 11/32] Removing unused external dependencies --- eng/versioning/external_dependencies.txt | 7 ------- sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml | 4 ---- 2 files changed, 11 deletions(-) diff --git a/eng/versioning/external_dependencies.txt b/eng/versioning/external_dependencies.txt index ee94143ed5de..4d55c3d58d0d 100644 --- a/eng/versioning/external_dependencies.txt +++ b/eng/versioning/external_dependencies.txt @@ -249,19 +249,12 @@ cosmos_com.microsoft.azure.synapse:synapseutils_2.12;1.5.4 ## Cosmos Spark connector under sdk\cosmos\azure-cosmos-spark_3-_2-12\pom.xml # Cosmos Spark connector runtime dependencies - provided by Spark runtime/host -cosmos-scala213-com.fasterxml.jackson.module:jackson-module-scala_2.13;2.18.4 cosmos-spark_3-3_org.apache.spark:spark-sql_2.12;3.3.0 -cosmos-scala213-spark_3-3_org.apache.spark:spark-sql_2.13;3.3.0 cosmos-spark_3-4_org.apache.spark:spark-sql_2.12;3.4.0 -cosmos-scala213-spark_3-4_org.apache.spark:spark-sql_2.13;3.4.0 cosmos-spark_3-5_org.apache.spark:spark-sql_2.12;3.5.0 -cosmos-scala213-spark_3-5_org.apache.spark:spark-sql_2.13;3.5.0 cosmos-spark_3-3_org.apache.spark:spark-hive_2.12;3.3.0 -cosmos-scala213-spark_3-3_org.apache.spark:spark-hive_2.13;3.3.0 cosmos-spark_3-4_org.apache.spark:spark-hive_2.12;3.4.0 -cosmos-scala213-spark_3-4_org.apache.spark:spark-hive_2.13;3.4.0 cosmos-spark_3-5_org.apache.spark:spark-hive_2.12;3.5.0 -cosmos-scala213-spark_3-5_org.apache.spark:spark-hive_2.13;3.5.0 cosmos_org.scala-lang:scala-library;2.12.19 cosmos-scala213_org.scala-lang:scala-library;2.13.17 cosmos_org.scala-lang.modules:scala-java8-compat_2.12;0.9.1 diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml index db9db939027e..18519c4dc94e 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml @@ -42,16 +42,12 @@ false 2.13 2.13.17 - 3.3.0 - 3.4.0 - 3.5.0 0.9.1 0.8.0 3.2.2 3.2.3 3.2.3 5.0.0 - 2.18.4 From 7ed73493f448446b1963289d074031fd2831f17f Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 5 Jan 2026 15:03:46 +0000 Subject: [PATCH 12/32] Update ci.yml --- sdk/cosmos/ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/ci.yml b/sdk/cosmos/ci.yml index a8c5cfb73cff..2e1c845dae37 100644 --- a/sdk/cosmos/ci.yml +++ b/sdk/cosmos/ci.yml @@ -97,11 +97,11 @@ parameters: displayName: 'azure-cosmos-spark_3-4_2-12' type: boolean default: true - - name: release_azurecosmosspark35-scala212 + - name: release_azurecosmosspark35_scala212 displayName: 'azure-cosmos-spark_3-5_2-12' type: boolean default: true - - name: release_azurecosmosspark35-scala213 + - name: release_azurecosmosspark35_scala213 displayName: 'azure-cosmos-spark_3-5_2-13' type: boolean default: true @@ -152,14 +152,14 @@ extends: uberJar: true skipPublishDocGithubIo: true skipPublishDocMs: true - releaseInBatch: ${{ parameters.release_azurecosmosspark35-scala212 }} + releaseInBatch: ${{ parameters.release_azurecosmosspark35_scala212 }} - name: azure-cosmos-spark_3-5_2-13 groupId: com.azure.cosmos.spark safeName: azurecosmosspark35scala213 uberJar: true skipPublishDocGithubIo: true skipPublishDocMs: true - releaseInBatch: ${{ parameters.release_azurecosmosspark35-scala213 }} + releaseInBatch: ${{ parameters.release_azurecosmosspark35_scala213 }} - name: fabric-cosmos-spark-auth_3 groupId: com.azure.cosmos.spark safeName: fabriccosmossparkauth3 From dc3ae96201b7bb6b9d852f57d3f5576a34867271 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 5 Jan 2026 15:33:29 +0000 Subject: [PATCH 13/32] Update pom.xml --- sdk/cosmos/azure-cosmos-spark_3/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/pom.xml b/sdk/cosmos/azure-cosmos-spark_3/pom.xml index 86d1b6819ac3..3dc770578e67 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3/pom.xml @@ -65,7 +65,7 @@ org.scala-lang scala-library - 2.12.19 + ${scala.version} provided From bae1de224de160ccf9054c4b868b461b9dc12c2b Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 5 Jan 2026 18:21:44 +0000 Subject: [PATCH 14/32] Fixing scala 2.13 incompatibilities --- .../cosmos/spark/CosmosCatalogBase.scala | 2 +- .../cosmos/spark/CosmosClientMetrics.scala | 2 +- .../spark/CosmosTableSchemaInferrer.scala | 2 +- .../com/azure/cosmos/spark/PointWriter.scala | 12 ++-- ...ientIOErrorsRetryingReadManyIterator.scala | 4 +- .../CosmosCatalogCosmosSDKClient.scala | 61 +++++++++---------- .../azure/cosmos/spark/CosmosConfigSpec.scala | 2 +- .../spark/CosmosPartitionPlannerITest.scala | 4 +- .../cosmos/spark/FilterAnalyzerSpec.scala | 6 +- .../cosmos/spark/SparkE2EBulkWriteITest.scala | 2 +- .../spark/SparkE2EChangeFeedITest.scala | 4 +- .../com/azure/cosmos/spark/TestUtils.scala | 5 +- 12 files changed, 51 insertions(+), 55 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosCatalogBase.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosCatalogBase.scala index 27a69b012eef..3da6ea1cb0b9 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosCatalogBase.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosCatalogBase.scala @@ -604,7 +604,7 @@ class CosmosCatalogBase v.viewName.equals(viewName)) match { case Some(existingView) => val updatedViewDefinitionsSnapshot: Array[ViewDefinition] = - (ArrayBuffer(viewDefinitions: _*) - existingView).toArray + ArrayBuffer(viewDefinitions: _*).filterNot(_ == existingView).toArray if (viewRepositorySnapshot.add( lastBatchId + 1, diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosClientMetrics.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosClientMetrics.scala index 99f906ff4a2e..05defc0884ed 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosClientMetrics.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosClientMetrics.scala @@ -80,7 +80,7 @@ private[spark] object CosmosClientMetrics extends BasicLoggingTrait { override protected def nullGaugeValue: java.lang.Double = Double.NaN - override protected def close(): Unit = { + override def close(): Unit = { super.close() slf4JReporter match { diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosTableSchemaInferrer.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosTableSchemaInferrer.scala index 41af8d70cef9..0b7d46dae134 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosTableSchemaInferrer.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosTableSchemaInferrer.scala @@ -163,7 +163,7 @@ private object CosmosTableSchemaInferrer .limit(cosmosInferenceConfig.inferSchemaSamplingSize) .collect(Collectors.toList[ObjectNode]()) - schema = Some(inferSchema(feedResponseList.asScala, + schema = Some(inferSchema(feedResponseList.asScala.toSeq, cosmosInferenceConfig.inferSchemaQuery.isDefined || cosmosInferenceConfig.includeSystemProperties, cosmosInferenceConfig.inferSchemaQuery.isDefined || cosmosInferenceConfig.includeTimestamp, cosmosInferenceConfig.allowNullForInferredProperties)) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/PointWriter.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/PointWriter.scala index 45d45e033e53..8f07bf5339d5 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/PointWriter.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/PointWriter.scala @@ -145,7 +145,7 @@ private class PointWriter(container: CosmosAsyncContainer, executeAsync(() => createWithRetry(partitionKeyValue, objectNode, createOperation)) .onComplete { case Success(_) => - promise.success(Unit) + promise.success(()) pendingPointWrites.remove(promise.future) log.logItemWriteCompletion(createOperation) case Failure(e) => @@ -167,7 +167,7 @@ private class PointWriter(container: CosmosAsyncContainer, executeAsync(() => upsertWithRetry(partitionKeyValue, objectNode, upsertOperation)) .onComplete { case Success(_) => - promise.success(Unit) + promise.success(()) pendingPointWrites.remove(promise.future) log.logItemWriteCompletion(upsertOperation) case Failure(e) => @@ -191,7 +191,7 @@ private class PointWriter(container: CosmosAsyncContainer, executeAsync(() => deleteWithRetry(partitionKeyValue, objectNode, onlyIfNotModified, deleteOperation)) .onComplete { case Success(_) => - promise.success(Unit) + promise.success(()) pendingPointWrites.remove(promise.future) log.logItemWriteCompletion(deleteOperation) case Failure(e) => @@ -214,7 +214,7 @@ private class PointWriter(container: CosmosAsyncContainer, executeAsync(() => patchWithRetry(partitionKeyValue, objectNode, patchOperation, ignoreNotFound)) .onComplete { case Success(_) => - promise.success(Unit) + promise.success(()) pendingPointWrites.remove(promise.future) log.logItemWriteCompletion(patchOperation) case Failure(e) => @@ -241,7 +241,7 @@ private class PointWriter(container: CosmosAsyncContainer, executeAsync(() => replaceIfNotModifiedWithRetry(partitionKeyValue, objectNode, etag, replaceOperation)) .onComplete { case Success(_) => - promise.success(Unit) + promise.success(()) pendingPointWrites.remove(promise.future) log.logItemWriteCompletion(replaceOperation) case Failure(e) => @@ -648,7 +648,7 @@ private class PointWriter(container: CosmosAsyncContainer, override def call(): Unit = { try { work() - future.complete(Unit) + future.complete(()) } catch { case e: Exception => future.completeExceptionally(e) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransientIOErrorsRetryingReadManyIterator.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransientIOErrorsRetryingReadManyIterator.scala index 39053c668574..bcfdb694cb82 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransientIOErrorsRetryingReadManyIterator.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransientIOErrorsRetryingReadManyIterator.scala @@ -78,14 +78,14 @@ private[spark] class TransientIOErrorsRetryingReadManyIterator[TSparkRow] * * @return true (more records exist), false (no more records exist), None (unknown call should be repeated) */ - private def hasNextInternalCore(readManyFilterList: List[CosmosItemIdentity]): Option[Boolean] = { + private def hasNextInternalCore(readManyFilterList: Seq[CosmosItemIdentity]): Option[Boolean] = { val feedResponse = try { Await.result( Future { ImplementationBridgeHelpers .CosmosAsyncContainerHelper .getCosmosAsyncContainerAccessor - .readMany(container, readManyFilterList.asJava, queryOptionsWithEnd2EndTimeout, classType) + .readMany(container, readManyFilterList.toList.asJava, queryOptionsWithEnd2EndTimeout, classType) .block() }(TransientIOErrorsRetryingReadManyIterator.executionContext), maxPageRetrievalTimeout) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/catalog/CosmosCatalogCosmosSDKClient.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/catalog/CosmosCatalogCosmosSDKClient.scala index d4e7e07f73f7..0ee2162eabd0 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/catalog/CosmosCatalogCosmosSDKClient.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/catalog/CosmosCatalogCosmosSDKClient.scala @@ -4,7 +4,8 @@ package com.azure.cosmos.spark.catalog import com.azure.cosmos.CosmosAsyncClient -import com.azure.cosmos.models.{CosmosContainerProperties, ExcludedPath, FeedRange, IncludedPath, IndexingMode, IndexingPolicy, ModelBridgeInternal, PartitionKeyDefinition, PartitionKeyDefinitionVersion, PartitionKind, SparkModelBridgeInternal, ThroughputProperties} +import com.azure.cosmos.models.{CosmosContainerProperties => ModelsCosmosContainerProperties, ExcludedPath, FeedRange, IncludedPath, IndexingMode, IndexingPolicy, ModelBridgeInternal, PartitionKeyDefinition, PartitionKeyDefinitionVersion, PartitionKind, SparkModelBridgeInternal, ThroughputProperties} +import com.azure.cosmos.spark.catalog.{CosmosContainerProperties => CatalogCosmosContainerProperties} import com.azure.cosmos.spark.diagnostics.BasicLoggingTrait import com.azure.cosmos.spark.{ContainerFeedRangesCache, CosmosConstants, Exceptions} import org.apache.spark.sql.connector.catalog.{NamespaceChange, TableChange} @@ -21,6 +22,8 @@ import java.util.Collections import scala.collection.JavaConverters._ // scalastyle:on underscore.import + + private[spark] case class CosmosCatalogCosmosSDKClient(cosmosAsyncClient: CosmosAsyncClient) extends CosmosCatalogClient with BasicLoggingTrait { @@ -80,15 +83,15 @@ private[spark] case class CosmosCatalogCosmosSDKClient(cosmosAsyncClient: Cosmos val partitionKeyDefinition = getPartitionKeyDefinition(containerProperties) val indexingPolicy = getIndexingPolicy(containerProperties) - val cosmosContainerProperties = new CosmosContainerProperties(containerName, partitionKeyDefinition) + val cosmosContainerProperties = new ModelsCosmosContainerProperties(containerName, partitionKeyDefinition) cosmosContainerProperties.setIndexingPolicy(indexingPolicy) - CosmosContainerProperties.getDefaultTtlInSeconds(containerProperties) match { + CatalogCosmosContainerProperties.getDefaultTtlInSeconds(containerProperties) match { case Some(ttl) => cosmosContainerProperties.setDefaultTimeToLiveInSeconds(ttl) case None => } - CosmosContainerProperties.getAnalyticalStoreTtlInSeconds(containerProperties) match { + CatalogCosmosContainerProperties.getAnalyticalStoreTtlInSeconds(containerProperties) match { case Some(ttl) => cosmosContainerProperties.setAnalyticalStoreTimeToLiveInSeconds(ttl) case None => } @@ -147,15 +150,15 @@ private[spark] case class CosmosCatalogCosmosSDKClient(cosmosAsyncClient: Cosmos cosmosAsyncClient.getDatabase(databaseName).read().asScala.`then`() private def getIndexingPolicy(containerProperties: Map[String, String]): IndexingPolicy = { - val indexingPolicySpecification = CosmosContainerProperties.getIndexingPolicy(containerProperties) + val indexingPolicySpecification = CatalogCosmosContainerProperties.getIndexingPolicy(containerProperties) //scalastyle:on multiple.string.literals - if (CosmosContainerProperties.AllPropertiesIndexingPolicyName.equalsIgnoreCase(indexingPolicySpecification)) { + if (CatalogCosmosContainerProperties.AllPropertiesIndexingPolicyName.equalsIgnoreCase(indexingPolicySpecification)) { new IndexingPolicy() .setAutomatic(true) .setIndexingMode(IndexingMode.CONSISTENT) .setIncludedPaths(util.Arrays.asList(new IncludedPath("/*"))) .setExcludedPaths(util.Arrays.asList(new ExcludedPath(raw"""/"_etag"/?"""))) - } else if (CosmosContainerProperties.OnlySystemPropertiesIndexingPolicyName.equalsIgnoreCase(indexingPolicySpecification)) { + } else if (CatalogCosmosContainerProperties.OnlySystemPropertiesIndexingPolicyName.equalsIgnoreCase(indexingPolicySpecification)) { new IndexingPolicy() .setAutomatic(true) .setIndexingMode(IndexingMode.CONSISTENT) @@ -168,42 +171,38 @@ private[spark] case class CosmosCatalogCosmosSDKClient(cosmosAsyncClient: Cosmos } private def getPartitionKeyDefinition(containerProperties: Map[String, String]): PartitionKeyDefinition = { - val partitionKeyPath = CosmosContainerProperties.getPartitionKeyPath(containerProperties) + val partitionKeyPath = CatalogCosmosContainerProperties.getPartitionKeyPath(containerProperties) val partitionKeyDef = new PartitionKeyDefinition val paths = new util.ArrayList[String] val pathList = partitionKeyPath.split(",").toList if (pathList.size >= 2) { - partitionKeyDef.setKind(CosmosContainerProperties.getPartitionKeyKind(containerProperties) match { - case Some(pkKind) => { - if (pkKind == PartitionKind.HASH.toString) { - throw new IllegalArgumentException("PartitionKind HASH is not supported for multi-hash partition key") - } - PartitionKind.MULTI_HASH - } + partitionKeyDef.setKind(CatalogCosmosContainerProperties.getPartitionKeyKind(containerProperties) match { + case Some(pkKind) => + if (pkKind == PartitionKind.HASH.toString) { + throw new IllegalArgumentException("PartitionKind HASH is not supported for multi-hash partition key") + } + PartitionKind.MULTI_HASH case None => PartitionKind.MULTI_HASH }) - partitionKeyDef.setVersion(CosmosContainerProperties.getPartitionKeyVersion(containerProperties) match { + partitionKeyDef.setVersion(CatalogCosmosContainerProperties.getPartitionKeyVersion(containerProperties) match { case Some(pkVersion) => - { - if (pkVersion == PartitionKeyDefinitionVersion.V1.toString) { - throw new IllegalArgumentException("PartitionKeyVersion V1 is not supported for multi-hash partition key") - } - PartitionKeyDefinitionVersion.V2 + if (pkVersion == PartitionKeyDefinitionVersion.V1.toString) { + throw new IllegalArgumentException("PartitionKeyVersion V1 is not supported for multi-hash partition key") } + PartitionKeyDefinitionVersion.V2 case None => PartitionKeyDefinitionVersion.V2 }) pathList.foreach(path => paths.add(path.trim)) } else { - partitionKeyDef.setKind(CosmosContainerProperties.getPartitionKeyKind(containerProperties) match { - case Some(pkKind) => { - if (pkKind == PartitionKind.MULTI_HASH.toString) { - throw new IllegalArgumentException("PartitionKind MULTI_HASH is not supported for single-hash partition key") - } - PartitionKind.HASH - } + partitionKeyDef.setKind(CatalogCosmosContainerProperties.getPartitionKeyKind(containerProperties) match { + case Some(pkKind) => + if (pkKind == PartitionKind.MULTI_HASH.toString) { + throw new IllegalArgumentException("PartitionKind MULTI_HASH is not supported for single-hash partition key") + } + PartitionKind.HASH case None => PartitionKind.HASH }) - CosmosContainerProperties.getPartitionKeyVersion(containerProperties) match { + CatalogCosmosContainerProperties.getPartitionKeyVersion(containerProperties) match { case Some(pkVersion) => partitionKeyDef.setVersion(PartitionKeyDefinitionVersion.valueOf(pkVersion)) case None => } @@ -278,10 +277,10 @@ private[spark] case class CosmosCatalogCosmosSDKClient(cosmosAsyncClient: Cosmos // scalastyle:off method.length private def generateTblProperties ( - metadata: (CosmosContainerProperties, List[FeedRange], Option[(ThroughputProperties, Boolean)]) + metadata: (ModelsCosmosContainerProperties, List[FeedRange], Option[(ThroughputProperties, Boolean)]) ): util.HashMap[String, String] = { - val containerProperties: CosmosContainerProperties = metadata._1 + val containerProperties: ModelsCosmosContainerProperties = metadata._1 val feedRanges: List[FeedRange] = metadata._2 val throughputPropertiesOption: Option[(ThroughputProperties, Boolean)] = metadata._3 diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala index c73bdb027fc4..0144b468582b 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosConfigSpec.scala @@ -252,7 +252,7 @@ class CosmosConfigSpec extends UnitSpec with BasicLoggingTrait { case otherError: Throwable => throw otherError } - val userCfgMissingArmEndpoint = userConfig.toMap.filterKeys(_ != "spark.cosmos.account.azureEnvironment.mANagement") + val userCfgMissingArmEndpoint = userConfig.toMap.filter(_._1 != "spark.cosmos.account.azureEnvironment.mANagement") try { CosmosAccountConfig.parseCosmosAccountConfig(userCfgMissingArmEndpoint) throw new IllegalStateException("Should never reach here when ARM endpoint config is missing") diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosPartitionPlannerITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosPartitionPlannerITest.scala index 78262b17455f..8826c6d46fe4 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosPartitionPlannerITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/CosmosPartitionPlannerITest.scala @@ -20,7 +20,7 @@ import java.util import java.util.UUID import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} import scala.collection.mutable.ArrayBuffer -import scala.jdk.CollectionConverters.asScalaBufferConverter +import scala.jdk.CollectionConverters._ class CosmosPartitionPlannerITest extends UnitSpec @@ -386,7 +386,7 @@ class CosmosPartitionPlannerITest val alwaysThrow = false partitions.foreach { - case _: CosmosInputPartition => Unit + case _: CosmosInputPartition => () case _ => assert(alwaysThrow, "Unexpected partition type") } partitions should have size expectedPartitionCount diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/FilterAnalyzerSpec.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/FilterAnalyzerSpec.scala index 659dd0782399..949384815fb1 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/FilterAnalyzerSpec.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/FilterAnalyzerSpec.scala @@ -53,7 +53,7 @@ class FilterAnalyzerSpec extends UnitSpec { EqualTo("physicist", "Schrodinger"), In("isCatAlive", Array(true, false))) val analyzedQuery = filterProcessorWithoutCustomQuery.analyze(filters) analyzedQuery.filtersNotSupportedByCosmos shouldBe empty - analyzedQuery.filtersToBePushedDownToCosmos.toIterable should contain theSameElementsAs filters.toList + analyzedQuery.filtersToBePushedDownToCosmos.toArray should contain theSameElementsAs filters.toList val query = analyzedQuery.cosmosParametrizedQuery query.queryText shouldEqual "SELECT * FROM r WHERE r['physicist']=@param0 AND r['isCatAlive'] IN (@param1,@param2)" @@ -223,7 +223,7 @@ class FilterAnalyzerSpec extends UnitSpec { EqualTo("physicist", "Schrodinger"), In("isCatAlive", Array(true, false))) val analyzedQuery = filterProcessorWithCustomQuery.analyze(filters) analyzedQuery.filtersToBePushedDownToCosmos shouldBe empty - analyzedQuery.filtersNotSupportedByCosmos.toIterable should contain theSameElementsAs filters.toList + analyzedQuery.filtersNotSupportedByCosmos.toArray should contain theSameElementsAs filters.toList val query = analyzedQuery.cosmosParametrizedQuery query.queryText shouldEqual queryText @@ -238,7 +238,7 @@ class FilterAnalyzerSpec extends UnitSpec { val analyzedFilters = filterProcessorWithoutCustomQuery.analyze(filters) analyzedFilters.filtersToBePushedDownToCosmos shouldBe empty - analyzedFilters.filtersNotSupportedByCosmos.toIterable should contain theSameElementsAs filters.toList + analyzedFilters.filtersNotSupportedByCosmos.toArray should contain theSameElementsAs filters.toList analyzedFilters.cosmosParametrizedQuery.queryText shouldEqual QueryFilterAnalyzer.rootParameterizedQuery.queryText analyzedFilters.readManyFiltersOpt.isDefined shouldBe false } diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EBulkWriteITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EBulkWriteITest.scala index 0ad43de74e30..a1be10199ed9 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EBulkWriteITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EBulkWriteITest.scala @@ -114,7 +114,7 @@ class SparkE2EBulkWriteITest toBeIngested += s"record_$i" } - val df = toBeIngested.toDF("id") + val df = toBeIngested.toSeq.toDF("id") var bytesWrittenSnapshot = 0L var recordsWrittenSnapshot = 0L diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala index 6c82a92f59be..e3c2263e5f37 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala @@ -20,7 +20,7 @@ import java.time.Duration import java.util.UUID import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import scala.jdk.CollectionConverters.asScalaBufferConverter +import scala.jdk.CollectionConverters._ class SparkE2EChangeFeedITest extends IntegrationSpec @@ -536,7 +536,7 @@ class SparkE2EChangeFeedITest val collectedFrame = groupedFrame.collect() collectedFrame.foreach(row => { val wrappedArray = row.get(1).asInstanceOf[mutable.WrappedArray[String]] - val array = wrappedArray.array + val array: Array[String] = wrappedArray.toArray row.get(0) match { case "create" => validateArraysUnordered(createdObjectIds, array) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/TestUtils.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/TestUtils.scala index 8eadf4261287..e5040944932a 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/TestUtils.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/TestUtils.scala @@ -25,10 +25,7 @@ import java.util.UUID import java.util.concurrent.atomic.AtomicInteger import javax.annotation.concurrent.NotThreadSafe import scala.collection.mutable.ListBuffer -import scala.jdk.CollectionConverters.iterableAsScalaIterableConverter -// scalastyle:off underscore.import -import scala.collection.JavaConverters._ -// scalastyle:on underscore.import +import scala.jdk.CollectionConverters._ // extending class will have a pre-created spark session @NotThreadSafe // marking this as not thread safe because we have to stop Spark Context in some unit tests From 4844572d679bc6bc0e663fb8527b3e9d71df3376 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 5 Jan 2026 19:13:53 +0000 Subject: [PATCH 15/32] Update pom.xml --- sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml index 18519c4dc94e..b9b6c15275a2 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml @@ -11,7 +11,7 @@ com.azure.cosmos.spark azure-cosmos-spark_3-5_2-13 - 4.42.0-beta.1 + 4.43.0-beta.1 jar https://github.com/Azure/azure-sdk-for-java/tree/main/sdk/cosmos/azure-cosmos-spark_3-5_2-13 OLTP Spark 3.5 Connector for Azure Cosmos DB SQL API From e4627578bef3bc6aaf42feb5643e997c8e8854ef Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 5 Jan 2026 21:22:31 +0000 Subject: [PATCH 16/32] Fixing CI tests or spark 3.5 to use Java 17 --- .../templates/stages/cosmos-emulator-matrix.json | 15 +++++++++++---- sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml | 4 ++-- sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml | 4 ++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/eng/pipelines/templates/stages/cosmos-emulator-matrix.json b/eng/pipelines/templates/stages/cosmos-emulator-matrix.json index b3cc2b8d46d0..e4e955cb72f0 100644 --- a/eng/pipelines/templates/stages/cosmos-emulator-matrix.json +++ b/eng/pipelines/templates/stages/cosmos-emulator-matrix.json @@ -72,17 +72,24 @@ "AdditionalArgs": "-DACCOUNT_HOST=https://localhost:8081/ -Dhadoop.home.dir=D:/Hadoop -DCOSMOS.AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY=true" }, "Spark 3.5 Integration Tests targeting Cosmos Emulator - Java 8'": { - "ProfileFlag": "-Dspark-e2e_3-5", + "ProfileFlag": "-Dspark-e2e_3-5_2-12", "PROTOCOLS": "[\"Tcp\"]", "DESIRED_CONSISTENCIES": "[\"Session\"]", "JavaTestVersion": "1.8", "AdditionalArgs": "-DACCOUNT_HOST=https://localhost:8081/ -Dhadoop.home.dir=D:/Hadoop -DCOSMOS.AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY=true" }, - "Spark 3.5 Integration Tests targeting Cosmos Emulator - Java 11'": { - "ProfileFlag": "-Dspark-e2e_3-5", + "Spark 3.5, Scala 2.12 Integration Tests targeting Cosmos Emulator - Java 17'": { + "ProfileFlag": "-Dspark-e2e_3-5_2-12", "PROTOCOLS": "[\"Tcp\"]", "DESIRED_CONSISTENCIES": "[\"Session\"]", - "JavaTestVersion": "1.11", + "JavaTestVersion": "1.17", + "AdditionalArgs": "-DACCOUNT_HOST=https://localhost:8081/ -Dhadoop.home.dir=D:/Hadoop -DCOSMOS.AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY=true" + }, + "Spark 3.5, Scala 2.13 Integration Tests targeting Cosmos Emulator - Java 17'": { + "ProfileFlag": "-Dspark-e2e_3-5_2-13", + "PROTOCOLS": "[\"Tcp\"]", + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "JavaTestVersion": "1.17", "AdditionalArgs": "-DACCOUNT_HOST=https://localhost:8081/ -Dhadoop.home.dir=D:/Hadoop -DCOSMOS.AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY=true" }, "Kafka Integration Tests targeting Cosmos Emulator - Java 11": { diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml index d8d599e6124c..92c59fa5a576 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml @@ -114,13 +114,13 @@ - spark-e2e_3-5 + spark-e2e_3-5_2-12 ${basedir}/scalastyle_config.xml - spark-e2e_3-5 + spark-e2e_3-5_2-12 diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml index b9b6c15275a2..3ecb80ba9158 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml @@ -110,13 +110,13 @@ - spark-e2e_3-5 + spark-e2e_3-5_2-13 ${basedir}/scalastyle_config.xml - spark-e2e_3-5 + spark-e2e_3-5_2-13 From f5d395d3dba3b4032fb9d19b56f1e4c4d7c6fb33 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Mon, 5 Jan 2026 21:39:02 +0000 Subject: [PATCH 17/32] Update CHANGELOG.md --- sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md index 94c2ec2d138d..1d4b34011d1d 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md @@ -1,6 +1,6 @@ ## Release History -### 4.42.0-beta.1 (Unreleased) +### 4.43.0-beta.1 (Unreleased) #### Features Added From abc9cef8e2ed84b662a6a394dd8b6dd4f142c952 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 6 Jan 2026 11:40:42 +0000 Subject: [PATCH 18/32] Fixing illegal access error when using Java 17 --- sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml | 1 + sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml | 1 + 2 files changed, 2 insertions(+) diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml index 92c59fa5a576..fd5ec183748f 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml @@ -160,6 +160,7 @@ scalatest-maven-plugin 2.1.0 + --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false ${project.build.directory}/surefire-reports . SparkTestSuite.txt diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml index 3ecb80ba9158..2afda3852f13 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml @@ -140,6 +140,7 @@ scalatest-maven-plugin 2.1.0 + --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false ${project.build.directory}/surefire-reports . SparkTestSuite.txt From e91c96b27fe8efd4858d5b664ea8340f8c989b5e Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 6 Jan 2026 12:34:33 +0000 Subject: [PATCH 19/32] Making --add-opens conditional on Java version --- sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml | 11 ++++++++++- sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml index fd5ec183748f..5bbe86d16d50 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-12/pom.xml @@ -160,7 +160,7 @@ scalatest-maven-plugin 2.1.0 - --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false + ${scalatest.argLine} ${project.build.directory}/surefire-reports . SparkTestSuite.txt @@ -178,5 +178,14 @@ + + java9-plus + + [9,) + + + --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false + + diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml index 2afda3852f13..c91292394edc 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/pom.xml @@ -140,7 +140,7 @@ scalatest-maven-plugin 2.1.0 - --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false + ${scalatest.argLine} ${project.build.directory}/surefire-reports . SparkTestSuite.txt @@ -158,5 +158,14 @@ + + java9-plus + + [9,) + + + --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false + + From a7d4741bcc4b18c231f9623a42a0ebc1fc4600e9 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 6 Jan 2026 17:38:19 +0000 Subject: [PATCH 20/32] Update spark.yml --- sdk/cosmos/spark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/spark.yml b/sdk/cosmos/spark.yml index 6e8f97e120c0..23e8ce6d079c 100644 --- a/sdk/cosmos/spark.yml +++ b/sdk/cosmos/spark.yml @@ -100,4 +100,4 @@ stages: ClusterName: 'oltp-ci-spark35-2workers-ds3v2-16.4-scala_2.13' AvoidDBFS: true JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) - JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) + JarReadOnlySasUri: $(spark-databricks-token) From 69a0ebb8d0ac0df984552da797c900257f103811 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 6 Jan 2026 19:26:58 +0000 Subject: [PATCH 21/32] Fixing Spark live tests --- .../test-databricks/databricks-jar-install.sh | 12 +++++++----- sdk/cosmos/spark.databricks.yml | 6 +++++- sdk/cosmos/spark.yml | 10 ++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/test-databricks/databricks-jar-install.sh b/sdk/cosmos/azure-cosmos-spark_3/test-databricks/databricks-jar-install.sh index d361a49014ef..d4e91556e72c 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/test-databricks/databricks-jar-install.sh +++ b/sdk/cosmos/azure-cosmos-spark_3/test-databricks/databricks-jar-install.sh @@ -3,7 +3,9 @@ CLUSTER_NAME=$1 AVOID_DBFS=$2 JARPATH=$3 -STORAGE_ACCOUNT_KEY=$4 +STORAGE_ACCOUNT_NAME=$4 +STORAGE_ACCOUNT_KEY=$5 +JAR_NAME=$6 [[ -z "$CLUSTER_NAME" ]] && exit 1 [[ -z "$JARPATH" ]] && exit 1 @@ -45,13 +47,13 @@ echo "Avoid DBFS: $AVOID_DBFS" # DATABRICKS_RUNTIME_VERSION is not populated in the environment and version comparison is messy in bash # Using cluster name for the cluster that was created with 16.4 if [[ "${AVOID_DBFS,,}" == "true" ]]; then - account=oltpsparkcijarstore + account=$STORAGE_ACCOUNT_NAME - echo "Uploading jar '$JARPATH/$JARFILE' to Azure Storage account oltpsparkcijarstore (ephemeral tenant) container jarstore BLOB jars/azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar" - az storage blob upload --account-name oltpsparkcijarstore --account-key $STORAGE_ACCOUNT_KEY --container-name jarstore --name jars/azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar --file $JARPATH/$JARFILE --type block --overwrite true --only-show-errors + echo "Uploading jar '$JARPATH/$JARFILE' to Azure Storage account $STORAGE_ACCOUNT_NAME (ephemeral tenant) container jarstore BLOB jars/$JAR_NAME" + az storage blob upload --account-name $STORAGE_ACCOUNT_NAME --account-key $STORAGE_ACCOUNT_KEY --container-name jarstore --name jars/ --file $JARPATH/$JARFILE --type block --overwrite true --only-show-errors if [ $? -eq 0 ]; then - echo "Successfully uploaded JAR to oltpsparkcijarstore (ephemeral tenant)." + echo "Successfully uploaded JAR to $STORAGE_ACCOUNT_NAME (ephemeral tenant)." echo "Rebooting cluster to install new library via init script" else echo "Failed to upload JAR to Workspace Files." diff --git a/sdk/cosmos/spark.databricks.yml b/sdk/cosmos/spark.databricks.yml index e124fc7e01e8..e7044941e9c9 100644 --- a/sdk/cosmos/spark.databricks.yml +++ b/sdk/cosmos/spark.databricks.yml @@ -34,6 +34,10 @@ parameters: type: string - name: JarReadOnlySasUri type: string + - name: JarStorageAccountName + type: string + - name: JarName + type: string stages: - stage: displayName: 'Spark Databricks integration ${{ parameters.ClusterName }} - ${{ parameters.SparkVersion }}' @@ -91,7 +95,7 @@ stages: displayName: Importing Jars inputs: filePath: $(build.sourcesdirectory)/sdk/cosmos/azure-cosmos-spark_3/test-databricks/databricks-jar-install.sh - arguments: '${{ parameters.ClusterName }} ${{ parameters.AvoidDBFS }} $(build.sourcesdirectory)/sdk/cosmos/${{ parameters.SparkVersion }}/target ${{ parameters.JarStorageAccountKey }}' + arguments: '${{ parameters.ClusterName }} ${{ parameters.AvoidDBFS }} $(build.sourcesdirectory)/sdk/cosmos/${{ parameters.SparkVersion }}/target ${{ parameters.JarStorageAccountName }} ${{ parameters.JarStorageAccountKey }} ${{ parameters.JarName }}' - task: Bash@3 displayName: Importing and executing notebooks inputs: diff --git a/sdk/cosmos/spark.yml b/sdk/cosmos/spark.yml index 23e8ce6d079c..0dcfd657d0ae 100644 --- a/sdk/cosmos/spark.yml +++ b/sdk/cosmos/spark.yml @@ -26,6 +26,8 @@ stages: ClusterName: 'oltp-ci-spark33-2workers-ds3v2' JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) + JarStorageAccountName: 'oltpsparkcijarstore1225' + JarName: 'azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: CosmosEndpointMsi: $(spark-databricks-cosmos-endpoint-msi) @@ -44,6 +46,8 @@ stages: ClusterName: 'oltp-ci-spark34-2workers-ds3v2' JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) + JarStorageAccountName: 'oltpsparkcijarstore1225' + JarName: 'azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: CosmosEndpointMsi: $(spark-databricks-cosmos-endpoint-msi) @@ -63,6 +67,8 @@ stages: AvoidDBFS: false JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) + JarStorageAccountName: 'oltpsparkcijarstore1225' + JarName: 'azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: CosmosEndpointMsi: $(spark-databricks-cosmos-endpoint-msi) @@ -82,6 +88,8 @@ stages: AvoidDBFS: true JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-cosmos-spn-clientCertBase64) + JarStorageAccountName: 'oltpsparkcijarstore1225' + JarName: 'azure-cosmos-spark_3-5_2-12-latest-ci-candidate.jar' - template: /sdk/cosmos/spark.databricks.yml parameters: CosmosEndpointMsi: $(spark-databricks-cosmos-endpoint-msi) @@ -101,3 +109,5 @@ stages: AvoidDBFS: true JarStorageAccountKey: $(spark-databricks-cosmos-spn-clientIdCert) JarReadOnlySasUri: $(spark-databricks-token) + JarStorageAccountName: 'oltpsparkcijarstore1225' + JarName: 'azure-cosmos-spark_3-5_2-13-latest-ci-candidate.jar' From 1d374a11dfabd060ac955744b5a6cb9691af1ab1 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 6 Jan 2026 20:52:34 +0000 Subject: [PATCH 22/32] Update databricks-jar-install.sh --- .../test-databricks/databricks-jar-install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/test-databricks/databricks-jar-install.sh b/sdk/cosmos/azure-cosmos-spark_3/test-databricks/databricks-jar-install.sh index d4e91556e72c..d0a16daa4f67 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/test-databricks/databricks-jar-install.sh +++ b/sdk/cosmos/azure-cosmos-spark_3/test-databricks/databricks-jar-install.sh @@ -50,7 +50,7 @@ if [[ "${AVOID_DBFS,,}" == "true" ]]; then account=$STORAGE_ACCOUNT_NAME echo "Uploading jar '$JARPATH/$JARFILE' to Azure Storage account $STORAGE_ACCOUNT_NAME (ephemeral tenant) container jarstore BLOB jars/$JAR_NAME" - az storage blob upload --account-name $STORAGE_ACCOUNT_NAME --account-key $STORAGE_ACCOUNT_KEY --container-name jarstore --name jars/ --file $JARPATH/$JARFILE --type block --overwrite true --only-show-errors + az storage blob upload --account-name $STORAGE_ACCOUNT_NAME --account-key $STORAGE_ACCOUNT_KEY --container-name jarstore --name jars/$JAR_NAME --file $JARPATH/$JARFILE --type block --overwrite true --only-show-errors if [ $? -eq 0 ]; then echo "Successfully uploaded JAR to $STORAGE_ACCOUNT_NAME (ephemeral tenant)." From 14ce26f2bb86a98b773b0b385f88aa2045382112 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Tue, 6 Jan 2026 22:32:48 +0000 Subject: [PATCH 23/32] Update TransientIOErrorsRetryingReadManyIterator.scala --- .../spark/TransientIOErrorsRetryingReadManyIterator.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransientIOErrorsRetryingReadManyIterator.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransientIOErrorsRetryingReadManyIterator.scala index bcfdb694cb82..c51c5c1226e1 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransientIOErrorsRetryingReadManyIterator.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/TransientIOErrorsRetryingReadManyIterator.scala @@ -59,7 +59,7 @@ private[spark] class TransientIOErrorsRetryingReadManyIterator[TSparkRow] while (returnValue.isEmpty) { if (readManyFilterBatchIterator.hasNext) { // fetch items for the next readMany filter batch - val readManyFilterBatch = readManyFilterBatchIterator.next() + val readManyFilterBatch = readManyFilterBatchIterator.next().toList returnValue = TransientErrorsRetryPolicy.executeWithRetry( () => hasNextInternalCore(readManyFilterBatch), @@ -78,14 +78,14 @@ private[spark] class TransientIOErrorsRetryingReadManyIterator[TSparkRow] * * @return true (more records exist), false (no more records exist), None (unknown call should be repeated) */ - private def hasNextInternalCore(readManyFilterList: Seq[CosmosItemIdentity]): Option[Boolean] = { + private def hasNextInternalCore(readManyFilterList: List[CosmosItemIdentity]): Option[Boolean] = { val feedResponse = try { Await.result( Future { ImplementationBridgeHelpers .CosmosAsyncContainerHelper .getCosmosAsyncContainerAccessor - .readMany(container, readManyFilterList.toList.asJava, queryOptionsWithEnd2EndTimeout, classType) + .readMany(container, readManyFilterList.asJava, queryOptionsWithEnd2EndTimeout, classType) .block() }(TransientIOErrorsRetryingReadManyIterator.executionContext), maxPageRetrievalTimeout) From db41e2c55c727feaa070efc0015e110ac9d21bd3 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 7 Jan 2026 01:42:05 +0000 Subject: [PATCH 24/32] Update CosmosItemIdentityHelper.scala --- .../spark/CosmosItemIdentityHelper.scala | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala index bc149f9623c8..cceda1f294c0 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala @@ -6,6 +6,7 @@ package com.azure.cosmos.spark import com.azure.cosmos.implementation.routing.PartitionKeyInternal import com.azure.cosmos.implementation.{ImplementationBridgeHelpers, Utils} import com.azure.cosmos.models.{CosmosItemIdentity, PartitionKey} +import com.fasterxml.jackson.databind.JsonNode import java.util @@ -32,7 +33,20 @@ private[spark] object CosmosItemIdentityHelper { case cosmosItemIdentityStringRegx(idValue, pkValue) => val partitionKeyValue = Utils.parse(pkValue, classOf[Object]) partitionKeyValue match { - case arrayList: util.ArrayList[Object] => Some(createCosmosItemIdentityWithMultiHashPartitionKey(idValue, arrayList.toArray)) + case arrayList: util.ArrayList[_] => + // Convert Jackson JsonNode objects to their primitive values + // This is necessary because Utils.parse returns JsonNode instances when deserializing from JSON + // In Scala 2.13, the deprecated JavaConverters behaves differently, so we need explicit conversion + val pkValuesArray = new Array[Object](arrayList.size()) + var i = 0 + while (i < arrayList.size()) { + pkValuesArray(i) = arrayList.get(i) match { + case node: JsonNode => convertJsonNodeToPrimitive(node) + case other => other.asInstanceOf[Object] + } + i += 1 + } + Some(createCosmosItemIdentityWithMultiHashPartitionKey(idValue, pkValuesArray)) case _ => Some(new CosmosItemIdentity(new PartitionKey(partitionKeyValue), idValue)) } case _ => None @@ -44,8 +58,25 @@ private[spark] object CosmosItemIdentityHelper { ImplementationBridgeHelpers .PartitionKeyHelper .getPartitionKeyAccessor - .toPartitionKey(PartitionKeyInternal.fromObjectArray(pkValuesArray, false)) + .toPartitionKey(PartitionKeyInternal.fromObjectArray(pkValuesArray, true)) new CosmosItemIdentity(partitionKey, idValue) } + + private[this] def convertJsonNodeToPrimitive(node: JsonNode): Object = { + if (node.isTextual) { + node.asText() + } else if (node.isBoolean) { + Boolean.box(node.asBoolean()) + } else if (node.isInt || node.isLong) { + Long.box(node.asLong()) + } else if (node.isDouble || node.isFloat) { + Double.box(node.asDouble()) + } else if (node.isNull) { + null + } else { + // For any other type, return the node itself and let the partition key logic handle it + node + } + } } From b29c7a810f0d3e843139038633201afd26876de1 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 7 Jan 2026 10:24:25 +0000 Subject: [PATCH 25/32] Update CosmosItemIdentityHelper.scala --- .../spark/CosmosItemIdentityHelper.scala | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala index cceda1f294c0..93fba346309d 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala @@ -31,23 +31,23 @@ private[spark] object CosmosItemIdentityHelper { def tryParseCosmosItemIdentity(cosmosItemIdentityString: String): Option[CosmosItemIdentity] = { cosmosItemIdentityString match { case cosmosItemIdentityStringRegx(idValue, pkValue) => - val partitionKeyValue = Utils.parse(pkValue, classOf[Object]) - partitionKeyValue match { - case arrayList: util.ArrayList[_] => - // Convert Jackson JsonNode objects to their primitive values - // This is necessary because Utils.parse returns JsonNode instances when deserializing from JSON - // In Scala 2.13, the deprecated JavaConverters behaves differently, so we need explicit conversion - val pkValuesArray = new Array[Object](arrayList.size()) - var i = 0 - while (i < arrayList.size()) { - pkValuesArray(i) = arrayList.get(i) match { - case node: JsonNode => convertJsonNodeToPrimitive(node) - case other => other.asInstanceOf[Object] - } - i += 1 - } - Some(createCosmosItemIdentityWithMultiHashPartitionKey(idValue, pkValuesArray)) - case _ => Some(new CosmosItemIdentity(new PartitionKey(partitionKeyValue), idValue)) + // Parse the partition key value from JSON string + // Use JsonNode first, then convert to ArrayList to avoid Scala 2.12/2.13 differences + val pkValueNode = objectMapper.readTree(pkValue) + + if (pkValueNode.isArray) { + // Multi-value partition key (hierarchical) + val pkValuesArray = new Array[Object](pkValueNode.size()) + var i = 0 + while (i < pkValueNode.size()) { + pkValuesArray(i) = convertJsonNodeToPrimitive(pkValueNode.get(i)) + i += 1 + } + Some(createCosmosItemIdentityWithMultiHashPartitionKey(idValue, pkValuesArray)) + } else { + // Single value partition key + val primitiveValue = convertJsonNodeToPrimitive(pkValueNode) + Some(new CosmosItemIdentity(new PartitionKey(primitiveValue), idValue)) } case _ => None } @@ -75,8 +75,9 @@ private[spark] object CosmosItemIdentityHelper { } else if (node.isNull) { null } else { - // For any other type, return the node itself and let the partition key logic handle it - node + throw new IllegalArgumentException( + s"Invalid partition key value: partition keys must be primitive values (string, number, boolean, or null), got JsonNode type: ${node.getNodeType}" + ) } } } From 7b5e13138617f829d3224d733f4041a7e6d01772 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 7 Jan 2026 12:17:12 +0000 Subject: [PATCH 26/32] Fixing scala 2.13 jsonNode serialization incompatibility --- .../stages/cosmos-emulator-matrix.json | 2 +- .../azure-cosmos-spark_3-5_2-13/CHANGELOG.md | 143 +----------------- .../spark/CosmosItemIdentityHelper.scala | 5 +- 3 files changed, 6 insertions(+), 144 deletions(-) diff --git a/eng/pipelines/templates/stages/cosmos-emulator-matrix.json b/eng/pipelines/templates/stages/cosmos-emulator-matrix.json index e4e955cb72f0..d1a2da8a0c64 100644 --- a/eng/pipelines/templates/stages/cosmos-emulator-matrix.json +++ b/eng/pipelines/templates/stages/cosmos-emulator-matrix.json @@ -71,7 +71,7 @@ "JavaTestVersion": "1.11", "AdditionalArgs": "-DACCOUNT_HOST=https://localhost:8081/ -Dhadoop.home.dir=D:/Hadoop -DCOSMOS.AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY=true" }, - "Spark 3.5 Integration Tests targeting Cosmos Emulator - Java 8'": { + "Spark 3.5, Scala 2.12 Integration Tests targeting Cosmos Emulator - Java 8'": { "ProfileFlag": "-Dspark-e2e_3-5_2-12", "PROTOCOLS": "[\"Tcp\"]", "DESIRED_CONSISTENCIES": "[\"Session\"]", diff --git a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md index 1d4b34011d1d..6ae4d7ce01d5 100644 --- a/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos-spark_3-5_2-13/CHANGELOG.md @@ -10,145 +10,4 @@ #### Other Changes -### 4.41.0 (2025-10-21) - -#### Features Added -* Added support `spark.cosmos.write.strategy` value `ItemPatchIfExists` which allows gracefully ignoring documents/patch-instructions when the document does not exist (anymore). - See [47034](https://github.com/Azure/azure-sdk-for-java/pull/47034) -* Added support to optionally omit info about spark environment and/or machine-info for driver/executors from `UserAgent` header via new config `spark.cosmos.userAgent.format` (allowed values are `SparkEnvAndWorkers` (default value), `OnlySparkEnv` and `NoSparkEnv`. - See [47047](https://github.com/Azure/azure-sdk-for-java/pull/47047) - -### 4.40.0 (2025-09-27) - -#### Features Added -* Added support for feed range cache refresh interval config. - See [46759](https://github.com/Azure/azure-sdk-for-java/pull/46759) - -#### Other Changes -* Added improvement to reduce partition planning time for large containers. - See [46727](https://github.com/Azure/azure-sdk-for-java/pull/46727) - -### 4.39.0 (2025-09-05) - -#### Bugs Fixed -* Reverted known issue due to shading log4j (which was introduced in 4.38.1). - See [PR 46546](https://github.com/Azure/azure-sdk-for-java/pull/46546) and [PR 46608](https://github.com/Azure/azure-sdk-for-java/pull/46608) -* Added change feed performance monitoring which is used to improve end lsn calculation in `CosmosPartitionPlanner`. - See [PR 46320](https://github.com/Azure/azure-sdk-for-java/pull/46320) -* Added `spark.cosmos.auth.aad.audience` as a valid configuration option to allow using AAD tokens with custom audiences. - See [PR 46554](https://github.com/Azure/azure-sdk-for-java/pull/46554) - -### 4.38.1 (2025-08-22) - -**NOTE: This version has a known issue due to shading log4j - Please use more recent versions >= 4.38.2 or 4.38.0 instead** - -#### Other Changes -* Added log4j-core to the list of shaded packages to avoid conflicts when customers use log4j in a different version. **NOTE: This change caused known issue - Please use a more recent version instead** - See [PR 45924](https://github.com/Azure/azure-sdk-for-java/pull/46451) - -### 4.38.0 (2025-07-31) - -#### Features Added -* Added telemetry support by adding OTEL span attribute naming schemes, introducing Azure Monitor integration, and sampled diagnostics. - See [PR 45924](https://github.com/Azure/azure-sdk-for-java/pull/45924) - -#### Other Changes -* Added compatibility with CosmosDB Fabric Native Accounts using the `FabricAccountDataResolver` for authentication. - See [PR 45890](https://github.com/Azure/azure-sdk-for-java/pull/45890) - -### 4.37.2 (2025-05-14) - -#### Features Added -* Added option to use the connector in non-public Azure clouds. - See [PR 45310](https://github.com/Azure/azure-sdk-for-java/pull/45310) - -#### Bugs Fixed -* Fixed an issue during bulk write operations that could result in failing the Spark job in `BulkWriter.flushAndClose` too eagerly in certain cases. - See [PR 44992](https://github.com/Azure/azure-sdk-for-java/pull/44992) -* Fixed hang issue in `CosmosPagedIterable#handle` by preventing race conditions in underlying subscription of `Flux`. - [PR 45290](https://github.com/Azure/azure-sdk-for-java/pull/45290) - -### 4.37.1 (2025-03-04) - -#### Features Added -* Added config option `spark.cosmos.read.responseContinuationTokenLimitInKb` to reduce query continuation token size. - See [PR 44480](https://github.com/Azure/azure-sdk-for-java/pull/44480) - -### 4.37.0 (2025-02-20) - -#### Other Changes -* Updated netty dependency - -### 4.36.1 (2025-02-08) - -#### Bugs Fixed -* Fixed an issue in change feed where under certain rare race conditions records could be skipped and excessive requests are prefetched. - See [PR 43788](https://github.com/Azure/azure-sdk-for-java/pull/43788) - -### 4.36.0 (2025-01-14) -> [!IMPORTANT] -> We strongly recommend our customers to use version 4.36.0 and above especially if using all versions and deletes change feed. - -#### Features Added -* Added the udfs `GetFeedRangesForContainer` and `GetOverlappingFeedRange` to ease mapping of cosmos partition key to databricks table partition key. - See [PR 43092](https://github.com/Azure/azure-sdk-for-java/pull/43092) - -#### Bugs Fixed -* Added null checking for previous images for deletes in full fidelity change feed. - See [PR 43483](https://github.com/Azure/azure-sdk-for-java/pull/43483) - -#### Other Changes -* Added options to fine-tune settings for bulk operations. - [PR 43509](https://github.com/Azure/azure-sdk-for-java/pull/43509) - -### 4.35.0 (2024-11-27) - -#### Bugs Fixed -* Fixed an issue when using `ChangeFeed` causing some cosmos partitions to not be fully processed in some cases. - See [PR 42553](https://github.com/Azure/azure-sdk-for-java/pull/42553) - -### 4.34.0 (2024-10-10) -#### Bugs Fixed -* Fixed an issue to avoid transient `IllegalArgumentException` due to duplicate json properties for the `uniqueKeyPolicy` property in `DocumentCollection`. - See [PR 41608](https://github.com/Azure/azure-sdk-for-java/pull/41608) and [PR 42244](https://github.com/Azure/azure-sdk-for-java/pull/42244) - -### 4.33.1 (2024-08-23) - -#### Bugs Fixed -* Fixed an issue to avoid transient `IllegalArgumentException` due to duplicate json properties for the `uniqueKeyPolicy` property. - See [PR 41608](https://github.com/Azure/azure-sdk-for-java/pull/41608) - -#### Other Changes -* Added retries on a new `BulkWriter` instance when first attempt to commit times out for bulk write jobs. - See [PR 41553](https://github.com/Azure/azure-sdk-for-java/pull/41553) - -### 4.33.0 (2024-06-22) - -#### Features Added -* Added a service trait `CosmosClientBuilderInterceptor` to allow intercepting and customizing the CosmosClient creation. - See [PR 40714](https://github.com/Azure/azure-sdk-for-java/pull/40714) - -#### Bugs Fixed -* Fixed a race condition resulting in not always re-enqueueing retries for bulk writes. - See [PR 40714](https://github.com/Azure/azure-sdk-for-java/pull/40714) - -### 4.32.1 (2024-06-07) - -#### Other Changes -* Added retries when retrieving new pages for query or readMany operations are timing out to avoid unbounded awaits. - See [PR 40506](https://github.com/Azure/azure-sdk-for-java/pull/40506) -* Ensured that no statistics are reported when custom queries via `spark.cosmos.read.customQuery` are used. - See [PR 40506](https://github.com/Azure/azure-sdk-for-java/pull/40506) - -### 4.32.0 (2024-05-24) - -#### Features Added -* Added config option `spark.cosmos.auth.aad.clientCertPemBase64` to allow using SPN (ServicePrincipal name) authentication with certificate instead of client secret. - See [PR 40325](https://github.com/Azure/azure-sdk-for-java/pull/40325) -* Added config option `spark.cosmos.accountDataResolverServiceName` to allow specifying which `AccountDataResolver` trait implementation to use if there are multiple on the class path. - See [PR 40325](https://github.com/Azure/azure-sdk-for-java/pull/40325) - -#### Bugs Fixed -* Fixed an issue where `SHOW DATABASES IN` only return one database even though multiple databases exist. - See [PR 40277](https://github.com/Azure/azure-sdk-for-java/pull/40277) -* Fixed an issue where `SHOW TABLES FROM` only return one container even though multiple containers exist. - See [PR 40277](https://github.com/Azure/azure-sdk-for-java/pull/40277) -* Fixed UserAgent encoding when the suffix contains non-ASCII characters. - See[PR 40293](https://github.com/Azure/azure-sdk-for-java/pull/40293) - -#### Other Changes -* Added robustness improvement to avoid client-side parsing errors `java.lang.IllegalArgumentException: Unable to parse JSON` when Gateway returns duplicate `unqiueKeyPolicy` in IndexPolicy (invalid json) - See[PR 40306](https://github.com/Azure/azure-sdk-for-java/pull/40306) - -### 4.31.0 (2024-05-20) - -#### Features Added -* Added capability in azure-cosmos-spark to allow the spark environment to support access tokens via AccountDataResolver. - See [PR 40079](https://github.com/Azure/azure-sdk-for-java/pull/40079) - -### 4.30.0 (2024-04-27) - -#### Features Added -* Added capability to use (and enforce) native netty transport. The native transport is more efficient - esepcially when the number of TCP connections being used is high. - See [PR 39834](https://github.com/Azure/azure-sdk-for-java/pull/39834) -* Added ManagedIdentity authentication support for azure-cosmos-spark in Databricks. - See [PR 39870](https://github.com/Azure/azure-sdk-for-java/pull/39870) - -### 4.29.0 (2024-04-16) - -#### Features Added -* Spark 3.5 support: - See [PR 39395](https://github.com/Azure/azure-sdk-for-java/pull/39395). - -#### Bugs Fixed -* Fixed an issue causing failures when using change feed in batch mode with a batch location and `ChangeFeedBatch.planInputPartitions` is called multiple times (for example because physcial query plan gets retrieved) and some changes have been made in the monitored container between those calls). - See [PR 39635](https://github.com/Azure/azure-sdk-for-java/pull/39635) -* Made `AccountDataResolver` trait public again. - See [PR 39736](https://github.com/Azure/azure-sdk-for-java/pull/39736) - -#### Other Changes -* Optimized the partitioning strategy implementation details to avoid unnecessarily high RU usage. - See [PR 39438](https://github.com/Azure/azure-sdk-for-java/pull/39438) - -### NOTE: See CHANGELOG.md in 3.1, 3.2, 3.3 and 3.4 projects for changes prior to 4.29.0 +### NOTE: See CHANGELOG.md in 3.3; 3.4 and 3.5 for scala 2.12 projects for changes prior to 4.43.0 diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala index 93fba346309d..9c0e72c8293d 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala @@ -25,7 +25,10 @@ private[spark] object CosmosItemIdentityHelper { private val objectMapper = Utils.getSimpleObjectMapper def getCosmosItemIdentityValueString(id: String, partitionKeyValue: List[Object]): String = { - s"id($id).pk(${objectMapper.writeValueAsString(partitionKeyValue.asJava)})" + // Explicitly create a Java ArrayList to avoid Scala 2.12/2.13 differences in .asJava behavior + val javaList = new util.ArrayList[Object](partitionKeyValue.size) + partitionKeyValue.foreach(value => javaList.add(value)) + s"id($id).pk(${objectMapper.writeValueAsString(javaList)})" } def tryParseCosmosItemIdentity(cosmosItemIdentityString: String): Option[CosmosItemIdentity] = { From ceeeb86d414163a5607cf0b465a977816ecad4e1 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 7 Jan 2026 15:06:55 +0000 Subject: [PATCH 27/32] Update CosmosItemIdentityHelper.scala --- .../spark/CosmosItemIdentityHelper.scala | 57 +++++-------------- 1 file changed, 15 insertions(+), 42 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala index 9c0e72c8293d..0b2c30e35560 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala @@ -6,7 +6,7 @@ package com.azure.cosmos.spark import com.azure.cosmos.implementation.routing.PartitionKeyInternal import com.azure.cosmos.implementation.{ImplementationBridgeHelpers, Utils} import com.azure.cosmos.models.{CosmosItemIdentity, PartitionKey} -import com.fasterxml.jackson.databind.JsonNode +import com.azure.cosmos.spark.diagnostics.BasicLoggingTrait import java.util @@ -14,7 +14,7 @@ import java.util import scala.collection.JavaConverters._ // scalastyle:on underscore.import -private[spark] object CosmosItemIdentityHelper { +private[spark] object CosmosItemIdentityHelper extends BasicLoggingTrait { // pattern will be recognized // 1. id(idValue).pk(partitionKeyValue) // @@ -25,32 +25,23 @@ private[spark] object CosmosItemIdentityHelper { private val objectMapper = Utils.getSimpleObjectMapper def getCosmosItemIdentityValueString(id: String, partitionKeyValue: List[Object]): String = { - // Explicitly create a Java ArrayList to avoid Scala 2.12/2.13 differences in .asJava behavior - val javaList = new util.ArrayList[Object](partitionKeyValue.size) - partitionKeyValue.foreach(value => javaList.add(value)) - s"id($id).pk(${objectMapper.writeValueAsString(javaList)})" + val result = s"id($id).pk(${objectMapper.writeValueAsString(partitionKeyValue.asJava)})" + logInfo(s"getCosmosItemIdentityValueString (id") + for (pkValueItem <- partitionKeyValue) { + logInfo(s"pkValueItem: ${partitionKeyValue.getClass.getName} - $pkValueItem") + } + logInfo(result) + result } def tryParseCosmosItemIdentity(cosmosItemIdentityString: String): Option[CosmosItemIdentity] = { + logInfo(s"tryParseCosmosItemIdentity - $cosmosItemIdentityString") cosmosItemIdentityString match { case cosmosItemIdentityStringRegx(idValue, pkValue) => - // Parse the partition key value from JSON string - // Use JsonNode first, then convert to ArrayList to avoid Scala 2.12/2.13 differences - val pkValueNode = objectMapper.readTree(pkValue) - - if (pkValueNode.isArray) { - // Multi-value partition key (hierarchical) - val pkValuesArray = new Array[Object](pkValueNode.size()) - var i = 0 - while (i < pkValueNode.size()) { - pkValuesArray(i) = convertJsonNodeToPrimitive(pkValueNode.get(i)) - i += 1 - } - Some(createCosmosItemIdentityWithMultiHashPartitionKey(idValue, pkValuesArray)) - } else { - // Single value partition key - val primitiveValue = convertJsonNodeToPrimitive(pkValueNode) - Some(new CosmosItemIdentity(new PartitionKey(primitiveValue), idValue)) + val partitionKeyValue = Utils.parse(pkValue, classOf[Object]) + partitionKeyValue match { + case arrayList: util.ArrayList[Object] => Some(createCosmosItemIdentityWithMultiHashPartitionKey(idValue, arrayList.toArray)) + case _ => Some(new CosmosItemIdentity(new PartitionKey(partitionKeyValue), idValue)) } case _ => None } @@ -61,26 +52,8 @@ private[spark] object CosmosItemIdentityHelper { ImplementationBridgeHelpers .PartitionKeyHelper .getPartitionKeyAccessor - .toPartitionKey(PartitionKeyInternal.fromObjectArray(pkValuesArray, true)) + .toPartitionKey(PartitionKeyInternal.fromObjectArray(pkValuesArray, false)) new CosmosItemIdentity(partitionKey, idValue) } - - private[this] def convertJsonNodeToPrimitive(node: JsonNode): Object = { - if (node.isTextual) { - node.asText() - } else if (node.isBoolean) { - Boolean.box(node.asBoolean()) - } else if (node.isInt || node.isLong) { - Long.box(node.asLong()) - } else if (node.isDouble || node.isFloat) { - Double.box(node.asDouble()) - } else if (node.isNull) { - null - } else { - throw new IllegalArgumentException( - s"Invalid partition key value: partition keys must be primitive values (string, number, boolean, or null), got JsonNode type: ${node.getNodeType}" - ) - } - } } From 5bb3fa3e9bcf6ab539a852b51f87077eed192158 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 7 Jan 2026 15:29:26 +0000 Subject: [PATCH 28/32] Update CosmosItemIdentityHelper.scala --- .../scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala index 0b2c30e35560..b52538ff5ab0 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala @@ -28,7 +28,7 @@ private[spark] object CosmosItemIdentityHelper extends BasicLoggingTrait { val result = s"id($id).pk(${objectMapper.writeValueAsString(partitionKeyValue.asJava)})" logInfo(s"getCosmosItemIdentityValueString (id") for (pkValueItem <- partitionKeyValue) { - logInfo(s"pkValueItem: ${partitionKeyValue.getClass.getName} - $pkValueItem") + logInfo(s"pkValueItem: ${pkValueItem.getClass.getName} - $pkValueItem") } logInfo(result) result From 05b86baef28fc7fa359cd73cfaf3eb97cfac6a47 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 7 Jan 2026 16:00:02 +0000 Subject: [PATCH 29/32] Fixing scala 2.13 regression --- .../azure/cosmos/spark/CosmosItemIdentityHelper.scala | 9 +-------- .../com/azure/cosmos/spark/ItemsPartitionReader.scala | 2 +- .../cosmos/spark/ItemsPartitionReaderWithReadMany.scala | 2 +- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala index b52538ff5ab0..c91b732d7b63 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/CosmosItemIdentityHelper.scala @@ -25,17 +25,10 @@ private[spark] object CosmosItemIdentityHelper extends BasicLoggingTrait { private val objectMapper = Utils.getSimpleObjectMapper def getCosmosItemIdentityValueString(id: String, partitionKeyValue: List[Object]): String = { - val result = s"id($id).pk(${objectMapper.writeValueAsString(partitionKeyValue.asJava)})" - logInfo(s"getCosmosItemIdentityValueString (id") - for (pkValueItem <- partitionKeyValue) { - logInfo(s"pkValueItem: ${pkValueItem.getClass.getName} - $pkValueItem") - } - logInfo(result) - result + s"id($id).pk(${objectMapper.writeValueAsString(partitionKeyValue.asJava)})" } def tryParseCosmosItemIdentity(cosmosItemIdentityString: String): Option[CosmosItemIdentity] = { - logInfo(s"tryParseCosmosItemIdentity - $cosmosItemIdentityString") cosmosItemIdentityString match { case cosmosItemIdentityStringRegx(idValue, pkValue) => val partitionKeyValue = Utils.parse(pkValue, classOf[Object]) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReader.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReader.scala index 44027bafbe7e..98db2bbc2f00 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReader.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReader.scala @@ -196,7 +196,7 @@ private case class ItemsPartitionReader ((_: ObjectNode) => { CosmosItemIdentityHelper.getCosmosItemIdentityValueString( idValue, - ModelBridgeInternal.getPartitionKeyInternal(pkValue).toObjectArray.toList) + ModelBridgeInternal.getPartitionKeyInternal(pkValue).toObjectArray.toSeq.toList) }) ) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadMany.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadMany.scala index 7b0d62bb7a42..1c17ffa34ec6 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadMany.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadMany.scala @@ -177,7 +177,7 @@ private[spark] case class ItemsPartitionReaderWithReadMany ((_: ObjectNode) => { CosmosItemIdentityHelper.getCosmosItemIdentityValueString( idValue, - ModelBridgeInternal.getPartitionKeyInternal(partitionKey).toObjectArray.toList) + ModelBridgeInternal.getPartitionKeyInternal(partitionKey).toObjectArray.toSeq.toList) }) ) From 894ef1acd7f1df605c468d31135813f6d3686516 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 7 Jan 2026 18:14:59 +0000 Subject: [PATCH 30/32] Fixing Scala 2.13 test failure --- .../com/azure/cosmos/spark/ItemsPartitionReader.scala | 2 +- .../cosmos/spark/ItemsPartitionReaderWithReadMany.scala | 2 +- .../cosmos/spark/udf/GetCosmosItemIdentityValue.scala | 4 ++++ .../com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala | 8 ++++++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReader.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReader.scala index 98db2bbc2f00..44027bafbe7e 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReader.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReader.scala @@ -196,7 +196,7 @@ private case class ItemsPartitionReader ((_: ObjectNode) => { CosmosItemIdentityHelper.getCosmosItemIdentityValueString( idValue, - ModelBridgeInternal.getPartitionKeyInternal(pkValue).toObjectArray.toSeq.toList) + ModelBridgeInternal.getPartitionKeyInternal(pkValue).toObjectArray.toList) }) ) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadMany.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadMany.scala index 1c17ffa34ec6..7b0d62bb7a42 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadMany.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/ItemsPartitionReaderWithReadMany.scala @@ -177,7 +177,7 @@ private[spark] case class ItemsPartitionReaderWithReadMany ((_: ObjectNode) => { CosmosItemIdentityHelper.getCosmosItemIdentityValueString( idValue, - ModelBridgeInternal.getPartitionKeyInternal(partitionKey).toObjectArray.toSeq.toList) + ModelBridgeInternal.getPartitionKeyInternal(partitionKey).toObjectArray.toList) }) ) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/udf/GetCosmosItemIdentityValue.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/udf/GetCosmosItemIdentityValue.scala index f8c2a7b7dac9..43f311a902c8 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/udf/GetCosmosItemIdentityValue.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/udf/GetCosmosItemIdentityValue.scala @@ -22,7 +22,11 @@ class GetCosmosItemIdentityValue extends UDF2[String, Object, String] { partitionKeyValue match { // for subpartitions case case wrappedArray: mutable.WrappedArray[Any] => + // Spark with Scala 2.12 uses WrappedArray in DataFrame for arrays CosmosItemIdentityHelper.getCosmosItemIdentityValueString(id, wrappedArray.map(_.asInstanceOf[Object]).toList) + case wrappedArraySeq: scala.collection.immutable.ArraySeq[Any] => + // Spark with Scala 2.13 uses ArraySeq in DataFrame for arrays + CosmosItemIdentityHelper.getCosmosItemIdentityValueString(id, wrappedArraySeq.map(_.asInstanceOf[Object]).toList) case _ => CosmosItemIdentityHelper.getCosmosItemIdentityValueString(id, List(partitionKeyValue)) } } diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala index e3c2263e5f37..baedd2754192 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala @@ -18,6 +18,7 @@ import java.io.{BufferedReader, InputStreamReader} import java.nio.file.Paths import java.time.Duration import java.util.UUID +import scala.collection.immutable.ArraySeq import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.jdk.CollectionConverters._ @@ -535,8 +536,11 @@ class SparkE2EChangeFeedITest val collectedFrame = groupedFrame.collect() collectedFrame.foreach(row => { - val wrappedArray = row.get(1).asInstanceOf[mutable.WrappedArray[String]] - val array: Array[String] = wrappedArray.toArray + val array = row.get(1) match { + case wrappedArray: mutable.WrappedArray[String] => wrappedArray.toArray + case wrappedArraySeq: ArraySeq[String] => wrappedArraySeq.toArray + case _ => throw new IllegalArgumentException("Unexpected type of array") + } row.get(0) match { case "create" => validateArraysUnordered(createdObjectIds, array) From d93ceaa91214fd9db75bcc64aeed33a2acae4464 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 7 Jan 2026 18:48:17 +0000 Subject: [PATCH 31/32] Fixing build break --- .../cosmos/spark/udf/GetCosmosItemIdentityValue.scala | 10 +++------- .../azure/cosmos/spark/SparkE2EChangeFeedITest.scala | 5 +---- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/udf/GetCosmosItemIdentityValue.scala b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/udf/GetCosmosItemIdentityValue.scala index 43f311a902c8..45259f6b6a66 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/udf/GetCosmosItemIdentityValue.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/main/scala/com/azure/cosmos/spark/udf/GetCosmosItemIdentityValue.scala @@ -20,13 +20,9 @@ class GetCosmosItemIdentityValue extends UDF2[String, Object, String] { requireNotNull(partitionKeyValue, "partitionKeyValue") partitionKeyValue match { - // for subpartitions case - case wrappedArray: mutable.WrappedArray[Any] => - // Spark with Scala 2.12 uses WrappedArray in DataFrame for arrays - CosmosItemIdentityHelper.getCosmosItemIdentityValueString(id, wrappedArray.map(_.asInstanceOf[Object]).toList) - case wrappedArraySeq: scala.collection.immutable.ArraySeq[Any] => - // Spark with Scala 2.13 uses ArraySeq in DataFrame for arrays - CosmosItemIdentityHelper.getCosmosItemIdentityValueString(id, wrappedArraySeq.map(_.asInstanceOf[Object]).toList) + // for subpartitions case - Seq covers both WrappedArray (Scala 2.12) and ArraySeq (Scala 2.13) + case seq: Seq[Any] => + CosmosItemIdentityHelper.getCosmosItemIdentityValueString(id, seq.map(_.asInstanceOf[Object]).toList) case _ => CosmosItemIdentityHelper.getCosmosItemIdentityValueString(id, List(partitionKeyValue)) } } diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala index baedd2754192..0fcc1f17b217 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala @@ -18,8 +18,6 @@ import java.io.{BufferedReader, InputStreamReader} import java.nio.file.Paths import java.time.Duration import java.util.UUID -import scala.collection.immutable.ArraySeq -import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.jdk.CollectionConverters._ @@ -537,8 +535,7 @@ class SparkE2EChangeFeedITest val collectedFrame = groupedFrame.collect() collectedFrame.foreach(row => { val array = row.get(1) match { - case wrappedArray: mutable.WrappedArray[String] => wrappedArray.toArray - case wrappedArraySeq: ArraySeq[String] => wrappedArraySeq.toArray + case seq: Seq[String] => seq.toArray case _ => throw new IllegalArgumentException("Unexpected type of array") } row.get(0) match { From 1ae02f79c23b35d5b12f8a4f80afb0ffbcc6b950 Mon Sep 17 00:00:00 2001 From: Fabian Meiswinkel Date: Wed, 7 Jan 2026 19:01:08 +0000 Subject: [PATCH 32/32] Update SparkE2EChangeFeedITest.scala --- .../com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala index 0fcc1f17b217..85d46a8e4032 100644 --- a/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala +++ b/sdk/cosmos/azure-cosmos-spark_3/src/test/scala/com/azure/cosmos/spark/SparkE2EChangeFeedITest.scala @@ -18,6 +18,7 @@ import java.io.{BufferedReader, InputStreamReader} import java.nio.file.Paths import java.time.Duration import java.util.UUID +import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.jdk.CollectionConverters._ @@ -247,7 +248,6 @@ class SparkE2EChangeFeedITest val cosmosMasterKey = TestConfigurations.MASTER_KEY CosmosClientMetrics.meterRegistry.isDefined shouldEqual true - val meterRegistry = CosmosClientMetrics.meterRegistry.get val container = cosmosClient.getDatabase(cosmosDatabase).getContainer(cosmosContainer) val sinkContainerName = cosmosClient @@ -860,7 +860,7 @@ class SparkE2EChangeFeedITest hdfs.copyToLocalFile(true, new Path(startOffsetFileLocation), new Path(startOffsetFileBackupLocation)) hdfs.exists(new Path(startOffsetFileLocation)) shouldEqual false - var remainingFromLastBatchOfTen = 10; + var remainingFromLastBatchOfTen = 10 while(remainingFromLastBatchOfTen > 0) { hdfs.copyToLocalFile(true, new Path(startOffsetFileBackupLocation), new Path(startOffsetFileLocation)) hdfs.delete(new Path(latestOffsetFileLocation), true)