diff --git a/cloud_gcp/BUILD.bazel b/cloud_gcp/BUILD.bazel index 70b446d067..0fd0b7ddff 100644 --- a/cloud_gcp/BUILD.bazel +++ b/cloud_gcp/BUILD.bazel @@ -33,6 +33,7 @@ shared_deps = [ maven_artifact("ch.qos.reload4j:reload4j"), maven_artifact("org.threeten:threetenbp"), maven_artifact("org.apache.kafka:kafka-clients"), + maven_artifact_with_suffix("org.apache.spark:spark-mllib"), maven_artifact("com.google.cloud.spark:spark-3.5-bigquery"), maven_artifact_with_suffix("org.apache.iceberg:iceberg-spark-runtime-3.5"), maven_artifact("org.objenesis:objenesis"), diff --git a/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DelegatingBigQueryMetastoreCatalog.scala b/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DelegatingBigQueryMetastoreCatalog.scala index a9dd17c8ae..71af318126 100644 --- a/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DelegatingBigQueryMetastoreCatalog.scala +++ b/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DelegatingBigQueryMetastoreCatalog.scala @@ -1,26 +1,33 @@ package ai.chronon.integrations.cloud_gcp -import com.google.cloud.bigquery.{ +import ai.chronon.api.Extensions.StringOps +import com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.{ BigQuery, BigQueryOptions, ExternalTableDefinition, + FormatOptions, + Schema, StandardTableDefinition, TableDefinition, - TableId + TableId, + TableInfo, + HivePartitioningOptions } -import com.google.cloud.spark.bigquery.BigQueryCatalog + +import com.google.cloud.spark.bigquery.{BigQueryCatalog, SchemaConverters, SchemaConvertersConfiguration} import org.apache.iceberg.spark.SparkCatalog import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.functions.UnboundFunction import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.connector.expressions.Expressions import org.apache.spark.sql.connector.read.ScanBuilder import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetTable import org.apache.spark.sql.types.StructType import org.apache.spark.sql.util.CaseInsensitiveStringMap -import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import java.util import scala.jdk.CollectionConverters._ @@ -35,6 +42,7 @@ import scala.util.Try */ class DelegatingTable(internalTable: Table, additionalProperties: Map[String, String], + schema: Option[StructType] = None, partitioning: Option[Array[Transform]] = None) extends Table with SupportsRead @@ -42,9 +50,9 @@ class DelegatingTable(internalTable: Table, override def name(): String = internalTable.name - override def schema(): StructType = internalTable.schema + override def schema(): StructType = schema.getOrElse(internalTable.schema) - override def capabilities(): util.Set[TableCapability] = internalTable.capabilities() + override def capabilities(): util.Set[TableCapability] = (internalTable.capabilities().asScala.toList :+ TableCapability.OVERWRITE_DYNAMIC).toSet.asJava override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = internalTable.asInstanceOf[SupportsRead].newScanBuilder(options) @@ -60,8 +68,15 @@ class DelegatingTable(internalTable: Table, } object DelegatingTable { - def apply(table: Table, additionalProperties: Map[String, String] = Map.empty): Table = - new DelegatingTable(table, additionalProperties = additionalProperties) + + def apply(table: Table, + additionalProperties: Map[String, String] = Map.empty, + schema: Option[StructType] = None, + partitioning: Option[Array[Transform]] = None): Table = + new DelegatingTable(table, + additionalProperties = additionalProperties, + schema = schema, + partitioning = partitioning) } /** Galactus catalog that allows us to interact with BigQuery metastore as a spark catalog. This allows for @@ -135,15 +150,27 @@ class DelegatingBigQueryMetastoreCatalog extends TableCatalog with SupportsNames require(uris.size == 1, s"External table ${table} can be backed by only one URI.") uris.head.replaceAll("/\\*\\.parquet$", "") } + val partitionColumns = externalTable.getHivePartitioningOptions.getFields.asScala.toList + val maybePartitionColumns = partitionColumns match { + case Nil => None + case _ => Option(partitionColumns) + } + + val schemaConverter = SchemaConverters.from(SchemaConvertersConfiguration.createDefault()) + val sparkSchema = schemaConverter.toSpark(externalTable.getSchema) val fileBasedTable = ParquetTable(tId.toString, SparkSession.active, CaseInsensitiveStringMap.empty(), List(uri), - None, + Option(sparkSchema), classOf[ParquetFileFormat]) - DelegatingTable(fileBasedTable, - Map(TableCatalog.PROP_EXTERNAL -> "true", TableCatalog.PROP_LOCATION -> uri)) + DelegatingTable( + fileBasedTable, + Map(TableCatalog.PROP_EXTERNAL -> "true", TableCatalog.PROP_LOCATION -> uri), + schema = Option(sparkSchema), + partitioning = maybePartitionColumns.map((cols) => cols.map(Expressions.identity).toArray) + ) } case _: StandardTableDefinition => { //todo(tchow): Support partitioning @@ -167,10 +194,65 @@ class DelegatingBigQueryMetastoreCatalog extends TableCatalog with SupportsNames partitions: Array[Transform], properties: util.Map[String, String]): Table = { val provider = properties.get(TableCatalog.PROP_PROVIDER) - if (provider.toUpperCase != "ICEBERG") { - throw new UnsupportedOperationException("Only creating iceberg tables supported.") + provider.toUpperCase match { + case "ICEBERG" => icebergCatalog.createTable(ident, schema, partitions, properties) + case "PARQUET" => { + + val rootLocation = properties.get(TableCatalog.PROP_LOCATION) + + val fullTableName = SparkBQUtils.toTableString(ident) + val tableId = SparkBQUtils.toTableId(ident) + val uri = rootLocation.stripSuffix("/") + f"/${fullTableName.sanitize}" + val glob = uri.stripSuffix("/") + "/*.parquet" + + val schemaConverter = SchemaConverters.from(SchemaConvertersConfiguration.createDefault()) + + val bqSchema: Schema = schemaConverter.toBigQuerySchema(schema) + val baseTableDef = ExternalTableDefinition + .newBuilder(glob, bqSchema, FormatOptions.parquet()) + .setAutodetect(false) + + if (partitions.nonEmpty) { + assert( + partitions.forall(_.name.equals("identity")), + s"Only identity partitioning is supported for external tables. Received unsupported partition spec: ${partitions}" + ) + val identityPartitions = partitions.map((t) => t.children().head.toString).toList + val noPartitions = bqSchema.getFields.asScala.filterNot((f) => identityPartitions.contains(f.getName)).toArray + val filteredBQSchema = Schema.of(noPartitions:_*) + baseTableDef.setSchema(filteredBQSchema) + + val partitionPathSegment = identityPartitions.map((p) => f"{${p}:STRING}").mkString("/") + val explicitPartitionedUri = uri + s"/${partitionPathSegment}" + val hivePartitions = HivePartitioningOptions + .newBuilder() + .setFields(identityPartitions.asJava) + .setSourceUriPrefix(explicitPartitionedUri) + .setMode("CUSTOM") + .build() + baseTableDef.setHivePartitioningOptions(hivePartitions) + } + + val shadedTableId = scala + .Option(tableId.getProject) + .map(TableId.of(_, tableId.getDataset, tableId.getTable)) + .getOrElse(TableId.of(tableId.getDataset, tableId.getTable)) + + val tableInfo = TableInfo.newBuilder(shadedTableId, baseTableDef.build).build() + + // Create this out of band, and then pass a parquet table back to spark. + bigQueryClient.create(tableInfo) + + val fileBasedTable = ParquetTable(tableId.toString, + SparkSession.active, + CaseInsensitiveStringMap.empty(), + List(uri), + Option(schema), + classOf[ParquetFileFormat]) + DelegatingTable(fileBasedTable, Map(TableCatalog.PROP_EXTERNAL -> "true", TableCatalog.PROP_LOCATION -> uri)) + } + case unsupported => throw new UnsupportedOperationException(s"Unsupported format: ${unsupported}") } - icebergCatalog.createTable(ident, schema, partitions, properties) } override def alterTable(ident: Identifier, changes: TableChange*): Table = { diff --git a/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/SparkBQUtils.scala b/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/SparkBQUtils.scala index 154be6a55b..a2ac36ca89 100644 --- a/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/SparkBQUtils.scala +++ b/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/SparkBQUtils.scala @@ -2,12 +2,25 @@ package ai.chronon.integrations.cloud_gcp import com.google.cloud.bigquery.connector.common.BigQueryUtil import org.apache.spark.sql.SparkSession import com.google.cloud.bigquery.TableId +import org.apache.spark.sql.connector.catalog.Identifier object SparkBQUtils { + def toTableString(ident: Identifier): String = { + (ident.namespace() :+ ident.name).mkString(".") + } def toTableId(tableName: String)(implicit spark: SparkSession): TableId = { val parseIdentifier = spark.sessionState.sqlParser.parseMultipartIdentifier(tableName) - val shadedTid = BigQueryUtil.parseTableId(parseIdentifier.mkString(".")) + parseBQIdentifier(parseIdentifier.mkString(".")) + } + + def toTableId(tableIdentifier: Identifier): TableId = { + val stringified = (tableIdentifier.namespace :+ tableIdentifier.name).mkString(".") + parseBQIdentifier(stringified) + } + + private def parseBQIdentifier(parsedIdentifier: String) = { + val shadedTid = BigQueryUtil.parseTableId(parsedIdentifier) scala .Option(shadedTid.getProject) .map(TableId.of(_, shadedTid.getDataset, shadedTid.getTable)) diff --git a/cloud_gcp/src/test/scala/ai/chronon/integrations/cloud_gcp/BigQueryCatalogTest.scala b/cloud_gcp/src/test/scala/ai/chronon/integrations/cloud_gcp/BigQueryCatalogTest.scala index adeb23f7f5..29cca4f18d 100644 --- a/cloud_gcp/src/test/scala/ai/chronon/integrations/cloud_gcp/BigQueryCatalogTest.scala +++ b/cloud_gcp/src/test/scala/ai/chronon/integrations/cloud_gcp/BigQueryCatalogTest.scala @@ -40,17 +40,19 @@ class BigQueryCatalogTest extends AnyFlatSpec with MockitoSugar { "spark.hadoop.fs.AbstractFileSystem.gs.impl" -> classOf[GoogleHadoopFS].getName, "spark.sql.catalogImplementation" -> "in-memory", -// "spark.sql.defaultCatalog" -> "default_iceberg", -// "spark.sql.catalog.default_iceberg" -> classOf[DelegatingBigQueryMetastoreCatalog].getName, -// "spark.sql.catalog.default_iceberg.catalog-impl" -> classOf[BQMSCatalog].getName, -// "spark.sql.catalog.default_iceberg.io-impl" -> classOf[ResolvingFileIO].getName, -// "spark.sql.catalog.default_iceberg.warehouse" -> "gs://zipline-warehouse-canary/data/tables/", -// "spark.sql.catalog.default_iceberg.gcp_location" -> "us-central1", -// "spark.sql.catalog.default_iceberg.gcp_project" -> "canary-443022", -// "spark.kryo.registrator" -> classOf[ChrononIcebergKryoRegistrator].getName, -// "spark.sql.defaultUrlStreamHandlerFactory.enabled" -> false.toString, -// + "spark.sql.defaultCatalog" -> "default_iceberg", + "spark.sql.catalog.default_iceberg" -> classOf[DelegatingBigQueryMetastoreCatalog].getName, + "spark.sql.catalog.default_iceberg.catalog-impl" -> classOf[BQMSCatalog].getName, + "spark.sql.catalog.default_iceberg.io-impl" -> classOf[ResolvingFileIO].getName, + "spark.sql.catalog.default_iceberg.warehouse" -> "gs://zipline-warehouse-canary/data/tables/", + "spark.sql.catalog.default_iceberg.gcp_location" -> "us-central1", + "spark.sql.catalog.default_iceberg.gcp_project" -> "canary-443022", + "spark.kryo.registrator" -> classOf[ChrononIcebergKryoRegistrator].getName, + "spark.sql.defaultUrlStreamHandlerFactory.enabled" -> false.toString, + // "spark.sql.catalog.default_bigquery" -> classOf[BigQueryCatalog].getName, + "spark.chronon.table_write.format" -> "PARQUET", + "spark.chronon.table_write.warehouse" -> "gs://zipline-warehouse-canary/data/tables/", )) ) lazy val tableUtils: TableUtils = TableUtils(spark) @@ -113,6 +115,18 @@ class BigQueryCatalogTest extends AnyFlatSpec with MockitoSugar { println(allParts) } + it should "create external parquet table" in { + val externalTable = "default_iceberg.data.tchow_external_parquet" + + val testDf = spark.createDataFrame(Seq((1, "2021-01-01"))).toDF("id", "ds") + if (!tableUtils.tableReachable(externalTable)) { + tableUtils.createTable(testDf, externalTable, List("ds"), Map.empty[String, String], "PARQUET") + } + tableUtils.insertPartitions(testDf, externalTable, Map.empty[String, String], List("ds")) + val roundTripped = tableUtils.loadTable(externalTable) + println(roundTripped) + } + it should "integration testing bigquery partitions" ignore { // TODO(tchow): This test is ignored because it requires a running instance of the bigquery. Need to figure out stubbing locally. // to run, set `GOOGLE_APPLICATION_CREDENTIALS= diff --git a/maven_install.json b/maven_install.json index 83187e6b0e..90ed413da6 100755 --- a/maven_install.json +++ b/maven_install.json @@ -1,7 +1,7 @@ { "__AUTOGENERATED_FILE_DO_NOT_MODIFY_THIS_FILE_MANUALLY": "THERE_IS_NO_DATA_ONLY_ZUUL", - "__INPUT_ARTIFACTS_HASH": -1991826856, - "__RESOLVED_ARTIFACTS_HASH": 327110684, + "__INPUT_ARTIFACTS_HASH": -2097971931, + "__RESOLVED_ARTIFACTS_HASH": -1216426280, "artifacts": { "ant:ant": { "shasums": { @@ -431,6 +431,13 @@ }, "version": "1.3.9-1" }, + "com.github.wendykierp:JTransforms": { + "shasums": { + "jar": "d9dffa3e27793040dccb97b054d95267df46e669c396bf1ca4f3b085069bc2d5", + "sources": "4fe7287b6464dd568dbe0cd2bf4082662ff4db28884e7ed9cbba6ca8512d5cc1" + }, + "version": "3.1" + }, "com.google.android:annotations": { "shasums": { "jar": "ba734e1e84c09d615af6a09d33034b4f0442f8772dec120efb376d86a565ae15", @@ -1215,6 +1222,13 @@ }, "version": "1.18.0" }, + "com.sun.istack:istack-commons-runtime": { + "shasums": { + "jar": "4ffabb06be454a05e4398e20c77fa2b6308d4b88dfbef7ca30a76b5b7d5505ef", + "sources": "4874ce5789c8684debd0099c8fa4ec8e00545626af4c9890760f0cb6fdf6d4db" + }, + "version": "3.0.8" + }, "com.sun.jersey.contribs:jersey-guice": { "shasums": { "jar": "8653320f79f20a369bfb9e6ff84be9315843bc4a048b37e1df13c777c7cfcb33", @@ -1418,6 +1432,27 @@ }, "version": "1.5.4" }, + "dev.ludovic.netlib:arpack": { + "shasums": { + "jar": "ab788e4b75875d2e448e530ee8bdb2f7161a973cc966f64f290b59840ee93e9e", + "sources": "c1337816f61ff70035eaa25b97056e84420c2f6fcddf3f01fe129ee9c91cf6ab" + }, + "version": "3.0.3" + }, + "dev.ludovic.netlib:blas": { + "shasums": { + "jar": "f69f56fbcc0020d92bbe56e28e49ae201319d59c71c272736ce41869c2642bd5", + "sources": "a4d521046b4608a0e3c362b86178355ed761f54b0a7d1efbd5b3b3d99be79982" + }, + "version": "3.0.3" + }, + "dev.ludovic.netlib:lapack": { + "shasums": { + "jar": "079125dee3a93f544ae451a6e7b2e5f6bd4022e1fe5c9377a93e1c6fcbfbe20d", + "sources": "e75585f7d5ca4b5fb01192d66dc45a54c561d9b359f43b5d9145d0e5587b4e46" + }, + "version": "3.0.3" + }, "dnsjava:dnsjava": { "shasums": { "jar": "f9def222ef5c0406216663ca45ca0950781541e22558b217aa779f36fbd24ab5", @@ -2520,6 +2555,13 @@ }, "version": "0.10.9.9" }, + "net.sourceforge.f2j:arpack_combined_all": { + "shasums": { + "jar": "9964fb948ef213548a79b23dd480af9d72f1450824fa006bbfea211ac1ffa6dc", + "sources": null + }, + "version": "0.1" + }, "org.antlr:ST4": { "shasums": { "jar": "17cc49dc535a0fbe58c3a8634e774572bed31eb73415e9ce9d2703b977bf356f", @@ -3434,6 +3476,20 @@ }, "version": "3.5.3" }, + "org.apache.spark:spark-graphx_2.12": { + "shasums": { + "jar": "dd9d1eda26ae0496e6fb66f13acdc9b270e2327d28890a747a388f5ba0eaed99", + "sources": "7879b1a8888fbe39a63bd11951586b4684afa42b7da211a17b3b4382b2e2e2ec" + }, + "version": "3.5.3" + }, + "org.apache.spark:spark-graphx_2.13": { + "shasums": { + "jar": "0a8aa4674b64d3ac4b00f7e23c0eae82e851e921235528c3093cda8f670b29ab", + "sources": "ee7144728b747ece5fbdc29d8eb399fa582f88826d09d8d743136db00bd2091e" + }, + "version": "3.5.3" + }, "org.apache.spark:spark-hive_2.12": { "shasums": { "jar": "5e55f49b3b6992f60188c93cd516f8d5766438168aab2dffe586293e4e2fc0c3", @@ -3476,6 +3532,34 @@ }, "version": "3.5.3" }, + "org.apache.spark:spark-mllib-local_2.12": { + "shasums": { + "jar": "1183635dd0b101162fb019fbb68a149a167738463f085e705ff5a3d3d285ed68", + "sources": "2b417ebe3f663db16619de473e1747f90aca863545bc00b14d37e6d5d9c48a4c" + }, + "version": "3.5.3" + }, + "org.apache.spark:spark-mllib-local_2.13": { + "shasums": { + "jar": "7da20fb2cafefa3b7fd1a9096add60adf65c195c3379de701503bbdbed0976e0", + "sources": "059a68ec08e1c104c3145edb4b65a5b6ccb5e6b6dea76327f906ded4c51a9e5e" + }, + "version": "3.5.3" + }, + "org.apache.spark:spark-mllib_2.12": { + "shasums": { + "jar": "bf68b5e0390d5976b6e74eb71a33c65324c4dae5279131e5af8bd29d58b6363a", + "sources": "2eaf9ac621703371f2f29eb9f82e8df026c5cf6f2e75f8155da7adb763e8ed78" + }, + "version": "3.5.3" + }, + "org.apache.spark:spark-mllib_2.13": { + "shasums": { + "jar": "25b9cf9a217f1053e6d3a3073a187320c7c26c1410beadd77aa70b0005a56a1c", + "sources": "1f0a4d5519849f5fc00e3c3473f319e9deb38b357810e94725aa74e93e2d9a24" + }, + "version": "3.5.3" + }, "org.apache.spark:spark-network-common_2.12": { "shasums": { "jar": "c43989a30376b07d4c323a7484430484dd805f78bb9d0331f15bd994260b00bb", @@ -3952,6 +4036,13 @@ }, "version": "1.0.3" }, + "org.glassfish.jaxb:jaxb-runtime": { + "shasums": { + "jar": "e6e0a1e89fb6ff786279e6a0082d5cef52dc2ebe67053d041800737652b4fd1b", + "sources": "b85c2e314e6565e7dc4fb7e00ec8688c1c0fe9c9f7275125fb8dee43898f3743" + }, + "version": "2.3.2" + }, "org.glassfish.jersey.containers:jersey-container-servlet": { "shasums": { "jar": "f16fc54ca51c0964a9a187e277dce0fa36b296c3d02b02bd685589b5f2766ed2", @@ -4421,6 +4512,34 @@ }, "version": "3.2.15" }, + "org.scalanlp:breeze-macros_2.12": { + "shasums": { + "jar": "44232567834518b9e36b34d6b3d6bc10875e89ae1a0794af4b365364cfd502aa", + "sources": "7f2459c0a170e9400cb7e0a925ae446a3802d2f3f44bf9d9f7f6d88b53b06aad" + }, + "version": "2.1.0" + }, + "org.scalanlp:breeze-macros_2.13": { + "shasums": { + "jar": "fbc20705e9319113d9a664b31041f519d646bb21fefa9cce57a471553ef5d586", + "sources": "7f2459c0a170e9400cb7e0a925ae446a3802d2f3f44bf9d9f7f6d88b53b06aad" + }, + "version": "2.1.0" + }, + "org.scalanlp:breeze_2.12": { + "shasums": { + "jar": "d42b74280c0b4f6442ec98f4ae9dbefd0956ff4f631120ef724ef4077caf84a7", + "sources": "f751c3927984dbc2aa8e32772778593e84f8f3f4e01c6461a15dd79d767ada10" + }, + "version": "2.1.0" + }, + "org.scalanlp:breeze_2.13": { + "shasums": { + "jar": "f31200270502b533d27867fb712b1deaea5f30a1ceb76c6156c26ea1d1a46148", + "sources": "92f759371bd2a7eeedee9fd34a2def3606568704ca6a031aa4cf394917590114" + }, + "version": "2.1.0" + }, "org.scalatest:scalatest-compatible": { "shasums": { "jar": "b48a2931333d9522f72f8f74dbf632587103b19620fb1cfea59e9e1147cffc78", @@ -4715,6 +4834,20 @@ }, "version": "1.9" }, + "org.typelevel:algebra_2.12": { + "shasums": { + "jar": "cc7cf938ef59017e1a1c8fa3bcde6f5170871fd32c3728681cd722287ec9e054", + "sources": "2ae1eafd9bdc81c67b5ddb3827910997d3355ef3b905b5ff34427deefe6fd79f" + }, + "version": "2.0.1" + }, + "org.typelevel:algebra_2.13": { + "shasums": { + "jar": "3eae539541fabfd50db5184b608c63aaefd2cc78f790102f5891178ead3f3c93", + "sources": "16a834a5c1bcaafd36727e629bd8de216d3eab718238d20b7ac6bc9199d76e8a" + }, + "version": "2.8.0" + }, "org.typelevel:cats-core_2.12": { "shasums": { "jar": "f3b4d616d46b46e46618ee384e79467cbb225692256f6ef389ddb9f960f6f6ea", @@ -4757,6 +4890,62 @@ }, "version": "1.6.0" }, + "org.typelevel:spire-macros_2.12": { + "shasums": { + "jar": "8e9865c977b67f4d7d915000d220d33dc71a4f341cc65151754cd50d73bfd72f", + "sources": "d32c2596b325b00d6c8441b3245900c6424e6e1d8eb940f2bf05bb5f913ebee7" + }, + "version": "0.17.0" + }, + "org.typelevel:spire-macros_2.13": { + "shasums": { + "jar": "0b74f86163207e5ce23aca69ebc50cea5a0162dddb7d9f6829552c78012b36b3", + "sources": "d54532fe6df13f027af8d2b864775d6bd41f77e4c1b69d383c9772500d4f3c1f" + }, + "version": "0.18.0" + }, + "org.typelevel:spire-platform_2.12": { + "shasums": { + "jar": "a3c3173db28070589b22e57e2167930a2948f070ae9e648181be57828d139722", + "sources": "31b06a42facee7d7f251012c06372364c35f3ca42d1b2f9bfbcdd155cdd1c563" + }, + "version": "0.17.0" + }, + "org.typelevel:spire-platform_2.13": { + "shasums": { + "jar": "bb2f5ff5d3180fc2f47df71133570c740acb9f9b3d917f3fa0c76727fac86240", + "sources": "5285c29a2bae091af19fb6faee17e4926df6b0d9ce70643cfb040d3b3a5a73d5" + }, + "version": "0.18.0" + }, + "org.typelevel:spire-util_2.12": { + "shasums": { + "jar": "9886ccc662820ae8aa4f91d0f089b244e6c403f5a9cbc1c9a01394d14d364fb8", + "sources": "572514a0388f46abca758285de651c58b2008f4b229d7ae6aec89e50f716d553" + }, + "version": "0.17.0" + }, + "org.typelevel:spire-util_2.13": { + "shasums": { + "jar": "61d9aae682d7854597e23b01befea3db42af10bc5db033ddb0662afbeee1ecf0", + "sources": "d107a65b0f646d8d8d6ad945d8d939c83ebd443510ab5fd972b88f644c37bb76" + }, + "version": "0.18.0" + }, + "org.typelevel:spire_2.12": { + "shasums": { + "jar": "940079da1407165462089e6d802e410494f8c906689b8252418fde202a2e9dc0", + "sources": "00b088b4e6f80cd8e1059b3b22b716f195ae4e72d41a7b258e7780a0a09d695b" + }, + "version": "0.17.0" + }, + "org.typelevel:spire_2.13": { + "shasums": { + "jar": "2b23c48f8ef13fde720dc0f276cf36abf42a3f069582d62cf208720c327a8a86", + "sources": "a0e641c376f82804100e3a0efe5c0d6902181b1afa4706d70e11b2be9e981b97" + }, + "version": "0.18.0" + }, "org.xerial.snappy:snappy-java": { "shasums": { "jar": "0f3f1857ed33116583f480b4df5c0218836c47bfbc9c6221c0d73f356decf37b", @@ -4778,6 +4967,13 @@ }, "version": "2.0.8" }, + "pl.edu.icm:JLargeArrays": { + "shasums": { + "jar": "6dca5ab23e1fdb9190a257c04687a9ea19111c36b6ec9478bce6b2a128ca1aeb", + "sources": "b22995c404825bd16fc357867d6f0823a173173b8dd03d64306f41066edff127" + }, + "version": "1.5" + }, "ru.vyarus:generics-resolver": { "shasums": { "jar": "0e4fc6f7ee079f357ecdae4e51a1a66c1f130cbf64b2778541b24f432830ddf1", @@ -5251,6 +5447,10 @@ "com.sun.jersey:jersey-core", "com.sun.xml.bind:jaxb-impl" ], + "com.github.wendykierp:JTransforms": [ + "org.apache.commons:commons-math3", + "pl.edu.icm:JLargeArrays" + ], "com.google.api-client:google-api-client": [ "com.google.auth:google-auth-library-credentials", "com.google.auth:google-auth-library-oauth2-http", @@ -6342,6 +6542,15 @@ "commons-el:commons-el": [ "commons-logging:commons-logging" ], + "dev.ludovic.netlib:arpack": [ + "net.sourceforge.f2j:arpack_combined_all" + ], + "dev.ludovic.netlib:blas": [ + "net.sourceforge.f2j:arpack_combined_all" + ], + "dev.ludovic.netlib:lapack": [ + "net.sourceforge.f2j:arpack_combined_all" + ], "dnsjava:dnsjava": [ "org.slf4j:slf4j-api" ], @@ -7872,6 +8081,22 @@ "org.xerial.snappy:snappy-java", "oro:oro" ], + "org.apache.spark:spark-graphx_2.12": [ + "dev.ludovic.netlib:blas", + "net.sourceforge.f2j:arpack_combined_all", + "org.apache.spark:spark-core_2.12", + "org.apache.spark:spark-mllib-local_2.12", + "org.apache.spark:spark-tags_2.12", + "org.apache.xbean:xbean-asm9-shaded" + ], + "org.apache.spark:spark-graphx_2.13": [ + "dev.ludovic.netlib:blas", + "net.sourceforge.f2j:arpack_combined_all", + "org.apache.spark:spark-core_2.13", + "org.apache.spark:spark-mllib-local_2.13", + "org.apache.spark:spark-tags_2.13", + "org.apache.xbean:xbean-asm9-shaded" + ], "org.apache.spark:spark-hive_2.12": [ "com.google.code.findbugs:jsr305", "commons-codec:commons-codec", @@ -7941,6 +8166,51 @@ "org.apache.spark:spark-launcher_2.13": [ "org.apache.spark:spark-tags_2.13" ], + "org.apache.spark:spark-mllib-local_2.12": [ + "dev.ludovic.netlib:blas", + "org.apache.commons:commons-math3", + "org.apache.spark:spark-tags_2.12", + "org.scalanlp:breeze_2.12" + ], + "org.apache.spark:spark-mllib-local_2.13": [ + "dev.ludovic.netlib:blas", + "org.apache.commons:commons-math3", + "org.apache.spark:spark-tags_2.13", + "org.scala-lang.modules:scala-collection-compat_2.13", + "org.scalanlp:breeze_2.13" + ], + "org.apache.spark:spark-mllib_2.12": [ + "dev.ludovic.netlib:arpack", + "dev.ludovic.netlib:blas", + "dev.ludovic.netlib:lapack", + "org.apache.commons:commons-math3", + "org.apache.spark:spark-core_2.12", + "org.apache.spark:spark-graphx_2.12", + "org.apache.spark:spark-mllib-local_2.12", + "org.apache.spark:spark-sql_2.12", + "org.apache.spark:spark-streaming_2.12", + "org.apache.spark:spark-tags_2.12", + "org.glassfish.jaxb:jaxb-runtime", + "org.scala-lang.modules:scala-parser-combinators_2.12", + "org.scalanlp:breeze_2.12" + ], + "org.apache.spark:spark-mllib_2.13": [ + "dev.ludovic.netlib:arpack", + "dev.ludovic.netlib:blas", + "dev.ludovic.netlib:lapack", + "org.apache.commons:commons-math3", + "org.apache.spark:spark-core_2.13", + "org.apache.spark:spark-graphx_2.13", + "org.apache.spark:spark-mllib-local_2.13", + "org.apache.spark:spark-sql_2.13", + "org.apache.spark:spark-streaming_2.13", + "org.apache.spark:spark-tags_2.13", + "org.glassfish.jaxb:jaxb-runtime", + "org.scala-lang.modules:scala-collection-compat_2.13", + "org.scala-lang.modules:scala-parallel-collections_2.13", + "org.scala-lang.modules:scala-parser-combinators_2.13", + "org.scalanlp:breeze_2.13" + ], "org.apache.spark:spark-network-common_2.12": [ "com.fasterxml.jackson.core:jackson-annotations", "com.fasterxml.jackson.core:jackson-databind", @@ -8182,6 +8452,10 @@ "org.glassfish.hk2:hk2-utils": [ "org.glassfish.hk2.external:jakarta.inject" ], + "org.glassfish.jaxb:jaxb-runtime": [ + "com.sun.istack:istack-commons-runtime", + "jakarta.xml.bind:jakarta.xml.bind-api" + ], "org.glassfish.jersey.containers:jersey-container-servlet": [ "jakarta.ws.rs:jakarta.ws.rs-api", "org.glassfish.jersey.containers:jersey-container-servlet-core", @@ -8326,6 +8600,36 @@ "org.roaringbitmap:RoaringBitmap": [ "org.roaringbitmap:shims" ], + "org.scalanlp:breeze-macros_2.12": [ + "org.typelevel:spire_2.12" + ], + "org.scalanlp:breeze-macros_2.13": [ + "org.typelevel:spire_2.13" + ], + "org.scalanlp:breeze_2.12": [ + "com.github.wendykierp:JTransforms", + "dev.ludovic.netlib:arpack", + "dev.ludovic.netlib:blas", + "dev.ludovic.netlib:lapack", + "net.sf.opencsv:opencsv", + "net.sourceforge.f2j:arpack_combined_all", + "org.apache.commons:commons-math3", + "org.scalanlp:breeze-macros_2.12", + "org.slf4j:slf4j-api", + "org.typelevel:spire_2.12" + ], + "org.scalanlp:breeze_2.13": [ + "com.github.wendykierp:JTransforms", + "dev.ludovic.netlib:arpack", + "dev.ludovic.netlib:blas", + "dev.ludovic.netlib:lapack", + "net.sf.opencsv:opencsv", + "net.sourceforge.f2j:arpack_combined_all", + "org.apache.commons:commons-math3", + "org.scalanlp:breeze-macros_2.13", + "org.slf4j:slf4j-api", + "org.typelevel:spire_2.13" + ], "org.scalatest:scalatest-core_2.12": [ "org.scala-lang.modules:scala-xml_2.12", "org.scalactic:scalactic_2.12", @@ -8473,12 +8777,57 @@ "org.rnorth.duct-tape:duct-tape", "org.slf4j:slf4j-api" ], + "org.typelevel:algebra_2.12": [ + "org.typelevel:cats-kernel_2.12" + ], + "org.typelevel:algebra_2.13": [ + "org.typelevel:cats-kernel_2.13" + ], "org.typelevel:cats-core_2.12": [ "org.typelevel:cats-kernel_2.12" ], "org.typelevel:cats-core_2.13": [ "org.typelevel:cats-kernel_2.13" ], + "org.typelevel:spire-macros_2.12": [ + "org.typelevel:algebra_2.12" + ], + "org.typelevel:spire-macros_2.13": [ + "org.typelevel:algebra_2.13" + ], + "org.typelevel:spire-platform_2.12": [ + "org.typelevel:algebra_2.12", + "org.typelevel:spire-macros_2.12", + "org.typelevel:spire-util_2.12" + ], + "org.typelevel:spire-platform_2.13": [ + "org.typelevel:algebra_2.13", + "org.typelevel:spire-macros_2.13", + "org.typelevel:spire-util_2.13" + ], + "org.typelevel:spire-util_2.12": [ + "org.typelevel:algebra_2.12", + "org.typelevel:spire-macros_2.12" + ], + "org.typelevel:spire-util_2.13": [ + "org.typelevel:algebra_2.13", + "org.typelevel:spire-macros_2.13" + ], + "org.typelevel:spire_2.12": [ + "org.typelevel:algebra_2.12", + "org.typelevel:spire-macros_2.12", + "org.typelevel:spire-platform_2.12", + "org.typelevel:spire-util_2.12" + ], + "org.typelevel:spire_2.13": [ + "org.typelevel:algebra_2.13", + "org.typelevel:spire-macros_2.13", + "org.typelevel:spire-platform_2.13", + "org.typelevel:spire-util_2.13" + ], + "pl.edu.icm:JLargeArrays": [ + "org.apache.commons:commons-math3" + ], "software.amazon.awssdk:apache-client": [ "commons-codec:commons-codec", "org.apache.httpcomponents:httpclient", @@ -9540,6 +9889,13 @@ "com.github.stephenc.findbugs:findbugs-annotations": [ "edu.umd.cs.findbugs.annotations" ], + "com.github.wendykierp:JTransforms": [ + "org.jtransforms.dct", + "org.jtransforms.dht", + "org.jtransforms.dst", + "org.jtransforms.fft", + "org.jtransforms.utils" + ], "com.google.android:annotations": [ "android.annotation" ], @@ -10992,6 +11348,11 @@ "com.squareup.kotlinpoet.jvm", "com.squareup.kotlinpoet.tags" ], + "com.sun.istack:istack-commons-runtime": [ + "com.sun.istack", + "com.sun.istack.localization", + "com.sun.istack.logging" + ], "com.sun.jersey.contribs:jersey-guice": [ "com.sun.jersey.guice", "com.sun.jersey.guice.spi.container", @@ -11323,6 +11684,15 @@ "org.apache.commons.pool", "org.apache.commons.pool.impl" ], + "dev.ludovic.netlib:arpack": [ + "dev.ludovic.netlib.arpack" + ], + "dev.ludovic.netlib:blas": [ + "dev.ludovic.netlib.blas" + ], + "dev.ludovic.netlib:lapack": [ + "dev.ludovic.netlib.lapack" + ], "dnsjava:dnsjava": [ "org.xbill.DNS", "org.xbill.DNS.config", @@ -12813,6 +13183,14 @@ "py4j.model", "py4j.reflection" ], + "net.sourceforge.f2j:arpack_combined_all": [ + "org.j_paine.formatter", + "org.netlib.arpack", + "org.netlib.blas", + "org.netlib.err", + "org.netlib.lapack", + "org.netlib.util" + ], "org.antlr:ST4": [ "org.stringtemplate.v4", "org.stringtemplate.v4.compiler", @@ -20510,6 +20888,22 @@ "org.sparkproject.spark_core.protobuf", "org.sparkproject.spark_core.protobuf.compiler" ], + "org.apache.spark:spark-graphx_2.12": [ + "org.apache.spark.graphx", + "org.apache.spark.graphx.impl", + "org.apache.spark.graphx.lib", + "org.apache.spark.graphx.util", + "org.apache.spark.graphx.util.collection", + "org.apache.spark.unused" + ], + "org.apache.spark:spark-graphx_2.13": [ + "org.apache.spark.graphx", + "org.apache.spark.graphx.impl", + "org.apache.spark.graphx.lib", + "org.apache.spark.graphx.util", + "org.apache.spark.graphx.util.collection", + "org.apache.spark.unused" + ], "org.apache.spark:spark-hive_2.12": [ "org.apache.hadoop.hive.ql.io", "org.apache.hadoop.hive.ql.io.orc", @@ -20546,6 +20940,184 @@ "org.apache.spark.launcher", "org.apache.spark.unused" ], + "org.apache.spark:spark-mllib-local_2.12": [ + "org.apache.spark.ml.impl", + "org.apache.spark.ml.linalg", + "org.apache.spark.ml.stat.distribution", + "org.apache.spark.unused" + ], + "org.apache.spark:spark-mllib-local_2.13": [ + "org.apache.spark.ml.impl", + "org.apache.spark.ml.linalg", + "org.apache.spark.ml.stat.distribution", + "org.apache.spark.unused" + ], + "org.apache.spark:spark-mllib_2.12": [ + "org.apache.spark.ml", + "org.apache.spark.ml.ann", + "org.apache.spark.ml.attribute", + "org.apache.spark.ml.classification", + "org.apache.spark.ml.clustering", + "org.apache.spark.ml.evaluation", + "org.apache.spark.ml.feature", + "org.apache.spark.ml.fpm", + "org.apache.spark.ml.image", + "org.apache.spark.ml.linalg", + "org.apache.spark.ml.optim", + "org.apache.spark.ml.optim.aggregator", + "org.apache.spark.ml.optim.loss", + "org.apache.spark.ml.param", + "org.apache.spark.ml.param.shared", + "org.apache.spark.ml.python", + "org.apache.spark.ml.r", + "org.apache.spark.ml.recommendation", + "org.apache.spark.ml.regression", + "org.apache.spark.ml.source.image", + "org.apache.spark.ml.source.libsvm", + "org.apache.spark.ml.stat", + "org.apache.spark.ml.tree", + "org.apache.spark.ml.tree.impl", + "org.apache.spark.ml.tuning", + "org.apache.spark.ml.util", + "org.apache.spark.mllib", + "org.apache.spark.mllib.api.python", + "org.apache.spark.mllib.classification", + "org.apache.spark.mllib.classification.impl", + "org.apache.spark.mllib.clustering", + "org.apache.spark.mllib.evaluation", + "org.apache.spark.mllib.evaluation.binary", + "org.apache.spark.mllib.feature", + "org.apache.spark.mllib.fpm", + "org.apache.spark.mllib.linalg", + "org.apache.spark.mllib.linalg.distributed", + "org.apache.spark.mllib.optimization", + "org.apache.spark.mllib.pmml", + "org.apache.spark.mllib.pmml.export", + "org.apache.spark.mllib.random", + "org.apache.spark.mllib.rdd", + "org.apache.spark.mllib.recommendation", + "org.apache.spark.mllib.regression", + "org.apache.spark.mllib.regression.impl", + "org.apache.spark.mllib.stat", + "org.apache.spark.mllib.stat.correlation", + "org.apache.spark.mllib.stat.distribution", + "org.apache.spark.mllib.stat.test", + "org.apache.spark.mllib.tree", + "org.apache.spark.mllib.tree.configuration", + "org.apache.spark.mllib.tree.impurity", + "org.apache.spark.mllib.tree.loss", + "org.apache.spark.mllib.tree.model", + "org.apache.spark.mllib.util", + "org.apache.spark.unused", + "org.sparkproject.dmg.pmml", + "org.sparkproject.dmg.pmml.adapters", + "org.sparkproject.dmg.pmml.association", + "org.sparkproject.dmg.pmml.baseline", + "org.sparkproject.dmg.pmml.bayesian_network", + "org.sparkproject.dmg.pmml.clustering", + "org.sparkproject.dmg.pmml.gaussian_process", + "org.sparkproject.dmg.pmml.general_regression", + "org.sparkproject.dmg.pmml.mining", + "org.sparkproject.dmg.pmml.naive_bayes", + "org.sparkproject.dmg.pmml.nearest_neighbor", + "org.sparkproject.dmg.pmml.neural_network", + "org.sparkproject.dmg.pmml.regression", + "org.sparkproject.dmg.pmml.rule_set", + "org.sparkproject.dmg.pmml.scorecard", + "org.sparkproject.dmg.pmml.sequence", + "org.sparkproject.dmg.pmml.support_vector_machine", + "org.sparkproject.dmg.pmml.text", + "org.sparkproject.dmg.pmml.time_series", + "org.sparkproject.dmg.pmml.tree", + "org.sparkproject.jpmml.model", + "org.sparkproject.jpmml.model.annotations", + "org.sparkproject.jpmml.model.filters", + "org.sparkproject.jpmml.model.inlinetable", + "org.sparkproject.jpmml.model.visitors" + ], + "org.apache.spark:spark-mllib_2.13": [ + "org.apache.spark.ml", + "org.apache.spark.ml.ann", + "org.apache.spark.ml.attribute", + "org.apache.spark.ml.classification", + "org.apache.spark.ml.clustering", + "org.apache.spark.ml.evaluation", + "org.apache.spark.ml.feature", + "org.apache.spark.ml.fpm", + "org.apache.spark.ml.image", + "org.apache.spark.ml.linalg", + "org.apache.spark.ml.optim", + "org.apache.spark.ml.optim.aggregator", + "org.apache.spark.ml.optim.loss", + "org.apache.spark.ml.param", + "org.apache.spark.ml.param.shared", + "org.apache.spark.ml.python", + "org.apache.spark.ml.r", + "org.apache.spark.ml.recommendation", + "org.apache.spark.ml.regression", + "org.apache.spark.ml.source.image", + "org.apache.spark.ml.source.libsvm", + "org.apache.spark.ml.stat", + "org.apache.spark.ml.tree", + "org.apache.spark.ml.tree.impl", + "org.apache.spark.ml.tuning", + "org.apache.spark.ml.util", + "org.apache.spark.mllib", + "org.apache.spark.mllib.api.python", + "org.apache.spark.mllib.classification", + "org.apache.spark.mllib.classification.impl", + "org.apache.spark.mllib.clustering", + "org.apache.spark.mllib.evaluation", + "org.apache.spark.mllib.evaluation.binary", + "org.apache.spark.mllib.feature", + "org.apache.spark.mllib.fpm", + "org.apache.spark.mllib.linalg", + "org.apache.spark.mllib.linalg.distributed", + "org.apache.spark.mllib.optimization", + "org.apache.spark.mllib.pmml", + "org.apache.spark.mllib.pmml.export", + "org.apache.spark.mllib.random", + "org.apache.spark.mllib.rdd", + "org.apache.spark.mllib.recommendation", + "org.apache.spark.mllib.regression", + "org.apache.spark.mllib.regression.impl", + "org.apache.spark.mllib.stat", + "org.apache.spark.mllib.stat.correlation", + "org.apache.spark.mllib.stat.distribution", + "org.apache.spark.mllib.stat.test", + "org.apache.spark.mllib.tree", + "org.apache.spark.mllib.tree.configuration", + "org.apache.spark.mllib.tree.impurity", + "org.apache.spark.mllib.tree.loss", + "org.apache.spark.mllib.tree.model", + "org.apache.spark.mllib.util", + "org.apache.spark.unused", + "org.sparkproject.dmg.pmml", + "org.sparkproject.dmg.pmml.adapters", + "org.sparkproject.dmg.pmml.association", + "org.sparkproject.dmg.pmml.baseline", + "org.sparkproject.dmg.pmml.bayesian_network", + "org.sparkproject.dmg.pmml.clustering", + "org.sparkproject.dmg.pmml.gaussian_process", + "org.sparkproject.dmg.pmml.general_regression", + "org.sparkproject.dmg.pmml.mining", + "org.sparkproject.dmg.pmml.naive_bayes", + "org.sparkproject.dmg.pmml.nearest_neighbor", + "org.sparkproject.dmg.pmml.neural_network", + "org.sparkproject.dmg.pmml.regression", + "org.sparkproject.dmg.pmml.rule_set", + "org.sparkproject.dmg.pmml.scorecard", + "org.sparkproject.dmg.pmml.sequence", + "org.sparkproject.dmg.pmml.support_vector_machine", + "org.sparkproject.dmg.pmml.text", + "org.sparkproject.dmg.pmml.time_series", + "org.sparkproject.dmg.pmml.tree", + "org.sparkproject.jpmml.model", + "org.sparkproject.jpmml.model.annotations", + "org.sparkproject.jpmml.model.filters", + "org.sparkproject.jpmml.model.inlinetable", + "org.sparkproject.jpmml.model.visitors" + ], "org.apache.spark:spark-network-common_2.12": [ "org.apache.spark.network", "org.apache.spark.network.buffer", @@ -21846,6 +22418,33 @@ "org.glassfish.hk2:osgi-resource-locator": [ "org.glassfish.hk2.osgiresourcelocator" ], + "org.glassfish.jaxb:jaxb-runtime": [ + "com.sun.xml.bind", + "com.sun.xml.bind.annotation", + "com.sun.xml.bind.api", + "com.sun.xml.bind.api.impl", + "com.sun.xml.bind.marshaller", + "com.sun.xml.bind.unmarshaller", + "com.sun.xml.bind.util", + "com.sun.xml.bind.v2", + "com.sun.xml.bind.v2.bytecode", + "com.sun.xml.bind.v2.model.annotation", + "com.sun.xml.bind.v2.model.core", + "com.sun.xml.bind.v2.model.impl", + "com.sun.xml.bind.v2.model.nav", + "com.sun.xml.bind.v2.model.runtime", + "com.sun.xml.bind.v2.model.util", + "com.sun.xml.bind.v2.runtime", + "com.sun.xml.bind.v2.runtime.output", + "com.sun.xml.bind.v2.runtime.property", + "com.sun.xml.bind.v2.runtime.reflect", + "com.sun.xml.bind.v2.runtime.reflect.opt", + "com.sun.xml.bind.v2.runtime.unmarshaller", + "com.sun.xml.bind.v2.schemagen", + "com.sun.xml.bind.v2.schemagen.episode", + "com.sun.xml.bind.v2.schemagen.xmlschema", + "com.sun.xml.bind.v2.util" + ], "org.glassfish.jersey.containers:jersey-container-servlet": [ "org.glassfish.jersey.servlet.async", "org.glassfish.jersey.servlet.init", @@ -22551,6 +23150,84 @@ "org.scalactic.exceptions", "org.scalactic.source" ], + "org.scalanlp:breeze-macros_2.12": [ + "breeze.gymnastics", + "breeze.macros" + ], + "org.scalanlp:breeze-macros_2.13": [ + "breeze.gymnastics", + "breeze.macros" + ], + "org.scalanlp:breeze_2.12": [ + "breeze.collection.compat", + "breeze.collection.immutable", + "breeze.collection.mutable", + "breeze.compat", + "breeze.features", + "breeze.generic", + "breeze.integrate", + "breeze.integrate.quasimontecarlo", + "breeze.interpolation", + "breeze.io", + "breeze.linalg", + "breeze.linalg.operators", + "breeze.linalg.support", + "breeze.math", + "breeze.numerics", + "breeze.numerics.constants", + "breeze.numerics.financial", + "breeze.numerics.units", + "breeze.optimize", + "breeze.optimize.flow", + "breeze.optimize.linear", + "breeze.optimize.proximal", + "breeze.polynomial", + "breeze.signal", + "breeze.signal.support", + "breeze.stats", + "breeze.stats.distributions", + "breeze.stats.hypothesis", + "breeze.stats.mcmc", + "breeze.stats.random", + "breeze.stats.regression", + "breeze.storage", + "breeze.util" + ], + "org.scalanlp:breeze_2.13": [ + "breeze.collection.compat", + "breeze.collection.immutable", + "breeze.collection.mutable", + "breeze.compat", + "breeze.features", + "breeze.generic", + "breeze.integrate", + "breeze.integrate.quasimontecarlo", + "breeze.interpolation", + "breeze.io", + "breeze.linalg", + "breeze.linalg.operators", + "breeze.linalg.support", + "breeze.math", + "breeze.numerics", + "breeze.numerics.constants", + "breeze.numerics.financial", + "breeze.numerics.units", + "breeze.optimize", + "breeze.optimize.flow", + "breeze.optimize.linear", + "breeze.optimize.proximal", + "breeze.polynomial", + "breeze.signal", + "breeze.signal.support", + "breeze.stats", + "breeze.stats.distributions", + "breeze.stats.hypothesis", + "breeze.stats.mcmc", + "breeze.stats.random", + "breeze.stats.regression", + "breeze.storage", + "breeze.util" + ], "org.scalatest:scalatest-compatible": [ "org.scalatest.compatible" ], @@ -23173,6 +23850,54 @@ "org.tukaani.xz.rangecoder", "org.tukaani.xz.simple" ], + "org.typelevel:algebra_2.12": [ + "algebra", + "algebra.instances", + "algebra.instances.all", + "algebra.instances.array", + "algebra.instances.bigDecimal", + "algebra.instances.bigInt", + "algebra.instances.bitSet", + "algebra.instances.boolean", + "algebra.instances.byte", + "algebra.instances.char", + "algebra.instances.int", + "algebra.instances.list", + "algebra.instances.long", + "algebra.instances.map", + "algebra.instances.option", + "algebra.instances.set", + "algebra.instances.short", + "algebra.instances.string", + "algebra.instances.tuple", + "algebra.instances.unit", + "algebra.lattice", + "algebra.ring" + ], + "org.typelevel:algebra_2.13": [ + "algebra", + "algebra.instances", + "algebra.instances.all", + "algebra.instances.array", + "algebra.instances.bigDecimal", + "algebra.instances.bigInt", + "algebra.instances.bitSet", + "algebra.instances.boolean", + "algebra.instances.byte", + "algebra.instances.char", + "algebra.instances.int", + "algebra.instances.list", + "algebra.instances.long", + "algebra.instances.map", + "algebra.instances.option", + "algebra.instances.set", + "algebra.instances.short", + "algebra.instances.string", + "algebra.instances.tuple", + "algebra.instances.unit", + "algebra.lattice", + "algebra.ring" + ], "org.typelevel:cats-core_2.12": [ "cats", "cats.arrow", @@ -23277,6 +24002,71 @@ "org.typelevel:jawn-parser_2.13": [ "org.typelevel.jawn" ], + "org.typelevel:spire-macros_2.12": [ + "spire.macros", + "spire.macros.machinist", + "spire.scalacompat" + ], + "org.typelevel:spire-macros_2.13": [ + "spire.macros", + "spire.macros.machinist" + ], + "org.typelevel:spire-platform_2.12": [ + "spire", + "spire.scalacompat" + ], + "org.typelevel:spire-platform_2.13": [ + "spire" + ], + "org.typelevel:spire-util_2.12": [ + "spire.scalacompat", + "spire.util" + ], + "org.typelevel:spire-util_2.13": [ + "spire.util" + ], + "org.typelevel:spire_2.12": [ + "spire", + "spire.algebra", + "spire.algebra.free", + "spire.algebra.lattice", + "spire.algebra.partial", + "spire.macros", + "spire.macros.fpf", + "spire.math", + "spire.math.interval", + "spire.math.poly", + "spire.math.prime", + "spire.optional", + "spire.optional.unicode", + "spire.random", + "spire.random.rng", + "spire.scalacompat", + "spire.std", + "spire.syntax", + "spire.syntax.std" + ], + "org.typelevel:spire_2.13": [ + "spire", + "spire.algebra", + "spire.algebra.free", + "spire.algebra.lattice", + "spire.algebra.partial", + "spire.macros", + "spire.macros.fpf", + "spire.math", + "spire.math.interval", + "spire.math.poly", + "spire.math.prime", + "spire.optional", + "spire.optional.unicode", + "spire.random", + "spire.random.rng", + "spire.scalacompat", + "spire.std", + "spire.syntax", + "spire.syntax.std" + ], "org.xerial.snappy:snappy-java": [ "org.xerial.snappy", "org.xerial.snappy.buffer", @@ -23315,6 +24105,9 @@ "org.apache.oro.text.regex", "org.apache.oro.util" ], + "pl.edu.icm:JLargeArrays": [ + "pl.edu.icm.jlargearrays" + ], "ru.vyarus:generics-resolver": [ "ru.vyarus.java.generics.resolver", "ru.vyarus.java.generics.resolver.context", @@ -23788,6 +24581,8 @@ "com.github.pjfanning:jersey-json:jar:sources", "com.github.stephenc.findbugs:findbugs-annotations", "com.github.stephenc.findbugs:findbugs-annotations:jar:sources", + "com.github.wendykierp:JTransforms", + "com.github.wendykierp:JTransforms:jar:sources", "com.google.android:annotations", "com.google.android:annotations:jar:sources", "com.google.api-client:google-api-client", @@ -24011,6 +24806,8 @@ "com.squareup:javapoet:jar:sources", "com.squareup:kotlinpoet-jvm", "com.squareup:kotlinpoet-jvm:jar:sources", + "com.sun.istack:istack-commons-runtime", + "com.sun.istack:istack-commons-runtime:jar:sources", "com.sun.jersey.contribs:jersey-guice", "com.sun.jersey.contribs:jersey-guice:jar:sources", "com.sun.jersey:jersey-client", @@ -24069,6 +24866,12 @@ "commons-net:commons-net:jar:sources", "commons-pool:commons-pool", "commons-pool:commons-pool:jar:sources", + "dev.ludovic.netlib:arpack", + "dev.ludovic.netlib:arpack:jar:sources", + "dev.ludovic.netlib:blas", + "dev.ludovic.netlib:blas:jar:sources", + "dev.ludovic.netlib:lapack", + "dev.ludovic.netlib:lapack:jar:sources", "dnsjava:dnsjava", "dnsjava:dnsjava:jar:sources", "io.airlift:aircompressor", @@ -24388,6 +25191,7 @@ "net.sf.opencsv:opencsv:jar:sources", "net.sf.py4j:py4j", "net.sf.py4j:py4j:jar:sources", + "net.sourceforge.f2j:arpack_combined_all", "org.antlr:ST4", "org.antlr:ST4:jar:sources", "org.antlr:antlr-runtime", @@ -24639,6 +25443,10 @@ "org.apache.spark:spark-core_2.12:jar:sources", "org.apache.spark:spark-core_2.13", "org.apache.spark:spark-core_2.13:jar:sources", + "org.apache.spark:spark-graphx_2.12", + "org.apache.spark:spark-graphx_2.12:jar:sources", + "org.apache.spark:spark-graphx_2.13", + "org.apache.spark:spark-graphx_2.13:jar:sources", "org.apache.spark:spark-hive_2.12", "org.apache.spark:spark-hive_2.12:jar:sources", "org.apache.spark:spark-hive_2.13", @@ -24651,6 +25459,14 @@ "org.apache.spark:spark-launcher_2.12:jar:sources", "org.apache.spark:spark-launcher_2.13", "org.apache.spark:spark-launcher_2.13:jar:sources", + "org.apache.spark:spark-mllib-local_2.12", + "org.apache.spark:spark-mllib-local_2.12:jar:sources", + "org.apache.spark:spark-mllib-local_2.13", + "org.apache.spark:spark-mllib-local_2.13:jar:sources", + "org.apache.spark:spark-mllib_2.12", + "org.apache.spark:spark-mllib_2.12:jar:sources", + "org.apache.spark:spark-mllib_2.13", + "org.apache.spark:spark-mllib_2.13:jar:sources", "org.apache.spark:spark-network-common_2.12", "org.apache.spark:spark-network-common_2.12:jar:sources", "org.apache.spark:spark-network-common_2.13", @@ -24785,6 +25601,8 @@ "org.glassfish.hk2:hk2-utils:jar:sources", "org.glassfish.hk2:osgi-resource-locator", "org.glassfish.hk2:osgi-resource-locator:jar:sources", + "org.glassfish.jaxb:jaxb-runtime", + "org.glassfish.jaxb:jaxb-runtime:jar:sources", "org.glassfish.jersey.containers:jersey-container-servlet", "org.glassfish.jersey.containers:jersey-container-servlet-core", "org.glassfish.jersey.containers:jersey-container-servlet-core:jar:sources", @@ -24919,6 +25737,14 @@ "org.scalactic:scalactic_2.12:jar:sources", "org.scalactic:scalactic_2.13", "org.scalactic:scalactic_2.13:jar:sources", + "org.scalanlp:breeze-macros_2.12", + "org.scalanlp:breeze-macros_2.12:jar:sources", + "org.scalanlp:breeze-macros_2.13", + "org.scalanlp:breeze-macros_2.13:jar:sources", + "org.scalanlp:breeze_2.12", + "org.scalanlp:breeze_2.12:jar:sources", + "org.scalanlp:breeze_2.13", + "org.scalanlp:breeze_2.13:jar:sources", "org.scalatest:scalatest-compatible", "org.scalatest:scalatest-compatible:jar:sources", "org.scalatest:scalatest-core_2.12", @@ -25003,6 +25829,10 @@ "org.threeten:threetenbp:jar:sources", "org.tukaani:xz", "org.tukaani:xz:jar:sources", + "org.typelevel:algebra_2.12", + "org.typelevel:algebra_2.12:jar:sources", + "org.typelevel:algebra_2.13", + "org.typelevel:algebra_2.13:jar:sources", "org.typelevel:cats-core_2.12", "org.typelevel:cats-core_2.12:jar:sources", "org.typelevel:cats-core_2.13", @@ -25015,12 +25845,30 @@ "org.typelevel:jawn-parser_2.12:jar:sources", "org.typelevel:jawn-parser_2.13", "org.typelevel:jawn-parser_2.13:jar:sources", + "org.typelevel:spire-macros_2.12", + "org.typelevel:spire-macros_2.12:jar:sources", + "org.typelevel:spire-macros_2.13", + "org.typelevel:spire-macros_2.13:jar:sources", + "org.typelevel:spire-platform_2.12", + "org.typelevel:spire-platform_2.12:jar:sources", + "org.typelevel:spire-platform_2.13", + "org.typelevel:spire-platform_2.13:jar:sources", + "org.typelevel:spire-util_2.12", + "org.typelevel:spire-util_2.12:jar:sources", + "org.typelevel:spire-util_2.13", + "org.typelevel:spire-util_2.13:jar:sources", + "org.typelevel:spire_2.12", + "org.typelevel:spire_2.12:jar:sources", + "org.typelevel:spire_2.13", + "org.typelevel:spire_2.13:jar:sources", "org.xerial.snappy:snappy-java", "org.xerial.snappy:snappy-java:jar:sources", "org.yaml:snakeyaml", "org.yaml:snakeyaml:jar:sources", "oro:oro", "oro:oro:jar:sources", + "pl.edu.icm:JLargeArrays", + "pl.edu.icm:JLargeArrays:jar:sources", "ru.vyarus:generics-resolver", "ru.vyarus:generics-resolver:jar:sources", "software.amazon.awssdk:annotations", @@ -25212,6 +26060,8 @@ "com.github.pjfanning:jersey-json:jar:sources", "com.github.stephenc.findbugs:findbugs-annotations", "com.github.stephenc.findbugs:findbugs-annotations:jar:sources", + "com.github.wendykierp:JTransforms", + "com.github.wendykierp:JTransforms:jar:sources", "com.google.android:annotations", "com.google.android:annotations:jar:sources", "com.google.api-client:google-api-client", @@ -25435,6 +26285,8 @@ "com.squareup:javapoet:jar:sources", "com.squareup:kotlinpoet-jvm", "com.squareup:kotlinpoet-jvm:jar:sources", + "com.sun.istack:istack-commons-runtime", + "com.sun.istack:istack-commons-runtime:jar:sources", "com.sun.jersey.contribs:jersey-guice", "com.sun.jersey.contribs:jersey-guice:jar:sources", "com.sun.jersey:jersey-client", @@ -25493,6 +26345,12 @@ "commons-net:commons-net:jar:sources", "commons-pool:commons-pool", "commons-pool:commons-pool:jar:sources", + "dev.ludovic.netlib:arpack", + "dev.ludovic.netlib:arpack:jar:sources", + "dev.ludovic.netlib:blas", + "dev.ludovic.netlib:blas:jar:sources", + "dev.ludovic.netlib:lapack", + "dev.ludovic.netlib:lapack:jar:sources", "dnsjava:dnsjava", "dnsjava:dnsjava:jar:sources", "io.airlift:aircompressor", @@ -25812,6 +26670,7 @@ "net.sf.opencsv:opencsv:jar:sources", "net.sf.py4j:py4j", "net.sf.py4j:py4j:jar:sources", + "net.sourceforge.f2j:arpack_combined_all", "org.antlr:ST4", "org.antlr:ST4:jar:sources", "org.antlr:antlr-runtime", @@ -26063,6 +26922,10 @@ "org.apache.spark:spark-core_2.12:jar:sources", "org.apache.spark:spark-core_2.13", "org.apache.spark:spark-core_2.13:jar:sources", + "org.apache.spark:spark-graphx_2.12", + "org.apache.spark:spark-graphx_2.12:jar:sources", + "org.apache.spark:spark-graphx_2.13", + "org.apache.spark:spark-graphx_2.13:jar:sources", "org.apache.spark:spark-hive_2.12", "org.apache.spark:spark-hive_2.12:jar:sources", "org.apache.spark:spark-hive_2.13", @@ -26075,6 +26938,14 @@ "org.apache.spark:spark-launcher_2.12:jar:sources", "org.apache.spark:spark-launcher_2.13", "org.apache.spark:spark-launcher_2.13:jar:sources", + "org.apache.spark:spark-mllib-local_2.12", + "org.apache.spark:spark-mllib-local_2.12:jar:sources", + "org.apache.spark:spark-mllib-local_2.13", + "org.apache.spark:spark-mllib-local_2.13:jar:sources", + "org.apache.spark:spark-mllib_2.12", + "org.apache.spark:spark-mllib_2.12:jar:sources", + "org.apache.spark:spark-mllib_2.13", + "org.apache.spark:spark-mllib_2.13:jar:sources", "org.apache.spark:spark-network-common_2.12", "org.apache.spark:spark-network-common_2.12:jar:sources", "org.apache.spark:spark-network-common_2.13", @@ -26209,6 +27080,8 @@ "org.glassfish.hk2:hk2-utils:jar:sources", "org.glassfish.hk2:osgi-resource-locator", "org.glassfish.hk2:osgi-resource-locator:jar:sources", + "org.glassfish.jaxb:jaxb-runtime", + "org.glassfish.jaxb:jaxb-runtime:jar:sources", "org.glassfish.jersey.containers:jersey-container-servlet", "org.glassfish.jersey.containers:jersey-container-servlet-core", "org.glassfish.jersey.containers:jersey-container-servlet-core:jar:sources", @@ -26343,6 +27216,14 @@ "org.scalactic:scalactic_2.12:jar:sources", "org.scalactic:scalactic_2.13", "org.scalactic:scalactic_2.13:jar:sources", + "org.scalanlp:breeze-macros_2.12", + "org.scalanlp:breeze-macros_2.12:jar:sources", + "org.scalanlp:breeze-macros_2.13", + "org.scalanlp:breeze-macros_2.13:jar:sources", + "org.scalanlp:breeze_2.12", + "org.scalanlp:breeze_2.12:jar:sources", + "org.scalanlp:breeze_2.13", + "org.scalanlp:breeze_2.13:jar:sources", "org.scalatest:scalatest-compatible", "org.scalatest:scalatest-compatible:jar:sources", "org.scalatest:scalatest-core_2.12", @@ -26427,6 +27308,10 @@ "org.threeten:threetenbp:jar:sources", "org.tukaani:xz", "org.tukaani:xz:jar:sources", + "org.typelevel:algebra_2.12", + "org.typelevel:algebra_2.12:jar:sources", + "org.typelevel:algebra_2.13", + "org.typelevel:algebra_2.13:jar:sources", "org.typelevel:cats-core_2.12", "org.typelevel:cats-core_2.12:jar:sources", "org.typelevel:cats-core_2.13", @@ -26439,12 +27324,30 @@ "org.typelevel:jawn-parser_2.12:jar:sources", "org.typelevel:jawn-parser_2.13", "org.typelevel:jawn-parser_2.13:jar:sources", + "org.typelevel:spire-macros_2.12", + "org.typelevel:spire-macros_2.12:jar:sources", + "org.typelevel:spire-macros_2.13", + "org.typelevel:spire-macros_2.13:jar:sources", + "org.typelevel:spire-platform_2.12", + "org.typelevel:spire-platform_2.12:jar:sources", + "org.typelevel:spire-platform_2.13", + "org.typelevel:spire-platform_2.13:jar:sources", + "org.typelevel:spire-util_2.12", + "org.typelevel:spire-util_2.12:jar:sources", + "org.typelevel:spire-util_2.13", + "org.typelevel:spire-util_2.13:jar:sources", + "org.typelevel:spire_2.12", + "org.typelevel:spire_2.12:jar:sources", + "org.typelevel:spire_2.13", + "org.typelevel:spire_2.13:jar:sources", "org.xerial.snappy:snappy-java", "org.xerial.snappy:snappy-java:jar:sources", "org.yaml:snakeyaml", "org.yaml:snakeyaml:jar:sources", "oro:oro", "oro:oro:jar:sources", + "pl.edu.icm:JLargeArrays", + "pl.edu.icm:JLargeArrays:jar:sources", "ru.vyarus:generics-resolver", "ru.vyarus:generics-resolver:jar:sources", "software.amazon.awssdk:annotations", @@ -31281,6 +32184,54 @@ "org.apache.spark.sql.hive.orc.OrcFileFormat" ] }, + "org.apache.spark:spark-mllib_2.12": { + "org.apache.spark.ml.util.MLFormatRegister": [ + "org.apache.spark.ml.clustering.InternalKMeansModelWriter", + "org.apache.spark.ml.clustering.PMMLKMeansModelWriter", + "org.apache.spark.ml.regression.InternalLinearRegressionModelWriter", + "org.apache.spark.ml.regression.PMMLLinearRegressionModelWriter" + ], + "org.apache.spark.sql.sources.DataSourceRegister": [ + "org.apache.spark.ml.source.image.ImageFileFormat", + "org.apache.spark.ml.source.libsvm.LibSVMFileFormat" + ] + }, + "org.apache.spark:spark-mllib_2.12:jar:sources": { + "org.apache.spark.ml.util.MLFormatRegister": [ + "org.apache.spark.ml.clustering.InternalKMeansModelWriter", + "org.apache.spark.ml.clustering.PMMLKMeansModelWriter", + "org.apache.spark.ml.regression.InternalLinearRegressionModelWriter", + "org.apache.spark.ml.regression.PMMLLinearRegressionModelWriter" + ], + "org.apache.spark.sql.sources.DataSourceRegister": [ + "org.apache.spark.ml.source.image.ImageFileFormat", + "org.apache.spark.ml.source.libsvm.LibSVMFileFormat" + ] + }, + "org.apache.spark:spark-mllib_2.13": { + "org.apache.spark.ml.util.MLFormatRegister": [ + "org.apache.spark.ml.clustering.InternalKMeansModelWriter", + "org.apache.spark.ml.clustering.PMMLKMeansModelWriter", + "org.apache.spark.ml.regression.InternalLinearRegressionModelWriter", + "org.apache.spark.ml.regression.PMMLLinearRegressionModelWriter" + ], + "org.apache.spark.sql.sources.DataSourceRegister": [ + "org.apache.spark.ml.source.image.ImageFileFormat", + "org.apache.spark.ml.source.libsvm.LibSVMFileFormat" + ] + }, + "org.apache.spark:spark-mllib_2.13:jar:sources": { + "org.apache.spark.ml.util.MLFormatRegister": [ + "org.apache.spark.ml.clustering.InternalKMeansModelWriter", + "org.apache.spark.ml.clustering.PMMLKMeansModelWriter", + "org.apache.spark.ml.regression.InternalLinearRegressionModelWriter", + "org.apache.spark.ml.regression.PMMLLinearRegressionModelWriter" + ], + "org.apache.spark.sql.sources.DataSourceRegister": [ + "org.apache.spark.ml.source.image.ImageFileFormat", + "org.apache.spark.ml.source.libsvm.LibSVMFileFormat" + ] + }, "org.apache.spark:spark-sql_2.12": { "org.apache.spark.deploy.history.EventFilterBuilder": [ "org.apache.spark.sql.execution.history.SQLEventFilterBuilder" @@ -33041,6 +33992,16 @@ "org.jvnet.hk2.external.generator.ServiceLocatorGeneratorImpl" ] }, + "org.glassfish.jaxb:jaxb-runtime": { + "javax.xml.bind.JAXBContext": [ + "com.sun.xml.bind.v2.ContextFactory" + ] + }, + "org.glassfish.jaxb:jaxb-runtime:jar:sources": { + "javax.xml.bind.JAXBContext": [ + "com.sun.xml.bind.v2.ContextFactory" + ] + }, "org.glassfish.jersey.containers:jersey-container-servlet": { "javax.servlet.ServletContainerInitializer": [ "org.glassfish.jersey.servlet.init.JerseyServletContainerInitializer" @@ -33295,6 +34256,7 @@ "io.netty:netty-all:jar:sources", "io.netty:netty-resolver-dns-native-macos:jar:sources", "javax.servlet:jsp-api:jar:sources", + "net.sourceforge.f2j:arpack_combined_all:jar:sources", "org.apache.curator:apache-curator:jar:sources", "org.apache.curator:apache-curator:pom:sources", "org.apache.derby:derby:jar:sources", diff --git a/spark/src/main/scala/ai/chronon/spark/TableUtils.scala b/spark/src/main/scala/ai/chronon/spark/TableUtils.scala index 58a296e894..a0c9883527 100644 --- a/spark/src/main/scala/ai/chronon/spark/TableUtils.scala +++ b/spark/src/main/scala/ai/chronon/spark/TableUtils.scala @@ -76,6 +76,7 @@ class TableUtils(@transient val sparkSession: SparkSession) extends Serializable sparkSession.conf.get("spark.chronon.table_write.cache.blocking", "false").toBoolean private val tableWriteFormat = sparkSession.conf.get("spark.chronon.table_write.format", "").toLowerCase + private val tableWriteWarehouse = sparkSession.conf.get("spark.chronon.table_write.warehouse", "").toLowerCase // transient because the format provider is not always serializable. // for example, BigQueryImpl during reflecting with bq flavor @@ -216,7 +217,13 @@ class TableUtils(@transient val sparkSession: SparkSession) extends Serializable try { sql( CreationUtils - .createTableSql(tableName, df.schema, partitionColumns, tableProperties, fileFormat, tableWriteFormat)) + .createTableSql(tableName, + df.schema, + partitionColumns, + tableProperties, + fileFormat, + tableWriteFormat, + tableWriteWarehouse)) } catch { case _: TableAlreadyExistsException => logger.info(s"Table $tableName already exists, skipping creation") diff --git a/spark/src/main/scala/ai/chronon/spark/format/CreationUtils.scala b/spark/src/main/scala/ai/chronon/spark/format/CreationUtils.scala index b9332585af..27ac96d8bb 100644 --- a/spark/src/main/scala/ai/chronon/spark/format/CreationUtils.scala +++ b/spark/src/main/scala/ai/chronon/spark/format/CreationUtils.scala @@ -11,7 +11,8 @@ object CreationUtils { partitionColumns: List[String], tableProperties: Map[String, String], fileFormatString: String, - tableTypeString: String): String = { + tableTypeString: String, + locationString: String = ""): String = { require( tableTypeString.isEmpty || ALLOWED_TABLE_TYPES.contains(tableTypeString.toLowerCase), @@ -27,6 +28,7 @@ object CreationUtils { | ${noPartitions.toDDL} |) |${if (tableTypeString.isEmpty) "" else f"USING ${tableTypeString}"} + |${if (locationString.isEmpty) "" else f"LOCATION '$locationString'"} |""".stripMargin val partitionFragment = if (partitionColumns != null && partitionColumns.nonEmpty) { diff --git a/tools/build_rules/dependencies/maven_repository.bzl b/tools/build_rules/dependencies/maven_repository.bzl index c964a14f1f..7695b26558 100644 --- a/tools/build_rules/dependencies/maven_repository.bzl +++ b/tools/build_rules/dependencies/maven_repository.bzl @@ -191,12 +191,14 @@ maven_repository = repository( "org.apache.spark:spark-hive_2.12:3.5.3", "org.apache.spark:spark-streaming_2.12:3.5.3", "org.apache.spark:spark-avro_2.12:3.5.3", + "org.apache.spark:spark-mllib_2.12:3.5.3", # Spark artifacts - for scala 2.13 "org.apache.spark:spark-sql_2.13:3.5.3", "org.apache.spark:spark-hive_2.13:3.5.3", "org.apache.spark:spark-streaming_2.13:3.5.3", "org.apache.spark:spark-avro_2.13:3.5.3", + "org.apache.spark:spark-mllib_2.13:3.5.3", # Circe - for scala 2.12 "io.circe:circe-core_2.12:0.14.9",