From 15dc6a617da83d80b5cb2cd8e5b737970be4b5da Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Wed, 21 Feb 2024 23:51:09 +0100 Subject: [PATCH 01/18] Refactoring of the GraphAr Spark - split datasources and core GraphAr - introduce Maven profiles for different versions of Spark - small fixes of PySpark part due to new naming and paths - new pom.xml files for subprojects On branch 320-datasources-refactoring Changes to be committed: modified: pyspark/tests/conftest.py new file: spark/datasources-32/pom.xml renamed: spark/src/main/java/com/alibaba/graphar/GeneralParams.java -> spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java renamed: spark/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala renamed: spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala renamed: spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala renamed: spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala renamed: spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala renamed: spark/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala renamed: spark/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala renamed: spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala renamed: spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala renamed: spark/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala -> spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala new file: spark/graphar/pom.xml new file: spark/graphar/src/main/java/com/alibaba/graphar/GeneralParams.java renamed: spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/EdgeInfo.scala renamed: spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/GraphInfo.scala renamed: spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/VertexInfo.scala renamed: spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala renamed: spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala renamed: spark/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala renamed: spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala renamed: spark/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala renamed: spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala renamed: spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala renamed: spark/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala renamed: spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala renamed: spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala renamed: spark/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala renamed: spark/src/main/scala/com/alibaba/graphar/util/FileSystem.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/util/FileSystem.scala renamed: spark/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala renamed: spark/src/main/scala/com/alibaba/graphar/util/Patitioner.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/util/Patitioner.scala renamed: spark/src/main/scala/com/alibaba/graphar/util/Utils.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/util/Utils.scala renamed: spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala renamed: spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala -> spark/graphar/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala new file: spark/graphar/src/test/resources/gar-test renamed: spark/src/test/scala/com/alibaba/graphar/ComputeExample.scala -> spark/graphar/src/test/scala/com/alibaba/graphar/ComputeExample.scala renamed: spark/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala -> spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala renamed: spark/src/test/scala/com/alibaba/graphar/TestGraphReader.scala -> spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphReader.scala renamed: spark/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala -> spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala renamed: spark/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala -> spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala renamed: spark/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala -> spark/graphar/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala renamed: spark/src/test/scala/com/alibaba/graphar/TestReader.scala -> spark/graphar/src/test/scala/com/alibaba/graphar/TestReader.scala renamed: spark/src/test/scala/com/alibaba/graphar/TestWriter.scala -> spark/graphar/src/test/scala/com/alibaba/graphar/TestWriter.scala renamed: spark/src/test/scala/com/alibaba/graphar/TransformExample.scala -> spark/graphar/src/test/scala/com/alibaba/graphar/TransformExample.scala modified: spark/pom.xml deleted: spark/src/test/resources/gar-test --- pyspark/tests/conftest.py | 2 +- spark/datasources-32/pom.xml | 188 +++++++++++ .../com/alibaba/graphar/GeneralParams.java | 0 .../datasources/GarCommitProtocol.scala | 0 .../graphar/datasources/GarDataSource.scala | 21 +- .../alibaba/graphar/datasources/GarScan.scala | 0 .../graphar/datasources/GarScanBuilder.scala | 0 .../graphar/datasources/GarTable.scala | 0 .../datasources/GarWriterBuilder.scala | 0 .../datasources/csv/CSVWriterBuilder.scala | 0 .../datasources/orc/OrcOutputWriter.scala | 0 .../datasources/orc/OrcWriteBuilder.scala | 0 .../parquet/ParquetWriterBuilder.scala | 0 spark/graphar/pom.xml | 306 +++++++++++++++++ .../com/alibaba/graphar/GeneralParams.java | 39 +++ .../scala/com/alibaba/graphar/EdgeInfo.scala | 0 .../scala/com/alibaba/graphar/GraphInfo.scala | 0 .../com/alibaba/graphar/VertexInfo.scala | 0 .../graphar/example/GraphAr2Nebula.scala | 0 .../graphar/example/GraphAr2Neo4j.scala | 0 .../graphar/example/Nebula2GraphAr.scala | 0 .../graphar/example/Neo4j2GraphAr.scala | 0 .../alibaba/graphar/graph/GraphReader.scala | 0 .../graphar/graph/GraphTransformer.scala | 0 .../alibaba/graphar/graph/GraphWriter.scala | 0 .../com/alibaba/graphar/importer/Neo4j.scala | 0 .../alibaba/graphar/reader/EdgeReader.scala | 0 .../alibaba/graphar/reader/VertexReader.scala | 0 .../graphar/util/DataFrameConcat.scala | 0 .../com/alibaba/graphar/util/FileSystem.scala | 0 .../alibaba/graphar/util/IndexGenerator.scala | 0 .../com/alibaba/graphar/util/Patitioner.scala | 0 .../com/alibaba/graphar/util/Utils.scala | 0 .../alibaba/graphar/writer/EdgeWriter.scala | 0 .../alibaba/graphar/writer/VertexWriter.scala | 0 spark/graphar/src/test/resources/gar-test | 1 + .../com/alibaba/graphar/ComputeExample.scala | 0 .../com/alibaba/graphar/TestGraphInfo.scala | 0 .../com/alibaba/graphar/TestGraphReader.scala | 0 .../graphar/TestGraphTransformer.scala | 0 .../com/alibaba/graphar/TestGraphWriter.scala | 0 .../alibaba/graphar/TestIndexGenerator.scala | 0 .../com/alibaba/graphar/TestReader.scala | 0 .../com/alibaba/graphar/TestWriter.scala | 0 .../alibaba/graphar/TransformExample.scala | 0 spark/pom.xml | 311 ++---------------- spark/src/test/resources/gar-test | 1 - 47 files changed, 582 insertions(+), 287 deletions(-) create mode 100644 spark/datasources-32/pom.xml rename spark/{ => datasources-32}/src/main/java/com/alibaba/graphar/GeneralParams.java (100%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala (100%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala (90%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala (100%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala (100%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala (100%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala (100%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala (100%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala (100%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala (100%) rename spark/{ => datasources-32}/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala (100%) create mode 100644 spark/graphar/pom.xml create mode 100644 spark/graphar/src/main/java/com/alibaba/graphar/GeneralParams.java rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/EdgeInfo.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/GraphInfo.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/VertexInfo.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/util/FileSystem.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/util/Patitioner.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/util/Utils.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala (100%) rename spark/{ => graphar}/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala (100%) create mode 120000 spark/graphar/src/test/resources/gar-test rename spark/{ => graphar}/src/test/scala/com/alibaba/graphar/ComputeExample.scala (100%) rename spark/{ => graphar}/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala (100%) rename spark/{ => graphar}/src/test/scala/com/alibaba/graphar/TestGraphReader.scala (100%) rename spark/{ => graphar}/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala (100%) rename spark/{ => graphar}/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala (100%) rename spark/{ => graphar}/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala (100%) rename spark/{ => graphar}/src/test/scala/com/alibaba/graphar/TestReader.scala (100%) rename spark/{ => graphar}/src/test/scala/com/alibaba/graphar/TestWriter.scala (100%) rename spark/{ => graphar}/src/test/scala/com/alibaba/graphar/TransformExample.scala (100%) delete mode 120000 spark/src/test/resources/gar-test diff --git a/pyspark/tests/conftest.py b/pyspark/tests/conftest.py index 61bdd9591..3a3eeb4ec 100644 --- a/pyspark/tests/conftest.py +++ b/pyspark/tests/conftest.py @@ -17,7 +17,7 @@ import pytest from pyspark.sql import SparkSession -JARS_PATH = Path(__file__).parent.parent.parent.joinpath("spark").joinpath("target") +JARS_PATH = Path(__file__).parent.parent.parent.joinpath("spark").joinpath("graphar").joinpath("target") GRAPHAR_SHADED_JAR_PATH = None for jar_file in JARS_PATH.glob("*.jar"): diff --git a/spark/datasources-32/pom.xml b/spark/datasources-32/pom.xml new file mode 100644 index 000000000..e1af90c01 --- /dev/null +++ b/spark/datasources-32/pom.xml @@ -0,0 +1,188 @@ + + + + + 4.0.0 + + + com.alibaba + graphar + ${graphar.version} + + + com.alibaba + graphar-datasources + ${graphar.version} + jar + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-mllib_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-hive_${scala.binary.version} + ${spark.version} + provided + + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + ${scala.version} + + -target:jvm-1.8 + + + -Xss4096K + + + + + scala-compile + + compile + + + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + scala-test-compile + + testCompile + + + + + + net.alchim31.maven + scala-maven-plugin + 4.8.0 + + + + compile + testCompile + + + + + + -Xms64m + -Xmx1024m + + + -Ywarn-unused + + + + org.scalameta + semanticdb-scalac_2.12.10 + 4.3.24 + + + + + + com.diffplug.spotless + spotless-maven-plugin + 2.20.0 + + + + + + + 1.13.0 + + + + + + ${project.basedir}/.scalafmt.conf + + + + + + io.github.evis + scalafix-maven-plugin_2.13 + 0.1.8_0.11.0 + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + + jar + + + + + + maven-site-plugin + 3.7.1 + + + + diff --git a/spark/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java similarity index 100% rename from spark/src/main/java/com/alibaba/graphar/GeneralParams.java rename to spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala similarity index 90% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala index 1b6d28645..e10754951 100644 --- a/spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala +++ b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala @@ -17,6 +17,7 @@ package com.alibaba.graphar.datasources import scala.collection.JavaConverters._ +import scala.util.matching.Regex import java.util import com.fasterxml.jackson.databind.ObjectMapper @@ -34,7 +35,6 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.sql.sources.DataSourceRegister import org.apache.spark.sql.connector.expressions.Transform -import com.alibaba.graphar.util.Utils object GarUtils @@ -42,6 +42,23 @@ object GarUtils * GarDataSource is a class to provide gar files as the data source for spark. */ class GarDataSource extends TableProvider with DataSourceRegister { + private val REDACTION_REPLACEMENT_TEXT = "*********(redacted)" + + /** + * Redact the sensitive information in the given string. + */ + // Copy of redact from graphar Utils + private def redact(regex: Option[Regex], text: String): String = { + regex match { + case None => text + case Some(r) => + if (text == null || text.isEmpty) { + text + } else { + r.replaceAllIn(text, REDACTION_REPLACEMENT_TEXT) + } + } + } /** The default fallback file format is Parquet. */ def fallbackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat] @@ -80,7 +97,7 @@ class GarDataSource extends TableProvider with DataSourceRegister { val name = shortName() + " " + paths .map(qualifiedPathName(_, hadoopConf)) .mkString(",") - Utils.redact(sparkSession.sessionState.conf.stringRedactionPattern, name) + redact(sparkSession.sessionState.conf.stringRedactionPattern, name) } private def qualifiedPathName( diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala diff --git a/spark/graphar/pom.xml b/spark/graphar/pom.xml new file mode 100644 index 000000000..792c111ba --- /dev/null +++ b/spark/graphar/pom.xml @@ -0,0 +1,306 @@ + + + + + 4.0.0 + + + com.alibaba + graphar + ${graphar.version} + + + com.alibaba + graphar-commons + ${graphar.version} + jar + + + + com.alibaba + graphar-datasources + ${graphar.version} + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-mllib_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-hive_${scala.binary.version} + ${spark.version} + provided + + + org.scalatest + scalatest_${scala.binary.version} + 3.1.1 + provided + + + org.scala-lang + scala-library + ${scala.version} + provided + + + org.yaml + snakeyaml + 2.0 + + + com.aliyun.odps + hadoop-fs-oss + ${cupid.sdk.version} + + + org.apache.hadoop + hadoop-common + + + + + com.aliyun.odps + odps-spark-datasource_2.11 + ${cupid.sdk.version} + + + net.jpountz.lz4 + lz4 + + + + + com.aliyun.odps + cupid-sdk + ${cupid.sdk.version} + provided + + + org.neo4j + neo4j-connector-apache-spark_2.12 + 5.0.0_for_spark_3 + + + com.vesoft + nebula-spark-connector_3.0 + 3.6.0 + + + org.scala-lang.modules + scala-collection-compat_2.12 + 2.1.1 + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + ${scala.version} + + -target:jvm-1.8 + + + -Xss4096K + + + + + scala-compile + + compile + + + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + scala-test-compile + + testCompile + + + + + + org.scalatest + scalatest-maven-plugin + 2.0.0 + + + test + + test + + + + + + org.apache.maven.plugins + maven-shade-plugin + 2.1 + + + package + + shade + + + false + true + + + + *:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + **/log4j.properties + + + + + + reference.conf + + + + + + + + net.alchim31.maven + scala-maven-plugin + 4.8.0 + + + + compile + testCompile + + + + + + -Xms64m + -Xmx1024m + + + -Ywarn-unused + + + + org.scalameta + semanticdb-scalac_2.12.10 + 4.3.24 + + + + + + com.diffplug.spotless + spotless-maven-plugin + 2.20.0 + + + + + + + 1.13.0 + + + + + + ${project.basedir}/.scalafmt.conf + + + + + + io.github.evis + scalafix-maven-plugin_2.13 + 0.1.8_0.11.0 + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + + jar + + + + + + maven-site-plugin + 3.7.1 + + + + diff --git a/spark/graphar/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/graphar/src/main/java/com/alibaba/graphar/GeneralParams.java new file mode 100644 index 000000000..798f7d1ab --- /dev/null +++ b/spark/graphar/src/main/java/com/alibaba/graphar/GeneralParams.java @@ -0,0 +1,39 @@ +/* + * Copyright 2022-2023 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.graphar; + +import org.apache.spark.storage.StorageLevel; + +/** General constant parameters for graphar. */ +public class GeneralParams { + // column name + public static final String vertexIndexCol = "_graphArVertexIndex"; + public static final String srcIndexCol = "_graphArSrcIndex"; + public static final String dstIndexCol = "_graphArDstIndex"; + public static final String offsetCol = "_graphArOffset"; + public static final String primaryCol = "_graphArPrimary"; + public static final String vertexChunkIndexCol = "_graphArVertexChunkIndex"; + public static final String edgeIndexCol = "_graphArEdgeIndex"; + public static final String regularSeparator = "_"; + public static final String offsetStartChunkIndexKey = "_graphar_offset_start_chunk_index"; + public static final String aggNumListOfEdgeChunkKey = "_graphar_agg_num_list_of_edge_chunk"; + public static final Long defaultVertexChunkSize = 262144L; // 2^18 + public static final Long defaultEdgeChunkSize = 4194304L; // 2^22 + public static final String defaultFileType = "parquet"; + public static final String defaultVersion = "v1"; + public static final StorageLevel defaultStorageLevel = StorageLevel.MEMORY_AND_DISK_SER(); +} diff --git a/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/EdgeInfo.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/EdgeInfo.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/GraphInfo.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/GraphInfo.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/VertexInfo.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/VertexInfo.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/FileSystem.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/FileSystem.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/FileSystem.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/FileSystem.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/Patitioner.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/Patitioner.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/Patitioner.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/Patitioner.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/Utils.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/Utils.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/Utils.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/Utils.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala diff --git a/spark/graphar/src/test/resources/gar-test b/spark/graphar/src/test/resources/gar-test new file mode 120000 index 000000000..0d198537a --- /dev/null +++ b/spark/graphar/src/test/resources/gar-test @@ -0,0 +1 @@ +/home/sem/github/GraphAr/testing \ No newline at end of file diff --git a/spark/src/test/scala/com/alibaba/graphar/ComputeExample.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/ComputeExample.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/ComputeExample.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/ComputeExample.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphReader.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphReader.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestGraphReader.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphReader.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestReader.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestReader.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestReader.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestReader.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestWriter.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestWriter.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestWriter.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestWriter.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TransformExample.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TransformExample.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TransformExample.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TransformExample.scala diff --git a/spark/pom.xml b/spark/pom.xml index 3cb7c5e1b..172d9e255 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -21,288 +21,33 @@ com.alibaba graphar - 0.1.0-SNAPSHOT + ${graphar.version} + pom - - graphar - UTF-8 - UTF-8 - 2.12.10 - 2.12 - 512m - 1024m - 3.2.2 - 1.8 - 1.8 - 3.3.8-public - - - - org.apache.spark - spark-core_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-streaming_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-mllib_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-sql_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-hive_${scala.binary.version} - ${spark.version} - provided - - - org.scalatest - scalatest_${scala.binary.version} - 3.1.1 - provided - - - org.scala-lang - scala-library - ${scala.version} - provided - - - org.yaml - snakeyaml - 2.0 - - - com.aliyun.odps - hadoop-fs-oss - ${cupid.sdk.version} - - - org.apache.hadoop - hadoop-common - - - - - com.aliyun.odps - odps-spark-datasource_2.11 - ${cupid.sdk.version} - - - net.jpountz.lz4 - lz4 - - - - - com.aliyun.odps - cupid-sdk - ${cupid.sdk.version} - provided - - - org.neo4j - neo4j-connector-apache-spark_2.12 - 5.0.0_for_spark_3 - - - com.vesoft - nebula-spark-connector_3.0 - 3.6.0 - - - org.scala-lang.modules - scala-collection-compat_2.12 - 2.1.1 - - - - - - org.scala-tools - maven-scala-plugin - 2.15.2 - - ${scala.version} - - -target:jvm-1.8 - - - -Xss4096K - - - - - scala-compile - - compile - - - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - scala-test-compile - - testCompile - - - - - - org.scalatest - scalatest-maven-plugin - 2.0.0 - - - test - - test - - - - - - org.apache.maven.plugins - maven-shade-plugin - 2.1 - - - package - - shade - - - false - true - - - - *:* - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - **/log4j.properties - - - - - - reference.conf - - - - - - - - net.alchim31.maven - scala-maven-plugin - 4.8.0 - - - - compile - testCompile - - - - - - -Xms64m - -Xmx1024m - - - -Ywarn-unused - - - - org.scalameta - semanticdb-scalac_2.12.10 - 4.3.24 - - - - - - com.diffplug.spotless - spotless-maven-plugin - 2.20.0 - - - - - - - 1.13.0 - - - - - - ${project.basedir}/.scalafmt.conf - - - - - - io.github.evis - scalafix-maven-plugin_2.13 - 0.1.8_0.11.0 - - - org.apache.maven.plugins - maven-source-plugin - - - attach-sources - - jar - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - - - attach-javadocs - - jar - - - - - - maven-site-plugin - 3.7.1 - - - - jar + + + datasources-32 + + graphar + UTF-8 + UTF-8 + 2.12.10 + 2.12 + 512m + 1024m + 3.2.2 + 1.8 + 1.8 + 3.3.8-public + 0.1.0-SNAPSHOT + + + graphar + datasources-32 + + + true + + + diff --git a/spark/src/test/resources/gar-test b/spark/src/test/resources/gar-test deleted file mode 120000 index 1166084dd..000000000 --- a/spark/src/test/resources/gar-test +++ /dev/null @@ -1 +0,0 @@ -../../../../testing \ No newline at end of file From f19f6a3adec53708869aeaf01608630ac6fa298f Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 00:02:42 +0100 Subject: [PATCH 02/18] Fix spotless:check On branch 320-datasources-refactoring Changes to be committed: new file: spark/datasources-32/.scalafmt.conf modified: spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala new file: spark/graphar/.scalafmt.conf modified: spark/graphar/pom.xml modified: spark/pom.xml --- spark/datasources-32/.scalafmt.conf | 1 + .../graphar/datasources/GarDataSource.scala | 1 - spark/graphar/.scalafmt.conf | 1 + spark/graphar/pom.xml | 21 ---------------- spark/pom.xml | 25 +++++++++++++++++++ 5 files changed, 27 insertions(+), 22 deletions(-) create mode 120000 spark/datasources-32/.scalafmt.conf create mode 120000 spark/graphar/.scalafmt.conf diff --git a/spark/datasources-32/.scalafmt.conf b/spark/datasources-32/.scalafmt.conf new file mode 120000 index 000000000..27ac71e06 --- /dev/null +++ b/spark/datasources-32/.scalafmt.conf @@ -0,0 +1 @@ +/home/sem/github/GraphAr/spark/.scalafmt.conf \ No newline at end of file diff --git a/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala index e10754951..d4fe44fd1 100644 --- a/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala +++ b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala @@ -35,7 +35,6 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.sql.sources.DataSourceRegister import org.apache.spark.sql.connector.expressions.Transform - object GarUtils /** diff --git a/spark/graphar/.scalafmt.conf b/spark/graphar/.scalafmt.conf new file mode 120000 index 000000000..27ac71e06 --- /dev/null +++ b/spark/graphar/.scalafmt.conf @@ -0,0 +1 @@ +/home/sem/github/GraphAr/spark/.scalafmt.conf \ No newline at end of file diff --git a/spark/graphar/pom.xml b/spark/graphar/pom.xml index 792c111ba..75ca9ff82 100644 --- a/spark/graphar/pom.xml +++ b/spark/graphar/pom.xml @@ -247,27 +247,6 @@ - - com.diffplug.spotless - spotless-maven-plugin - 2.20.0 - - - - - - - 1.13.0 - - - - - - ${project.basedir}/.scalafmt.conf - - - - io.github.evis scalafix-maven-plugin_2.13 diff --git a/spark/pom.xml b/spark/pom.xml index 172d9e255..ffa6af1b8 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -50,4 +50,29 @@ + + + + com.diffplug.spotless + spotless-maven-plugin + 2.20.0 + + + + + + + 1.13.0 + + + + + + ${project.basedir}/.scalafmt.conf + + + + + + From 435be1316669771092f48fa080ea70f8ef5eeced Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 00:06:06 +0100 Subject: [PATCH 03/18] Fix broken symlins On branch 320-datasources-refactoring Changes to be committed: typechange: spark/datasources-32/.scalafmt.conf typechange: spark/graphar/.scalafmt.conf --- spark/datasources-32/.scalafmt.conf | 9 ++++++++- spark/graphar/.scalafmt.conf | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) mode change 120000 => 100644 spark/datasources-32/.scalafmt.conf mode change 120000 => 100644 spark/graphar/.scalafmt.conf diff --git a/spark/datasources-32/.scalafmt.conf b/spark/datasources-32/.scalafmt.conf deleted file mode 120000 index 27ac71e06..000000000 --- a/spark/datasources-32/.scalafmt.conf +++ /dev/null @@ -1 +0,0 @@ -/home/sem/github/GraphAr/spark/.scalafmt.conf \ No newline at end of file diff --git a/spark/datasources-32/.scalafmt.conf b/spark/datasources-32/.scalafmt.conf new file mode 100644 index 000000000..a79ab2962 --- /dev/null +++ b/spark/datasources-32/.scalafmt.conf @@ -0,0 +1,8 @@ +version = "3.0.6" + +align.preset = some +runner.dialect = scala212 +maxColumn = 80 +docstrings.style = Asterisk +docstrings.removeEmpty = true +project.git = true diff --git a/spark/graphar/.scalafmt.conf b/spark/graphar/.scalafmt.conf deleted file mode 120000 index 27ac71e06..000000000 --- a/spark/graphar/.scalafmt.conf +++ /dev/null @@ -1 +0,0 @@ -/home/sem/github/GraphAr/spark/.scalafmt.conf \ No newline at end of file diff --git a/spark/graphar/.scalafmt.conf b/spark/graphar/.scalafmt.conf new file mode 100644 index 000000000..a79ab2962 --- /dev/null +++ b/spark/graphar/.scalafmt.conf @@ -0,0 +1,8 @@ +version = "3.0.6" + +align.preset = some +runner.dialect = scala212 +maxColumn = 80 +docstrings.style = Asterisk +docstrings.removeEmpty = true +project.git = true From a8d0312a243b782f01f25b4921d9a90eb447a882 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 00:09:14 +0100 Subject: [PATCH 04/18] Fix license ignore On branch 320-datasources-refactoring Changes to be committed: modified: .licenserc.yaml --- .licenserc.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.licenserc.yaml b/.licenserc.yaml index c6234a1f0..7dcd938a5 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -33,10 +33,10 @@ header: - 'pre-commit-config.yaml' - 'docs' - '**/.gitignore' - - 'spark/.scalafix.conf' - - 'spark/.scalafmt.conf' + - '**/.scalafix.conf' + - '**/.scalafmt.conf' - 'cpp/apidoc' - - 'spark/src/main/scala/com/alibaba/graphar/datasources' + - 'spark/**/src/main/scala/com/alibaba/graphar/datasources' - '*.md' - '*.rst' - '**/*.json' From 657882351f0cf206585d24c173ede10226661554 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 00:13:43 +0100 Subject: [PATCH 05/18] Fix broken symlinks On branch 320-datasources-refactoring Changes to be committed: modified: spark/graphar/src/test/resources/gar-test --- spark/graphar/src/test/resources/gar-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark/graphar/src/test/resources/gar-test b/spark/graphar/src/test/resources/gar-test index 0d198537a..4ce4f440f 120000 --- a/spark/graphar/src/test/resources/gar-test +++ b/spark/graphar/src/test/resources/gar-test @@ -1 +1 @@ -/home/sem/github/GraphAr/testing \ No newline at end of file +../../../../../testing/ \ No newline at end of file From e81beb23e12792daa537168c46ce763a84a5e8c8 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 00:15:14 +0100 Subject: [PATCH 06/18] Fix broken symlinks again On branch 320-datasources-refactoring Changes to be committed: typechange: spark/datasources-32/.scalafmt.conf typechange: spark/graphar/.scalafmt.conf --- spark/datasources-32/.scalafmt.conf | 9 +-------- spark/graphar/.scalafmt.conf | 9 +-------- 2 files changed, 2 insertions(+), 16 deletions(-) mode change 100644 => 120000 spark/datasources-32/.scalafmt.conf mode change 100644 => 120000 spark/graphar/.scalafmt.conf diff --git a/spark/datasources-32/.scalafmt.conf b/spark/datasources-32/.scalafmt.conf deleted file mode 100644 index a79ab2962..000000000 --- a/spark/datasources-32/.scalafmt.conf +++ /dev/null @@ -1,8 +0,0 @@ -version = "3.0.6" - -align.preset = some -runner.dialect = scala212 -maxColumn = 80 -docstrings.style = Asterisk -docstrings.removeEmpty = true -project.git = true diff --git a/spark/datasources-32/.scalafmt.conf b/spark/datasources-32/.scalafmt.conf new file mode 120000 index 000000000..4cb05e831 --- /dev/null +++ b/spark/datasources-32/.scalafmt.conf @@ -0,0 +1 @@ +../.scalafmt.conf \ No newline at end of file diff --git a/spark/graphar/.scalafmt.conf b/spark/graphar/.scalafmt.conf deleted file mode 100644 index a79ab2962..000000000 --- a/spark/graphar/.scalafmt.conf +++ /dev/null @@ -1,8 +0,0 @@ -version = "3.0.6" - -align.preset = some -runner.dialect = scala212 -maxColumn = 80 -docstrings.style = Asterisk -docstrings.removeEmpty = true -project.git = true diff --git a/spark/graphar/.scalafmt.conf b/spark/graphar/.scalafmt.conf new file mode 120000 index 000000000..4cb05e831 --- /dev/null +++ b/spark/graphar/.scalafmt.conf @@ -0,0 +1 @@ +../.scalafmt.conf \ No newline at end of file From 16c7a0878e93cd49ceb7b77646838cf160e377c9 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 00:25:23 +0100 Subject: [PATCH 07/18] Fix license ignore && fix broken path to GraphAr JAR On branch 320-datasources-refactoring Changes to be committed: modified: .licenserc.yaml modified: spark/scripts/run-graphar2nebula.sh modified: spark/scripts/run-graphar2neo4j.sh modified: spark/scripts/run-nebula2graphar.sh modified: spark/scripts/run-neo4j2graphar.sh --- .licenserc.yaml | 2 +- spark/scripts/run-graphar2nebula.sh | 2 +- spark/scripts/run-graphar2neo4j.sh | 2 +- spark/scripts/run-nebula2graphar.sh | 2 +- spark/scripts/run-neo4j2graphar.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.licenserc.yaml b/.licenserc.yaml index 7dcd938a5..926e6b7ec 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -24,7 +24,7 @@ header: - 'LICENSE' - 'NOTICE' - 'testing' - - 'spark/src/test/resources' + - 'spark/graphar/src/test/resources' - 'java/src/test/resources' - '.licenserc.yaml' - '.gitignore' diff --git a/spark/scripts/run-graphar2nebula.sh b/spark/scripts/run-graphar2nebula.sh index 5094885f7..7a47df44d 100755 --- a/spark/scripts/run-graphar2nebula.sh +++ b/spark/scripts/run-graphar2nebula.sh @@ -16,7 +16,7 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" graph_info_path="${GRAPH_INFO_PATH:-/tmp/graphar/nebula2graphar/basketballplayergraph.graph.yml}" spark-submit --class com.alibaba.graphar.example.GraphAr2Nebula ${jar_file} \ diff --git a/spark/scripts/run-graphar2neo4j.sh b/spark/scripts/run-graphar2neo4j.sh index 25150999a..d350387a2 100755 --- a/spark/scripts/run-graphar2neo4j.sh +++ b/spark/scripts/run-graphar2neo4j.sh @@ -17,7 +17,7 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" graph_info_path="${GRAPH_INFO_PATH:-/tmp/graphar/neo4j2graphar/MovieGraph.graph.yml}" spark-submit --class com.alibaba.graphar.example.GraphAr2Neo4j ${jar_file} \ diff --git a/spark/scripts/run-nebula2graphar.sh b/spark/scripts/run-nebula2graphar.sh index 3204b1177..898859402 100755 --- a/spark/scripts/run-nebula2graphar.sh +++ b/spark/scripts/run-nebula2graphar.sh @@ -16,7 +16,7 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" vertex_chunk_size=100 edge_chunk_size=1024 diff --git a/spark/scripts/run-neo4j2graphar.sh b/spark/scripts/run-neo4j2graphar.sh index 16358838f..e18e7c9af 100755 --- a/spark/scripts/run-neo4j2graphar.sh +++ b/spark/scripts/run-neo4j2graphar.sh @@ -17,7 +17,7 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" vertex_chunk_size=100 edge_chunk_size=1024 From 01518c07050ad74f01204a38e81a7180e2a4b99c Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 00:36:16 +0100 Subject: [PATCH 08/18] Final fix of license-ignore glob On branch 320-datasources-refactoring Changes to be committed: modified: .licenserc.yaml --- .licenserc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.licenserc.yaml b/.licenserc.yaml index 926e6b7ec..904f96985 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -36,7 +36,7 @@ header: - '**/.scalafix.conf' - '**/.scalafmt.conf' - 'cpp/apidoc' - - 'spark/**/src/main/scala/com/alibaba/graphar/datasources' + - 'spark/datasources-*/src/main/scala/com/alibaba/graphar/datasources' - '*.md' - '*.rst' - '**/*.json' From 167068187e1644e11fa864373371215f0f3f66cb Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 00:39:59 +0100 Subject: [PATCH 09/18] Fix broken path to the JAR in neo4j importer On branch 320-datasources-refactoring Changes to be committed: modified: spark/import/neo4j.sh --- spark/import/neo4j.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark/import/neo4j.sh b/spark/import/neo4j.sh index 7389567ec..d7ddaadc4 100755 --- a/spark/import/neo4j.sh +++ b/spark/import/neo4j.sh @@ -17,8 +17,8 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" conf_path="$(readlink -f $1)" spark-submit --class com.alibaba.graphar.importer.Neo4j ${jar_file} \ - ${conf_path} \ No newline at end of file + ${conf_path} From e5874352dcbffaac44759f1a9dba922765aa69e4 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 12:54:00 +0100 Subject: [PATCH 10/18] Final fix licenserc && symlink to GeneralParams On branch 320-datasources-refactoring Changes to be committed: modified: .licenserc.yaml typechange: spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java --- .licenserc.yaml | 2 +- .../com/alibaba/graphar/GeneralParams.java | 40 +------------------ 2 files changed, 2 insertions(+), 40 deletions(-) mode change 100644 => 120000 spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java diff --git a/.licenserc.yaml b/.licenserc.yaml index 904f96985..45b89c4cf 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -36,7 +36,7 @@ header: - '**/.scalafix.conf' - '**/.scalafmt.conf' - 'cpp/apidoc' - - 'spark/datasources-*/src/main/scala/com/alibaba/graphar/datasources' + - 'spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources' - '*.md' - '*.rst' - '**/*.json' diff --git a/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java deleted file mode 100644 index 798f7d1ab..000000000 --- a/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2022-2023 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.alibaba.graphar; - -import org.apache.spark.storage.StorageLevel; - -/** General constant parameters for graphar. */ -public class GeneralParams { - // column name - public static final String vertexIndexCol = "_graphArVertexIndex"; - public static final String srcIndexCol = "_graphArSrcIndex"; - public static final String dstIndexCol = "_graphArDstIndex"; - public static final String offsetCol = "_graphArOffset"; - public static final String primaryCol = "_graphArPrimary"; - public static final String vertexChunkIndexCol = "_graphArVertexChunkIndex"; - public static final String edgeIndexCol = "_graphArEdgeIndex"; - public static final String regularSeparator = "_"; - public static final String offsetStartChunkIndexKey = "_graphar_offset_start_chunk_index"; - public static final String aggNumListOfEdgeChunkKey = "_graphar_agg_num_list_of_edge_chunk"; - public static final Long defaultVertexChunkSize = 262144L; // 2^18 - public static final Long defaultEdgeChunkSize = 4194304L; // 2^22 - public static final String defaultFileType = "parquet"; - public static final String defaultVersion = "v1"; - public static final StorageLevel defaultStorageLevel = StorageLevel.MEMORY_AND_DISK_SER(); -} diff --git a/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java new file mode 120000 index 000000000..972663dd8 --- /dev/null +++ b/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java @@ -0,0 +1 @@ +../../../../../../../graphar/src/main/java/com/alibaba/graphar/GeneralParams.java \ No newline at end of file From a94d7b8c18b133a8de8e93efd838644ca7545b77 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 13:02:01 +0100 Subject: [PATCH 11/18] Fix scala:doc missing plugin On branch 320-datasources-refactoring Changes to be committed: modified: spark/pom.xml --- spark/pom.xml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/spark/pom.xml b/spark/pom.xml index ffa6af1b8..4fc8235ab 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -73,6 +73,41 @@ + + org.scala-tools + maven-scala-plugin + 2.15.2 + + ${scala.version} + + -target:jvm-1.8 + + + -Xss4096K + + + + + scala-compile + + compile + + + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + scala-test-compile + + testCompile + + + + From 74c3d3e80bdee36e96710f4d9b00473541b11d88 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 13:20:34 +0100 Subject: [PATCH 12/18] Trying stuff until it works? - added direct export of JAVA_HOME=JAVA_11 because it works for tests On branch 320-datasources-refactoring Changes to be committed: modified: docs/Makefile --- docs/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Makefile b/docs/Makefile index 3dfb507d0..f5fb9eb98 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -37,6 +37,7 @@ cpp-apidoc: .PHONY: spark-apidoc spark-apidoc: cd $(ROOTDIR)/spark && \ + export JAVA_HOME=$JAVA_HOME_11_X64 && \ mvn scala:doc .PHONY: html From 63bfd525d5f1a5765498c4f641031947156935b7 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 13:27:24 +0100 Subject: [PATCH 13/18] Fix syntax && add --no-transfer-progress --no-transfer-progress should make GHA logs little more readable On branch 320-datasources-refactoring Changes to be committed: modified: docs/Makefile --- docs/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index f5fb9eb98..670fa877c 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -37,8 +37,8 @@ cpp-apidoc: .PHONY: spark-apidoc spark-apidoc: cd $(ROOTDIR)/spark && \ - export JAVA_HOME=$JAVA_HOME_11_X64 && \ - mvn scala:doc + export JAVA_HOME=$(JAVA_HOME_11_X64) && \ + mvn --no-transfer-progress scala:doc .PHONY: html html: cpp-apidoc spark-apidoc @@ -46,7 +46,7 @@ html: cpp-apidoc spark-apidoc rm -fr $(BUILDDIR)/html/spark/reference cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ cd $(ROOTDIR)/java && \ - mvn -P javadoc javadoc:aggregate \ + mvn --no-transfer-progress -P javadoc javadoc:aggregate \ -Dmaven.antrun.skip=true \ -DskipTests \ -Djavadoc.output.directory=$(ROOTDIR)/docs/$(BUILDDIR)/html/java/ \ From d9deb15f19964644b19fd19264aa0725a7d4d168 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 13:29:49 +0100 Subject: [PATCH 14/18] Update a little docs Makefile On branch 320-datasources-refactoring Changes to be committed: modified: docs/Makefile --- docs/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 670fa877c..bc58d9579 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -46,7 +46,7 @@ html: cpp-apidoc spark-apidoc rm -fr $(BUILDDIR)/html/spark/reference cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ cd $(ROOTDIR)/java && \ - mvn --no-transfer-progress -P javadoc javadoc:aggregate \ + mvn --no-transfer-progress -P javadoc javadoc:aggregate \ -Dmaven.antrun.skip=true \ -DskipTests \ -Djavadoc.output.directory=$(ROOTDIR)/docs/$(BUILDDIR)/html/java/ \ @@ -67,7 +67,7 @@ html-poetry: rm -fr $(BUILDDIR)/html/spark/reference cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ cd $(ROOTDIR)/java && \ - mvn -P javadoc javadoc:aggregate \ + mvn --no-transfer-progress -P javadoc javadoc:aggregate \ -Dmaven.antrun.skip=true \ -DskipTests \ -Djavadoc.output.directory=$(ROOTDIR)/docs/$(BUILDDIR)/html/java/ \ From 5753387e2e2be09e9cf1e9546d59119698b48c8c Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 13:37:54 +0100 Subject: [PATCH 15/18] Update CI a little - add --no-transfer-progress to every mvn command in CI - move JAVA_HOME export from Makefile level to CI level On branch 320-datasources-refactoring Changes to be committed: modified: .github/workflows/docs.yml modified: .github/workflows/spark.yaml modified: docs/Makefile modified: pyspark/Makefile --- .github/workflows/docs.yml | 1 + .github/workflows/spark.yaml | 8 ++++---- docs/Makefile | 1 - pyspark/Makefile | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2edf17383..eb9327ce6 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -76,6 +76,7 @@ jobs: - name: Generate Doc run: | + export JAVA_HOME=${JAVA_HOME_11_X64} pushd docs make html popd diff --git a/.github/workflows/spark.yaml b/.github/workflows/spark.yaml index 8e9729bf8..e315bdf0d 100644 --- a/.github/workflows/spark.yaml +++ b/.github/workflows/spark.yaml @@ -46,21 +46,21 @@ jobs: run: | export JAVA_HOME=${JAVA_HOME_11_X64} pushd spark - mvn spotless:check + mvn --no-transfer-progress spotless:check popd - name: Build GraphAr Spark run: | export JAVA_HOME=${JAVA_HOME_11_X64} pushd spark - mvn clean package -DskipTests -Dspotless.check.skip=true + mvn --no-transfer-progress clean package -DskipTests -Dspotless.check.skip=true popd - name: Run test run: | export JAVA_HOME=${JAVA_HOME_11_X64} pushd spark - mvn test -Dspotless.check.skip=true + mvn --no-transfer-progress test -Dspotless.check.skip=true popd - name: Run Neo4j2GraphAr example @@ -135,4 +135,4 @@ jobs: ./neo4j.sh neo4j.json # stop and clean - popd \ No newline at end of file + popd diff --git a/docs/Makefile b/docs/Makefile index bc58d9579..79159c5d2 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -37,7 +37,6 @@ cpp-apidoc: .PHONY: spark-apidoc spark-apidoc: cd $(ROOTDIR)/spark && \ - export JAVA_HOME=$(JAVA_HOME_11_X64) && \ mvn --no-transfer-progress scala:doc .PHONY: html diff --git a/pyspark/Makefile b/pyspark/Makefile index f8f7fde8e..2e0a6c9c7 100644 --- a/pyspark/Makefile +++ b/pyspark/Makefile @@ -15,7 +15,7 @@ .PHONY: install_test install_test: export JAVA_HOME=${JAVA_HOME_11_X64} - cd ../spark && mvn clean package -DskipTests -Dspotless.check.skip=true && cd ../pyspark + cd ../spark && mvn --no-transfer-progress clean package -DskipTests -Dspotless.check.skip=true && cd ../pyspark export PYSPARK_HADOOP_VERSION=3.2 poetry install --with=spark,tests From 12064b0002b49a70e09d554df62f03ff5c2aba39 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 13:44:27 +0100 Subject: [PATCH 16/18] Try to package first before scala:doc On branch 320-datasources-refactoring Changes to be committed: modified: docs/Makefile --- docs/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Makefile b/docs/Makefile index 79159c5d2..be08f8cfd 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -37,6 +37,7 @@ cpp-apidoc: .PHONY: spark-apidoc spark-apidoc: cd $(ROOTDIR)/spark && \ + mvn --no-transfer-progress clean package -DskipTests -Dspotless.check.skip=true && \ mvn --no-transfer-progress scala:doc .PHONY: html From 78b2a7e3f88dc76fa7fb89adcdb10de60e54adda Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 20:43:53 +0100 Subject: [PATCH 17/18] Try to fix scala:doc On branch 320-datasources-refactoring Changes to be committed: modified: docs/Makefile --- docs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Makefile b/docs/Makefile index be08f8cfd..1fec1e200 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -37,7 +37,7 @@ cpp-apidoc: .PHONY: spark-apidoc spark-apidoc: cd $(ROOTDIR)/spark && \ - mvn --no-transfer-progress clean package -DskipTests -Dspotless.check.skip=true && \ + mvn --no-transfer-progress clean install -DskipTests -Dspotless.check.skip=true && \ mvn --no-transfer-progress scala:doc .PHONY: html From 4597ad01ea9bac1a946d55700a8206d25436c050 Mon Sep 17 00:00:00 2001 From: SemyonSinchenko Date: Thu, 22 Feb 2024 20:59:59 +0100 Subject: [PATCH 18/18] Fix spark target path On branch 320-datasources-refactoring Changes to be committed: modified: docs/Makefile --- docs/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 1fec1e200..f8fda06d0 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -44,7 +44,7 @@ spark-apidoc: html: cpp-apidoc spark-apidoc $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html rm -fr $(BUILDDIR)/html/spark/reference - cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ + cp -fr $(ROOTDIR)/spark/graphar/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ cd $(ROOTDIR)/java && \ mvn --no-transfer-progress -P javadoc javadoc:aggregate \ -Dmaven.antrun.skip=true \ @@ -65,7 +65,7 @@ html-poetry: cd $(ROOTDIR)/pyspark && \ poetry run bash -c "cd $(ROOTDIR)/docs && $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html" rm -fr $(BUILDDIR)/html/spark/reference - cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ + cp -fr $(ROOTDIR)/spark/graphar/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ cd $(ROOTDIR)/java && \ mvn --no-transfer-progress -P javadoc javadoc:aggregate \ -Dmaven.antrun.skip=true \