diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2edf17383..eb9327ce6 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -76,6 +76,7 @@ jobs: - name: Generate Doc run: | + export JAVA_HOME=${JAVA_HOME_11_X64} pushd docs make html popd diff --git a/.github/workflows/spark.yaml b/.github/workflows/spark.yaml index 8e9729bf8..e315bdf0d 100644 --- a/.github/workflows/spark.yaml +++ b/.github/workflows/spark.yaml @@ -46,21 +46,21 @@ jobs: run: | export JAVA_HOME=${JAVA_HOME_11_X64} pushd spark - mvn spotless:check + mvn --no-transfer-progress spotless:check popd - name: Build GraphAr Spark run: | export JAVA_HOME=${JAVA_HOME_11_X64} pushd spark - mvn clean package -DskipTests -Dspotless.check.skip=true + mvn --no-transfer-progress clean package -DskipTests -Dspotless.check.skip=true popd - name: Run test run: | export JAVA_HOME=${JAVA_HOME_11_X64} pushd spark - mvn test -Dspotless.check.skip=true + mvn --no-transfer-progress test -Dspotless.check.skip=true popd - name: Run Neo4j2GraphAr example @@ -135,4 +135,4 @@ jobs: ./neo4j.sh neo4j.json # stop and clean - popd \ No newline at end of file + popd diff --git a/.licenserc.yaml b/.licenserc.yaml index c6234a1f0..45b89c4cf 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -24,7 +24,7 @@ header: - 'LICENSE' - 'NOTICE' - 'testing' - - 'spark/src/test/resources' + - 'spark/graphar/src/test/resources' - 'java/src/test/resources' - '.licenserc.yaml' - '.gitignore' @@ -33,10 +33,10 @@ header: - 'pre-commit-config.yaml' - 'docs' - '**/.gitignore' - - 'spark/.scalafix.conf' - - 'spark/.scalafmt.conf' + - '**/.scalafix.conf' + - '**/.scalafmt.conf' - 'cpp/apidoc' - - 'spark/src/main/scala/com/alibaba/graphar/datasources' + - 'spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources' - '*.md' - '*.rst' - '**/*.json' diff --git a/docs/Makefile b/docs/Makefile index 3dfb507d0..f8fda06d0 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -37,15 +37,16 @@ cpp-apidoc: .PHONY: spark-apidoc spark-apidoc: cd $(ROOTDIR)/spark && \ - mvn scala:doc + mvn --no-transfer-progress clean install -DskipTests -Dspotless.check.skip=true && \ + mvn --no-transfer-progress scala:doc .PHONY: html html: cpp-apidoc spark-apidoc $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html rm -fr $(BUILDDIR)/html/spark/reference - cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ + cp -fr $(ROOTDIR)/spark/graphar/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ cd $(ROOTDIR)/java && \ - mvn -P javadoc javadoc:aggregate \ + mvn --no-transfer-progress -P javadoc javadoc:aggregate \ -Dmaven.antrun.skip=true \ -DskipTests \ -Djavadoc.output.directory=$(ROOTDIR)/docs/$(BUILDDIR)/html/java/ \ @@ -64,9 +65,9 @@ html-poetry: cd $(ROOTDIR)/pyspark && \ poetry run bash -c "cd $(ROOTDIR)/docs && $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html" rm -fr $(BUILDDIR)/html/spark/reference - cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ + cp -fr $(ROOTDIR)/spark/graphar/target/site/scaladocs $(BUILDDIR)/html/spark/reference/ cd $(ROOTDIR)/java && \ - mvn -P javadoc javadoc:aggregate \ + mvn --no-transfer-progress -P javadoc javadoc:aggregate \ -Dmaven.antrun.skip=true \ -DskipTests \ -Djavadoc.output.directory=$(ROOTDIR)/docs/$(BUILDDIR)/html/java/ \ diff --git a/pyspark/Makefile b/pyspark/Makefile index f8f7fde8e..2e0a6c9c7 100644 --- a/pyspark/Makefile +++ b/pyspark/Makefile @@ -15,7 +15,7 @@ .PHONY: install_test install_test: export JAVA_HOME=${JAVA_HOME_11_X64} - cd ../spark && mvn clean package -DskipTests -Dspotless.check.skip=true && cd ../pyspark + cd ../spark && mvn --no-transfer-progress clean package -DskipTests -Dspotless.check.skip=true && cd ../pyspark export PYSPARK_HADOOP_VERSION=3.2 poetry install --with=spark,tests diff --git a/pyspark/tests/conftest.py b/pyspark/tests/conftest.py index 61bdd9591..3a3eeb4ec 100644 --- a/pyspark/tests/conftest.py +++ b/pyspark/tests/conftest.py @@ -17,7 +17,7 @@ import pytest from pyspark.sql import SparkSession -JARS_PATH = Path(__file__).parent.parent.parent.joinpath("spark").joinpath("target") +JARS_PATH = Path(__file__).parent.parent.parent.joinpath("spark").joinpath("graphar").joinpath("target") GRAPHAR_SHADED_JAR_PATH = None for jar_file in JARS_PATH.glob("*.jar"): diff --git a/spark/datasources-32/.scalafmt.conf b/spark/datasources-32/.scalafmt.conf new file mode 120000 index 000000000..4cb05e831 --- /dev/null +++ b/spark/datasources-32/.scalafmt.conf @@ -0,0 +1 @@ +../.scalafmt.conf \ No newline at end of file diff --git a/spark/datasources-32/pom.xml b/spark/datasources-32/pom.xml new file mode 100644 index 000000000..e1af90c01 --- /dev/null +++ b/spark/datasources-32/pom.xml @@ -0,0 +1,188 @@ + + + + + 4.0.0 + + + com.alibaba + graphar + ${graphar.version} + + + com.alibaba + graphar-datasources + ${graphar.version} + jar + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-mllib_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-hive_${scala.binary.version} + ${spark.version} + provided + + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + ${scala.version} + + -target:jvm-1.8 + + + -Xss4096K + + + + + scala-compile + + compile + + + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + scala-test-compile + + testCompile + + + + + + net.alchim31.maven + scala-maven-plugin + 4.8.0 + + + + compile + testCompile + + + + + + -Xms64m + -Xmx1024m + + + -Ywarn-unused + + + + org.scalameta + semanticdb-scalac_2.12.10 + 4.3.24 + + + + + + com.diffplug.spotless + spotless-maven-plugin + 2.20.0 + + + + + + + 1.13.0 + + + + + + ${project.basedir}/.scalafmt.conf + + + + + + io.github.evis + scalafix-maven-plugin_2.13 + 0.1.8_0.11.0 + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + + jar + + + + + + maven-site-plugin + 3.7.1 + + + + diff --git a/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java new file mode 120000 index 000000000..972663dd8 --- /dev/null +++ b/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java @@ -0,0 +1 @@ +../../../../../../../graphar/src/main/java/com/alibaba/graphar/GeneralParams.java \ No newline at end of file diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala similarity index 90% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala index 1b6d28645..d4fe44fd1 100644 --- a/spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala +++ b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala @@ -17,6 +17,7 @@ package com.alibaba.graphar.datasources import scala.collection.JavaConverters._ +import scala.util.matching.Regex import java.util import com.fasterxml.jackson.databind.ObjectMapper @@ -34,14 +35,29 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.sql.sources.DataSourceRegister import org.apache.spark.sql.connector.expressions.Transform -import com.alibaba.graphar.util.Utils - object GarUtils /** * GarDataSource is a class to provide gar files as the data source for spark. */ class GarDataSource extends TableProvider with DataSourceRegister { + private val REDACTION_REPLACEMENT_TEXT = "*********(redacted)" + + /** + * Redact the sensitive information in the given string. + */ + // Copy of redact from graphar Utils + private def redact(regex: Option[Regex], text: String): String = { + regex match { + case None => text + case Some(r) => + if (text == null || text.isEmpty) { + text + } else { + r.replaceAllIn(text, REDACTION_REPLACEMENT_TEXT) + } + } + } /** The default fallback file format is Parquet. */ def fallbackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat] @@ -80,7 +96,7 @@ class GarDataSource extends TableProvider with DataSourceRegister { val name = shortName() + " " + paths .map(qualifiedPathName(_, hadoopConf)) .mkString(",") - Utils.redact(sparkSession.sessionState.conf.stringRedactionPattern, name) + redact(sparkSession.sessionState.conf.stringRedactionPattern, name) } private def qualifiedPathName( diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala diff --git a/spark/graphar/.scalafmt.conf b/spark/graphar/.scalafmt.conf new file mode 120000 index 000000000..4cb05e831 --- /dev/null +++ b/spark/graphar/.scalafmt.conf @@ -0,0 +1 @@ +../.scalafmt.conf \ No newline at end of file diff --git a/spark/graphar/pom.xml b/spark/graphar/pom.xml new file mode 100644 index 000000000..75ca9ff82 --- /dev/null +++ b/spark/graphar/pom.xml @@ -0,0 +1,285 @@ + + + + + 4.0.0 + + + com.alibaba + graphar + ${graphar.version} + + + com.alibaba + graphar-commons + ${graphar.version} + jar + + + + com.alibaba + graphar-datasources + ${graphar.version} + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-mllib_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + org.apache.spark + spark-hive_${scala.binary.version} + ${spark.version} + provided + + + org.scalatest + scalatest_${scala.binary.version} + 3.1.1 + provided + + + org.scala-lang + scala-library + ${scala.version} + provided + + + org.yaml + snakeyaml + 2.0 + + + com.aliyun.odps + hadoop-fs-oss + ${cupid.sdk.version} + + + org.apache.hadoop + hadoop-common + + + + + com.aliyun.odps + odps-spark-datasource_2.11 + ${cupid.sdk.version} + + + net.jpountz.lz4 + lz4 + + + + + com.aliyun.odps + cupid-sdk + ${cupid.sdk.version} + provided + + + org.neo4j + neo4j-connector-apache-spark_2.12 + 5.0.0_for_spark_3 + + + com.vesoft + nebula-spark-connector_3.0 + 3.6.0 + + + org.scala-lang.modules + scala-collection-compat_2.12 + 2.1.1 + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + ${scala.version} + + -target:jvm-1.8 + + + -Xss4096K + + + + + scala-compile + + compile + + + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + scala-test-compile + + testCompile + + + + + + org.scalatest + scalatest-maven-plugin + 2.0.0 + + + test + + test + + + + + + org.apache.maven.plugins + maven-shade-plugin + 2.1 + + + package + + shade + + + false + true + + + + *:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + **/log4j.properties + + + + + + reference.conf + + + + + + + + net.alchim31.maven + scala-maven-plugin + 4.8.0 + + + + compile + testCompile + + + + + + -Xms64m + -Xmx1024m + + + -Ywarn-unused + + + + org.scalameta + semanticdb-scalac_2.12.10 + 4.3.24 + + + + + + io.github.evis + scalafix-maven-plugin_2.13 + 0.1.8_0.11.0 + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + + jar + + + + + + maven-site-plugin + 3.7.1 + + + + diff --git a/spark/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/graphar/src/main/java/com/alibaba/graphar/GeneralParams.java similarity index 100% rename from spark/src/main/java/com/alibaba/graphar/GeneralParams.java rename to spark/graphar/src/main/java/com/alibaba/graphar/GeneralParams.java diff --git a/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/EdgeInfo.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/EdgeInfo.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/GraphInfo.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/GraphInfo.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/VertexInfo.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/VertexInfo.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/FileSystem.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/FileSystem.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/FileSystem.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/FileSystem.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/Patitioner.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/Patitioner.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/Patitioner.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/Patitioner.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/util/Utils.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/Utils.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/util/Utils.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/Utils.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala diff --git a/spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala similarity index 100% rename from spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala rename to spark/graphar/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala diff --git a/spark/graphar/src/test/resources/gar-test b/spark/graphar/src/test/resources/gar-test new file mode 120000 index 000000000..4ce4f440f --- /dev/null +++ b/spark/graphar/src/test/resources/gar-test @@ -0,0 +1 @@ +../../../../../testing/ \ No newline at end of file diff --git a/spark/src/test/scala/com/alibaba/graphar/ComputeExample.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/ComputeExample.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/ComputeExample.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/ComputeExample.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphReader.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphReader.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestGraphReader.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphReader.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestReader.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestReader.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestReader.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestReader.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TestWriter.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestWriter.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TestWriter.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestWriter.scala diff --git a/spark/src/test/scala/com/alibaba/graphar/TransformExample.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TransformExample.scala similarity index 100% rename from spark/src/test/scala/com/alibaba/graphar/TransformExample.scala rename to spark/graphar/src/test/scala/com/alibaba/graphar/TransformExample.scala diff --git a/spark/import/neo4j.sh b/spark/import/neo4j.sh index 7389567ec..d7ddaadc4 100755 --- a/spark/import/neo4j.sh +++ b/spark/import/neo4j.sh @@ -17,8 +17,8 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" conf_path="$(readlink -f $1)" spark-submit --class com.alibaba.graphar.importer.Neo4j ${jar_file} \ - ${conf_path} \ No newline at end of file + ${conf_path} diff --git a/spark/pom.xml b/spark/pom.xml index 3cb7c5e1b..4fc8235ab 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -21,115 +21,58 @@ com.alibaba graphar - 0.1.0-SNAPSHOT + ${graphar.version} + pom - - graphar - UTF-8 - UTF-8 - 2.12.10 - 2.12 - 512m - 1024m - 3.2.2 - 1.8 - 1.8 - 3.3.8-public - - - - org.apache.spark - spark-core_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-streaming_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-mllib_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-sql_${scala.binary.version} - ${spark.version} - provided - - - org.apache.spark - spark-hive_${scala.binary.version} - ${spark.version} - provided - - - org.scalatest - scalatest_${scala.binary.version} - 3.1.1 - provided - - - org.scala-lang - scala-library - ${scala.version} - provided - - - org.yaml - snakeyaml - 2.0 - - - com.aliyun.odps - hadoop-fs-oss - ${cupid.sdk.version} - - - org.apache.hadoop - hadoop-common - - - - - com.aliyun.odps - odps-spark-datasource_2.11 - ${cupid.sdk.version} - - - net.jpountz.lz4 - lz4 - - - - - com.aliyun.odps - cupid-sdk - ${cupid.sdk.version} - provided - - - org.neo4j - neo4j-connector-apache-spark_2.12 - 5.0.0_for_spark_3 - - - com.vesoft - nebula-spark-connector_3.0 - 3.6.0 - - - org.scala-lang.modules - scala-collection-compat_2.12 - 2.1.1 - - + + + datasources-32 + + graphar + UTF-8 + UTF-8 + 2.12.10 + 2.12 + 512m + 1024m + 3.2.2 + 1.8 + 1.8 + 3.3.8-public + 0.1.0-SNAPSHOT + + + graphar + datasources-32 + + + true + + + + + com.diffplug.spotless + spotless-maven-plugin + 2.20.0 + + + + + + + 1.13.0 + + + + + + ${project.basedir}/.scalafmt.conf + + + + org.scala-tools maven-scala-plugin @@ -165,144 +108,6 @@ - - org.scalatest - scalatest-maven-plugin - 2.0.0 - - - test - - test - - - - - - org.apache.maven.plugins - maven-shade-plugin - 2.1 - - - package - - shade - - - false - true - - - - *:* - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - **/log4j.properties - - - - - - reference.conf - - - - - - - - net.alchim31.maven - scala-maven-plugin - 4.8.0 - - - - compile - testCompile - - - - - - -Xms64m - -Xmx1024m - - - -Ywarn-unused - - - - org.scalameta - semanticdb-scalac_2.12.10 - 4.3.24 - - - - - - com.diffplug.spotless - spotless-maven-plugin - 2.20.0 - - - - - - - 1.13.0 - - - - - - ${project.basedir}/.scalafmt.conf - - - - - - io.github.evis - scalafix-maven-plugin_2.13 - 0.1.8_0.11.0 - - - org.apache.maven.plugins - maven-source-plugin - - - attach-sources - - jar - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - - - attach-javadocs - - jar - - - - - - maven-site-plugin - 3.7.1 - - jar diff --git a/spark/scripts/run-graphar2nebula.sh b/spark/scripts/run-graphar2nebula.sh index 5094885f7..7a47df44d 100755 --- a/spark/scripts/run-graphar2nebula.sh +++ b/spark/scripts/run-graphar2nebula.sh @@ -16,7 +16,7 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" graph_info_path="${GRAPH_INFO_PATH:-/tmp/graphar/nebula2graphar/basketballplayergraph.graph.yml}" spark-submit --class com.alibaba.graphar.example.GraphAr2Nebula ${jar_file} \ diff --git a/spark/scripts/run-graphar2neo4j.sh b/spark/scripts/run-graphar2neo4j.sh index 25150999a..d350387a2 100755 --- a/spark/scripts/run-graphar2neo4j.sh +++ b/spark/scripts/run-graphar2neo4j.sh @@ -17,7 +17,7 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" graph_info_path="${GRAPH_INFO_PATH:-/tmp/graphar/neo4j2graphar/MovieGraph.graph.yml}" spark-submit --class com.alibaba.graphar.example.GraphAr2Neo4j ${jar_file} \ diff --git a/spark/scripts/run-nebula2graphar.sh b/spark/scripts/run-nebula2graphar.sh index 3204b1177..898859402 100755 --- a/spark/scripts/run-nebula2graphar.sh +++ b/spark/scripts/run-nebula2graphar.sh @@ -16,7 +16,7 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" vertex_chunk_size=100 edge_chunk_size=1024 diff --git a/spark/scripts/run-neo4j2graphar.sh b/spark/scripts/run-neo4j2graphar.sh index 16358838f..e18e7c9af 100755 --- a/spark/scripts/run-neo4j2graphar.sh +++ b/spark/scripts/run-neo4j2graphar.sh @@ -17,7 +17,7 @@ set -eu cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar" +jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar" vertex_chunk_size=100 edge_chunk_size=1024 diff --git a/spark/src/test/resources/gar-test b/spark/src/test/resources/gar-test deleted file mode 120000 index 1166084dd..000000000 --- a/spark/src/test/resources/gar-test +++ /dev/null @@ -1 +0,0 @@ -../../../../testing \ No newline at end of file