diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 2edf17383..eb9327ce6 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -76,6 +76,7 @@ jobs:
- name: Generate Doc
run: |
+ export JAVA_HOME=${JAVA_HOME_11_X64}
pushd docs
make html
popd
diff --git a/.github/workflows/spark.yaml b/.github/workflows/spark.yaml
index 8e9729bf8..e315bdf0d 100644
--- a/.github/workflows/spark.yaml
+++ b/.github/workflows/spark.yaml
@@ -46,21 +46,21 @@ jobs:
run: |
export JAVA_HOME=${JAVA_HOME_11_X64}
pushd spark
- mvn spotless:check
+ mvn --no-transfer-progress spotless:check
popd
- name: Build GraphAr Spark
run: |
export JAVA_HOME=${JAVA_HOME_11_X64}
pushd spark
- mvn clean package -DskipTests -Dspotless.check.skip=true
+ mvn --no-transfer-progress clean package -DskipTests -Dspotless.check.skip=true
popd
- name: Run test
run: |
export JAVA_HOME=${JAVA_HOME_11_X64}
pushd spark
- mvn test -Dspotless.check.skip=true
+ mvn --no-transfer-progress test -Dspotless.check.skip=true
popd
- name: Run Neo4j2GraphAr example
@@ -135,4 +135,4 @@ jobs:
./neo4j.sh neo4j.json
# stop and clean
- popd
\ No newline at end of file
+ popd
diff --git a/.licenserc.yaml b/.licenserc.yaml
index c6234a1f0..45b89c4cf 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -24,7 +24,7 @@ header:
- 'LICENSE'
- 'NOTICE'
- 'testing'
- - 'spark/src/test/resources'
+ - 'spark/graphar/src/test/resources'
- 'java/src/test/resources'
- '.licenserc.yaml'
- '.gitignore'
@@ -33,10 +33,10 @@ header:
- 'pre-commit-config.yaml'
- 'docs'
- '**/.gitignore'
- - 'spark/.scalafix.conf'
- - 'spark/.scalafmt.conf'
+ - '**/.scalafix.conf'
+ - '**/.scalafmt.conf'
- 'cpp/apidoc'
- - 'spark/src/main/scala/com/alibaba/graphar/datasources'
+ - 'spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources'
- '*.md'
- '*.rst'
- '**/*.json'
diff --git a/docs/Makefile b/docs/Makefile
index 3dfb507d0..f8fda06d0 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -37,15 +37,16 @@ cpp-apidoc:
.PHONY: spark-apidoc
spark-apidoc:
cd $(ROOTDIR)/spark && \
- mvn scala:doc
+ mvn --no-transfer-progress clean install -DskipTests -Dspotless.check.skip=true && \
+ mvn --no-transfer-progress scala:doc
.PHONY: html
html: cpp-apidoc spark-apidoc
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
rm -fr $(BUILDDIR)/html/spark/reference
- cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/spark/reference/
+ cp -fr $(ROOTDIR)/spark/graphar/target/site/scaladocs $(BUILDDIR)/html/spark/reference/
cd $(ROOTDIR)/java && \
- mvn -P javadoc javadoc:aggregate \
+ mvn --no-transfer-progress -P javadoc javadoc:aggregate \
-Dmaven.antrun.skip=true \
-DskipTests \
-Djavadoc.output.directory=$(ROOTDIR)/docs/$(BUILDDIR)/html/java/ \
@@ -64,9 +65,9 @@ html-poetry:
cd $(ROOTDIR)/pyspark && \
poetry run bash -c "cd $(ROOTDIR)/docs && $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html"
rm -fr $(BUILDDIR)/html/spark/reference
- cp -fr $(ROOTDIR)/spark/target/site/scaladocs $(BUILDDIR)/html/spark/reference/
+ cp -fr $(ROOTDIR)/spark/graphar/target/site/scaladocs $(BUILDDIR)/html/spark/reference/
cd $(ROOTDIR)/java && \
- mvn -P javadoc javadoc:aggregate \
+ mvn --no-transfer-progress -P javadoc javadoc:aggregate \
-Dmaven.antrun.skip=true \
-DskipTests \
-Djavadoc.output.directory=$(ROOTDIR)/docs/$(BUILDDIR)/html/java/ \
diff --git a/pyspark/Makefile b/pyspark/Makefile
index f8f7fde8e..2e0a6c9c7 100644
--- a/pyspark/Makefile
+++ b/pyspark/Makefile
@@ -15,7 +15,7 @@
.PHONY: install_test
install_test:
export JAVA_HOME=${JAVA_HOME_11_X64}
- cd ../spark && mvn clean package -DskipTests -Dspotless.check.skip=true && cd ../pyspark
+ cd ../spark && mvn --no-transfer-progress clean package -DskipTests -Dspotless.check.skip=true && cd ../pyspark
export PYSPARK_HADOOP_VERSION=3.2
poetry install --with=spark,tests
diff --git a/pyspark/tests/conftest.py b/pyspark/tests/conftest.py
index 61bdd9591..3a3eeb4ec 100644
--- a/pyspark/tests/conftest.py
+++ b/pyspark/tests/conftest.py
@@ -17,7 +17,7 @@
import pytest
from pyspark.sql import SparkSession
-JARS_PATH = Path(__file__).parent.parent.parent.joinpath("spark").joinpath("target")
+JARS_PATH = Path(__file__).parent.parent.parent.joinpath("spark").joinpath("graphar").joinpath("target")
GRAPHAR_SHADED_JAR_PATH = None
for jar_file in JARS_PATH.glob("*.jar"):
diff --git a/spark/datasources-32/.scalafmt.conf b/spark/datasources-32/.scalafmt.conf
new file mode 120000
index 000000000..4cb05e831
--- /dev/null
+++ b/spark/datasources-32/.scalafmt.conf
@@ -0,0 +1 @@
+../.scalafmt.conf
\ No newline at end of file
diff --git a/spark/datasources-32/pom.xml b/spark/datasources-32/pom.xml
new file mode 100644
index 000000000..e1af90c01
--- /dev/null
+++ b/spark/datasources-32/pom.xml
@@ -0,0 +1,188 @@
+
+
+
+
+ 4.0.0
+
+
+ com.alibaba
+ graphar
+ ${graphar.version}
+
+
+ com.alibaba
+ graphar-datasources
+ ${graphar.version}
+ jar
+
+
+
+ org.apache.spark
+ spark-core_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-streaming_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-mllib_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-sql_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-hive_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+
+
+
+
+ org.scala-tools
+ maven-scala-plugin
+ 2.15.2
+
+ ${scala.version}
+
+ -target:jvm-1.8
+
+
+ -Xss4096K
+
+
+
+
+ scala-compile
+
+ compile
+
+
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+ scala-test-compile
+
+ testCompile
+
+
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+ 4.8.0
+
+
+
+ compile
+ testCompile
+
+
+
+
+
+ -Xms64m
+ -Xmx1024m
+
+
+ -Ywarn-unused
+
+
+
+ org.scalameta
+ semanticdb-scalac_2.12.10
+ 4.3.24
+
+
+
+
+
+ com.diffplug.spotless
+ spotless-maven-plugin
+ 2.20.0
+
+
+
+
+
+
+ 1.13.0
+
+
+
+
+
+ ${project.basedir}/.scalafmt.conf
+
+
+
+
+
+ io.github.evis
+ scalafix-maven-plugin_2.13
+ 0.1.8_0.11.0
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+
+
+ attach-sources
+
+ jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+
+
+ attach-javadocs
+
+ jar
+
+
+
+
+
+ maven-site-plugin
+ 3.7.1
+
+
+
+
diff --git a/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java
new file mode 120000
index 000000000..972663dd8
--- /dev/null
+++ b/spark/datasources-32/src/main/java/com/alibaba/graphar/GeneralParams.java
@@ -0,0 +1 @@
+../../../../../../../graphar/src/main/java/com/alibaba/graphar/GeneralParams.java
\ No newline at end of file
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarCommitProtocol.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala
similarity index 90%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala
index 1b6d28645..d4fe44fd1 100644
--- a/spark/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala
+++ b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarDataSource.scala
@@ -17,6 +17,7 @@
package com.alibaba.graphar.datasources
import scala.collection.JavaConverters._
+import scala.util.matching.Regex
import java.util
import com.fasterxml.jackson.databind.ObjectMapper
@@ -34,14 +35,29 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.connector.expressions.Transform
-import com.alibaba.graphar.util.Utils
-
object GarUtils
/**
* GarDataSource is a class to provide gar files as the data source for spark.
*/
class GarDataSource extends TableProvider with DataSourceRegister {
+ private val REDACTION_REPLACEMENT_TEXT = "*********(redacted)"
+
+ /**
+ * Redact the sensitive information in the given string.
+ */
+ // Copy of redact from graphar Utils
+ private def redact(regex: Option[Regex], text: String): String = {
+ regex match {
+ case None => text
+ case Some(r) =>
+ if (text == null || text.isEmpty) {
+ text
+ } else {
+ r.replaceAllIn(text, REDACTION_REPLACEMENT_TEXT)
+ }
+ }
+ }
/** The default fallback file format is Parquet. */
def fallbackFileFormat: Class[_ <: FileFormat] = classOf[ParquetFileFormat]
@@ -80,7 +96,7 @@ class GarDataSource extends TableProvider with DataSourceRegister {
val name = shortName() + " " + paths
.map(qualifiedPathName(_, hadoopConf))
.mkString(",")
- Utils.redact(sparkSession.sessionState.conf.stringRedactionPattern, name)
+ redact(sparkSession.sessionState.conf.stringRedactionPattern, name)
}
private def qualifiedPathName(
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScan.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarScanBuilder.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarTable.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/GarWriterBuilder.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/csv/CSVWriterBuilder.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcOutputWriter.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/orc/OrcWriteBuilder.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala b/spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala
rename to spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources/parquet/ParquetWriterBuilder.scala
diff --git a/spark/graphar/.scalafmt.conf b/spark/graphar/.scalafmt.conf
new file mode 120000
index 000000000..4cb05e831
--- /dev/null
+++ b/spark/graphar/.scalafmt.conf
@@ -0,0 +1 @@
+../.scalafmt.conf
\ No newline at end of file
diff --git a/spark/graphar/pom.xml b/spark/graphar/pom.xml
new file mode 100644
index 000000000..75ca9ff82
--- /dev/null
+++ b/spark/graphar/pom.xml
@@ -0,0 +1,285 @@
+
+
+
+
+ 4.0.0
+
+
+ com.alibaba
+ graphar
+ ${graphar.version}
+
+
+ com.alibaba
+ graphar-commons
+ ${graphar.version}
+ jar
+
+
+
+ com.alibaba
+ graphar-datasources
+ ${graphar.version}
+
+
+ org.apache.spark
+ spark-core_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-streaming_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-mllib_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-sql_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-hive_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+ org.scalatest
+ scalatest_${scala.binary.version}
+ 3.1.1
+ provided
+
+
+ org.scala-lang
+ scala-library
+ ${scala.version}
+ provided
+
+
+ org.yaml
+ snakeyaml
+ 2.0
+
+
+ com.aliyun.odps
+ hadoop-fs-oss
+ ${cupid.sdk.version}
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
+
+
+ com.aliyun.odps
+ odps-spark-datasource_2.11
+ ${cupid.sdk.version}
+
+
+ net.jpountz.lz4
+ lz4
+
+
+
+
+ com.aliyun.odps
+ cupid-sdk
+ ${cupid.sdk.version}
+ provided
+
+
+ org.neo4j
+ neo4j-connector-apache-spark_2.12
+ 5.0.0_for_spark_3
+
+
+ com.vesoft
+ nebula-spark-connector_3.0
+ 3.6.0
+
+
+ org.scala-lang.modules
+ scala-collection-compat_2.12
+ 2.1.1
+
+
+
+
+
+ org.scala-tools
+ maven-scala-plugin
+ 2.15.2
+
+ ${scala.version}
+
+ -target:jvm-1.8
+
+
+ -Xss4096K
+
+
+
+
+ scala-compile
+
+ compile
+
+
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+ scala-test-compile
+
+ testCompile
+
+
+
+
+
+ org.scalatest
+ scalatest-maven-plugin
+ 2.0.0
+
+
+ test
+
+ test
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 2.1
+
+
+ package
+
+ shade
+
+
+ false
+ true
+
+
+
+ *:*
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+ **/log4j.properties
+
+
+
+
+
+ reference.conf
+
+
+
+
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+ 4.8.0
+
+
+
+ compile
+ testCompile
+
+
+
+
+
+ -Xms64m
+ -Xmx1024m
+
+
+ -Ywarn-unused
+
+
+
+ org.scalameta
+ semanticdb-scalac_2.12.10
+ 4.3.24
+
+
+
+
+
+ io.github.evis
+ scalafix-maven-plugin_2.13
+ 0.1.8_0.11.0
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+
+
+ attach-sources
+
+ jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+
+
+ attach-javadocs
+
+ jar
+
+
+
+
+
+ maven-site-plugin
+ 3.7.1
+
+
+
+
diff --git a/spark/src/main/java/com/alibaba/graphar/GeneralParams.java b/spark/graphar/src/main/java/com/alibaba/graphar/GeneralParams.java
similarity index 100%
rename from spark/src/main/java/com/alibaba/graphar/GeneralParams.java
rename to spark/graphar/src/main/java/com/alibaba/graphar/GeneralParams.java
diff --git a/spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/EdgeInfo.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/EdgeInfo.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/EdgeInfo.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/GraphInfo.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/GraphInfo.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/GraphInfo.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/VertexInfo.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/VertexInfo.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/VertexInfo.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Nebula.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/GraphAr2Neo4j.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/Nebula2GraphAr.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphReader.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphTransformer.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/graph/GraphWriter.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/importer/Neo4j.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/reader/EdgeReader.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/reader/VertexReader.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/DataFrameConcat.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/util/FileSystem.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/FileSystem.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/util/FileSystem.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/FileSystem.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/IndexGenerator.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/util/Patitioner.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/Patitioner.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/util/Patitioner.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/Patitioner.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/util/Utils.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/util/Utils.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/util/Utils.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/util/Utils.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/writer/EdgeWriter.scala
diff --git a/spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala b/spark/graphar/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala
similarity index 100%
rename from spark/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala
rename to spark/graphar/src/main/scala/com/alibaba/graphar/writer/VertexWriter.scala
diff --git a/spark/graphar/src/test/resources/gar-test b/spark/graphar/src/test/resources/gar-test
new file mode 120000
index 000000000..4ce4f440f
--- /dev/null
+++ b/spark/graphar/src/test/resources/gar-test
@@ -0,0 +1 @@
+../../../../../testing/
\ No newline at end of file
diff --git a/spark/src/test/scala/com/alibaba/graphar/ComputeExample.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/ComputeExample.scala
similarity index 100%
rename from spark/src/test/scala/com/alibaba/graphar/ComputeExample.scala
rename to spark/graphar/src/test/scala/com/alibaba/graphar/ComputeExample.scala
diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala
similarity index 100%
rename from spark/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala
rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphInfo.scala
diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphReader.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphReader.scala
similarity index 100%
rename from spark/src/test/scala/com/alibaba/graphar/TestGraphReader.scala
rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphReader.scala
diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala
similarity index 100%
rename from spark/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala
rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphTransformer.scala
diff --git a/spark/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala
similarity index 100%
rename from spark/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala
rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestGraphWriter.scala
diff --git a/spark/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala
similarity index 100%
rename from spark/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala
rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestIndexGenerator.scala
diff --git a/spark/src/test/scala/com/alibaba/graphar/TestReader.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestReader.scala
similarity index 100%
rename from spark/src/test/scala/com/alibaba/graphar/TestReader.scala
rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestReader.scala
diff --git a/spark/src/test/scala/com/alibaba/graphar/TestWriter.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TestWriter.scala
similarity index 100%
rename from spark/src/test/scala/com/alibaba/graphar/TestWriter.scala
rename to spark/graphar/src/test/scala/com/alibaba/graphar/TestWriter.scala
diff --git a/spark/src/test/scala/com/alibaba/graphar/TransformExample.scala b/spark/graphar/src/test/scala/com/alibaba/graphar/TransformExample.scala
similarity index 100%
rename from spark/src/test/scala/com/alibaba/graphar/TransformExample.scala
rename to spark/graphar/src/test/scala/com/alibaba/graphar/TransformExample.scala
diff --git a/spark/import/neo4j.sh b/spark/import/neo4j.sh
index 7389567ec..d7ddaadc4 100755
--- a/spark/import/neo4j.sh
+++ b/spark/import/neo4j.sh
@@ -17,8 +17,8 @@
set -eu
cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar"
+jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar"
conf_path="$(readlink -f $1)"
spark-submit --class com.alibaba.graphar.importer.Neo4j ${jar_file} \
- ${conf_path}
\ No newline at end of file
+ ${conf_path}
diff --git a/spark/pom.xml b/spark/pom.xml
index 3cb7c5e1b..4fc8235ab 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -21,115 +21,58 @@
com.alibaba
graphar
- 0.1.0-SNAPSHOT
+ ${graphar.version}
+ pom
-
- graphar
- UTF-8
- UTF-8
- 2.12.10
- 2.12
- 512m
- 1024m
- 3.2.2
- 1.8
- 1.8
- 3.3.8-public
-
-
-
- org.apache.spark
- spark-core_${scala.binary.version}
- ${spark.version}
- provided
-
-
- org.apache.spark
- spark-streaming_${scala.binary.version}
- ${spark.version}
- provided
-
-
- org.apache.spark
- spark-mllib_${scala.binary.version}
- ${spark.version}
- provided
-
-
- org.apache.spark
- spark-sql_${scala.binary.version}
- ${spark.version}
- provided
-
-
- org.apache.spark
- spark-hive_${scala.binary.version}
- ${spark.version}
- provided
-
-
- org.scalatest
- scalatest_${scala.binary.version}
- 3.1.1
- provided
-
-
- org.scala-lang
- scala-library
- ${scala.version}
- provided
-
-
- org.yaml
- snakeyaml
- 2.0
-
-
- com.aliyun.odps
- hadoop-fs-oss
- ${cupid.sdk.version}
-
-
- org.apache.hadoop
- hadoop-common
-
-
-
-
- com.aliyun.odps
- odps-spark-datasource_2.11
- ${cupid.sdk.version}
-
-
- net.jpountz.lz4
- lz4
-
-
-
-
- com.aliyun.odps
- cupid-sdk
- ${cupid.sdk.version}
- provided
-
-
- org.neo4j
- neo4j-connector-apache-spark_2.12
- 5.0.0_for_spark_3
-
-
- com.vesoft
- nebula-spark-connector_3.0
- 3.6.0
-
-
- org.scala-lang.modules
- scala-collection-compat_2.12
- 2.1.1
-
-
+
+
+ datasources-32
+
+ graphar
+ UTF-8
+ UTF-8
+ 2.12.10
+ 2.12
+ 512m
+ 1024m
+ 3.2.2
+ 1.8
+ 1.8
+ 3.3.8-public
+ 0.1.0-SNAPSHOT
+
+
+ graphar
+ datasources-32
+
+
+ true
+
+
+
+
+ com.diffplug.spotless
+ spotless-maven-plugin
+ 2.20.0
+
+
+
+
+
+
+ 1.13.0
+
+
+
+
+
+ ${project.basedir}/.scalafmt.conf
+
+
+
+
org.scala-tools
maven-scala-plugin
@@ -165,144 +108,6 @@
-
- org.scalatest
- scalatest-maven-plugin
- 2.0.0
-
-
- test
-
- test
-
-
-
-
-
- org.apache.maven.plugins
- maven-shade-plugin
- 2.1
-
-
- package
-
- shade
-
-
- false
- true
-
-
-
- *:*
-
-
-
-
- *:*
-
- META-INF/*.SF
- META-INF/*.DSA
- META-INF/*.RSA
- **/log4j.properties
-
-
-
-
-
- reference.conf
-
-
-
-
-
-
-
- net.alchim31.maven
- scala-maven-plugin
- 4.8.0
-
-
-
- compile
- testCompile
-
-
-
-
-
- -Xms64m
- -Xmx1024m
-
-
- -Ywarn-unused
-
-
-
- org.scalameta
- semanticdb-scalac_2.12.10
- 4.3.24
-
-
-
-
-
- com.diffplug.spotless
- spotless-maven-plugin
- 2.20.0
-
-
-
-
-
-
- 1.13.0
-
-
-
-
-
- ${project.basedir}/.scalafmt.conf
-
-
-
-
-
- io.github.evis
- scalafix-maven-plugin_2.13
- 0.1.8_0.11.0
-
-
- org.apache.maven.plugins
- maven-source-plugin
-
-
- attach-sources
-
- jar
-
-
-
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
-
-
- attach-javadocs
-
- jar
-
-
-
-
-
- maven-site-plugin
- 3.7.1
-
- jar
diff --git a/spark/scripts/run-graphar2nebula.sh b/spark/scripts/run-graphar2nebula.sh
index 5094885f7..7a47df44d 100755
--- a/spark/scripts/run-graphar2nebula.sh
+++ b/spark/scripts/run-graphar2nebula.sh
@@ -16,7 +16,7 @@
set -eu
cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar"
+jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar"
graph_info_path="${GRAPH_INFO_PATH:-/tmp/graphar/nebula2graphar/basketballplayergraph.graph.yml}"
spark-submit --class com.alibaba.graphar.example.GraphAr2Nebula ${jar_file} \
diff --git a/spark/scripts/run-graphar2neo4j.sh b/spark/scripts/run-graphar2neo4j.sh
index 25150999a..d350387a2 100755
--- a/spark/scripts/run-graphar2neo4j.sh
+++ b/spark/scripts/run-graphar2neo4j.sh
@@ -17,7 +17,7 @@
set -eu
cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar"
+jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar"
graph_info_path="${GRAPH_INFO_PATH:-/tmp/graphar/neo4j2graphar/MovieGraph.graph.yml}"
spark-submit --class com.alibaba.graphar.example.GraphAr2Neo4j ${jar_file} \
diff --git a/spark/scripts/run-nebula2graphar.sh b/spark/scripts/run-nebula2graphar.sh
index 3204b1177..898859402 100755
--- a/spark/scripts/run-nebula2graphar.sh
+++ b/spark/scripts/run-nebula2graphar.sh
@@ -16,7 +16,7 @@
set -eu
cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar"
+jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar"
vertex_chunk_size=100
edge_chunk_size=1024
diff --git a/spark/scripts/run-neo4j2graphar.sh b/spark/scripts/run-neo4j2graphar.sh
index 16358838f..e18e7c9af 100755
--- a/spark/scripts/run-neo4j2graphar.sh
+++ b/spark/scripts/run-neo4j2graphar.sh
@@ -17,7 +17,7 @@
set -eu
cur_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-jar_file="${cur_dir}/../target/graphar-0.1.0-SNAPSHOT-shaded.jar"
+jar_file="${cur_dir}/../graphar/target/graphar-commons-0.1.0-SNAPSHOT-shaded.jar"
vertex_chunk_size=100
edge_chunk_size=1024
diff --git a/spark/src/test/resources/gar-test b/spark/src/test/resources/gar-test
deleted file mode 120000
index 1166084dd..000000000
--- a/spark/src/test/resources/gar-test
+++ /dev/null
@@ -1 +0,0 @@
-../../../../testing
\ No newline at end of file