From 12a0ca98adf6bac0a1cdf19ab29dd4015cc7bed9 Mon Sep 17 00:00:00 2001 From: acezen Date: Thu, 1 Feb 2024 11:31:19 +0800 Subject: [PATCH 1/2] Fix neo4j generate DataFrame got wrong data type bug Signed-off-by: acezen --- .../com/alibaba/graphar/example/Neo4j2GraphAr.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala b/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala index 49dcf3ce6..ed9c024ae 100644 --- a/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala +++ b/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala @@ -72,9 +72,12 @@ object Neo4j2GraphAr { spark: SparkSession ): Unit = { // read vertices with label "Person" from Neo4j as a DataFrame + // Note: set "schema.flatten.limit" to 1 to avoid select null to make the property type as string, + // If APOC is installed, you can use apoc to get the property type val person_df = spark.read .format("org.neo4j.spark.DataSource") .option("query", "MATCH (n:Person) RETURN n.name AS name, n.born as born") + .option("schema.flatten.limit", 1) .load() // put into writer, vertex label is "Person" writer.PutVertexData("Person", person_df) @@ -86,6 +89,7 @@ object Neo4j2GraphAr { "query", "MATCH (n:Movie) RETURN n.title AS title, n.tagline as tagline" ) + .option("schema.flatten.limit", 1) .load() // put into writer, vertex label is "Movie" writer.PutVertexData("Movie", movie_df) @@ -97,6 +101,7 @@ object Neo4j2GraphAr { "query", "MATCH (a:Person)-[r:PRODUCED]->(b:Movie) return a.name as src, b.title as dst" ) + .option("schema.flatten.limit", 1) .load() // put into writer, source vertex label is "Person", edge label is "PRODUCED" // target vertex label is "Movie" @@ -109,6 +114,7 @@ object Neo4j2GraphAr { "query", "MATCH (a:Person)-[r:ACTED_IN]->(b:Movie) return a.name as src, b.title as dst" ) + .option("schema.flatten.limit", 1) .load() // put into writer, source vertex label is "Person", edge label is "ACTED_IN" // target vertex label is "Movie" @@ -121,6 +127,7 @@ object Neo4j2GraphAr { "query", "MATCH (a:Person)-[r:DIRECTED]->(b:Movie) return a.name as src, b.title as dst" ) + .option("schema.flatten.limit", 1) .load() // put into writer, source vertex label is "Person", edge label is "DIRECTED" // target vertex label is "Movie" @@ -133,6 +140,7 @@ object Neo4j2GraphAr { "query", "MATCH (a:Person)-[r:FOLLOWS]->(b:Person) return a.name as src, b.name as dst" ) + .option("schema.flatten.limit", 1) .load() // put into writer, source vertex label is "Person", edge label is "FOLLOWS" // target vertex label is "Person" @@ -145,6 +153,7 @@ object Neo4j2GraphAr { "query", "MATCH (a:Person)-[r:REVIEWED]->(b:Movie) return a.name as src, b.title as dst, r.rating as rating, r.summary as summary" ) + .option("schema.flatten.limit", 1) .load() // put into writer, source vertex label is "Person", edge label is "REVIEWED" // target vertex label is "Movie" @@ -157,6 +166,7 @@ object Neo4j2GraphAr { "query", "MATCH (a:Person)-[r:WROTE]->(b:Movie) return a.name as src, b.title as dst" ) + .option("schema.flatten.limit", 1) .load() // put into writer, source vertex label is "Person", edge label is "WROTE" // target vertex label is "Movie" From e4e56657007ff10b7f8e01cd8e71ac34682f6488 Mon Sep 17 00:00:00 2001 From: acezen Date: Thu, 1 Feb 2024 17:08:53 +0800 Subject: [PATCH 2/2] Update comment --- .../scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala b/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala index ed9c024ae..a7196d730 100644 --- a/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala +++ b/spark/src/main/scala/com/alibaba/graphar/example/Neo4j2GraphAr.scala @@ -72,8 +72,8 @@ object Neo4j2GraphAr { spark: SparkSession ): Unit = { // read vertices with label "Person" from Neo4j as a DataFrame - // Note: set "schema.flatten.limit" to 1 to avoid select null to make the property type as string, - // If APOC is installed, you can use apoc to get the property type + // Note: set "schema.flatten.limit" to 1 to not sample null record infer type as string as far as possible, + // if you want a perfect type inference, consider to user APOC. val person_df = spark.read .format("org.neo4j.spark.DataSource") .option("query", "MATCH (n:Person) RETURN n.name AS name, n.born as born")